Tail recursive parser

In computer science, tail recursive parsers are a derivation from the more common recursive descent parsers. Tail recursive parsers are commonly used to parse left recursive grammars. They use a smaller amount of stack space than regular recursive descent parsers. They are also easy to write. Typical recursive descent parsers make parsing left recursive grammars impossible (because of an infinite loop problem). Tail recursive parsers use a node reparenting technique that makes this allowable.

Example

Given an EBNF Grammar such as the following:

E: T
T: T { '+' F } | F
F: F { '*' I } | I
I: <identifier>

A simple tail recursive parser can be written much like a recursive descent parser. The typical algorithm for parsing a grammar like this using an Abstract syntax tree is:

  1. Parse the next level of the grammar and get its output tree, designate it the first tree, F
  2. While there is terminating token, T, that can be put as the parent of this node:
    1. Allocate a new node, N
    2. Set N's current operator as the current input token
    3. Advance the input one token
    4. Set N's left subtree as F
    5. Parse another level down again and store this as the next tree, X
    6. Set N's right subtree as X
    7. Set F to N
  3. Return N

A C programming language implementation of this parser is shown here:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
 
enum node_type
{
  NODE_NUMBER,
  NODE_OPERATOR,
  NODE_IDENTIFIER,
  NODE_MUL,
  NODE_ADD
};
 
typedef struct _exptree exptree;
 struct _exptree {
	 int val;
     char* token;
     int line_number;
     exptree *left;
     exptree *right;
 };
 
struct _lexval {
  int ival;
  char *sval;
  int line;
  enum node_type typ;
} lexval;
 
int line_number = 0;
FILE *fs,*fd;
 
exptree *parse_e(void);
exptree *parse_t(void);
exptree *parse_f(void);
exptree *parse_i(void);
 
void next_token(){
	while (1){
		int t;
		char *buff;
 
		t = fgetc(fs);
		//printf("after  getchar = %d\n",t);
		if (t == ' ' || t == '\t' || t=='\n'){
		  //printf("white space\n"); /* ignore whitespace */
		}
		else if (t == '\n')
		{
		  line_number++;
		  //printf("line = %i\n",line_number);
		  return;
		}
		else if(t == EOF){
			return;
		} 
		else if(isdigit(t))
		{
		  //ungetc (t, stdin);
		  fseek(fs,ftell(fs)-1,SEEK_SET);
		  fscanf (fs,"%d", &lexval.ival);
		  //printf("in lexer ival = %d\n",lexval.ival);
		  lexval.sval = "";
		  lexval.line = line_number;
		  lexval.typ = NODE_NUMBER;
		  return;
		} else if (t == '*'){
			//printf("lexer mul\n");
			//ungetc (t, stdin);
			//scanf ("%s", buff);
			lexval.sval = "*";
 
			//printf("in lexer ival = %s\n",lexval.sval);
			lexval.line = line_number;
			lexval.typ = NODE_MUL;
			return;
		} else if (t == '+'){
			//printf("lexer add\n");
			//ungetc (t, stdin);
			//scanf ("%s0", buff);
			lexval.sval = "+";
			//printf("in lexer ival = %s\n",lexval.sval);
			lexval.line = line_number;
			lexval.typ = NODE_ADD;
			return;
		}
	}
}
 
exptree* alloc_tree(){
	return (exptree *) malloc(sizeof(exptree));
}
 
exptree *parse_e(void)
 {
	 //printf("in e ival = %d\n",lexval.ival);
     return parse_t();
 }
 
 exptree *parse_t(void)
 {
	//printf("in t\n");
	//printf("in t ival = %d\n",lexval.ival);
    exptree *first_f = parse_f();
 
     while(lexval.typ == NODE_ADD) {
		 //printf("in t loop\n");
         exptree *replace_tree = alloc_tree();
         replace_tree->token = lexval.sval;
         replace_tree->left = first_f;
         //printf("in t loop begin next_token\n");
         next_token();
         //printf("parse_t = %c\n",replace_tree->token);
         replace_tree->right = parse_f();
         replace_tree->val = replace_tree->left->val + replace_tree->right->val;
         printf("hasil t = %d\n",replace_tree->val);
         first_f = replace_tree;
     }
 
     return first_f;
 }
 
 exptree *parse_f(void)
 {
	 //printf("in f\n");
	 //printf("in f ival = %d\n",lexval.ival);
     exptree *first_i = parse_i();
	 //printf("before f loop\n");
     while(lexval.typ == NODE_MUL) {
		 //printf("in f loop\n");
         exptree *replace_tree = alloc_tree();
         replace_tree->token = lexval.sval;
         replace_tree->left = first_i;
         //printf("in f loop begin next_token\n");
         next_token();
         //printf("parse_f = %c\n",replace_tree->token);
         replace_tree->right = parse_i();
 
         replace_tree->val = replace_tree->left->val * replace_tree->right->val;
         printf("hasil f = %d\n",replace_tree->val);
         first_i = replace_tree;
 
     }
	 //printf("about to exit from f\n");
     return first_i;
 }
 
exptree *parse_i(void)
{
	 //printf("in i\n");
     exptree *i = alloc_tree();
     i->left = i->right = NULL;
     i->val = lexval.ival;
     //printf("in i ival = %d\n",lexval.ival);
     //printf("in i = %d\n",i->val);
     next_token();
     //printf("parse_i = %c\n",i->token);
     return i;
 }
 
int main( int argc, char** argv){
 
	if(argc != 2) return 1;
 
	fs = fopen(argv[1], "r");
	if (fs == NULL) {
		perror("Failed to open file ");
	}
 
	//char * = strcat(
	fd = fopen (strcat(argv[1],".c"),"w");
	if (fd == NULL) {
		perror("Failed to open file ");
	}
	char *template = "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <ctype.h>\n#include <sys/types.h>\nvoid main(){\n";
 
	fputs(template,fd);
 
 
	char *buffer = malloc(35);
	 next_token();
 
	exptree *temp = parse_e();
	printf("hasil akhir = %d\n",temp->val);
	snprintf(buffer, 30, "printf(\"hasil = %d\\n\");\n", temp->val);
	fputs(buffer,fd);
	fputs("}",fd);
 
	fclose(fs);
	fclose(fd);
 
	char *substr = strdup(argv[1]);
	char * pch;
	pch = strstr(substr,".lol");
	strncpy(pch,"\0",1);
 
	snprintf(buffer, 30, "gcc -o %s %s", substr,argv[1]);
 
	system(buffer);
 
	return 0;
 }

Further reading