Lanuage Processors LAB Solution Manual: VI Sem, BE (CS&E) 2011

Lanuage Processors LAB
SOLUTION MANUAL
VI Sem, BE (CS&E)
2011
DEPT OF COMPTER SCIENCE & ENGG.
M. I. T., MANIPAL
Contents
1. Preliminary Scanning Applications 03

2. Formal grammar for C Language subset 09
3. Design of Lexical Analyzer 10
4. Design of PredictiveParser 12
5. Design of Code Generator 21
6. Using Lex 29
7. Using Yacc 33
8. Sample Lab Questions 40
9. References 41
Language Processors
Page 2
WEEK 1 & 2: Preliminary Scanning applications
WEEK 1:
Write a program which will take as input a C program consisting of single and
multi-line comments and multiple blank spaces and produces as output the C
program without comments and single blank space.
// This is a single line comment
/* This is a
Multiline Comment */
#include <stdio.h>
#include <fcntl.h>
#include<conio.h>
int main(void)
{
clrscr();
FILE *in, *out,*out2;
in=fopen("try2.txt","w");
char c;
out2=fopen("final.txt","w");
out=fopen("temp.c","r");
char temp='a';
int flag=0;
if(!out)
printf("\nfile cannot be opened");
else
{
flag=1;
temp=c;
while((c=getc(out))!=EOF)
{
printf("%c",c);
if(temp=='/' && c=='*')
do
{
temp=c;
c=getc(out);
printf("%c",c);
if(temp=='*' && c=='/')
{
c=getc(out);
break;
}
}while(c!=EOF);
if(temp=='/' && c=='/')

do
{
temp=c;
c=getc(out);
printf("%c",c);
if(c=='\n')
{
c=getc(out);
break;
}
}while(c!=EOF);
if(c==' ' && temp==' ')

continue;
temp=c;
if(c!='/')
putc(c,out2);
}
}
fclose(out2);
fopen("final.txt","r");
while((c=getc(out2))!=EOF)
printf("%c",c);
fclose(out2);
fclose(out);
getch();
}
WEEK 2:
Write a program, which will read a program written in C, recognize all of the
keywords in that program and print them in upper case letters.
#include <stdio.h>
#include <fcntl.h>
#include<conio.h>
#include<string.h>
#include<stdio.h>
#include<iostream.h>
#include<ctype.h>
char line[80];
char key[][6]={"VOID","MAIN","INT","CHAR","IF","ELSE","WHILE","FOR"};
void findkey()
{
int i,j,k,l=strlen(line),p,pos;
char temp[10]; for(i=0;i<l;i+
+)
{
p=0,pos=i; while(!
isalpha(line[i]))
{ i+
+;
pos++;
}
while(isalpha(line[i]))
temp[p++]=line[i++];
temp[p]='\0';
cout<<"\t*"<<temp<<"*";
for(j=0;j<8;j++)
if(strcmpi(temp,key[j])==0)
{
//cout<<"\nkeyword found is "<<key[j];
int t=0;
for(k=pos;k<pos+strlen(key[j]);k++)
line[k]=key[j][t++];
break;
}
}
//cout<<"\n"<<line;
}
int main(void)
{
clrscr();
FILE *in, *out,*out2;
in=fopen("try2.txt","w");
char c;
out2=fopen("final.txt","w");
out=fopen("temp.c","r");
char temp='a';
int flag=0;
if(!out)
printf("\nfile cannot be opened");
else
{
flag=1;
//temp=c;
while(1)
{
int p=0; while((c=getc(out))!
='\n')
{
line[p++]=c;
if(c==EOF)
break;
}
line[p++]='\n';
line[p++]='\0';
findkey();
for(int i=0;i<strlen(line);i++)
putc(line[i],out2);
putc('\n',out2);
if(c==EOF)
break;
}
}
fclose(out2);
cout<<"\n\n\n\n the modified prog is \n";
fopen("final.txt","r");
while((c=getc(out2))!=EOF)
printf("%c",c);
fclose(out2);
fclose(out);
getch();
}
WEEK 3 – 11: Design of Mini Compiler for C Language for the given subset
The formal grammar (sample):
Program - main () { declarations statement-list }
declarations data-type identifier-list; declarations 
data-type  intchar
identifier-list  idid, identifier-list id[number] , identifier-list | id[number]
statement_list  statement ; statement_list 
statement  assign-stat  decision_stat  looping-
stat assign_stat  id = expn
expn simple-expn eprime
eprimerelop simple-expn|
simple-exp term seprime
seprimeaddop term seprime |
term  factor tprime
tprime  mulop factor tprime |
 factor  idnum
decision-stat  if ( expn ) stat dprime
dprime  else stat | 
looping-stat  while (expn) statfor (assign_stat ; expn ; assign_stat ) stat
relop  = =!=<=>=><
addop  +-
mulop  * /  %
WEEK 3 & 4: Design of Lexical analyzer
To construct an adhoc Lexical Analyzer.
 Identifying different classes of tokens like: keywords, identifiers and special

symbols.
 Selecting a suitable data structure for symbol table (the alternates are
linked list, hashing, array of structures, binary search tree)
 Having selected a data structure, identifying the appropriate fields.
Solution:
#define KEY_CTR 12
FILE *fin, *fout;
char line[80],*ptr,filename[20],token[30];
enum token_class {KEY=20,IDENTIFIER,DIGIT,COMMA,SEMICOLON,COLON,DOT,

EQ,GT,LT,EQUEQU,GEQ,LEQ,NEQ,LB,RB,LSB,RSB,LCB,RCB,
INCR,DECR,SQUOTE,DQUOTE,ADD,SUB,MUL,DIV,MOD,HASH,
DEFAULT,UNDEF};
char keywords[KEY_CTR][10] =
{"void","int","char","float","if","else","while","include","for","break
","continue","main"};
int scanner()
{ int i=0;
while(*ptr==' ' || *ptr=='\t') {ptr++;}
token[0]=*ptr; token[1]='\0';
switch(*ptr)
{
case '#' :ptr++; return HASH; //Single Character Tokens
case '(' :ptr++; return LB;
case ')' :ptr++; return RB;
case '{' :ptr++; return LCB;
case '}' :ptr++; return RCB;
case '[' :ptr++; return LSB;
case ']' :ptr++; return RSB;
case ';' :ptr++; return SEMICOLON;
case '.' :ptr++; return DOT;
case '*' :ptr++; return MUL;
case '/' :ptr++; return DIV;
…
…
case '>' : ptr++;

if(*ptr=='=')
{token[1]='='; token[2]='\0';ptr++; return GEQ;}
else { return GT;}
case '<' : ptr++;
if(*ptr=='=')
{token[1]='='; token[2]='\0';ptr++; return LEQ;}
Else {return LT;}
default : //multi character tokens
if(isalpha(*ptr))
{ i=0;
while(isalnum(*ptr))
{ token[i++]=*ptr; ptr++;}
token[i]='\0';
for(int j=0;j<KEY_CTR;j++)
{ if(strcmp(keywords[j],token)==0)
{ KEYtype=j;
return KEY;
}
}
return IDENTIFIER;
}
else if(isdigit(*ptr))
{ i=0;
while(isdigit(*ptr))
{ token[i++]=*ptr; ptr++;}
token[i]='\0';
return DIGIT;
}
else
{ ptr++; return UNDEF;}
}//end of case
The different data Structures that can be used are:

a) Linked list.
b) Array of Structures
The appropriate fields are Name, Type, Value, etc.

WEEK 5, 6 AND 7: Design of a Predictive Parser
To code and test parser:
 Students should write a formal grammar for the given C subset(Refer

Sample Grammar given above)
 Remove left recursion from each of the productions so that the

underlying grammar can be parsed with a predictive parser.
 The parser obtains a string of tokens from the lexical analyzer and
verifies that the string can be generated by the grammar for the C
language.
 The parser should report syntax errors if any (for eg.: Misspelling an
identifier or keyword, Undeclared or Multiply declared identifier,
Arithmetic or Relational Expressions with unbalanced parentheses
and Expression syntax error etc.) with appropriate line-no.
For a given grammar:

1. Eliminate the Left Recursion
2. Left Factor the grammar
3. Write down function for each Nonterminal the
grammar Eg: Design a parser for the following grammar:
E -> E+T|T
T -> T*F |
F
F -> (E)|id
After doing the modifications

The grammar becomes
E -> TE’
E’ -> +TE’|e
T -> FT’
T’->*FT’ |e
F -> (E)|id
Write the Parser as follows:
Void E()
{ T(
);
EPRIME();
}
Void EPRIME( )
{
If(input-symbol ==”+”)
{
ADVANCE( );
T( );
EPRIME( );
}
}
Void T( )
{
F( );
TPRIME( );
}
Void TPRIME( )
{
if (input-symbol ==‘*’)
{
F( );
TPRIME( );
}
}
void F( )
{
if (input-symbol ==‘(‘)
{
ADVANCE( );
E( );
if (input-symbol==‘)’)
ADVANCE( );
Else if (input-symbol ==‘id’)
ADVANCE( );
else ERROR( );
}
Parser Code for given C language grammar:
void declarations()
{
s=lex();
if(s==KEY && (KEYtype==INT||KEYtype==CHAR))
{ cprintf(" KEYWORD detected\r\n");
idList(KEYtype);
cprintf("End of a Declaration by SEMICOLON\r\n");
declarations();
}
else
{ ep_flag=YES;}//epsilon transition so no error also
}
int Missing_Error=NO;
void idList(int datatype)
{
s=lex();
if (s!=IDENTIFIER)
{// Missing_Error=YES;
if(s==COMMA||s==SEMICOLON)
{ep_flag=YES;
strcpy(errtype,"Identifier Missing");put_error(lineno);}
else
{
sprintf(errtype,"Illegal Character '%s'", token);
put_error(lineno);}
}
//don't use else here cause scan the remaining
strcpy(varname,token);
idList_X(datatype);
void idList_X(int datatype)

{
s=lex();
if (s==COMMA && Missing_Error==NO)
{ ADD_ST(varname,datatype,size);
idList(datatype);
}
else if(s==LSB)
{
s=lex(); //get number
if (s!=DIGIT)
{ep_flag=YES;strcpy(errtype,"Missing Array size after '['");
put_error(lineno);}
else
{size=atoi(token);}
s=lex(); //get ']'

if(s !=RSB)
{ep_flag=YES;
strcpy(errtype,"Missing ']'");
put_error(lineno);
}
idList_X(datatype);
}
else if (s==SEMICOLON && Missing_Error==NO)
{
ADD_ST(varname,datatype,size);
}
else
{ep_flag=YES;
strcpy(errtype,"Missing ';'");
put_error(lineno);
}
Missing_Error=NO;
}
void statement()
{
s=lex();
if (s==LCB){
cprintf("***COMPOUND_STATEMENT\r\n");
comp_stat();
}
else { ep_flag=YES;
cprintf("*****SIMPLE_STATEMENT\r\n");
simple_stat();
}
}
void comp_stat()
{
statement();
s=lex(); //get '}'
if(s!=RCB){
ep_flag=YES;
strcpy(errtype,"Missing '}'");
put_error(lineno);
}
// statement();
}
void simple_stat()
{
s=lex();
if(s==IDENTIFIER)
{ assign_stat();
statement();
}
else if (s==KEY)
{
if(KEYtype== IF)
{ decesion_stat();
statement();
}
else if(KEYtype==WHILE || KEYtype==FOR)
{ looping_stat();
statement();
}
else if (KEYtype==CONTINUE ||KEYtype==BREAK)
{ jump_stat();
statement();
}
}
else{ep_flag=YES;} //because lex has been already called.
}
void assign_stat()
{ cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token);
char gotID[30];
strcpy(gotID,token);//get the identifier name
int IDtype=INT;///check from symbol table
if (IDtype==INT)
{
s=lex();
if (s != EQ)
{
ep_flag=YES;
strcpy(errtype,"Missing '='");
put_error(lineno);
}
expn();
s=lex();
if (s != SEMICOLON)
{
ep_flag=YES;
put_error(lineno);
}
}
else if (IDtype==CHAR)
{
//not in the question for handeling them
}
int relop()
{
s=lex();
if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)

{ …..
}
else
{ep_flag=YES;return NO;}
}
void expn()
{
single_arg_flag=YES;
simple_expn();
strcpy(fa1,fa);
if(relop()==YES) //check if there are real op;
{
simple_expn();
}
else
{ //when we have a single argument ie a=10; a=c; etc...
}
}
void simple_expn()
{
term();
simple_expn_2();
}
void simple_expn_2()
{
s=lex();
if (s==ADD || s==SUB) //operator
{ single_arg_flag=NO;
term();
simple_expn_2();
}
else
{ep_flag=YES;}
}
void term()
{
factor();
term_not();
}
void term_not()
{
s=lex();
if (s==MUL ||s==DIV|| s==MOD)
factor();
term_not();
}
else
{ep_flag=YES;}
int find_in_ST(char stext[])

{
for(int j=0;j<ST_CTR;j++) {
if (strcmp(stext,ST[j].id)==0){return YES;}
}
return NO;//not found
}
void factor()
{
s=lex();
if (s==IDENTIFIER || s==DIGIT)
{
if(s==IDENTIFIER && find_in_ST(token)==NO)
{
sprintf(errtype,"Undefined Symbol '%s'",token);
put_error(lineno);}
strcpy(fa,token);
}
else
{ strcpy(fa,"");
ep_flag=YES;
sprintf(errtype,"Missing ID or Number");
put_error(lineno);
}
}
void parser()
{
fgets(line,80,fin); ptr=line; lineno++;
s=lex();
if (s==KEY && KEYtype==MAIN)
{ cprintf("main() function is defined\r\n");
s=lex();//(
if(s!=LB)
{
ep_flag=YES;
strcpy(errtype,"Missing '('");put_error(lineno);
}
s=lex();//)
if(s!=RB) {
ep_flag=YES;
strcpy(errtype,"Missing ')'");
put_error(lineno);
}
s=lex();//{
if(s!=LCB){
ep_flag=YES;
strcpy(errtype,"Missing main '{'");
put_error(lineno);
}
cprintf("BEGINNING of DECLARATION REGION\r\n");
declarations();
statement();
s=lex();//}
if(s!=RCB){ …..}
}
}
int main()
{
if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)

{
fprintf(stderr, "Cannot open input file.\n");
getch();
return 1;
}
parser();
display_ST();
display_ET();
getch();
clrscr();
fclose(fin);
getch();
return 0;
}
WEEK 8, 9 and 10: Design of Code generator
 The target code to be generated is 8086 assembly language program.
 Registers have to be selected for each of the variables used by the program.
 Code generator will take each line of the source program and generate
its equivalent assembly code.
Solution: Here we, modify the parser and associated functions such that it also generates code
for the language constructs in addition to parsing. The Modified parser code is as follows:
void idList_X(int datatype)

{
s=lex();
if (s==COMMA && Missing_Error==NO)
{ ADD_ST(varname,datatype,size);
if (datatype==INT)
{
fprintf(fout," %-10s DW 0\n",varname);
}
else
{
fprintf(fout," %-10s DB 0\n",varname);
}
idList(datatype);
}
else if(s==LSB)
{
s=lex(); //get number
if (s!=DIGIT)
{
ep_flag=YES;
strcpy(errtype,"Missing Array size after '['");
put_error(lineno);
}
else {size=atoi(token);}
s=lex(); //get ']'
if(s !=RSB)
{
ep_flag=YES;
strcpy(errtype,"Missing ']'");
put_error(lineno);
}
idList_X(datatype);
}
else if (s==SEMICOLON && Missing_Error==NO)
{
if (datatype==INT)
{fprintf(fout," %-10s DW 0\n",varname);}
else
{fprintf(fout," %-10s DB 0\n",varname);}
ADD_ST(varname,datatype,size);
}
else
{
ep_flag=YES;
put_error(lineno);
}
Missing_Error=NO;
}
void statement()
{
s=lex();
if (s==LCB)
{
cprintf("***COMPOUND_STATEMENT\r\n");
comp_stat();
}
else
{
ep_flag=YES;
cprintf("*****SIMPLE_STATEMENT\r\n");
simple_stat();
}
}
void comp_stat()
{
statement();
s=lex(); //get '}'
if(s!=RCB)
{
ep_flag=YES;
strcpy(errtype,"Missing '}'");
put_error(lineno);
}
}
void simple_stat()
{
s=lex();
if(s==IDENTIFIER)
{ assign_stat();
statement();
}
else if (s==KEY)
{
if(KEYtype== IF)
{
decesion_stat();
statement();
}
else if(KEYtype==WHILE || KEYtype==FOR)
{ looping_stat();
statement();
}
else if (KEYtype==CONTINUE ||KEYtype==BREAK)
{ jump_stat();
statement();
}
}
else{ep_flag=YES;} //because lex has been already called.
}
void assign_stat()
{
cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token);
char gotID[30];
strcpy(gotID,token);//get the identifier name
int IDtype=INT;///check from symbol table
if (IDtype==INT)
{
s=lex();
if (s != EQ)
{
ep_flag=YES;
strcpy(errtype,"Missing '='");
put_error(lineno);
}
expn();
fprintf(fout," MOV %s,AX\n",gotID);
s=lex();
if (s != SEMICOLON)
{
ep_flag=YES;
put_error(lineno);
}
}
else if (IDtype==CHAR)
{
//not in the question for handeling them
}
int relop()
{
s=lex();
if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)

{ …..
}
else
{
ep_flag=YES;
return NO;}
}
void expn()
{
single_arg_flag=YES;
simple_expn();
strcpy(fa1,fa);
if(relop()==YES) //check if there are real op;
{
simple_expn();
fprintf(fout," MOV AX,%s\n",fa1);
fprintf(fout," MOV BX,%s\n",fa);
}
else
{ //when we have a single argument ie a=10; a=c; etc...
if(single_arg_flag==YES)
fprintf(fout," MOV AX,%s\n",fa);
}
}
void simple_expn()
{
term();
simple_expn_2();
}
void simple_expn_2()
{
s=lex();
if (s==ADD || s==SUB) //operator
sprintf(tempcode," MOV AX,%s\n",fa);
if (s==ADD)
sprintf(tempcode2," ADD AX,BX");
else
sprintf(tempcode2," SUB AX,BX");
term();
fprintf(fout,"%s",tempcode); //mov AX,a
fprintf(fout," MOV BX,%s\n",fa);//MOV BX,b
fprintf(fout,"%s\n",tempcode2);//ADD c,result
simple_expn_2();
}
else
{ep_flag=YES;}
}
void term()
{
factor();
term_not();
}
void term_not()
{
s=lex();
if (s==MUL ||s==DIV|| s==MOD)
sprintf(tempcode3," MOV AX,%s\n",fa);
if (s==MUL)
sprintf(tempcode4," MUL BX");
else
sprintf(tempcode4," DIV BX");
factor();
fprintf(fout,"%s",tempcode3);//mov AX,a
fprintf(fout," MOV BX,%s\n",fa);//mov BX,b
fprintf(fout,"%s\n",tempcode4);//mul AX,BX
fprintf(fout," MOV temp,AX\n");//temp or dx stores the
//result;
strcpy(fa,"temp");
term_not();
}
else
{ep_flag=YES;}
}
int find_in_ST(char stext[])
{
for(int j=0;j<ST_CTR;j++)
{
if (strcmp(stext,ST[j].id)==0){return YES;}
}
return NO;//not found
}
void factor()
{
s=lex();
if (s==IDENTIFIER || s==DIGIT)
{
if(s==IDENTIFIER && find_in_ST(token)==NO)
{
sprintf(errtype,"Undefined Symbol '%s'",token);
put_error(lineno);
}
strcpy(fa,token);
}
else
{ strcpy(fa,"");
ep_flag=YES;
sprintf(errtype,"Missing ID or Number");
put_error(lineno);
}
}
void parser()
{
fgets(line,80,fin); ptr=line; lineno++;
s=lex();
if (s==KEY && KEYtype==MAIN)
{
cprintf("main() function is defined\r\n");
s=lex();//(
if(s!=LB)
{
ep_flag=YES;
strcpy(errtype,"Missing '('");
put_error(lineno);
}
s=lex();//)
if(s!=RB)
{
ep_flag=YES;
strcpy(errtype,"Missing ')'");
put_error(lineno);
}
s=lex();//{
if(s!=LCB)
{
ep_flag=YES;
strcpy(errtype,"Missing main '{'");
put_error(lineno);
}
cprintf("BEGINNING of DECLARATION REGION\r\n");
declarations();
fprintf(fout,"DATA ENDS\n\nCODE SEGMENT\n");
fprintf(fout,"ASSUME CS:CODE DS:DATA\n”);
fprintf(fout,”START: MOV AX,DATA\n MOV DS,AX\n\n");
statement();
s=lex();//}
if(s!=RCB){ …..}
}
}
int main()
{
clrscr();
if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)

{
fprintf(stderr, "Cannot open input file.\n");
getch();
return 1;
}
if ((fout = fopen("c:\\code.asm", "wt"))== NULL)

{
fprintf(stderr, "Cannot open output file.\n");
getch();
return 1;
}
fprintf(fout,"DATA SEGMENT\n");
fprintf(fout,"temp DW 0\n");
parser();
fprintf(fout,"\n MOV AH,4CH\n INT 21H\n");

fprintf(fout,"CODE ENDS\nEND START\n");
display_ST(); display_ET();
getch();
clrscr();
fclose(fin);
fclose(fout);
getch();
return 0;
}
WEEK 13 AND 14: Usage of LEX and Yacc
WEEK 13: Usage of LEX tool
Steps to use LEX tool:
1. lex filename.l lex.yy.c

2. cc lex.yy.c –lfl  a.out
3. .\a.out  program gets executed
1. Design a lexical analyzer for recognizing following class of tokens:

Keywords (if, then, else), id, num and relational operators using
lex compiler.
%{
/* definition of manifest constants
LT, LE, EQ, NE, GT, GE,
IF, THEN, ELSE, ID, NUMBER, RELOP */
#include<stdio.h>
#define LT 1
#define LE 2
…
%}
/* regular definition */
delim [ \t\n]
ws {delim}+
letter [A-Za-z]
digit [0-9]
id {letter}({letter}|{digit})*
number {digit}+(\.{digit}+)?(E[+\-]?{digit}+)?
%%
{ws} {/* no action and no return */}

if {
printf(“lexeme=%s, tokentype = Reserveword “, yytext);
return (IF);
}
then {
return (THEN);
}
else {
return (ELSE);
}
{id} {
return(ID);
}
{number} {
printf(“lexeme=%s, tokentype = Number “, yytext);
return(NUMBER);
}
“<” {yylav = LT; return(RELOP);}
“<=” {yylav = LE; return(RELOP);}
“=” {yylav = EQ; return(RELOP);}
“<>” {yylav = NE; return(RELOP);}
“>” {yylav = GT; return(RELOP);}
“>=” {yylav = GE; return(RELOP);}
%%
main()
{
yylex();
}
2. Write a lex program to count the number of printf and scanf statements
in a valid c program and replace them with write and read statements
respectively.
%{
#include<stdio.h>
int sc=0;
int pc=0;
void disp(void);
%}
{ws} { }
scanf {fprintf(yyout, “read”);sc++;}
printf { fprintf(yyout, “write”);pc++;}
“.” {disp();}
%%
main()
{
yyin = fopen(“test.l”,”r”);
yyout=fopen(“test.l”,”u”);
yylex();
}
void disp()
{
printf(“read count : %d “,sc);
printf(“write count : %d”,pc);
}
3. Write a lex program to count the number of characters, words, blanks
and lines in a given string.
%{
#include<stdio.h>
int sc=0;
int cc=0;
int wc=0;
int lc=0;
void disp(void);
%}
space [ ]
letter [a-zA-Z0-9]
word {letter}*
%%
{word} {wc++; cc=cc+strlen(yytext);}

{space} {sc++;}
“\n” {lc++;}
“#” {disp();}
%%
main()
{
yylex();
}
void disp()
{
printf(“character count : %d “,cc);
printf(“word count : %d”,wc);
printf(“blanks count : %d “,sc);
printf(“line count : %d”,lc);
}
WEEK 14: Usage of Yacc tool
Steps to use yacc tool:
1. yacc filename.y  y.tab.c
2. cc y.tab.c -lfla.out
3. .\a.out  program executed
1. Write a yacc program to test the validity of a simple expression involving

the operators +,-,*,/.
%{
#include <ctype.h>
#include<stdio.h>
%}
%token id
%left ‘+’ ‘-‘
%left ‘*’ ‘/’
%%
line : expr ‘\n’ {printf(“\n Valid Expression”);}

;
expr : expr ‘+’ expr { }
| expr ‘-‘ expr { }
| expr ‘*’ expr { }
| expr ‘/’ expr { }
| ‘(‘ expr ‘)’ { }
| id { }
;
%%
yylex(){
char c;
c = getchar();
if (isdigit(c))
{
yylval = c –‘0’;
return id;
}
else
return c;
}
yyerror(char *s){
fprintf(stderr, “%s”,s);
}
main()
{
yyparse();
}
2. Write a yacc program to recognize nested if statements and display
the number of levels of nested if.
/* Lex */
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[ \t ]+ ;
if {return IF;}
"(" {return OPP;}
")" {return CLP;}
"{" {return OPB;}
"}" {return CLB;}
"<" | "<=" | ">" | ">=" | "==" | "!=" {return RELOP;}
"&&"|"||"|"&"|"|"|"!" { return LOGOP; }
[+ - / *] {return AROP;}
\n {return NEWL;}
[0-9]+ {return NUMBER;}
";" {return SEM;}
[a-z]+ {return ID;}
%%
/* yacc */
%{
unsigned levels=0;
%}
%token IF OPP CLP OPB CLB RELOP LOGOP AROP
%token NEWL ID SEM NUMBER
%%
ifcond: IF OPP COND CLP NEWL
OPB explist ifcond CLB {levels++;}

;
COND: exp LOGOP exp
| exp RELOP exp
explist: exp SEM
exp: NUMBER AROP NUMBER
| NUMBER
| ID
%%
main()
{
yyparse();
printf("no of levels %d \n",levels);
}
yyerror(s)
char *s;
{
printf("not a valid if stmt\n");
}
yywrap()
{
return(1);
}
3. Write a yacc program to parse the given string 2+3*5
/* Lex */
%{
#include “y.tab.h”
extern int yylval;
%}
d [0-9]
%%
{d}+ {yylval = atoi(yytext); return id;}

“\n” {return ‘\n’;}
. {return yytext[0];}
%%
/* yacc*/
%{
#include <<stdio.h>
%}
%token id
%%
line : expr ‘\n’ { printf(“valid statement”);}
;
expr : expr ‘+’ term {}
| term {}
;
term : term ‘*’ factor {}
| factor {}
;
factor: ‘(‘ expr ‘)’ {}
| id
;
%%
int yyerror(char *s)
{
fprintf(stderr, “%s”,s);
}
main(){ yyparse(); }
Steps to use Yacc tool:
1. lex filename.l lex.yy.c
2. yacc –d filename.y  y.tab.c
3. cc lex.yy.c y.tab.c -lfla.out
4. .\a.out  program executed

Sample Lab Exam questions
1) Write the Lexical Analyzer to identify various tokens for a given language
(say Pascal)
2) Generate the parser for the following

grammar start REPEAT stmt UNTIL
condition ; stmt id= term arithop term
condition  term relop term
term  id | num
arithop  + | -
relop  < | > | =
=
Sample input:- REPEAT count = count + increment UNTIL count = =10
3) Generate the 8086 code for the following
StartREPEAT statement UNTIL condition

Statementid:=expr;
Conditionid<num
Exprterm expr`
Expr`+term expr’ /ε
Term factor term`
Term’*factor term’ /ε
Factorid
Ex:- REPEAT
A:=B+C*D
UNTIL I <10
References:
1. Compiler principles, Techniques and tolls by Alfred V. Aho, Ravi Sethi, Jeffrey D.
Ullman
2. Principles of compiler Design by Alfred V. Aho and Jeffrey D. Ullman
3. Engineering a Compiler by Keith D. Cooper and Linda Torczon
4. Introduction to compiler construction by Thomas W. Parsons
5. Compiler Design in C by Holub A.I

Lanuage Processors LAB Solution Manual: VI Sem, BE (CS&E) 2011

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Lanuage Processors LAB Solution Manual: VI Sem, BE (CS&E) 2011

Uploaded by

Copyright:

Available Formats

Lanuage Processors LAB

DEPT OF COMPTER SCIENCE & ENGG.

1. Preliminary Scanning Applications 03

// This is a single line comment

if(temp=='/' && c=='/')

if(c==' ' && temp==' ')

The formal grammar (sample):

Program - main () { declarations statement-list }

declarations data-type identifier-list; declarations 

identifier-list  idid, identifier-list id[number] , identifier-list | id[number]

statement_list  statement ; statement_list 

statement  assign-stat  decision_stat  looping-

stat assign_stat  id = expn

expn simple-expn eprime

simple-exp term seprime

seprimeaddop term seprime |

term  factor tprime

tprime  mulop factor tprime |

decision-stat  if ( expn ) stat dprime

dprime  else stat | 

looping-stat  while (expn) statfor (assign_stat ; expn ; assign_stat ) stat

To construct an adhoc Lexical Analyzer.

 Identifying different classes of tokens like: keywords, identifiers and special

enum token_class {KEY=20,IDENTIFIER,DIGIT,COMMA,SEMICOLON,COLON,DOT,

while(*ptr==' ' || *ptr=='\t') {ptr++;}

case '>' : ptr++;

The different data Structures that can be used are:

The appropriate fields are Name, Type, Value, etc.

To code and test parser:

 Students should write a formal grammar for the given C subset(Refer

 Remove left recursion from each of the productions so that the

For a given grammar:

grammar Eg: Design a parser for the following grammar:

After doing the modifications

Write the Parser as follows:

void idList_X(int datatype)

s=lex(); //get ']'

if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)

int find_in_ST(char stext[])

if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)

 The target code to be generated is 8086 assembly language program.

void idList_X(int datatype)

if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)

if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)

if ((fout = fopen("c:\\code.asm", "wt"))== NULL)

fprintf(fout,"\n MOV AH,4CH\n INT 21H\n");

WEEK 13 AND 14: Usage of LEX and Yacc

WEEK 13: Usage of LEX tool

Steps to use LEX tool:

1. lex filename.l lex.yy.c

1. Design a lexical analyzer for recognizing following class of tokens:

{ws} {/* no action and no return */}

{word} {wc++; cc=cc+strlen(yytext);}

Steps to use yacc tool:

1. yacc filename.y  y.tab.c

3. .\a.out  program executed

1. Write a yacc program to test the validity of a simple expression involving

line : expr ‘\n’ {printf(“\n Valid Expression”);}

extern int yylval;

"(" {return OPP;}

")" {return CLP;}

"{" {return OPB;}

while(ptr==' ' || ptr=='\t') {ptr++;}