Professional Documents
Culture Documents
Compiler Design Lab Manual
Compiler Design Lab Manual
Compiler Design Lab Manual
THEORY :
Lexical analysis involves scanning the program to be compiled and recognizing the
tokens that make up the source statements Scanners or lexical analyzers are usually
designed to recognize keywords , operators , and identifiers , as well as integers,
floating point numbers , character strings , and other similar items that are written as
part of the source program . The exact set of tokens to be recognized of course
depends upon the programming language being used to describe it.
A sequence of input characters that comprises a single token is called a lexeme. A
lexical analyzer can insulate a parser from the lexeme representation of tokens.
Following are the list of functions that lexical analyzers perform.
Lexical analyzer converts stream of input characters into a stream of tokensThe
different tokens that our lexical analyzer identifies are as follows:
KEYWORDS: int, char, float, double, if, for, while, else, switch, struct, printf, scanf,
case, break, return, typedef, void
IDENTIFIERS: main, fopen, getch etc
NUMBERS: positive and negative integers, positive and negative floating point
numbers.
OPERATORS: +, ++, -, , ||, *, ?, /, >, >=, <, <=, =, ==, &, &&.
BRACKETS: [ ], { }, ( ).
STRINGS : Set of characters enclosed within the quotes
COMMENT LINES: Ignores single line, multi line comments.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#define MAX 30
void main()
{
char str[MAX];
int state=0;
int i=0, j, startid=0, endid, startcon, endcon;
clrscr();
for(j=0; j<MAX; j++)
str[j]=NULL;
printf("*** Program on Lexical Analysis ***");
printf("Enter the string: ");
gets(str);
str[strlen(str)]=' ';
printf("Analysis:");
while(str[i]!=NULL)
{
while(str[i]==' ') //To eliminate spaces
i++;
switch(state)
{
case 0: if(str[i]=='i') state=1;
//if
else if(str[i]=='w') state=3; //while
else if(str[i]=='d') state=8; //do
else if(str[i]=='e') state=10; //else
else if(str[i]=='f') state=14; //for
else if(isalpha(str[i]) || str[i]=='_')
{
state=17;
startid=i;
} //identifiers
else if(str[i]=='<') state=19;
else if(str[i]=='>') state=21;
else if(str[i]=='=') state=23;
else if(isdigit(str[i]))
{
state=25; startcon=i;
}
else if(str[i]=='(') state=26;
else if(str[i]==')') state=27;
else if(str[i]==';') state=28;
else if(str[i]=='+') state=29;
else if(str[i]=='-') state=30;
break;
case 1: if(str[i]=='f') state=2;
else { state=17; startid=i-1; i--; }
break;
case 2: if(str[i]=='(' || str[i]==NULL)
{
printf("\nif
: Keyword");
state=0;
i--;
}
else { state=17; startid=i-2; i--; }
break;
case 3: if(str[i]=='h') state=4;
else { state=17; startid=i-1; i--; }
break;
case 4: if(str[i]=='i') state=5;
else { state=17; startid=i-2; i--; }
break;
case 5: if(str[i]=='l') state=6;
else { state=17; startid=i-3; i--; }
break;
case 6: if(str[i]=='e') state=7;
else { state=17; startid=i-4; i--; }
break;
case 7: if(str[i]=='(' || str[i]==NULL)
{
printf("\nwhile : Keyword");
state=0;
i--;
}
else { state=17; startid=i-5; i--; }
break;
case 8: if(str[i]=='o') state=9;
else { state=17; startid=i-1; i--; }
break;
case 9: if(str[i]=='{' || str[i]==' ' || str[i]==NULL || str[i]=='(')
{
printf("\ndo : Keyword");
state=0;
i--;
}
break;
case 10: if(str[i]=='l') state=11;
else { state=17; startid=i-1; i--; }
break;
case 11: if(str[i]=='s') state=12;
else { state=17; startid=i-2; i--; }
break;
case 12: if(str[i]=='e') state=13;
else { state=17; startid=i-3; i--; }
break;
case 13: if(str[i]=='{' || str[i]==NULL)
{
printf("\nelse : Keyword");
state=0;
i--;
}
else { state=17; startid=i-4; i--; }
break;
case 14: if(str[i]=='o') state=15;
else { state=17; startid=i-1; i--; }
break;
case 15: if(str[i]=='r') state=16;
else { state=17; startid=i-2; i--; }
break;
case 16: if(str[i]=='(' || str[i]==NULL)
{
printf("\nfor : Keyword");
state=0;
i--;
}
else { state=17; startid=i-3; i--; }
break;
case 17:
if(isalnum(str[i]) || str[i]=='_')
{
state=18; i++;
}
else
if(str[i]==NULL||str[i]=='<'||str[i]=='>'||str[i]=='('||str[i]==')'||str[i]==';'||str[i]=='='||str[i
]=='+'||str[i]=='-') state=18;
i--;
break;
case 18:
if(str[i]==NULL || str[i]=='<' || str[i]=='>' || str[i]=='(' || str[i]==')' || str[i]==';' ||
str[i]=='=' || str[i]=='+' ||str[i]=='-')
{
endid=i-1;
printf(" ");
for(j=startid; j<=endid; j++)
printf("\n%c", str[j]);
printf("
: Identifier");
state=0;
i--;
}
break;
case 19: if(str[i]=='=') state=20;
else if(isalnum(str[i]) || str[i]=='_')
{
printf("\n<
: Relational operator");
i--;
state=0;
}
break;
case 20: if(isalnum(str[i]) || str[i]=='_')
{
printf("\n<= : Relational operator");
i--;
state=0;
}
break;
case 21: if(str[i]=='=') state=22;
else if(isalnum(str[i]) || str[i]=='_')
{
printf("\n>
: Relational operator");
i--;
state=0;
}
break;
case 22: if(isalnum(str[i]) || str[i]=='_')
{
printf("\n>= : Relational operator");
i--;
state=0;
}
break;
case 23: if(str[i]=='=') state=24;
else
{
printf("\n=
: Assignment operator");
i--;
state=0;
}
break;
case 24: if(isalnum(str[i]))
{
printf("\n=: Relational operator");
state=0;
i--;
}
break;
END:
getch();
}
OUTPUT:
Enter the string: for(x1=0; x1<=10; x1++);
Analysis:
for
(
x1
=
0
;
x1
<=
10
;
x1
+
+
)
;
: Keyword
: Special character
: Identifier
: Assignment operator
: Constant
: Special character
: Identifier
: Relational operator
: Constant
: Special character
: Identifier
: Operator
: Operator
: Special character
: Special character
ALGORITHM:
10
Initially, place the start symbol and the EOI (end of input) symbol on
the prediction stack with the start symbol on top.
20
Put EOI at the end of the input. Make the current token empty.
30
40
Repeat
50
60
While it is an action symbol, call its action routine and pop the next top
symbol off the prediction stack.
70
The action routine may pop zero or more values off the semantics stack
and may push one or zero values back on it.
80
If the current token is empty, call the scanner to read the next input
token into the current token.
90
100
If they match, push the current token onto the semantics stack.
110
If they dont match, an error has been discovered in the input. Execute
some error recovery code.
120
130
140
Choose the right hand side by looking at the next input symbol and
deciding which right hand side will allow parsing to continue.
150
PROGRAM:
#include<string.h>
#include<stdio.h>
#include<conio.h>
void main()
{
Char pro1[MAX];
Char pro2[MAX];
Char*p1,*p2p;
int n;
clrscr()
printf(\n\n\t\t\t.::Demo of LL(1).::.);
get(pro1);
Printf(n\t Enter the production First:);
Gets(pro1);
printf(\n\tEnter the priduction second:);
gets(pro2);
p1=strik(strrev(peo1),>);
p2=strtok(strrev(pro2),>-);
printf(\n1:productionFirst();
printf(strrev(pro1));
printf());
printf(\n2:productionFirst();
printf(strrev(pro2));
printf());
n=strcmp(pro1,pro2);
if(n=o)
printf(\nLL(1)Grammer to satisy Left Recursive disjoint set);
else
printf(\n Not LL(1)Grammer to satisfy Left Recursive disjoint set);
getch();
}
10
OUTPUT:
Enter the production First:E----- >E+E
Enter the priduction second: E---- >E
productionFirst E--- >E+E
production second E---- >E
Not LL(1)Grammer to satisfy Left Recursive disjoint set
11
12
PROGRAM:
#include<stdio.h>
#include<ctype.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
#include<iostream.h>
struct
{
char nts,trs;
}stack[20];
int lead[20][20],trail[20][20],nop,top=0;
char nr,tr,*p[10],nt[10],t[10],opt[10][10],starts,stk[20];
void read_grammar()
{
cout<<"Enter the terminal:";
cin>>t;
cout<<"Enter the non Terminal:";
cin>>nt;
cout<<"Enter the no of production:";
cin>>nop;
for(int i=0;i<nop;i++)
{
p[i]=new char[10];
cin>>p[i];
}
starts=p[0][0];
}
int nt_no(char x)
{
if(x!='\0')
{
for(int i=0;nt[i]!='\0';i++)
if(nt[i]==x)
return(i);
}}
int t_no(char x)
{
if(x!='\0')
{
for(int i=0;t[i]!='\0';i++)
{
if(t[i]==x)
13
return(i);
if(x=='$')
return(strlen(t));
}}}
int nonterminal(char x)
{
if(x!='\0')
{
for(int i=0;i<=strlen(nt);i++)
{
if(nt[i]==x)
return(1);
}}
return(0);
}
int terminal(char x)
{
if(x!='\0')
{
for(int i=0;i<=strlen(nt);i++)
{
if(t[i]==x)
return(1);
}
}
return(0);
}
void push(int r,int c)
{
top++;
stack[top].nts=nt[r];
stack[top].trs=t[c];
}
void pop()
{
nr=stack[top].nts;
tr=stack[top].trs;
top--;
}
void install(int r,int c,int lt)
{
if(lt==1)
{
if(!lead[r][c])
{
lead[r][c]=1;
push(r,c);
14
}
}
if(lt==2)
{
if(!trail[r][c])
{
trail[r][c]=1;
push(r,c);
}}}
void leading()
{
char a;
int r,c;
top=0;
for(int no=0;nt[no]!='\0';no++)
{
for(int i=0;i<nop;i++)
{
if(nt[no]==p[i][0])
{
a=p[i][3];
if(terminal(a))
{
r=nt_no(p[i][0]);
c=t_no(a);
install(r,c,l);
}
else
if(nonterminal(a) && terminal(p[i][4]))
{
r=nt_no(p[i][0]);
c=t_no(p[i][4]);
install(r,c,l);
}
while(top!=0)
{
pop();
for(int j=0;j<nop;j++)
{
char nont=p[j][3];
if(nont==nr)
{
r=nt_no(p[j][0]);
c=t_no(tr);
install(r,c,l);
}}
15
}}
}}
}}
void trailing()
{
char a;
int r,c;
top=0;
for(int no=0;nt[no]!='\0';no++)
{
for(int i=0;i<nop;i++)
{
if(nt[no]==p[i][0])
{
int l=strlen(p[i]);
a=p[i][l-1];
if(terminal(a))
{
r=nt_no(p[i][0]);
c=t_no(a);
install(r,c,2);
}
else
if(nonterminal(a) && terminal(p[i][l-2]))
{
r=nt_no(p[i][0]);
c=t_no(p[i][l-2]);
install(r,c,l);
}
while(top!=0)
{
pop();
for(int j=0;j<nop;j++)
{
int l=strlen(p[j]);
char nont=p[j][l-1];
if(nont==nr)
{
r=nt_no(p[j][0]);
c=t_no(tr);
install(r,c,2);
}}
}}
}}
}}
void main()
{
16
17
OUTPUT:
Enter the terminal: a b
Enter the non Terminal: S
Enter the no of production:S--> ab
leading (S)--> { a }
trailing (S)--> { b}
Operator Precedence table:
a
b
a
<.
b
.>
Enter string to parse :a
String a is accepted
18
4) AIM: Write a program for generating for various intermediate code forms:
i0
Polish notation
THEORY:
Polish notation, also known as prefix notation, is a form of notation for logic, arithmetic, and
algebra. Its distinguishing feature is that it places operators to the left of their operands. If the
arity of the operators is fixed, the result is a syntax lacking parentheses or other brackets, that can
still be parsed without ambiguity.
The Polish logician Jan ukasiewicz invented this notation around 1920 in order to simplify
sentential logic. When Polish notation is used as a syntax for mathematical expressions by
interpreters of programming languages, it is readily parsed into abstract syntax trees and can, in
fact, define a one-to-one representation for the same. Because of this, Lisp and related
programming languages define their entire syntax in terms of prefix or postfix expressions.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
char stack[50];
int top=-1;
void in_to_post(char infix[]);
void push (char);
char pop();
void main()
{
char infix[25];
printf("Enter the infix expression");
gets(infix);
in_to_post(infix);
getch();
}
void push(char symb)
{
if(top>=49)
{
printf("stack overflow");
getch();
return;
}
else
19
{
top=top+1;
stack[top]=symb;
}
}
char pop()
{
char item;
if(top==-1)
{
printf("stack empty");
getch();
return(0);
}
else
{
item=stack[top];
top--;
}
return(item);
}
int preced(char ch)
{
if(ch==47)
{
return(5);
}
else if(ch==42)
{
return(4);
}
else if(ch==43)
{
return(3);
}
else
return(2);
}
void in_to_post(char infix[])
{
int length;
20
21
postfix[pos++]=temp;
}
postfix[pos++]='\0';
puts(postfix);
return;
}
22
where the definitions and the user subroutines are often omitted.mming languages, called ``host
languages.''
PROGRAM:
#include<iostream.h>
#include<string.h>
#include<ctype.h>
#include<math.h>
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>
enum Token_Type
{
NAME
,
NUMBER ,
END,
PLUS='+' ,
MINUS='-' ,
MUL='*'
PRINT=';' ,
ASSIGN='='
,
LP='('
};
Token_Type current_token=PRINT;
double Number_value;
char Identifier[10]={'\0'};
,
,
DIV='/',
RP=')'
23
24
case MUL:
left*=Prim(1);
break;
case DIV:
double d=Prim(1);
if(d!=0)
{
left/=d;
break;
}
return printf("Error:divide by 0");
default:
return left;
}
}
}
double Prim(int get)
//implements grammer
{
//P->NUMBER
if(get) Get_Token();
//P->NAME
int place=-1;
//P->NAME=E
switch(current_token)
//P-> -P
{
//P->(E)
case NUMBER:
double v=Number_value;
Get_Token();
return v;
case NAME:
if(ASSIGN==Get_Token())
{
v=FindinTable(Identifier,10,place,1);
if(place==-1)
{
strcpy(SymTable[Total_No_ofSymbols].Entry,Identifier);
place=Total_No_ofSymbols++;
}
v=Expr(1);
SymTable[place].value=v;
}
else
v=FindinTable(Identifier,10,place,0);
return v;
case MINUS:
return -Prim(1);
case LP:
double exp=Expr(1);
if(current_token!=RP)
{return printf("Error: ) expected");}
25
Get_Token();
return exp;
default:
return printf("Error: primary expected");
}
}
Token_Type Get_Token()//This function implements lexical analyzer
{
static int state=0;
static char ch;
char str[10]={'\0'};
while(1)
{
switch(state)
{
case 0:
ch=cin.get();
switch(ch)
{
case ' ':break;
case '=':current_token=ASSIGN;
return ASSIGN;
case '+':current_token=PLUS;
return PLUS;
case '-':current_token=MINUS;
return MINUS;
case '*':current_token=MUL;
return MUL;
case '/':current_token=DIV;
return DIV;
case '(':current_token=LP;
return LP;
case ')':current_token=RP;
return RP;
case ';':current_token=PRINT;
return PRINT;
case '\n':current_token=PRINT;
return PRINT;
case 'Q':current_token=END;
return END;
case 'q':current_token=END;
return END;
default:state=1;
}
break;
case 1: //detecting token type NAME in states 1 and 2
if(isalpha(ch))
26
{
state=2;
Identifier[0]=ch;
Identifier[1]='\0';
}
else
{state=3;break;}
case 2:
ch=cin.get();
if(isalpha(ch)||isdigit(ch))
{
Identifier[strlen(Identifier)+1]='\0';
Identifier[strlen(Identifier)]=ch;
break;
}
else
{
state=3;
current_token=NAME;
return NAME;
}
case 3://detecting token type Number in state 3 and 4
if(ch=='.'||isdigit(ch))
{
state=4;
str[0]=ch;
str[1]='\0';
break;
}
else
{cin.putback(ch);
state=0;break;}
case 4:
ch=cin.get();
if(ch=='.'||isdigit(ch))
{
str[strlen(str)+1]='\0';
str[strlen(str)]=ch;
}
else
{
cin.putback(ch);
state=0;
Number_value=atof(str);
current_token=NUMBER;
return NUMBER;
}}}
}
void main()
27
{
clrscr();
while(1)
{
Get_Token();
if (current_token==END)break;
cout<<Expr(0)<<"\n";
}
}
28
OUTPUT:
x=2
y=3
x*y+3
output is 9
29
6) AIM: Write a program to generate YACC specification for a few syntactic categories.
THEORY:
Yacc provides a general tool for imposing structure on the input to a computer program. The
Yacc user prepares a specification of the input process; this includes rules describing the input
structure, code to be invoked when these rules are recognized, and a low-level routine to do the
basic input. Yacc then generates a function to control the input process. This function, called a
parser, calls the user-supplied low-level input routine (the lexical analyzer) to pick up the basic
items (called tokens) from the input stream. These tokens are organized according to the input
structure rules, called grammar rules; when one of these rules has been recognized, then user
code supplied for this rule, an action, is invoked; actions have the ability to return values and
make use of the values of other actions.
A yacc specification consists of a mandatory rules section, and optional sections for definitions
and user subroutines.
The declarations section for definitions, if present, must be the first section in the yacc program.
The mandatory rules section follows the definitions; if there are no definitions, then the rules
section is first. In both cases, the rules section must start with the delimiter %%. If there is a
subroutines section, it follows the rules section and is separated from the rules by
another %% delimiter. If there is no second %% delimiter, the rules section continues to the
end of the file.
When all sections are present, a specification file has the format:
declarations
%%
rules
%%
subroutines
PROGRAM:
#include <stdio.h>
int regs[26];
int base;
#start list
#token DIGIT LETTER
#left '|'
#left '&'
#left '+' '-'
#left '*' '/' '%'
#left UMINUS /*supplies precedence for unary minus */
30
list:
/*empty */
|
list stat '\n'
|
list error '\n'
{
yyerrok;
}
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
};
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
31
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
}
|
number DIGIT
{
$$ = base * $1 + $2;
}
;
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
32
{
return(1);
}
33