Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 27

CS3501-COMPILER DESIGN LABORATORY

LIST OF EXPERIMENTS

1. Using the LEX tool, Develop a lexical analyzer to recognize a few patterns in C. (Ex.
identifiers, constants, comments, operators etc.). Create a symbol table, while
recognizing
identifiers.
2. Implement a Lexical Analyzer using LEX Tool
3. Generate YACC specification for a few syntactic categories.
a. Program to recognize a valid arithmetic expression that uses operator +, -, * and /.
b. Program to recognize a valid variable which starts with a letter followed by any
number of letters or digits.
c. Program to recognize a valid control structures syntax of C language (For loop,
while loop, if-else, if-else-if, switch-case, etc.).
d. Implementation of calculator using LEX and YACC
4. Generate three address code for a simple program using LEX and YACC.
5. Implement type checking using Lex and Yacc.
6. Implement simple code optimization techniques (Constant folding, Strength
reduction and Algebraic transformation)
7. Implement back-end of the compiler for which the three address code is given as
input and the 8086 assembly language code is produced as output.

30 PERIODS
TOTAL: 75 PERIODS

1. DEVELOP A LEXICAL ANALYZER TO RECOGNIZE A FEW


PATTERNS IN C
AIM:
To write and execute a C program to implement the lexical analyzer.
ALGORITHM:
1. Start the program.
2. Declare the file pointer and necessary variables.
3. Open the input file in the read mode.
4. Use the analyze function to analyze the input program and store the identifiers,
keywords and
operator on idhd, keyhd, ophd respectively.
5. Stores the tokens in data structure linked lists.
6. Increment the line number of each token and its occurrences.
7. Using the show function print the linked lists in a tabular format.
8. Stop the program.

PROGRAM:

#include<string.h>
#include<ctype.h>
#include<stdio.h>
void keyword(char str[10])
{
if(strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||
strcmp("int",str)==0||strcmp("float",str)==0||strcmp("char",str)==0||
strcmp("double",str)==0||strcmp("static",str)==0||strcmp("switch",str)==0||
strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
main()
{
FILE *f1,*f2,*f3;
char c,str[10],st1[10];
int num[100],lineno=0,tokenvalue=0,i=0,j=0,k=0;
printf("\nEnter the c program");/*gets(st1);*/
f1=fopen("input","w");
while((c=getchar())!=EOF)
putc(c,f1);
fclose(f1);
f1=fopen("input","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
ungetc(c,f1);
}
else
if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else
if(c==' '||c=='\t')
printf(" ");
else
if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\nThe no's in the program are");
for(j=0;j<i;j++)
printf("%d",num[j]);
printf("\n");
f2=fopen("identifier","r");
k=0;
printf("The keywords and identifiersare:");
while((c=getc(f2))!=EOF)
{
if(c!=' ')
str[k++]=c;
else
{
str[k]='\0';
keyword(str);
k=0;
}
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\nSpecial characters are");
while((c=getc(f3))!=EOF)
printf("%c",c);
printf("\n");
fclose(f3);
printf("Total no. of lines are:%d",lineno);
}

INPUT & OUTPUT:

Enter the c program


#include<stdio.h>
main()
{
int a=10,b=20,c;
c=a+b;
printf("%d",c);
}
^Z

The no's in the program are1020


The keywords and identifiersare:
include is an identifier
stdio is an identifier
h is an identifier
main is an identifier
int is a keyword
a is an identifier
b is an identifier
c is an identifier
c is an identifier
a is an identifier
b is an identifier
printf is an identifier
d is an identifier
c is an identifier
Special characters are#<.>(){=,=,;=+;("%",);}
Total no. of lines are:8

RESULT:
Thus the C program for lexical analyzer to recognize few patterns is implemented
and executed successfully.

2. IMPLEMENTATION OF LEXICAL ANALYZER USING LEX


TOOL
AIM:
To implement the lexical analyzer using lex tool for a subset of C language.

ALGORITHM:
1. Start the program.
2. Declare necessary variables and creates token representation using Regular.
3. Print the pre processor or directives, keywords by analysis of the input
program.
4. In the program check whether there are arguments.
5. Declare a file and open it as read mode.
6. Read the file and if any taken in source program matches with RE that all
returned as
integer value.
7. Print the token identified using YYdex() function.
8. Stop the program

PROGRAM:

%{
%}
identifier[a-zA-Z][a-zA-Z0-9]*
%%
#.* {printf("\n%s is a preprocessor directive",yytext);}
int |
float |
char |
double |
while |
do |
if |
break |
continue |
void |
switch |
return |
else |
goto {printf("\n%s is a keyword",yytext);}
{identifier}\( {printf("\n function %s",yytext);}
\{ {printf("\nblock begins");}
\} {printf("\nblock ends");}
\( {printf("\n");ECHO;}
{identifier}(\[[0-9]*\])* {printf("\n%s is an identifier",yytext);}
\".*\" {printf("\n %s is a string ",yytext);}
[0-9]+ {printf("\n%s is a number",yytext);
}
\<= |
\>= |
\< |
\> |
\== {printf("\n %s is a relational operator",yytext);}
\= |
\+ |
\- |
\/ |
\& |
% {printf("\n %s is a operator",yytext);}
.|
\n;
%%
int main(int argc,char **argv)
{
FILE *file;
file=fopen("inp.c","r");
if(!file)
{
printf("could not open the file!!!");
exit(0);
}
yyin=file;
yylex();
printf("\n\n");
return(0);
}
int yywrap()
{
return 1;
}
OUTPUT:

C:\Documents and Settings\admin\Desktop>flex alex.l

C:\Documents and Settings\admin\Desktop>gcc lex.yy.c

C:\Documents and Settings\admin\Desktop>a.exe

#include<stdio.h> is a preprocessor directive

void is a keyword
function main(

block begins

int is a keyword
a is an identifier
b is an identifier
c is an identifier

function printf(
"enter the value for a,b" is a string

function scanf(
"%d%d" is a string
& is a operator
a is an identifier
& is a operator
b is an identifier

c is an identifier
= is a operator
a is an identifier
+ is a operator
b is an identifier

function printf(
"the value of c:%d" is a string
& is a operator
c is an identifier

block ends

RESULT:
Thus the program to implement lexical analyzer using lex tool is executed and
implemented successfully.
3. GENERATE YACC SPECIFICATION FOR A FEW SYNTACTIC
CATEGORIES

A) PROGRAM TO RECOGNIZE A VALID ARITHMETIC EXPRESSION THAT


USES OPERATOR +, - , * AND /.

ALGORITHM:

1. Accept the token generated in lex part as input


2. Specify order of procedure
3. Define rules with end point
4. Parse input string from standard input by calling yyparse() by main function
5. Print the result of any rules matches
6. If none of results defined matches print “invalid expression”
COMPILATION STEPS:

$ lex exp.l
$ yacc –d exp.y
$ cc lex.yy.c y.tab.c –ll –ly
$ ./a.out

PROGRAM:

Yacc<Exp.y>

%token NUMBER ID NL
%left '+' '-'
%left '*' '/'
%%

stmt : exp NL { printf(“Valid Expression”); exit(0);}


;
exp : exp '+' exp
| exp '-' exp
| exp '*' exp
| exp '/' exp
| '(' exp ')'
| ID
| NUMBER
;
%%

int yyerror(char *msg)


{
printf(“Invalid Expression\n”);
exit(0);
}
main ()
{
printf(“Enter the expression\n”);
yyparse();
}

Lex<Exp.L>

%{
#include “y.tab.h”
%}
%%
[0-9]+ { return NUMBER; }
[a-zA-Z][a-zA-Z0-9_]* { return ID; }
\n { return NL ;}
. { return yytext[0]; }
%%

RESULT:
Thus the program for validating arithmetic expressions using Yacc is implemented
and executed successfully.
B) PROGRAM TO RECOGNIZE A VALID VARIABLE WHICH
STARTS WITH A LETTER FOLLOWED BY ANY NUMBER OF
LETTERS OR DIGITS.

PROGRAM:

Yacc<Let.Y>

%token DIGIT LETTER NL UND


%%
stmt : variable NL { printf(“Valid Identifiers\n”); exit(0);}
;
variable : LETTER alphanumeric
;
alphanumeric: LETTER alphanumeric
| DIGIT alphanumeric
| UND alphanumeric
| LETTER
| DIGIT
| UND
;
%%
int yyerror(char *msg)
{
printf(“Invalid Expression\n”);
exit(0);
}
main ()
{
printf(“Enter the variable name\n”);
yyparse();
}
Lex<Let.L>

%{
#include “y.tab.h”
%}
%%
[a-zA-Z] { return LETTER ;}
[0-9] { return DIGIT ; }
[\n] { return NL ;}
[_] { return UND; }
. { return yytext[0]; }
%%
RESULT:
Thus the program is executed successfully.
C. PROGRAM TO RECOGNIZE A VALID CONTROL STRUCTURES SYNTAX
OF C LANGUAGE (FOR LOOP,WHILE LOOP, IF-ELSE, IF-ELSE-IF, SWITCH-
CASE, ETC.).
D) IMPLEMENTATION OF CALCULATOR USING LEX AND YACC

PROGRAM:

Lex<Cal.L>

%{
#include"y.tab.h"
#include<math.h>
%}
%%
([0-9]+|([0-9]*\.[0-9]+)([eE][-+]?[0-9]+)?) {yylval.dval=atof(yytext);return
NUMBER;}
log |
LOG {return LOG;}
In {return nLOG;}
sin |
SIN {return SINE;}
cos |
COS {return COS;}
tan |
TAN {return TAN;}
mem {return MEM;}
[\t];
\$ return 0;
\n|. return yytext[0];
%%

Yacc<Cal.Y>

%{
double memvar;
%}
%union
{
double dval;
}
%token<dval>NUMBER
%token<dval>MEM
%token LOG SINE nLOG COS TAN
%left '-' '+'
%left '*' '/'
%right '^'
%left LOG SINE nLOG COS TAN
%nonassoc UMINUS
%type<dval>expression
%%
start:statement'\n'
|start statement'\n'
;
statement:MEM'='expression {memvar=$3;}
| expression{printf("Answer=%g\n",$1);}
;
expression:expression'+'expression {$$=$1+$3;}
| expression '-' expression {$$=$1-$3;}
| expression '*' expression {$$=$1*$3;}
| expression '/' expression
{
if($3==0)
yyerror("divide by zero");
else
$$=$1/$3;
}
|expression'^'expression {$$=pow($1,$3);}
;
expression:'-'expression %prec UMINUS{$$=-$2;}
|'('expression')'{$$=$2;}
|LOG expression {$$=log($2)/log(10);}
|nLOG expression {$$=log($2);}
|SINE expression {$$=sin($2*3.14/180);}
|COS expression {$$=cos($2*3.14/180);}
|TAN expression {$$=tan($2*3.14/180);}
|NUMBER {$$=$1;}
|MEM {$$=memvar;}
;
%%
main()
{
printf("Enter the expression");
yyparse();}
int yyerror(char *error)
{
printf("%s\n",error);
}

OUTPUT:

[linuxpert@fosslab ~]$ vi cal.l


[linuxpert@fosslab ~]$ lex cal.l
[linuxpert@fosslab ~]$ yacc -d cal.y
[linuxpert@fosslab ~]$ cc lex.yy.c y.tab.c -ll -lm
[linuxpert@fosslab ~]$ ./a.out
Enter the expression(5+2)*(3-1)/(2)
Answer=7

RESULT:
Thus the program to implement calculator using LEX and YACC tool is executed
successfully and output is verified.
4) GENERATE 3 ADDRESS CODE FOR A SIMPLE PROGRAM USING LEX AND
YACC
lex.l
%{
#include<stdio.h>
#include"y.tab.h"
int k=1;
%}

%%
[0-9]+ {
yylval.dval=yytext[0];
return NUM;
}

\n {return 0;}
. {return yytext[0];}
%%

void yyerror(char* str)


{
printf("\n%s",str);
}
char *gencode(char word[],char first,char op,char second)
{
char temp[10];
sprintf(temp,"%d",k);
strcat(word,temp);
k++;
printf("%s = %c %c %c\n",word,first,op,second);

return word; //Returns variable name like t1,t2,t3... properly


}
int yywrap()
{
return 1;
}

main()
{
yyparse();
return 0;
}
yacc.y
%{
#include<stdio.h>
int aaa;
%}

%union{
char dval;
}

%token <dval> NUM


%type <dval> E
%left '+' '-'
%left '*' '/' '%'

%%
statement : E {printf("\nt = %c \n",$1);}
;
E : E '+' E
{
char word[]="t";
char *test=gencode(word,$1,'+',$3);
$$=test;
}
| E '-' E
{
char word[]="t";
char *test=gencode(word,$1,'-',$3);
$$=test;
}
| E '%' E
{
char word[]="t";
char *test=gencode(word,$1,'%',$3);
$$=test;
}
| E '*' E
{
char word[]="t";
char *test=gencode(word,$1,'*',$3);
$$=test;
}
| E '/' E
{
char word[]="t";
char *test=gencode(word,$1,'/',$3);
$$=test;
}
| '(' E ')'
{
$$=$2;
}
| NUM
{
$$=$1;
}
;
%%
Problem: getting garbage value in output
Expected output for expression (2+3)*5 should be like:
t1= 2 + 3
t2= t1 * 5
Obtained output:

t1= 2 + 3
t2= garbage value * 5
RESULT:
Thus the program to implement 3 address code using LEX and YACC tool is
executed successfully and output is verified.
5. IMPLEMENT TYPE CHECKING USING LEX AND YACC.

PROGRAM:

Lex<Cal.L>

%{
#include"y.tab.h"
#include<math.h>
%}
%%
([0-9]+|([0-9]*\.[0-9]+)([eE][-+]?[0-9]+)?) {yylval.dval=atof(yytext);return
NUMBER;}
log |
LOG {return LOG;}
In {return nLOG;}
sin |
SIN {return SINE;}
cos |
COS {return COS;}
tan |
TAN {return TAN;}
mem {return MEM;}
[\t];
\$ return 0;
\n|. return yytext[0];
%%

Yacc<Cal.Y>

%{
double memvar;
%}
%union
{
double dval;
}
%token<dval>NUMBER
%token<dval>MEM
%token LOG SINE nLOG COS TAN
%left '-' '+'
%left '*' '/'
%right '^'
%left LOG SINE nLOG COS TAN
%nonassoc UMINUS
%type<dval>expression
%%
start:statement'\n'
|start statement'\n'
;
statement:MEM'='expression {memvar=$3;}
| expression{printf("Answer=%g\n",$1);}
;
expression:expression'+'expression {$$=$1+$3;}
| expression '-' expression {$$=$1-$3;}
| expression '*' expression {$$=$1*$3;}
| expression '/' expression
{
if($3==0)
yyerror("divide by zero");
else
$$=$1/$3;
}
|expression'^'expression {$$=pow($1,$3);}
;
expression:'-'expression %prec UMINUS{$$=-$2;}
|'('expression')'{$$=$2;}
|LOG expression {$$=log($2)/log(10);}
|nLOG expression {$$=log($2);}
|SINE expression {$$=sin($2*3.14/180);}
|COS expression {$$=cos($2*3.14/180);}
|TAN expression {$$=tan($2*3.14/180);}
|NUMBER {$$=$1;}
|MEM {$$=memvar;}
;
%%
main()
{
printf("Enter the expression");
yyparse();}
int yyerror(char *error)
{
printf("%s\n",error);
}
OUTPUT:

[linuxpert@fosslab ~]$ vi cal.l


[linuxpert@fosslab ~]$ lex cal.l
[linuxpert@fosslab ~]$ yacc -d cal.y
[linuxpert@fosslab ~]$ cc lex.yy.c y.tab.c -ll -lm
[linuxpert@fosslab ~]$ ./a.out

Enter the expression(5+2)*(3-1)/(2)


Answer=7

RESULT:
Thus the program to implement calculator using LEX and YACC tool is executed
successfully and output is verified.
6. IMPLEMENT SIMPLE CODE OPTIMIZATION TECHNIQUES (CONSTANT
FOLDING, STRENGTH REDUCTION AND ALGEBRAIC TRANSFORMATION)

AIM:
To write a C program to implement Code Optimization Techniques.

ALGORITHM:

Input: Set of ‘L’ values with corresponding ‘R’ values.


Output: Intermediate code & Optimized code after eliminating common expressions.

PROGRAM:

#include<stdio.h>
#include<conio.h>
#include<string.h>
struct op
{
char l;
char r[20];
}
op[10],pr[10];
void main()
{
int a,i,k,j,n,z=0,m,q;
char *p,*l;
char temp,t;
char *tem;
clrscr();
printf("Enter the Number of Values:");
scanf("%d",&n);
for(i=0;i<n;i++)
{
printf("left: ");
op[i].l=getche();
printf("\tright: ");
scanf("%s",op[i].r);
}
printf("Intermediate Code\n") ;
for(i=0;i<n;i++)
{
printf("%c=",op[i].l);
printf("%s\n",op[i].r);
}
for(i=0;i<n-1;i++)
{
temp=op[i].l;
for(j=0;j<n;j++)
{
p=strchr(op[j].r,temp);
if(p)
{
pr[z].l=op[i].l;
strcpy(pr[z].r,op[i].r);
z++; }}}
pr[z].l=op[n-1].l;
strcpy(pr[z].r,op[n-1].r);
z++;
printf("nAfter Dead Code Eliminationn");
for(k=0;k<z;k++) {
printf("%ct=",pr[k].l);
printf("%sn",pr[k].r);
}
for(m=0;m<z;m++) {
tem=pr[m].r;
for(j=m+1;j<z;j++) {
p=strstr(tem,pr[j].r);
if(p) {
t=pr[j].l;
pr[j].l=pr[m].l;
for(i=0;i<z;i++) {
l=strchr(pr[i].r,t) ;
if(l) {
a=l-pr[i].r;
printf("pos: %d",a);
pr[i].r[a]=pr[m].l; }}}}}
printf("Eliminate Common Expression\n");
for(i=0;i<z;i++)
{
printf("%c\t=",pr[i].l);
printf("%s\n",pr[i].r);
}
for(i=0;i<z;i++)
{
for(j=i+1;j<z;j++)
{
q=strcmp(pr[i].r,pr[j].r);
if((pr[i].l==pr[j].l)&&!q)
{
pr[i].l='\0';
strcpy(pr[i].r,'\0');
}
}
}
printf("Optimized Code\n");
for(i=0;i<z;i++)
{
if(pr[i].l!='\0')
{
printf("%c=",pr[i].l);
printf("%s\n",pr[i].r);
}
}
getch();
}

INPUT & OUTPUT:


Enter the Number of Values:5
left: a right: 9
left: b right: c+d
left: e right: c+d
left: f right: b+e
left: r right: f
Intermediate Code
a=9
b=c+d
e=c+d
f=b+e
r=f
nAfter Dead Code Eliminationnbt=c+dnet=c+dnft=b+enrt=fnpos: 2Eliminate Common
Ex
pression
b =c+d
b =c+d
f =b+b
r =f
Optimized Code
b=c+d
f=b+b
r=f

RESULT:
Thus the program for code optimization is implemented and executed
successfully, and the output is verified.
7. IMPLEMENT BACK-END OF THE COMPILER FOR WHICH THE THREE
ADDRESS CODE IS GIVEN AS INPUT AND THE 8086 ASSEMBLY LANGUAGE
CODE IS PRODUCED AS OUTPUT.

AIM:
To write a C program to implement Simple Code Generator.

ALGORITHM:

1. Start
2. Get address code sequence.
3. Determine current location of 3 using address (for 1st operand).
4. If current location not already exist generate move (B,O).
5. Update address of A(for 2nd operand).
6. If current value of B and () is null,exist.
7. If they generate operator () A,3 ADPR.
8. Store the move instruction in memory
9. Stop

PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
#include<ctype.h>
#include<graphics.h>
typedef struct
{
char var[10];
int alive;
}
regist;
regist preg[10];
void substring(char exp[],int st,int end)
{
int i,j=0;
char dup[10]="";
for(i=st;i<end;i++)
dup[j++]=exp[i];
dup[j]='0';
strcpy(exp,dup);
}
int getregister(char var[])
{
int i;
for(i=0;i<10;i++)
{
if(preg[i].alive==0)
{
strcpy(preg[i].var,var);
break;
}
}
return(i);
}
void getvar(char exp[],char v[])
{
int i,j=0;
char var[10]="";
for(i=0;exp[i]!='\0';i++)
if(isalpha(exp[i]))
var[j++]=exp[i];
else
break;
strcpy(v,var);
}
void main()
{
char basic[10][10],var[10][10],fstr[10],op;
int i,j,k,reg,vc,flag=0;
clrscr();
printf("\nEnter the Three Address Code:\n");
for(i=0;;i++)
{
gets(basic[i]);
if(strcmp(basic[i],"exit")==0)
break;
}
printf("\nThe Equivalent Assembly Code is:\n");
for(j=0;j<i;j++)
{
getvar(basic[j],var[vc++]);
strcpy(fstr,var[vc-1]);
substring(basic[j],strlen(var[vc-1])+1,strlen(basic[j]));
getvar(basic[j],var[vc++]);
reg=getregister(var[vc-1]);
if(preg[reg].alive==0)
{
printf("\nMov R%d,%s",reg,var[vc-1]);
preg[reg].alive=1;
}
op=basic[j][strlen(var[vc-1])];
substring(basic[j],strlen(var[vc-1])+1,strlen(basic[j]));
getvar(basic[j],var[vc++]);
switch(op)
{
case '+': printf("\nAdd"); break;
case '-': printf("\nSub"); break;
case '*': printf("\nMul"); break;
case '/': printf("\nDiv"); break;
}
flag=1;
for(k=0;k<=reg;k++)
{
if(strcmp(preg[k].var,var[vc-1])==0)
{
printf("R%d, R%d",k,reg);
preg[k].alive=0;
flag=0;
break;
}
}
if(flag)
{
printf(" %s,R%d",var[vc-1],reg);
printf("\nMov %s,R%d",fstr,reg);
}
strcpy(preg[reg].var,var[vc-3]);
getch();
}
}
INPUT & OUTPUT:

Enter the Three Address Code:


a=b+c
c=a*c
exit
The Equivalent Assembly Code is:
Mov R0,b
Add c,R0
Mov a,R0
Mov R1,a
Mul c,R1
Mov c,R1

RESULT:
Thus the program to implement simple code generator is executed successfully
and the output is verified.

You might also like