Professional Documents
Culture Documents
Compiler Lab Report
Compiler Lab Report
Write a series of patterns (regular expressions) suitable for use with egrep.
Create an input file MyFile (containing on word per line) so that you can demonstrate
your work.
MyFile.txt
soumyava
a
s
i
aerious
pointer2005
Robot
KSHITIJ
aerobic
olfactory
ubuntu
crypt
cyst
caesious
Euonia
Derek'obrien
aaseiodfau
aueuiuou
ahghgekgjijoghu
aaeeiioouu
bAlEfUl
Results:
soumyava
aerious
aerobic
olfactory
caesious
Euonia
aaseiodfau
aueuiuou
ahghgekgjijoghu
aaeeiioouu
a
i
aueuiuou
aaeeiioouu
aerious
caesious
ahghgekgjijoghu
Assignment 2
1> Write a simple lex program which recognizes the verbs and the articles in a
sentence. Thus with an input "This is the book", your scanner must print "is is a
verb" and "the is an article".
2> Construct a scanner for the simple C-like language using the lex/flex tool. Given
a legal input (valid program in the language) the scanner should print a stream of two
tuples in the following format <tokenid, tokenname>.
For the following program segment
int main()
{
int first, second, third;
third=first+second;
}
3> Write a lexer that will identify the largest integer from a stream of characters like
the one below
gh56ef89f345h983exer340yacc
The lexer should return 983 in the above example. Assume that the stream only has
letters and digits. i.e., no other characters.
PROBLEM 1
%{
#include <stdio.h>
%}
%%
" "[i|I][s|S]" " printf("'%s' is a verb\n",yytext);
" "[b|B][e|E]" " printf("'%s' is a verb\n",yytext);
" "[a|A][r|R][e|E]" " printf("'%s' is a verb\n",yytext);
" "[w|W][a|A][s|S]" " printf("'%s' is a verb\n",yytext);
" "[a|A]" " printf("'%s' is an article\n",yytext);
" "[a|A][n|N]" " printf("'%s' is an article\n",yytext);
" "[t|T][h|H][e|E]" " printf("'%s' is an article\n",yytext);
[a-zA-Z]+
%%
RESULT:
PROBLEM 2
%{
#include <stdio.h>
#define INT 260
#define MAIN 265
#define IDENT 270
#define CONSTANT 275
#define OPERATOR 280
#define KEYWORD 285
%}
%%
"int" {printf(" <%d,%s> \n",INT,yytext);}
[m][a][i][n]"()" {printf(" <%d,main> \n",MAIN);}
"return"|"if"|"else" {printf(" <%d,keyword> \n",KEYWORD);}
[a-zA-Z][a-zA-Z0-9]* {printf(" <%d,ident> \n",IDENT);}
"+"|"-"|"*"|"/" {printf(" <%d,operator>
\n",OPERATOR);}
\" ;
[0-9]+ {printf(" <%d,integer_constant>
\n",CONSTANT);}
\n {printf("\n");}
\; ;
\{ ;
\} ;
\= ;
[ \t]+ /* ignore whitespace */;
%%
Test.c
int main()
{
int first,second,third;
third=first+second ;
}
RESULT:
<260,int>
<265,main>
<260,int>
<270,ident>
, <270,ident>
, <270,ident>
<270,ident>
<270,ident>
<280,operator>
<270,ident>
PROBLEM 3
%{
#include <stdio.h>
int _maxVal = -32768;
%}
Int [0-9]+
%%
[a-z] { printf("char = %c ",*yytext); }
{Int} {
printf("number = %s\n",yytext);
_maxVal = (atoi(yytext) > _maxVal )
? atoi(yytext) :_maxVal;
}
%%
int yywrap(void)
{
return 1;
}
int main()
{
yylex();
printf("Maximum value is = %d \n", _maxVal);
return 0;
}
1. Construct a parser for a grammar that you have used for your last assignment
2. Construct a parser for a C-like language. The parser should work with the lexer that has
been
designed in assignment 3.
The above parser should give warning messages when any error occurs, such as
when a variable is used without a preceding definition.
Description of the language is given below:
The keywords of the language are:
else if int return void
Special symbols are:
+ - * / < > = : . ( ) { } /*…*/
Other tokens are ID and NUM. defined by the following regular expressions:
ID = letter letter*
NUM = digit digit*
letter = a|…|z|A|…|Z
digit = 0|…|9
White spaces (blanks, newlines, tabs) must be ignored. Comments must be skipped
by the lexer.
A BNF grammar for the language is as follows:
program declaration-list
declaration-list declaration-list declaration | declaration
declaration var-declaration | fun-declaration
var-declaration type ID ;
type int | void
fun-declaration type ID ( params ) compound-stmt
params type ID
compound stmt { local declarations statement-list }
local declarations local declarations var-declarations | empty
statement-list statement-list statement | empty
statement expression-stmt | compound-stmt | selection-stmt | return-stmt
expression-stmt expression : | :
selection-stmt if ( expression ) statement | if ( expression ) statement else
statement
return-stmt return ;
expression var = expression | simple-expression
var = ID
simple-expression exp relop exp | exp
relop < | >
exp exp addop term | term
addop + | -
term term mulop factor | factor
mulop * | /
factor ( expression ) | var | NUM
PROBLEM 1:
NewLex.l
%{
#include <stdlib.h>
#include "y.tab.h"
void yyerror(char *);
extern int yylval;
%}
digit [0-9]
num {digit}{digit}*
%%
{num} { yylval=atoi(yytext);
return(NUM);}
"+"|"*"|"("|")"|"\n" {return yytext[0];}
[ \t] ;
/*Anything else is treated as error*/
. yyerror("Invalid character\n");
%%
Newyacc.y
%{
#include <ctype.h>
#include <stdio.h>
%}
%token NUM
%%
lines : lines exp '\n' {printf("value : %d \n",$2);}
| lines '\n'
|
;
yywrap()
{
return(1);
}
PROBLEM 2
LFile.l
%{
#include<stdio.h>
#include "y.tab.h"
#define TableSize 29
struct SYMTAB{
char symb[16];
int kind;
int type;
int empty;
}symbol[TableSize];
extern int yylval;
int full=0;
%}
/* regular deifinitions*/
delim [ \t\n]
ws {delim}+
letter [A-Za-z]
digit [0-9]
id {letter}{letter}*
num {digit}{digit}*
%%
{ws} {/*no action*/}
if {yylval=IF; return(IF);}
else {yylval=ELSE; return(ELSE);}
int {yylval=INT; return(INT);}
return {yylval=RETURN; return(RETURN);}
void {yylval=VOID; return(VOID);}
{id} { yylval=install_id(yytext); return(ID); }
{num} { yylval=atoi(yytext); return(NUM);}
"+"|"-"|"*"|"/"|"("|")"|"{"|"}"|";"|"<"|">"|"="|"," {return yytext[0];}
"/*".*"*/" {/*ignore comment*/}
"//".* {/*ignore comment*/}
%%
error=1;
full=0;
}
}
if(!error)
enter_symbol(hashval,s);
return(hashval);
}
YaccFile.y
%{
#include<stdio.h>
#define TableSize 29
extern struct SYMTAB{
char symb[16];
int kind;
int type;
int empty;
}symbol[TableSize];
%}
%token ID
%token NUM IF ELSE RETURN VOID INT
%%
program : declist ;
declist : declist dec
| dec ;
dec : vardec
| fundec ;
vardec : type ID ';'; {symbol[$2].kind=0;
symbol[$2].type=$1;}
type : INT {$$ = $1;}
| VOID ; {$$= $1;}
fundec : type ID '(' params ')' compoundstmt ;
{symbol[$2].kind=1; symbol[$2].type=$1;}
params : type ID {symbol[$2].kind=0;
symbol[$2].type=$1;}
compoundstmt : '{' localdec statementlist '}' ;
localdec : localdec vardec
| ;
statementlist : statementlist statement
| ;
statement : expstmt
| compoundstmt
| selectstmt
| returnstmt ;
expstmt : exp ';'
| ';' ;
selectstmt : IF '(' exp ')' statement
| IF '(' exp ')' statement ELSE statement ;
returnstmt : RETURN ;
exp : var '=' exp
| simpleexp;
var : ID ; {if(symbol[$1].kind<0 && symbol[$1].type<0)
printf("Error:Identifier %s not defined\n",symbol[$1].symb);}
simpleexp : expr relop expr
| expr ;
relop : '<'
| '>' ;
expr : expr addop term
| term ;
addop : '+'
| '-' ;
term : term mulop factor
| factor ;
mulop : '*'
| '/' ;
factor : '(' exp ')'
| ID {if(symbol[$1].kind<0 && symbol[$1].type<0)
printf("Error:Identifier %s not defined\n",symbol[$1].symb);}
| NUM ;
%%
main()
{
int i;
for(i=0;i<TableSize;i++)
{
symbol[i].empty=-1;
}
return(yyparse());
}
yywrap()
{
return(1);
}
ASSIGNMENT -3
LL(1) PARSER .c
#include <stdio.h>
#include<stdlib.h>
#include<string.h>
#define TERMINALS 20
#define NONTERMINALS 20
#define NUMBER_OF_RULES 20
/*_____________Data Structures___________*/
char Grammar[20][20]={
"E>TJ",
"J>+TJ",
"J>#",
"T>FI",
"I>*FI",
"I>#",
"F>(E)",
"F>i"
};
char tokens[20]={'\0'};
int token_ids[20]={0};
int nID=100;
int tID=200;
int Productions=8;
}terminals;
}nonterminals;
terminals TerminalList[TERMINALS];
nonterminals NonTerminalList[NONTERMINALS];
}FOLLOW_SET;
int id;
int hasNull;
char FIRST[20];//assume that follow set cannot contain more than 19 elements.
}FIRST_SET;
char AUX_FIRST[20];
/*Table of production rules*/
typedef struct tagRules{
int Left;
int Right[20];
char Prod[20];
}Rules;
char ParsingTable[20][20][20];
/*
* Type: stackElementT
* -------------------
* This is the type of the objects entered in the stack.
* Edit it to change the type of things to be placed in
* the stack.
*/
typedef struct {
stackElementT *contents;
int maxSize;
int top;
} stackT;
/*
* Function: StackInit
* Usage: StackInit(&stack, maxSize);
* -------------------------
* A new stack variable is initialized. The initialized
* stack is made empty. MaxSize is used to determine the
* maximum number of character that can be held in the
* stack.
*/
/*
* Functions: StackPush, StackPop
* Usage: StackPush(&stack, element); element = StackPop(&stack);
* --------------------------------------------
* These are the fundamental stack operations that add an element to
* the top of the stack and remove an element from the top of the stack.
* A call to StackPop on an empty stack or to StackPush on a full stack
* is an error. Make use of StackIsEmpty()/StackIsFull() (see below)
* to avoid these errors.
*/
/*
* Functions: StackIsEmpty, StackIsFull
* Usage: if (StackIsEmpty(&stack)) ...
* -----------------------------------
* These return a true value if the stack is empty
* or full (respectively).
*/
/*
Functions: GetTopEl
* This returns the top-most element of the stack
*/
stackElementT GetTopEl(stackT *stackP);
/*
function :PushString
This routine pushes a string in the Stack
*/
void PushString(stackT *stackP ,char* string);
if (newContents == NULL) {
fprintf(stderr, "Insufficient memory to initialize stack.\n");
exit(1); /* Exit, returning error code. */
}
stackP->contents = newContents;
stackP->maxSize = maxSize;
stackP->top = -1; /* I.e., empty */
}
stackP->contents = NULL;
stackP->maxSize = 0;
stackP->top = -1; /* I.e., empty */
}
if (StackIsFull(stackP)) {
fprintf(stderr, "\nCan't push element on stack: stack is full.\n");
exit(1); /* Exit, returning error code. */
}
stackP->contents[++stackP->top] = element;
if (StackIsEmpty(stackP)) {
fprintf(stderr, "\nCan't pop element from stack: stack is empty.\n");
exit(1); /* Exit, returning error code. */
}
return stackP->contents[stackP->top--];
}
stackElementT x;
x=StackPop(stackP);
StackPush(stackP,x);
return x;
void Init_NonTerminalTable(){
int i=0;
for(i=0;i<20;i++){
NonTerminalList[i].present=0;
}
}
void Init_TerminalTable(){
int i=0;
for(i=0;i<20;i++){
TerminalList[i].present=0;
}
}
}
void populateTerminal(){
int i,j;
for(i=0;i<Productions;i++){
for(j=2;Grammar[i][j]!='\0' && j<20; j++){
if(! (Grammar[i][j] >= 'A' && Grammar[i][j] <='Z') )
AddElementToTerminalList(Grammar[i][j]);
}
}
AddElementToTerminalList('$');
}
void AddElementToNonTerminalList(char c){
int i,j;
for(i=0;NonTerminalList[i].present==1 && i<20 ;i++){
if(NonTerminalList[i].name == c)
return;
}
NonTerminalList[i].id = nID ++;
NonTerminalList[i].name=c;
NonTerminalList[i].present=1;
}
void populateNonTerminal(){
int i,j;
for(i=0;i<20;i++){
if(Grammar[i][0] != '\0'){
AddElementToNonTerminalList(Grammar[i][0]);
}
}
}
for(i=0;i<NONTERMINALS;i++){
if(NonTerminalList[i].name == c)
return NonTerminalList[i].id;
}
printf("\n Error occured in searching NonTerminals");
return 0;
}
void CreateRuleTable(){
int i,j,k;
for(i=0;i<Productions;i++){
for(j=0;j<20;j++)
RULES[i].Prod[j]='\0';
RULES[i].Left = SearchNonTerminals(Grammar[i][0]);
for(j=2,k=0;Grammar[i][j]!='\0';j++,k++){
RULES[i].Prod[k]=Grammar[i][j];
if(Grammar[i][j] >='A' && Grammar[i][j]<='Z')
RULES[i].Right[k]=SearchNonTerminals(Grammar[i][j]);
else
RULES[i].Right[k]=SearchTerminals(Grammar[i][j]);
}
RULES[i].Right[k] = 999;/*sentinel*/
}
void Init_First(){
int i,j;
for(i=0;i<NONTERMINALS + TERMINALS;i++){
FIRST_TABLE[i].hasNull = 0;
for(j=0;j<20;j++){
FIRST_TABLE[i].FIRST[j]='\0';
}
for(i=0;TerminalList[i].present==1;i++){
FIRST_TABLE[i].id = TerminalList[i].id;
FIRST_TABLE[i].FIRST[0]=TerminalList[i].name;
for(j=0;NonTerminalList[j].present==1;i++,j++){
FIRST_TABLE[i].id = NonTerminalList[j].id;
}
}
void Init_Follow(){
int i,j,k;
for(i=0;NonTerminalList[i].present==1;i++){
FOLLOW_TABLE[i].id = NonTerminalList[i].id;
FOLLOW_TABLE[i].FOLLOW[0]='$';
for(j=1;j<20;j++)
FOLLOW_TABLE[i].FOLLOW[j]='\0';
for(k=0;TerminalList[k].present==1;k++,i++){
FOLLOW_TABLE[i].id = TerminalList[k].id;
for(j=0;j<20;j++)
FOLLOW_TABLE[i].FOLLOW[j]='\0';
}
}
int i,j;
int p,q;
int flag=0;
for(i=0;FOLLOW_TABLE[i].id!=x;i++);
for(j=0;FIRST_TABLE[j].id!=y;j++);
for(q=0;FIRST_TABLE[j].FIRST[q]!='\0';q++){
int i,j;
int p,q;
int flag=0;
for(i=0;FOLLOW_TABLE[i].id!=x;i++);
for(j=0;FOLLOW_TABLE[j].id!=y;j++);
//printf("\nFOLLOW : %d,%d",x,y);
//printf("\nFOLLOW[%d]: %s",x,FOLLOW_TABLE[i].FOLLOW);
//printf("\nFOLLOW[%d]: %s",y,FOLLOW_TABLE[j].FOLLOW);
for(q=0;FOLLOW_TABLE[j].FOLLOW[q]!='\0';q++){
if(flag==0){
FOLLOW_TABLE[i].FOLLOW[p]=FOLLOW_TABLE[j].FOLLOW[q];
}
}
}
for(i=0;FIRST_TABLE[i].id!=x;i++);
for(j=0;FIRST_TABLE[j].id!=y;j++);
for(q=0;FIRST_TABLE[j].FIRST[q]!='\0';q++){
if(flag==0){
FIRST_TABLE[i].FIRST[p]=FIRST_TABLE[j].FIRST[q];
if(FIRST_TABLE[i].FIRST[p] == NULL_VAL)
FIRST_TABLE[i].hasNull=1;
}
}
for(q=0;FIRST_TABLE[j].FIRST[q]!='\0';q++){
if(flag==0){
AUX_FIRST[p]=FIRST_TABLE[j].FIRST[q];
AUX_FIRST[p+1]='\0';
}
}
}
char SearchTerminalNames(int id){
int i=0;
for(i=0;i<TERMINALS;i++){
if(TerminalList[i].id == id)
return TerminalList[i].name;
}
return '\0';
if(NonTerminalList[i].id == id)
return NonTerminalList[i].name;
}
return '\0';
}
int Total_Firsts(){
int i,j=0;
int count=0;
for(i=0;i<NONTERMINALS +TERMINALS ;i++){
for(j=0;FIRST_TABLE[i].FIRST[j]!='\0';j++){
count++;
}
}
return count;
}
int Total_Follows(){
int i,j=0;
int count=0;
for(i=0;i<NONTERMINALS +TERMINALS ;i++){
for(j=0;FOLLOW_TABLE[i].FOLLOW[j]!='\0';j++){
count++;
}
}
return count;
}
int Total_Nulls(){
int i,j=0;
int count=0;
for(i=0;i<NONTERMINALS +TERMINALS ;i++){
if(FIRST_TABLE[i].hasNull == 1)
count++;
}
return count;
}
void ComputeFirstAndFollow(){
int i,j,k;
int m,n;
int x,y;
int flag1,allnull;//flags
int id;
int count_First,count_Follow,count_Null;
for(i=0;i<TERMINALS;i++){
if(TerminalList[i].name == NULL_VAL)
FIRST_TABLE[i].hasNull=1;
}
do{
count_First=Total_Firsts();
count_Follow=Total_Follows();
count_Null=Total_Nulls();
for(i=0;i<Productions;i++){
flag1=0;
for(j=0;RULES[i].Right[j]!=999 && flag1==0;j++){
k=0;
while(FIRST_TABLE[k].id!=RULES[i].Right[j]){
k++;
}
if(FIRST_TABLE[k].hasNull==0)
flag1=1;
}//end for j________
if(flag1==0){
id=RULES[i].Left;
k=0;
while(FIRST_TABLE[k].id!=RULES[i].Left){
//printf("\n %d",k);
k++;
}
FIRST_TABLE[k].hasNull=1; //make it nullable
}//end if_____
for(j=0;RULES[i].Right[j]!=999 ;j++){
allnull=1;
for(k=0;k<=j-1 && allnull==1;k++){
m=0;
while(FIRST_TABLE[m].id!=RULES[i].Right[k])
m++;
if(FIRST_TABLE[m].hasNull==0)
allnull=0;
}//end for k
if(allnull==1){
if(RULES[i].Right[j]<=nID)
Union_FollowFollow(RULES[i].Right[j],RULES[i].Left);
}
for(n=j+1; RULES[i].Right[n]!=999;n++){
allnull=1;
for(k=j+1;k<=n-2 && allnull==1 ; k++){
//printf("\n k3:%d",k);
m=0;
while(FIRST_TABLE[m].id!=RULES[i].Right[k])
m++;
if(FIRST_TABLE[m].hasNull==0)
allnull=0;
}
if(allnull==1){
if(RULES[i].Right[j]<=nID)
Union_FirstFollow(RULES[i].Right[j],RULES[i].Right[n]);
}
}//end for n___
}// end for j___
}//end for i ___
void CreateParsingTable(){
int i,j,k,m,n,t;
int id;
int flag=0,flag2=0;
for(i=0;i<Productions;i++){
j=0;
flag=0;
do{
for(m=0;FIRST_TABLE[m].id!=RULES[i].Right[j];m++);
Union_AuxFirstFirst(FIRST_TABLE[m].id);
if(FIRST_TABLE[m].hasNull ==1 )
j++;
else
flag=1;
if(RULES[i].Right[j]==999)
flag=1;
}while(flag!=1);
flag2=0;
for(k=0;AUX_FIRST[k]!='\0';k++){
if(AUX_FIRST[k]!='#'){
strcpy(ParsingTable[RULES[i].Left-
NONTERMINALS_BASE][SearchTerminals(AUX_FIRST[k])- TERMINALS_BASE],RULES[i].Prod);
printf( "\n%d
,%d,%s",RULES[i].Left,SearchTerminals(AUX_FIRST[k]),RULES[i].Prod);
}
else
flag2=1;
for(t=0;t<20;t++)
AUX_FIRST[t]='\0';
if(flag2==1){
for(n=0;FOLLOW_TABLE[n].id!=RULES[i].Left;n++);
for(m=0;FOLLOW_TABLE[n].FOLLOW[m]!='\0';m++){
strcpy(ParsingTable[RULES[i].Left-
NONTERMINALS_BASE][SearchTerminals(FOLLOW_TABLE[n].FOLLOW[m])-
TERMINALS_BASE],"#");
}//end if___________
int main(){
int i,j;
char string[20];int length;
stackT stack;
char parse_tree[100];
int parse_tree_index=0;
int index=0;
char buf,temp;
Init_NonTerminalTable();
Init_TerminalTable();
populateNonTerminal();
populateTerminal();
CreateRuleTable();
Init_First();
Init_Follow();
ComputeFirstAndFollow();
CreateParsingTable();
for(i=0,j=0;i<20;i++,j++){
if(TerminalList[i].present ==1 /*&& TerminalList[i].name!=NULL_VAL*/){
tokens[j]=TerminalList[i].name;
token_ids[j]=TerminalList[i].id - TERMINALS_BASE;
printf("\n %d :%d,%c",j,token_ids[j],tokens[j]);
}
}
printf("Enter the input string :");
scanf("%s",string);
length =strlen(string);
for(i=0;i<length;i++){
if(isdigit(string[i])){
string[i]='i'; //converting numbers to id
}
}
printf("\n\n%s\n\n",string);
StackInit(&stack,100);
StackPush(&stack,'$');
StackPush(&stack,START_SYMBOL); //start symbol
for(i=0;i<100;i++)
{
parse_tree[i]='\0';
}
parse_tree[parse_tree_index]=START_SYMBOL;
while(index<length){
buf=string[index];
//printf("\ntoken : %c\n",buf);
if(buf=='$'){printf("parsing completed successfully");break;}
if(GetTopEl(&stack)==tokens[i]){
StackPop(&stack);
index++;
}
else{
//access the parsing table
temp=StackPop(&stack);
if(ParsingTable[SearchNonTerminals(temp)-
NONTERMINALS_BASE][token_ids[i]][0]=='\0'){
printf("parsing error");
exit(0);
}
PushString(&stack,ParsingTable[SearchNonTerminals(temp)-
NONTERMINALS_BASE][token_ids[i]]);
printf("\n%c->%s\n",temp,ParsingTable[SearchNonTerminals(temp)-
NONTERMINALS_BASE][token_ids[i]]);
}//end else
}//end while
return 0;