Compiler Design Lab

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 27

GREATER NOIDA INSTITUE OF TECHNOLOGY

NAME Anup Yadav


UNIVERSITY ROLL NO 2201320109005
COLLEGE ID 2200017
SECTION ‘A’ 3rd Year
SUBJECT COMPILER DESING LAB
SUBJECT CODE (KCS552)
SUBMITTED TO Mohd. JAWED KHAN
COLLEGE CODE 132

DATE: ……………………………..

TEACHER SIGNATURE: …………………………….

Department of Computer Science and Engineering


G.N.I.O.T. , Greater Noida
(Affiliated to UPTU LUCKNOW)

1
INDEX

S.No OBJECT DATE SIGNATURE REMARKS

1.
C Program to count the
number of tokens

2.
C program to find the
nature of tokens
3.
C program to identify
whether a given line is
comment or not.

4.
C program to check
whether the given string is
accepted by the DFA or
not?

5.
C Program to eliminate
the left recursion in
compiler design

6.
C Program to eliminate
the left factoring in
compiler design

7.
C program to find the
whether the given
grammar is LL (1) or not?

2
PROGRAM 01
C Program to calculate the number of tokens

Objective: To calculate tokens generated by the lexical analyser and to find its
validity and type as well.

Theory:
What is a token?
A lexical token is a sequence of characters that can be treated as a unit in
the grammar of the programming languages.

Example of tokens:
Type token (id, number, real, . . . )
Punctuation tokens (IF, void, return, . . . )
Alphabetic tokens (keywords)
Keywords; Examples-for, while, if etc.
Identifier; Examples-Variable name, function name, etc.
Operators; Examples '+', '++', '-' etc.
Separators; Examples ',' ';' etc

PROGRAM

#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.


bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||
ch == '<' || ch == '=' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);

3
}

// Returns 'true' if the character is an OPERATOR.


bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}

// Returns 'true' if the string is a VALID IDENTIFIER.


bool validIdentifier(char *str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}

// Returns 'true' if the string is a KEYWORD.


bool isKeyword(char *str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int") || !strcmp(str, "double") ||
!strcmp(str, "float") || !strcmp(str, "return") || !strcmp(str, "char") || !strcmp(str,
"case") || !strcmp(str, "char") || !strcmp(str, "sizeof") || !strcmp(str, "long") ||
!strcmp(str, "short") || !strcmp(str, "typedef") || !strcmp(str, "switch") ||
!strcmp(str, "unsigned") || !strcmp(str, "void") || !strcmp(str, "static") ||
!strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}

// Returns 'true' if the string is an INTEGER.


bool isInteger(char *str)
{
int i, len = strlen(str);

if (len == 0)
return (false);
for (i = 0; i < len; i++)
{

4
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i]
!= '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-'
&& i > 0))
return (false);
}
return (true);
}

// Returns 'true' if the string is a REAL NUMBER.


bool isRealNumber(char *str)
{
int i, len = strlen(str);
bool hasDecimal = false;

if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' &&
str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] !=
'.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}

// Extracts the SUBSTRING.


char *subString(char *str, int left, int right)
{
int i;
char *subStr = (char *)malloc(
sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)


subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}

// Parsing the input STRING.


void parse(char *str)
{
int left = 0, right = 0;
int len = strlen(str);
int count = 0;

5
while (right <= len && left <= right)
{
if (isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right)


{
if (isOperator(str[right]) == true)
{
printf("'%c' IS AN OPERATOR\n", str[right]);
count++;
}

right++;
left = right;
}
else if (isDelimiter(str[right]) == true && left != right || (right == len &&
left != right))
{
char *subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)
{
printf("'%s' IS A KEYWORD\n", subStr);
count++;
}

else if (isInteger(subStr) == true)


{
printf("'%s' IS AN INTEGER\n", subStr);
count++;
}

else if (isRealNumber(subStr) == true)


{
printf("'%s' IS A REAL NUMBER\n", subStr);
count++;
}

else if (validIdentifier(subStr) == true && isDelimiter(str[right - 1]) ==


false)
{
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
count++;
}

else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) ==


false)

6
{
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
count++;
}
left = right;
}
}
printf("Total number of token is %d\n", count);
return;
}

// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = 3; printf(‘Hello world’); ";

parse(str); // calling the parse function

return (0);
}

OUTPUT

7
PROGRAM 02
C Program to calculate the number of tokens and determine its validity and
type:-

Objective: To calculate tokens generated by the lexical analyser and to find its
validity and type as well.

Theory:
What is a token?
A lexical token is a sequence of characters that can be treated as a unit in
the grammar of the programming languages.

Example of tokens:
Type token (id, number, real, . . . )
Punctuation tokens (IF, void, return, . . . )
Alphabetic tokens (keywords)
Keywords; Examples-for, while, if etc.
Identifier; Examples-Variable name, function name, etc.
Operators; Examples '+', '++', '-' etc.
Separators; Examples ',' ';' etc

PROGRAM

#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.


bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||
ch == '<' || ch == '=' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);

8
}

// Returns 'true' if the character is an OPERATOR.


bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}

// Returns 'true' if the string is a VALID IDENTIFIER.


bool validIdentifier(char *str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}

// Returns 'true' if the string is a KEYWORD.


bool isKeyword(char *str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int") || !strcmp(str, "double") ||
!strcmp(str, "float") || !strcmp(str, "return") || !strcmp(str, "char") || !strcmp(str,
"case") || !strcmp(str, "char") || !strcmp(str, "sizeof") || !strcmp(str, "long") ||
!strcmp(str, "short") || !strcmp(str, "typedef") || !strcmp(str, "switch") ||
!strcmp(str, "unsigned") || !strcmp(str, "void") || !strcmp(str, "static") ||
!strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}

// Returns 'true' if the string is an INTEGER.


bool isInteger(char *str)
{
int i, len = strlen(str);

if (len == 0)
return (false);
for (i = 0; i < len; i++)
{

9
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i]
!= '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-'
&& i > 0))
return (false);
}
return (true);
}

// Returns 'true' if the string is a REAL NUMBER.


bool isRealNumber(char *str)
{
int i, len = strlen(str);
bool hasDecimal = false;

if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' &&
str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] !=
'.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}

// Extracts the SUBSTRING.


char *subString(char *str, int left, int right)
{
int i;
char *subStr = (char *)malloc(
sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)


subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}

// Parsing the input STRING.


void parse(char *str)
{
int left = 0, right = 0;
int len = strlen(str);

10
int count = 0;

while (right <= len && left <= right)


{
if (isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right)


{
if (isOperator(str[right]) == true)
{
printf("'%c' IS AN OPERATOR\n", str[right]);
count++;
}

right++;
left = right;
}
else if (isDelimiter(str[right]) == true && left != right || (right == len &&
left != right))
{
char *subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)
{
printf("'%s' IS A KEYWORD\n", subStr);
count++;
}

else if (isInteger(subStr) == true)


{
printf("'%s' IS AN INTEGER\n", subStr);
count++;
}

else if (isRealNumber(subStr) == true)


{
printf("'%s' IS A REAL NUMBER\n", subStr);
count++;
}

else if (validIdentifier(subStr) == true && isDelimiter(str[right - 1]) ==


false)
{
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
count++;
}

11
else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) ==
false)

{
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
count++;
}
left = right;
}
}
printf("Total number of token is %d\n", count);
return;
}

// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = 3; printf(‘Hello world’); ";

parse(str); // calling the parse function

return (0);
}

OUTPUT

12
Program 03
C program to identify whether a given line is comment or not.

Objective: To find whether a given line in comment line or not.


Theory:-
Read the input string.
Check whether the string is starting with ‘/’ and check next character ‘/’ or ‘*’
If condition satisfies print comment
Else not a comment.

Program:-
#include<stdio.h>
void main()
{
char com [30];
int i=2,a=0;

printf("\n Enter Text : ");

gets(com);
if(com[0]=='/')
{
if(com[1]=='/')

printf("\n It is a Comment.");

else if(com [1]=='*')


{
for(i=2;i<=30;i++)
{
if(com[i]=='*'&&com[i+1]=='/')
{
printf("\n It is a Comment.");
a=1;
break;
}
else continue;
}
if(a==0)

printf("\n It is Not a Comment.");

13
}
else

printf("\n It is Not a Comment.");

}
else

printf("\n It is Not a Comment.");

}
Output:-

14
Program 04
C program to check whether the given string is accepted by the DFA or
not?

Objective:- Design a deterministic finite automata (DFA) for accepting


the language L = (aa)*.b+

Theory:-

There are 3 steps involve which results in acceptance of string:


Construct FA for (aa)^* means having even number of a’s.
Construct FA for b^+ means having any number of b’s greater than one.
Concatenate the two FA and make single DFA.
Any other combination result is the rejection of the input string.

Description:
Given DFA has following states. State 3 leads to the acceptance of the
string, whereas states 0, 1, 2 and 4 leads to the rejection of the string.
DFA State Transition Diagram:

Program:-

#include <stdio.h>
#include <string.h>

// dfa tells the number associated


// string end in which state.
int dfa = 0;

// This function is for


// the starting state (Q0)of DFA

15
void start(char c)
{
if (c == 'a') {
dfa = 1;
}
else if (c == 'b') {
dfa = 3;
}

// -1 is used to check for any invalid symbol


else {
dfa = -1;
}
}

// This function is for the first state (Q1) of DFA


void state1(char c)
{
if (c == 'a') {
dfa = 2;
}
else if (c == 'b') {
dfa = 4;
}
else {
dfa = -1;
}
}

// This function is for the second state (Q2) of DFA


void state2(char c)
{
if (c == 'b') {
dfa = 3;
}
else if (c == 'a') {
dfa = 1;
}
else {
dfa = -1;
}
}

// This function is for the third state (Q3)of DFA


void state3(char c)
{
if (c == 'b') {
dfa = 3;
}

16
else if (c == 'a') {
dfa = 4;
}
else {
dfa = -1;
}
}

// This function is for the fourth state (Q4) of DFA


void state4(char c)
{
dfa = -1;
}

int isAccepted(char str[])


{
// store length of string
int i, len = strlen(str);

for (i = 0; i < len; i++) {


if (dfa == 0)
start(str[i]);

else if (dfa == 1)
state1(str[i]);

else if (dfa == 2)
state2(str[i]);

else if (dfa == 3)
state3(str[i]);

else if (dfa == 4)
state4(str[i]);
else
return 0;
}
if (dfa == 3)
return 1;
else
return 0;
}

// driver code
int main()
{
char str[] = "aaaaaabbbb";
if (isAccepted(str))
printf("ACCEPTED");

17
else
printf("NOT ACCEPTED");
return 0;
}

Output:-

18
PROGRAM 05
C Program to eliminate the left recursion in compiler design

Objective: To remove the left recursion form given grammar in compiler design
Theory:
Left recursion is eliminated by converting the grammar into a right recursive
grammar.
If we have the left-recursive pair of productions-
A → Aα / β
(Left Recursive Grammar)
where β does not begin with an A.
Then, we can eliminate left recursion by replacing the pair of productions
with-
A → βA’
A’ → αA’ / ∈
(Right Recursive Grammar)
This right recursive grammar functions same as left recursive grammar.

PROGRAM
#include<stdio.h>
#include<string.h>
#define SIZE 10
int main () {
char non_terminal;
char beta,alpha;
int num;
char production[10][SIZE];
int index=3; /* starting of the string following "->" */
printf("Enter Number of Production : ");
scanf("%d",&num);
printf("Enter the grammar as E->E-A :\n");
for(int i=0;i<num;i++){
scanf("%s",production[i]);
}
for(int i=0;i<num;i++){

19
printf("\nGRAMMAR : : : %s",production[i]);

non_terminal=production[i][0];
if(non_terminal==production[i][index]) {
alpha=production[i][index+1];
printf(" is left recursive.\n");
while(production[i][index]!=0 && production[i][index]!='|')
index++;
if(production[i][index]!=0) {
beta=production[i][index+1];
printf("Grammar without left recursion:\n");
printf("%c->%c%c\'",non_terminal,beta,non_terminal);
printf("\n%c\'->%c%c\'|E\n",non_terminal,alpha,non_terminal);
}
else
printf(" can't be reduced\n");
}
else
printf(" is not left recursive.\n");
index=3;
}
}

OUTPUT

20
PROGRAM 06
C Program to eliminate the left factoring in compiler design

Objective: To remove the left factoring form given grammar in compiler design
Theory:
In LL(1) Parser in Compiler Design, Even if a context-free grammar is unambiguous
and non-left-recursion it still can not be a LL(1) Parser. That is because of Left
Factoring.

What is Left Factoring ?

Consider a part of regular grammar,

E->aE+bcD
E->aE+cBD

Here, grammar is non-left recursive, and unambiguous but there is left factoring.

How to resolve ?

E=aB | aC | aD | ............
then,

E=aX
X=B | C | D |...........

So, the above grammar will be as :

E=aE+X
X=bcD | cBD

PROGRAM
#include<stdio.h>
#include<string.h>
int main()
{
char
gram[20],part1[20],part2[20],modifiedGram[20],newGram[20],tempGram[20]
;
int i,j=0,k=0,l=0,pos;
printf("Enter Production : A->");
gets(gram);
for(i=0;gram[i]!='|';i++,j++)

21
part1[j]=gram[i];
part1[j]='\0';
for(j=++i,i=0;gram[j]!='\0';j++,i++)
part2[i]=gram[j];
part2[i]='\0';
for(i=0;i<strlen(part1)||i<strlen(part2);i++)
{
if(part1[i]==part2[i])
{
modifiedGram[k]=part1[i];
k++;
pos=i+1;
}
}
for(i=pos,j=0;part1[i]!='\0';i++,j++){
newGram[j]=part1[i];
}
newGram[j++]='|';
for(i=pos;part2[i]!='\0';i++,j++){
newGram[j]=part2[i];
}
modifiedGram[k]='X';
modifiedGram[++k]='\0';
newGram[j]='\0';
printf("\n A->%s",modifiedGram);
printf("\n X->%s\n",newGram);
}

OUTPUT

22
Program 07
C program to find whether the given grammar is LL(1) or not ?

Objective:- Design a Predictive Parser for the following grammar


G: { E-> TE’ , E’ -> +TE’ | 0, T-> FT’ , T’-> *FT’|0 , F-> (E) | id }

Given the parse Table:

ALGORITHM / PROCEDURE :

Input: string w$, Predictive Parsing table M


Output: A Left Most Derivation of the input string if it is valid , error otherwise.

Step1: Start
Step2: Declare a character array w[10] and Z as an array
Step3: Enter the string with $ at the end
Step4: if (A(w[z]) then increment z and check for (B(w[z])) and if satisfies
increment z and check for ‘d’ if d is present then increment and
check for (D(w[z]))
Step5: if step 4 is satisfied then the string is accepted
Else string is not
Step 6: Exit

Program:-

// ***IMPLEMENTATION OF PREDICTIVE / NON-RECURSIVE DESCENT


PARSING *****//
#include<stdio.h>
#include<conio.h>
#include<ctype.h>
char ch;
#define id 0
#define CONST 1
#define mulop 2

23
#define addop 3
#define op 4
#define cp 5
#define err 6
#define col 7
#define size 50
int token;
char lexbuff[size];
int lookahead=0;
int main()
{
clrscr();
printf(" Enter the string :");
gets(lexbuff);
parser();
return 0;
}
parser()
{
if(E())
printf("valid string");
else
printf("invalid string");
getch();
return 0;
}
E()
{
if(T())
{
if(EPRIME())
return 1;
else
return 0;
}
else
return 0;
}
T()
{
if(F())
{
if(TPRIME())
return 1;
else
return 0;
}
else
return 0;

24
}
EPRIME()
{
token=lexer();
if(token==addop)
{
lookahead++;
if(T())
{
if(EPRIME())
return 1;
else
return 0;
}
else
return 0;
}
else
return 1;
}
TPRIME()
{
token=lexer();
if(token==mulop)
{
lookahead++;
if(F())
{
if(TPRIME())
return 1;
else
return 0;
}
else
return 0;
}
else
return 1;
}
F()
{
token=lexer();
if(token==id)
return 1;
else
{
if(token==4)
{
if(E())

25
{
if(token==5)
return 1;
else
return 0;
}
else
return 0;
}
else
return 0;
}
}
lexer()
{
if(lexbuff[lookahead]!='\n')
{
while(lexbuff[lookahead]=='\t')
lookahead++;
if(isalpha(lexbuff[lookahead]))
{
while(isalnum(lexbuff[lookahead]))
lookahead++;
return(id);
}
else
{
if(isdigit(lexbuff[lookahead]))
{
while(isdigit(lexbuff[lookahead]))
lookahead++;
return CONST;
}
else
{
if(lexbuff[lookahead]=='+')
{
return(addop);
}
else
{
if(lexbuff[lookahead]=='*')
{
return(mulop);
}
else
{
if(lexbuff[lookahead]=='(')
{

26
lookahead++;
return(op);
}
else
{
if(lexbuff[lookahead]==')')
{
return(op);
}
else
{
return(err);
}
}
}
}
}
}
}
else
return (col);
}

Output:-

27

You might also like