SPCC Manual (FINAL)

Experiment No – 1
AIM: To study and implement 2 Pass Assembler.
THEORY:
1) ASSEMBLER - An assembler program creates object code by translating combinations of

mnemonics and syntax for operations and addressing modes into their numerical equivalents. This
representation typically includes an operation code ("opcode") as well as other control bits.
– e.g., to generate common short sequences of instructions as inline, instead of called subroutines.
2) ASSEMBLY LEVEL LANGUAGE: An assembly or assembler language, often abbreviated asm, is a
low-level programming language for a computer, or other programmable device, in which there is a
very strong (generally one-to-one) correspondence between the language and the architecture's
machine code instructions.
3) Number of Passes:
There are two types of assemblers based on how many passes through the source are needed to
produce the executable program.
a) One-pass assemblers go through the source code once. Any symbol used before it is defined
will require "errata" at the end of the object code (or, at least, no earlier than the point where
the symbol is defined) telling the linker or the loader to "go back" and overwrite a placeholder
which had been left where the as yet undefined symbol was used.
b) Multi-pass assemblers create a table with all symbols and their values in the first passes, then
use the table in later passes to generate code.
PROGRAM:
#include<stdio.h>
#include<conio.h>
int LC = 0,b_reg[2];
struct Instruction
{
char label[5];
char nemo[5];
char op1[5];
char op2[7]};
struct M
{
char nemo[5];
int bytes;
char type[2]};
struct P
{
char nemo[7];};
struct symbol_table
{
char name[5];
int rel_add;
int bytes;
char add_type;};
struct symbol_table st[4];
struct lit_table{
char literal[4]};
struct lit_table lt[3];
struct ass_output
{
int r_add;
char nemo[5];
char op1[5];
char op2[10]};
struct ass_output ps1[20],ps2[20];
struct Instruction prog[14] = {
{"TEST","START","",""},
{"BEGI","BALR","15","0"},
{"","USNG","2","15"},
{"","SR","4","4"},
{"","L","3","=F10"},
{"LOOP","L","2","DATA(4)"},
{"","A","2","=F49"},
{"","ST","2","DATA(4)"},
{"","A","4","=F4"},
{"","BCT","3","*-16"},
{"","BR","14",""},
{"","LTROG","",""},
{"DATA","DC","F1",""},
{"","END","",""}};
struct M MOT[7] = {
{"BALR",2,"RR"},
{"SR",2,"RR"},
{"L",4,"RX"},
{"A",4,"RX"},
{"ST",2,"RR"},
{"BCT",4,"RX"},
{"BR",2,"RR"}};
struct P POT[5] = {
{"USNG"},
{"START"},
{"LTROG"},
{"DC"},
{"END"}
int main()
{
int i;printf("\n----------------SOURCE PROGRAM is:-----------------\n");
for( i=0;i<14;i++)
{
printf("%s\t%s\t%s\t%s\n",prog[i].label,prog[i].nemo,prog[i].op1,prog[i].op2);
{pass1();
pass2();
return 0;}
void pass1()
{
int i=0,stp=0,L,nmo=0,ps1p=0,ltp=0,cnt,len;
char literal[5],ch;
printf("\n------------------Output of PASS1 is:-------------------");
while(strcmp(prog[i].nemo,"END")!=0)
//if label exists in the instruction store label in the symbol table
if(strlen(prog[i].label) != 0){
strcpy(st[stp].name ,prog[i].label);
st[stp].bytes = 4;
st[stp].add_type = "R";
st[stp].rel_add = LC;
stp++;
//check nemonic in POT
for(nmo=0;strcmp(prog[i].nemo,POT[nmo].nemo) !=0 && nmo <=4;nmo++);
if(strcmp(prog[i].nemo,"USNG")==0){
b_reg[0] = (int)prog[i].op1;
b_reg[1] = prog[i].op2[0] - '0';
i++;
continue;}
if(strcmp(prog[i].nemo,"LTROG")==0){
LC = LC + ((int)b_reg[1]-1);
//store literal table in PASS1
for(ltp=0;ltp <= 2;ltp++){
ps1[ps1p].r_add = LC;
strcpy(ps1[ps1p].nemo , lt[ltp].literal);
printf("\n%d\t
%s",ps1[ps1p].r_add,ps1[ps1p].nemo);
LC = LC + 4;
ps1p++;}i++;
continue}
if(strcmp(prog[i].nemo,"START")==0){
i++;
continue;}
if(strcmp(prog[i].nemo,"DC")==0){
strcpy(ps1[ps1p].op1 , prog[i].op1);
printf("\n%d\t%s",LC,ps1[ps1p].op1);
LC = LC + 4;
i++;
continue;}
//check nemonic in MOT
for(nmo=0;strcmp(MOT[nmo].nemo,prog[i].nemo)!=0;nmo++);
printf("\n%d\t",ps1[ps1p].r_add);
strcpy(ps1[ps1p].nemo , prog[i].nemo);
printf("%s\t",ps1[ps1p].nemo);
strcpy(ps1[ps1p].op1, prog[i].op1); //process operand 1
printf("%s\t",ps1[ps1p].op1);
if(strcmp(prog[i].nemo,"SR")==0||strcmp(prog[i].nemo,"BALR")==0||strcmp(prog[i].nemo,"BR")==0)
//process operand 2 for RR type of instruction
strcpy(ps1[ps1p].op2 , prog[i].op2);
len = strlen(ps1[ps1p].op2);
ps1[ps1p].op2[len] = '\0';
printf("%s",ps1[ps1p].op2);
LC = LC + 2;
//break}
if(strcmp(prog[i].nemo,"L")==0||strcmp(prog[i].nemo,"A")==0||strcmp(prog[i].nemo,"ST")==0||
strcmp(prog[i].nemo,"BCT")==0){//process operand 2 if it is literal
if(prog[i].op2[0] == '='){
for(cnt=1;prog[i].op2[cnt] != '\0'; cnt++ )
literal[cnt-1] = prog[i].op2[cnt];
literal[cnt-1] = '\0';
//Store literal in literal table
strcpy(lt[ltp].literal,literal);
ltp++;
}
if(strcmp(prog[i].op2,"DATA(4)") == 0)
strcpy(ps1[ps1p].op2 , "-(4,");
else
strcpy(ps1[ps1p].op2 , "-(0,");
strcat(ps1[ps1p].op2,"15)");
len = strlen(ps1[ps1p].op2);
ps1[ps1p].op2[len] = '\0';
printf("%s",ps1[ps1p].op2);
LC = LC + 4; }i++;ps1p++}
strcpy(ps1[ps1p].nemo , "END");
printf("\n%d\t%s",LC,ps1[ps1p].nemo);
printf("\n------------------SYMBOL TABLE is---------------------");
for(stp=0;strlen(st[stp].name) != 0;stp++)
{
printf("\n%d\t%s",st[stp].rel_add,st[stp].name);}
printf("\n---------------------LITERAL TABLE is-------------------");
for(ltp=0;strlen(lt[ltp].literal) != 0;ltp++)
printf("\n%s",lt[ltp].literal);
void pass2()
{int ps1p=0,i,j,stp,ps2p=0,s_val,r,cnt,ltp;
char str[7],ch[2],literal[5];
j=0;
printf("\n------------------Output of PASS2 is -----------------");
while(strcmp(ps1[ps1p].nemo,"END")!= 0){
//If instruction does not have operand 1 and 2
if(strlen(ps1[ps1p].op1) == 0 && strlen(ps1[ps1p].op2) == 0){
ps2[ps2p].r_add = ps1[ps1p].r_add;
printf("\n%d",ps2[ps2p].r_add);
strcpy(ps2[ps2p].nemo,ps1[ps1p].nemo);
printf("\t%s",ps2[ps2p].nemo);}
else if(strlen(ps1[ps1p].op1) != 0 && strlen(ps1[ps1p].op2) == 0){//if instruction have only
operand 1 like BR
printf("\t%s",ps2[ps2p].nemo);
strcpy(ps2[ps2p].op1,ps1[ps1p].op1);
printf("\t%s",ps2[ps2p].op1);}
else{
for(i=j;strcmp(prog[i].nemo,ps1[ps1p].nemo) != 0;i++);
//if operand 2 is symbol or literal
printf("\t%s",ps2[ps2p].nemo);
strcpy(ps2[ps2p].op1,ps1[ps1p].op1);
printf("\t%s",ps2[ps2p].op1);
//Operand 2 is symbol
if(ps1[ps1p].op2[0] == '-' && prog[i].op2[0] != '='){
//printf("\nSearch %s in symbol table",prog[i].op2);
if(strcmp(prog[i].op2,"DATA(4)") == 0)
{
for(stp=0;strcmp(st[stp].name,"DATA") != 0;stp++);
s_val = st[stp].rel_add;
//printf("\nNemonic is %s and S_val
is %d",prog[i].nemo,st[stp].rel_add);
}
else
if(prog[i].op2[0] == '*')
{
s_val = ps1[ps1p].r_add - 16;
}
else{
for(stp=0;strcmp(st[stp].name,prog[i].op2) != 0;stp++);
s_val = st[stp].rel_add;
}
//convert offset of symbol to character
if(s_val!=0){
r = s_val%10;
ch[1] = 48+(r);
s_val = s_val/10}
if(s_val!=0){
r = s_val%10;
ch[0] =48+(r);}
//copy offset to operand 2 of PASS 2
strcpy(ps2[ps2p].op2,ch);
//Get index and base register details from PASS 1
for(r=1;ps1[ps1p].op2[r]!='\0';r++){
str[r-1] = ps1[ps1p].op2[r];
} strcat(ps2[ps2p].op2,str);
}
else
if(prog[i].op2[0] == '='){
//Operand 2 is literal
for(cnt=1;prog[i].op2[cnt] != '\0'; cnt++ ) //Read literal from source prog.
literal[cnt-1] = prog[i].op2[cnt]
ltp = ps1p;
//Search literal in PASS 1
while(strcmp(ps1[ltp].nemo,literal) != 0){
ltp++;}
//Get the relative address of literal from PASS 1
s_val = ps1[ltp].r_add;
//Convert offset to character
while(s_val!=0){
r = s_val%10;
ch[1] = 48+(r);
s_val = s_val/10;
s_val = s_val/10;}
//Copy offset to operand 2 of PASS
strcpy(ps2[ps2p].op2,ch);
//Get index and base register details from PASS 1
for(r=1;ps1[ps1p].op2[r]!='\0';r++){
str[r-1] = ps1[ps1p].op2[r];}
strcat(ps2[ps2p].op2,str);}else
strcpy(ps2[ps2p].nemo,"END");
printf("\n%d\t%s",ps2[ps2p].r_add,ps2[ps2p].nemo)}
OUTPUT:
Experiment No – 2
AIM: To study and implement 2 Pass Macro Processor.
THEORY:
1) MACRO - A macro in computer science is a rule or pattern that specifies how a certain input
sequence (often a sequence of characters) should be mapped to a replacement output sequence
(also often a sequence of characters) according to a defined procedure. The mapping process that
instantiates (transforms) a macro use into a specific sequence is known as macro expansion. A
facility for writing macros may be provided as part of a software application or as a part of a
programming language. In the former case, macros are used to make tasks using the application less
repetitive. In the latter case, they are a tool that allows a programmer to enable code reuse or even
to design domain-specific languages.
2) MACRO Processor - A macro processor is a program that copies a stream of text from one place to
another, making a systematic set of replacements as it does so. Macro processors are often
embedded in other programs, such as assemblers and compilers. Sometimes they are standalone
programs that can be used to process any kind of text.
3) MACRO in Assembly Language: In assembly language, the term "macro" represents a more
comprehensive concept than it does in some other contexts, such as in the C programming language,
where its #define directive typically is used to create short single line macros. Assembler macro
instructions, like macros in PL/I and some other languages, can be lengthy "programs" by
themselves, executed by interpretation by the assembler during assembly.
An organization using assembly language that has been heavily extended using such a macro suite
can be considered to be working in a higher-level language, since such programmers are not
working with a computer's lowest-level conceptual elements.
Underlining this point, macros were used to implement an early virtual machine in SNOBOL4
(1967), which was written in the SNOBOL Implementation Language (SIL), an assembly language
for a virtual machine, which was then targeted to physical machines by transpiled to a native
assembler via a macro assembler. This allowed a high degree of portability for the time.
PROGRAM:
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>
#include<string.h>
FILE *f1,*f2,*f3,*f4,*f5;
void main()
{
char lbl[20],opc[20],opr[20],mname[20],arg[20],check[20];char ch,dlbl[20],dopc[20],dopr[20];
int c;
f1=fopen("MACIN.DAT","r");
rewind(f1);
f2=fopen("NAMETAB.DAT","r");
rewind(f2);
f3=fopen("DEFTAB.DAT","r");
f4=fopen("EXPAND.DAT","w");
f5=fopen("ARGTAB.DAT","w");
while(!feof(f1))
{
l1:
fscanf(f1,"%s %s %s",lbl,opc,opr);
if(strcmp(opc,mname)==0)
c=1;
if(strcmp(opc,"MACRO")==0)
{
while(strcmp(opc,"MEND")!=0)
{
fscanf(f1,"%s%s%s",lbl,opc,opr);
continue;
}
goto l1;
}
rewind(f2);
rewind(f3);
fscanf(f2,"%s",mname);
{
fprintf(f5," %s",opr);
rewind(f5);
while(!feof(f3))
{
fscanf(f3,"%s%s%s",dlbl,dopc,dopr);
if(strcmp(dopc,"MEND")!=0)
{
if(strcmp(dopc,"MACRO")==0)
{
continue;
}
if(strcmp(dopr,"=X'?1'")==0)
strcpy(dopr,"=X'F1'");
if(strcmp(dopr,"?2,X")==0)
strcpy(dopr,"BUFFER,X");
if(strcmp(dopr,"?3")==0)
strcpy(dopr,"LENGTH");
if(c==1)
{
fprintf(f4," %s\t%s\t%s\n",lbl,opc,opr);
c=0;
}
fprintf(f4," %s\t%s\t%s\n",dlbl,dopc,dopr);
}
}
goto l1;
}
}
printf("\n INPUT\n\n Macro Program before expanded \n");
printf(" ---------------------------------\n");
ch=fgetc(f1);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f1);
}
printf("\n Definition Table \n");
printf(" ---------------------------------\n");
ch=fgetc(f2);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f2);
}
printf("\n Name Table \n");
printf(" ---------------------------------\n");
ch=fgetc(f3);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f3);
}
getch();
printf("\n\n OUTPUT\n\n Macro Program after expanded \n");
printf(" ---------------------------------\n\n");
f4=fopen("EXPAND.DAT","r");
ch=fgetc(f4);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f4);
}
printf("\n Argument Table \n");
printf(" ---------------------------------\n\n");
f5=fopen("ARGTAB.DAT","r");
ch=fgetc(f5);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f5);
}
}
OUTPUT:
Experiment No – 3
AIM: Implementation of Lexical Analyzer
THEORY:
1) MACRO - A macro in computer science is a rule or pattern that specifies how a certain input
sequence (often a sequence of characters) should be mapped to a replacement output sequence
(also often a sequence of characters) according to a defined procedure. The mapping process that
instantiates (transforms) a macro use into a specific sequence is known as macro expansion. A
facility for writing macros may be provided as part of a software application or as a part of a
programming language. In the former case, macros are used to make tasks using the application less
repetitive. In the latter case, they are a tool that allows a programmer to enable code reuse or even
to design domain-specific languages.
2) MACRO Processor - A macro processor is a program that copies a stream of text from one place to
another, making a systematic set of replacements as it does so. Macro processors are often
embedded in other programs, such as assemblers and compilers. Sometimes they are standalone
programs that can be used to process any kind of text.
3) MACRO in Assembly Language: In assembly language, the term "macro" represents a more
comprehensive concept than it does in some other contexts, such as in the C programming language,
where its #define directive typically is used to create short single line macros. Assembler macro
instructions, like macros in PL/I and some other languages, can be lengthy "programs" by
themselves, executed by interpretation by the assembler during assembly.
Since macros can have 'short' names but expand to several or indeed many lines of code, they can
be used to make assembly language programs appear to be far shorter, requiring fewer lines of
source code, as with higher level languages. They can also be used to add higher levels of structure
to assembly programs, optionally introduce embedded debugging code via parameters and other
similar features.
PROGRAM:
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>
#include<string.h>
FILE *f1,*f2,*f3,*f4,*f5;
void main()
{
char lbl[20],opc[20],opr[20],mname[20],arg[20],check[20];char ch,dlbl[20],dopc[20],dopr[20];
int c;
rewind(f1);
rewind(f2);
f4=fopen("EXPAND.DAT","w");
f5=fopen("ARGTAB.DAT","w");
while(!feof(f1))
{
l1:
fscanf(f1,"%s %s %s",lbl,opc,opr);
c=1;
if(strcmp(opc,"MACRO")==0)
{
while(strcmp(opc,"MEND")!=0)
{
fscanf(f1,"%s%s%s",lbl,opc,opr);
continue;
}
goto l1;
}
rewind(f2);
rewind(f3);
fscanf(f2,"%s",mname);
{
fprintf(f5," %s",opr);
rewind(f5);
while(!feof(f3))
{
fscanf(f3,"%s%s%s",dlbl,dopc,dopr);
if(strcmp(dopc,"MEND")!=0)
{
if(strcmp(dopc,"MACRO")==0)
{
continue;
}
if(strcmp(dopr,"=X'?1'")==0)
strcpy(dopr,"=X'F1'");
if(strcmp(dopr,"?2,X")==0)
strcpy(dopr,"BUFFER,X");
if(strcmp(dopr,"?3")==0)
strcpy(dopr,"LENGTH");
if(c==1)
{
c=0;
}
fprintf(f4," %s\t%s\t%s\n",dlbl,dopc,dopr);
}
}
goto l1;
}
}
printf("\n INPUT\n\n Macro Program before expanded \n");
printf(" ---------------------------------\n");
ch=fgetc(f1);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f1);
}
printf("\n Definition Table \n");
printf(" ---------------------------------\n");
ch=fgetc(f2);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f2);
}
printf("\n Name Table \n");
printf(" ---------------------------------\n");
ch=fgetc(f3);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f3);
}
getch();
printf("\n\n OUTPUT\n\n Macro Program after expanded \n");
printf(" ---------------------------------\n\n");
f4=fopen("EXPAND.DAT","r");
ch=fgetc(f4);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f4);
}
printf("\n Argument Table \n");
printf(" ---------------------------------\n\n");
f5=fopen("ARGTAB.DAT","r");
ch=fgetc(f5);
while(ch!=EOF)
{
printf("%c",ch);
ch=fgetc(f5);
}
}
OUTPUT:
Experiment No – 4
AIM: To study and implement operator precedence parser.
THEORY:
Parser:
Shift Reduce Parser:
Opertaor Precedence Parser:
Operator precedence grammar is kinds of shift reduce parsing method. It is applied to a small
class of operator grammars.
A grammar is said to be operator precedence grammar if it has two properties:
o No R.H.S. of any production has a∈.
o No two non-terminals are adjacent.
Operator precedence can only established between the terminals of the grammar. It ignores
the non-terminal.
There are the three operator precedence relations:
a ⋗ b means that terminal "a" has the higher precedence than terminal "b".
a ⋖ b means that terminal "a" has the lower precedence than terminal "b".
a ≐ b means that the terminal "a" and "b" both have same precedence.
Precedence table:
Parsing Action
o Both end of the given input string, add the $ symbol.
o Now scan the input string from left right until the ⋗ is encountered.
o Scan towards left over all the equal precedence until the first left most ⋖ is encountered.
o Everything between left most ⋖ and right most ⋗ is a handle.
o $ on $ means parsing is successful.
Example
Grammar
1. E → E+T/T
2. T → T*F/F
3. F → id
Given string:
1. w = id + id * id
Let us consider a parse tree for it as follows:
On the basis of above tree, we can design following operator precedence table:
Now let us process the string with the help of the above precedence table:
PROGRAM:
/*OPERATOR PRECEDENCE PARSER*/
#include<stdio.h>
#include<conio.h>
void main()
{
char stack[20],ip[20],opt[10][10][1],ter[10];
int i,j,k,n,top=0,col,row;
clrscr();
for(i=0;i<10;i++){stack[i]=NULL; ip[i]=NULL;
for(j=0;j<10;j++){opt[i][j][1]=NULL;}}
printf("Enter the no.of terminals:");
scanf("%d",&n);
printf("\nEnter the terminals:");
scanf("%s",ter);
printf("\nEnter the table values:\n");
for(i=0;i<n;i++)
{
for(j=0;j<n;j++)
{
printf("Enter the value for %c %c:",ter[i],ter[j]);
scanf("%s",opt[i][j]);
}
}
printf("\nOPERATOR PRECEDENCE TABLE:\n");
for(i=0;i<n;i++){printf("\t%c",ter[i]);}
printf("\n");
for(i=0;i<n;i++){printf("\n%c",ter[i]);
for(j=0;j<n;j++){printf("\t%c",opt[i][j][0]);}}
stack[top]='$';
printf("\nEnter the input string:");
scanf("%s",ip);
i=0;
printf("\nSTACK\t\t\tINPUT STRING\t\t\tACTION\n");
printf("\n%s\t\t\t%s\t\t\t",stack,ip);
while(i<=strlen(ip))
{
for(k=0;k<n;k++)
{
if(stack[top]==ter[k])
col=k;
if(ip[i]==ter[k])
row=k;
}
if((stack[top]=='$')&&(ip[i]=='$')){
printf("String is accepted");
break;}
else if((opt[col][row][0]=='<') ||(opt[col][row][0]=='='))
{ stack[++top]=opt[col][row][0];
stack[++top]=ip[i];
printf("Shift %c",ip[i]);
i++;
}
else{
if(opt[col][row][0]=='>')
{
while(stack[top]!='<'){--top;}
top=top-1;
printf("Reduce");
}
else
{
printf("\nString is not accepted");
break;
}
}
printf("\n");
for(k=0;k<=top;k++)
{
printf("%c",stack[k]);
}
printf("\t\t\t");
for(k=i;k<strlen(ip);k++){
printf("%c",ip[k]);
}
printf("\t\t\t");}
getch();
}
OUTPUT:
Enter the no.of terminals:4
Enter the terminals:+*i$
Enter the table values:
Enter the value for + +:>
Enter the value for + *:<
Enter the value for + i:<
Enter the value for + $:>
Enter the value for * +:>
Enter the value for * *:>
Enter the value for * i:<
Enter the value for * $:>
Enter the value for i +:>
Enter the value for i *:>
Enter the value for i i:=
Enter the value for i $:>
Enter the value for $ +:<
Enter the value for $ *:<
Enter the value for $ i:<
Enter the value for $ $:A
OPERATOR PRECEDENCE TABLE:
+*i$
+><<>
*>><>
i>>=>
$<<<A
Enter the input string:i+i*i$
STACK INPUT STRING ACTION
$ i+i*i$ Shift i
$<i +i*i$ Reduce
$ +i*i$ Shift +
$<+ i*i$ Shift i
$<+<i *i$ Reduce
$<+ *i$ Shift *
$<+<* i$ Shift i
$<+<*<i $ Reduce
$<+<* $ Reduce
$<+ $ Reduce
$ $ String is accepted
Experiment No – 5
AIM: To study and implement intermediate code generation phase of compiler.
THEORY:
A source code can directly be translated into its target machine code, then why at all we need to
translate the source code into an intermediate code which is then translated to its target code? Let us
see the reasons why we need an intermediate code.
 If a compiler translates the source language to its target machine language without having the
option for generating intermediate code, then for each new machine, a full native compiler is
required.
 Intermediate code eliminates the need of a new full compiler for every unique machine by
keeping the analysis portion same for all the compilers.
 The second part of compiler, synthesis, is changed according to the target machine.
 It becomes easier to apply the source code modifications to improve code performance by
applying code optimization techniques on the intermediate code.
Intermediate code generator receives input from its predecessor phase, semantic analyzer, in the form
of an annotated syntax tree. That syntax tree then can be converted into a linear representation, e.g.,
postfix notation. Intermediate code tends to be machine independent code. Therefore, code generator
assumes to have unlimited number of memory storage (register) to generate code.
PROGRAM:
Program:
#include<stdio.h>
#include<conio.h>
#include<string.h>
char op[2],arg1[5],arg2[5],result[5];
void main()
{
FILE *fp1,*fp2;
fp1=fopen("input.txt","r");
fp2=fopen("output.txt","w");
while(!feof(fp1))
{
fscanf(fp1,"%s%s%s%s",op,arg1,arg2,result);
if(strcmp(op,"+")==0)
{
fprintf(fp2,"\nMOV R0,%s",arg1);
fprintf(fp2,"\nADD R0,%s",arg2);
fprintf(fp2,"\nMOV %s,R0",result);
}
if(strcmp(op,"*")==0)
{
fprintf(fp2,"\nMUL R0,%s",arg2);
}
if(strcmp(op,"-")==0)
{
fprintf(fp2,"\nSUB R0,%s",arg2);
}
if(strcmp(op,"/")==0)
{
fprintf(fp2,"\nDIV R0,%s",arg2);
}
if(strcmp(op,"=")==0)
{
}
}
fclose(fp1);
fclose(fp2);
getch();
}
}input.txt
+ a b t1
* c d t2
- t1 t2 t
=t?x
OUTPUT:
MOV R0,a
ADD R0,b
MOV t1,R0
MOV R0,c
MUL R0,d
MOV t2,R0
MOV R0,t1
SUB R0,t2
MOV t,R0
MOV R0,t
MOV x,R0
Experiment No – 6
AIM: To study and implement Code generation phase of compiler
THEORY:
1) Definition: Code generation can be considered as the final phase of compilation. Through post code
generation, optimization process can be applied on the code, but that can be seen as a part of code
generation phase itself. The code generated by the compiler is an object code of some lower-level
programming language, for example, assembly language.
2) Directed Acyclic Graph: Directed Acyclic Graph (DAG) is a tool that depicts the structure of basic
blocks, helps to see the flow of values flowing among the basic blocks, and offers optimization too.
DAG provides easy transformation on basic blocks. DAG can be understood here:
Leaf nodes represent identifiers, names or constants.
Interior nodes represent operators.
Interior nodes also represent the results of expressions or the identifiers/name where the values
are to be stored or assigned.
Example:
t0 = a + b
t1 = t0 + c
d = t0 + t1
[t0 = a + b] [t1 = t0 + c] [d = t0 + t1]
[t0 = a + b]
[t1 = t0 + c]
[d = t0 + t1]
3) Peephole Optimization: This optimization technique works locally on the source code to
transform it into an optimized code. By locally, we mean a small portion of the code block at hand.
These methods can be applied on intermediate codes as well as on target codes. A bunch of
statements is analyzed and are checked for the following possible optimization.
4) Redundant instruction elimination: At source code level, the following can be done by the user:
int add_ten(int x) int add_ten(int x) int add_ten(int x) int add_ten(int x)
{ { { {
int y, z; int y; int y = 10; return x + 10;
y = 10; y = 10; return x + y; }
z = x + y; y = x + y; }
return z; return y;
} }
At compilation level, the compiler searches for instructions redundant in nature. Multiple loading and
storing of instructions may carry the same meaning even if some of them are removed. For example:
MOV x, R0
MOV R0, R1
We can delete the first instruction and re-write the sentence as:
MOV x, R1
5) Unreachable code: Unreachable code is a part of the program code that is never accessed because
of programming constructs. Programmers may have accidently written a piece of code that can
never be reached.
Example:
void add_ten(int x)
{
return x + 10;
printf(“value of x is %d”, x);
}
In this code segment, the printf statement will never be executed as the program control returns back
before it can execute, hence printf can be removed.
6) Flow of control optimization; There are instances in a code where the program control jumps
back and forth without performing any significant task. These jumps can be removed. Consider the
following chunk of code:
...
MOV R1, R2
GOTO L1
...
L1 : GOTO L2
L2 : INC R1
In this code,label L1 can be removed as it passes the control to L2. So instead of jumping to L1 and then to
L2, the control can directly reach L2, as shown below:
...
MOV R1, R2
GOTO L2
...
L2 : INC R1
7) Algebraic expression simplification: There are occasions where algebraic expressions can be
made simple. For example, the expression a = a + 0 can be replaced by a itself and the expression a =
a + 1 can simply be replaced by INC a.
8) Strength reduction: There are operations that consume more time and space. Their ‘strength’ can
be reduced by replacing them with other operations that consume less time and space, but produce
the same result.
9) Accessing machine instructions; The target machine can deploy more sophisticated instructions,
which can have the capability to perform specific operations much efficiently. If the target code can
accommodate those instructions directly, that will not only improve the quality of code, but also
yield more efficient results.
10) Code Generator: A code generator is expected to have an understanding of the target machine’s
runtime environment and its instruction set. The code generator should take the following things
into consideration to generate the code:
• Target language: The code generator has to be aware of the nature of the target language for which
the code is to be transformed. That language may facilitate some machine-specific instructions to help
the compiler generate the code in a more convenient way. The target machine can have either CISC or
RISC processor architecture.
• IR Type: Intermediate representation has various forms. It can be in Abstract Syntax Tree (AST)
structure, Reverse Polish Notation, or 3-address code.
• Selection of instruction: The code generator takes Intermediate Representation as input and
converts (maps) it into target machine’s instruction set. One representation can have many ways
(instructions) to convert it, so it becomes the responsibility of the code generator to choose the
appropriate instructions wisely.
• Register allocation: A program has a number of values to be maintained during the execution. The
target machine’s architecture may not allow all of the values to be kept in the CPU memory or registers.
Code generator decides what values to keep in the registers. Also, it decides the registers to be used to
keep these values.
• Ordering of instructions: At last, the code generator decides the order in which the instruction
will be executed. It creates schedules for instructions to execute them.
11) Descriptors: The code generator has to track both the registers (for availability) and addresses
(location of values) while generating the code. For both of them, the following two descriptors are
used:
• Register descriptor : Register descriptor is used to inform the code generator about the
availability of registers. Register descriptor keeps track of values stored in each register. Whenever a
new register is required during code generation, this descriptor is consulted for register availability.
• Address descriptor : Values of the names (identifiers) used in the program might be stored at
different locations while in execution. Address descriptors are used to keep track of memory locations
where the values of identifiers are stored. These locations may include CPU registers, heaps, stacks,
memory or a combination of the mentioned locations.
Code generator keeps both the descriptor updated in real-time. For a load statement, LD R1, x, the code
generator:
updates the Register Descriptor R1 that has value of x and
updates the Address Descriptor (x) to show that one instance of x is in R1.
12)Code Generation: Basic blocks comprise of a sequence of three-address instructions. Code
generator takes these sequence of instructions as input.
Note: If the value of a name is found at more than one place (register, cache, or memory), the register’s
value will be preferred over the cache and main memory. Likewise cache’s value will be preferred over
the main memory. Main memory is barely given any preference.
getReg: Code generator uses getReg function to determine the status of available registers and the
location of name values. getReg works as follows:
If variable Y is already in register R, it uses that register.
Else if some register R is available, it uses that register.
Else if both the above options are not possible, it chooses a register that requires minimal number of
load and store instructions.
For an instruction x = y OP z, the code generator may perform the following actions. Let us assume that
L is the location (preferably register) where the output of y OP z is to be saved:
Call function getReg, to decide the location of L.
Determine the present location (register or memory) of y by consulting the Address Descriptor of y. If y
is not presently in register L, then generate the following instruction to copy the value of y to L:
MOV y’, L
where y’ represents the copied value of y.
Determine the present location of z using the same method used in step 2 for y and generate the
following instruction.
PROGRAM:
#include<stdio.h>
void main()
{
char stmt[4][6] = {{"T=A-B"},{"U=A-C"},{"V=T+U"},{"W=V+U"}};
struct code{
char nemo[4];
char op1[3];
char op2[3]};
struct code c[7];
char add_dis[2][3],op;
int i,cp=0,reg,j=0,flag,fnd_add;
for(i=0;i<=3;i++){
printf("\n%s",stmt[i]);
op = stmt[i][3];
flag = 0;
switch(op){
case '-':
reg = getreg();
strcpy(c[cp].nemo,"MOV");
c[cp].op1[0] = stmt[i][2];
c[cp].op1[1] = '\0';
c[cp].op2[0] = 'R';
c[cp].op2[1] = reg;
c[cp].op2[2] = '\0';
printf("\n%s\t%s\t%s",c[cp].nemo,c[cp].op1,c[cp].op2);
cp++;
strcpy(c[cp].nemo,"SUB");
c[cp].op1[0] = stmt[i][4];
c[cp].op1[1] = '\0';
c[cp].op2[0] = 'R';
c[cp].op2[1] = reg;
c[cp].op2[2] = '\0';
//Assign Address Discriptor to variable on LHS of '=' sign
add_dis[j][0] = stmt[i][0];
printf("\nAddress Discriptor of ");
printf("%c is ",add_dis[j][0]);
add_dis[j][1] = 'R';
printf("%c",add_dis[j][1]);
add_dis[j][2] = reg;
printf("%c",add_dis[j][2]);
add_dis[j][3] = '\0';
j++;
cp++;
break;
case '+':
strcpy(c[cp].nemo,"ADD");
//search the address discriptor of second operand and store it as first opearnd in
m/c instruction
for(j=0;add_dis[j][0]!=stmt[i][4];j++);
c[cp].op1[0] = 'R';
c[cp].op1[1] = add_dis[j][2];
c[cp].op1[2] = '\0';
//Find the address discriptor of first operand and store it as second opearnd in m/c
instruction
for(j=0;add_dis[j][0]!=stmt[i][2] ;j++);
c[cp].op2[0] = 'R';
c[cp].op2[2] = '\0';
//Assign Address Discriptor to variable on LHS of '=' sign
add_dis[j][0] = stmt[i][0];
printf("\nAddress Discriptor of %c is %c%c",add_dis[j][0],add_dis[j][1],add_dis[j]
[2]);
cp++;
if(i==3){
strcpy(c[cp].nemo,"MOV");
c[cp].op1[0] = 'R';
c[cp].op1[2] = '\0';
c[cp].op2[0] = stmt[i][0];
c[cp].op2[1] = '\0';
printf("\n%s\t%s\t%s",c[cp].nemo,c[cp].op1,c[cp].op2);}
break; }}}int getreg(){
static int r=48;
//printf("\n Register is %c",r);
return r++}
OUTPUT:
Experiment No – 7
AIM: To study and implement LEX and YACC in Lexical Analyzer.
THEORY:
1) LEX: Lex is a computer program that generates lexical analyzers like scanners or lexers. Lex is
commonly used with the yacc parser generator. Lex, originally written by Mike Lesk and Eric
Schmidt and described in 1975, is the standard lexical analyzer generator on many Unix systems,
and an equivalent tool is specified as part of the POSIX standard.
Lex reads an input stream specifying the lexical analyzer and outputs source code implementing the
lexer in the C programming language.
Structure of LEX: The structure of a Lex file is intentionally similar to that of a yacc file; files are
divided into three sections, separated by lines that contain only two percent signs, as follows:
Definition section
%%
Rules section
%%
C code section
The definition section defines macros and imports header files written in C. It is also possible to
write any C code here, which will be copied verbatim into the generated source file.
The rules section associates regular expression patterns with C statements. When the lexer sees text
in the input matching a given pattern, it will execute the associated C code.
The C code section contains C statements and functions that are copied verbatim to the generated
source file.
2) YACC: Yacc is a computer program for the Unix operating system. It is a LALR parser generator,
generating a parser, the part of a compiler that tries to make syntactic sense of the source code,
specifically a LALR parser, based on an analytic grammar written in a notation similar to BNF.
Yacc itself used to be available as the default parser generator on most Unix systems, though it has
since been supplanted as the default by more recent, largely compatible, programs.
Description of YACC: YACC is an acronym for "Yet Another Compiler Compiler". It is a LALR parser
generator, generating a parser, the part of a compiler that tries to make syntactic sense of the
source code, specifically a LALR parser, based on an analytic grammar written in a notation similar
to BNF.
It was originally developed in the early 1970s by Stephen C. Johnson at AT&T Corporation and
written in the B programming language, but soon rewritten in C. It appeared as part of Version 3
Unix, and a full description of Yacc was published in 1975.
The input to Yacc is a grammar with snippets of C code (called "actions") attached to its rules. Its
output is a shift-reduce parser in C that executes the C snippets associated with each rule as soon as
the rule is recognized. Typical actions involve the construction of parse trees. Using an example
from Johnson, if the call node (label, left, right) constructs a binary parse tree node with the
specified label and children, then the rule
expr : expr '+' expr { $$ = node('+', $1, $3); }
recognizes summation expressions and constructs nodes for them. The special identifiers $$, $1 and $3
refer to items on the parser's stack.
LEX Example:
/*** Definition section ***/
%{
/* C code to be copied verbatim */
#include <stdio.h>
%}
/* This tells flex to read only one input file */
%option noyywrap
%%
/*** Rules section ***/
/* [0-9]+ matches a string of one or more digits */
[0-9]+ {
/* yytext is a string containing the matched text. */
printf("Saw an integer: %s\n", yytext);
}
.|\n { /* Ignore all other characters. */ }
%%
/*** C Code section ***/
int main(void)
{
/* Call the lexer, then quit. */
yylex();
return 0;
}
PROGRAM:
hello1.l:
%{
#include <stdlib.h>
#include "y.tab.h"
%}
%%
("hi"|"oi")"\n" { return HI; }
("tchau"|"bye")"\n" { return BYE; }
. { yyerror(); }
%%
int main(void)
{
yyparse();
return 0;
}
int yywrap(void)
{
return 0;
}
int yyerror(void)
{
printf("Error\n");
exit(1);
}
hello1.y:
%token HI BYE
%%
program:
hi bye
;
hi:
HI { printf("Hello World\n"); }
;
bye:
BYE { printf("Bye World\n"); exit(0); }
;
OUTPUT:

SPCC Manual (FINAL)

Uploaded by

Copyright:

Available Formats

You might also like

SPCC Manual (FINAL)

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

SPCC Manual (FINAL)

Uploaded by

Copyright:

Available Formats

Experiment No – 1

AIM: To study and implement 2 Pass Assembler.

1) ASSEMBLER - An assembler program creates object code by translating combinations of

b_reg[1] = prog[i].op2[0] - '0';

[t0 = a + b] [t1 = t0 + c] [d = t0 + t1]

You might also like