Carpio Quiz Compiler Design

Tristan Jay C.
Bermudo
BSCS-2A
1. 3 Parts of Compiler
# 1 PARSING
## a. Lexical Analysis/Tokenization (vocabulary)
## b. Syntactic Analysis/Parsing (grammar)
# 2 TRANSFORMATION
## a. Traversal
## b. Transform via Traversal
# 3 CODE GENERATION
## a. Stringify! # 1 PARSING
## a. Lexical Analysis/Tokenization (vocabulary)
## b. Syntactic Analysis/Parsing (grammar)
# 2 TRANSFORMATION
## a. Traversal
## b. Transform via Traversal
# 3 CODE GENERATION
## a. Stringify!
2. Tokenizer Function
#Tokenizer function receives starting input

# i.e (add 2 (subtract 4 2))
def tokenizer(input_expression):
#counter variable for iterating through input array
current = 0
#array to store computed tokens
tokens = []
##use regex library to create search patterns for
#letters a,z
alphabet = re.compile(r"[a-z]", re.I);
#numbers 1-9
numbers = re.compile(r"[0-9]");
#white space
whiteSpace = re.compile(r"\s");
#iterate through input
while current < len(input_expression):
#track position
char = input_expression[current]
#If white space is detected, no token created
if re.match(whiteSpace, char):
current = current+1
continue
#create + add token to array for open parens
if char == '(':
tokens.append({
'type': 'left_paren',
'value': '('
})
#continue iterating
current = current+1
continue
#create + add token to array for closed parens
if char == ')':
tokens.append({
'type': 'right_paren',
'value': ')'
})
#continue iterating
current = current+1
continue
#create + add token to array for numbers
if re.match(numbers, char):
value = ''
#nested iteration if a number is multi-num
while re.match(numbers, char):
value += char
current = current+1
char = input_expression[current];
tokens.append({
'type': 'number',
'value': value
})
continue
#create + add token to array for letters
if re.match(alphabet, char):
value = ''
#nested iteration if a word is multi-char (all are in this case)
while re.match(alphabet, char):
value += char
current = current+1
char = input_expression[current]
tokens.append({
'type': 'name',
'value': value
})
continue
#error condition if we find an unknown value in the input
raise ValueError('what are THOSE?: ' + char);
return tokens
3. Parsing Function
#The parse function creates an Abstract Syntax Tree given the computed
#tokens from the previous function
def parser(tokens):
#keep track of position while iterating
global current
current = 0
#nested walk function for building an abstract syntax tree
def walk():
#keep track of position while iterating?
global current
token = tokens[current]
#if a number is encountered, return a "NumberLiteral" node
if token.get('type') == 'number':
current = current + 1
return {
'type': 'NumberLiteral',
'value': token.get('value')
}
#if open parentheses encountered, return a "CallExpression" node

if token.get('type') == 'left_paren':
#skip past the parenthesis, we're not storing that
#store the name of operation
node = {
'type': 'CallExpression',
'name': token.get('value'),
'params': []
}
#and this node will have child nodes as parameters
#and input expression can have many nested expressions
#so we'll use recursion to build a tree of relations!
#until the expression ends with a closed parens
while token.get('type') != 'right_paren':
#recursively add nodes to the params array via the walk function
node['params'].append(walk());
return node
#error if unknown type encountered
raise TypeError(token.get('type'))
4. Initializing an Empty Abstract Syntax Tree

#Let's initialize an empty Abstract Syntax Tree
ast = {
'type': 'Program',
'body': []
}
#then populate it by calling the walk function
#until the global current variable reaches the end of the token array
while current < len(tokens):
ast['body'].append(walk())
#return the completed AST
return ast
5. Code Generation Function

##last part! Code generation
#a recursive stringify function that iterates
#through the newly created AST, node by node, continually
#building a string output given the values in each node.
def codeGenerator(node):
if node['type'] == 'Program':
return '\n'.join([code for code in map(codeGenerator, node['body'])])
elif node['type'] == 'Identifier':
return node['name']
elif node['type'] == 'NumberLiteral':
return node['value']
elif node['type'] == 'ExpressionStatement':
expression = codeGenerator(node['expression'])
return '%s;' % expression
elif node['type'] == 'CallExpression':
callee = codeGenerator(node['callee'])
params = ', '.join([code for code in map(codeGenerator, node['arguments'])])
return "%s(%s)" % (callee, params)
else:
raise TypeError(node['type'])
#finally, let's put it all together

def compiler(input_expression):
#given an input expression, create a set of tokens
tokens = tokenizer(input_expression)
#create an abstract syntax tree given those tokens
ast = parser(tokens)
#create a transformed AST given the existing one
newAst = transformer(ast)
#stringify the transformed AST into an output expression
output = codeGenerator(newAst)
#return!
return output
def main():
#test
input = "(add 2 (subtract 4 2))"
output = compiler(input)
print(output)
if __name__ == "__main__":
main()

Carpio Quiz Compiler Design

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Carpio Quiz Compiler Design

Uploaded by

Copyright:

Available Formats

Tristan Jay C.

#Tokenizer function receives starting input

#if open parentheses encountered, return a "CallExpression" node

4. Initializing an Empty Abstract Syntax Tree

5. Code Generation Function

#finally, let's put it all together

You might also like