#!/usr/bin/python3
# ==========================================================
# my programming language interperter
#
# Design Questions:
# a. Move some of the external functions into the object?
# b. Use Python's dataclass decorator?
# c. Keep variables as strings and only convert to
# do arithmetic?
# d. Create a more sophisticated (complex) tokenizer?
# e. Add user input capability?
# ==========================================================
import re
import sys
# **********************************************************
# class definition - my programming language interperter
# **********************************************************
class Program():
def __init__(self):
self.filepath = ''
self.verbose = False
self.variable_dict = {}
self.goto_dict = {}
self.program_lines = []
self.token_lines = []
self.lines_read = 0
self.lines_in_program = 0
self.current_cmd = 0
# ----------------------------------------------------------
# ---- interperter's internal state
# ----------------------------------------------------------
def internals(self,title=None):
print()
print('---- internals --------------------')
if title is not None:
print(title)
print()
print(f'filepath "{self.filepath}"')
print(f'verbose is {self.verbose}')
print(f'lines read {self.lines_read}')
print(f'program lines {self.lines_in_program}')
print(f'current cmd {self.current_cmd}')
# ---- program lines
self.display_program_lines()
# ---- token lines
self.display_token_lines()
# ---- variable dictionary
self.display_variable_dictionary()
# ---- goto dictionary
self.display_goto_dictionary()
print()
return
# ------------------------------------------------------
# ---- display program lines
# ------------------------------------------------------
def display_program_lines(self,both=False):
print('---- program lines ----------------')
for idx,line in enumerate(self.program_lines):
print(f'[{idx:02}] {line}')
if both:
print(f'[{idx:02}] {self.token_lines[idx]}')
# ------------------------------------------------------
# ---- display token lines
# ------------------------------------------------------
def display_token_lines(self):
print('----- tokens lines -----------------')
for idx,line in enumerate(self.token_lines):
print(f'[{idx:02}] {line}')
# ------------------------------------------------------
# ---- display variable dictionary
# ------------------------------------------------------
def display_variable_dictionary(self):
print('---- variable dictionary -----------')
l = len(self.variable_dict)
print(f' length: {l}')
if l > 0:
for key, value in self.variable_dict.items():
print(f' {key} = {value}')
# ------------------------------------------------------
# ---- display goto dictionary
# ------------------------------------------------------
def display_goto_dictionary(self):
print('---- goto dictionary ---------------')
l = len(self.goto_dict)
print(f' length {l}')
if l > 0:
for key, value in self.goto_dict.items():
print(f' "{key}" is {value}')
# **********************************************************
# end of class definition
# **********************************************************
# ----------------------------------------------------------
# ---- toggle verbose messages flag
# ----------------------------------------------------------
def verbose(p):
if p.verbose:
p.verbose = False
else:
p.verbose = True
# ----------------------------------------------------------
# ---- return a variable's value
# ----------------------------------------------------------
def is_variable(p,s):
if s in p.variable_dict:
return (True,p.variable_dict[s])
return (False,0)
# ----------------------------------------------------------
# ---- convert a string to an integer
# ----------------------------------------------------------
def is_integer(s):
try:
n = int(s)
return (True,n)
except:
return (False,0)
# ----------------------------------------------------------
# ---- get a number's or variable's value
# ----------------------------------------------------------
def get_value(p,s):
tf,num = is_integer(s)
if tf: return (True,num)
tf,num = is_variable(p,s)
if tf: return (True,num)
print()
print(f'value error: "{s}" is not ' +\
'a constant or a variable')
return (False,0)
# ----------------------------------------------------------
# ---- evaluate an expression
# ---- returning the "tf" flag as False means that the
# ---- expression is invalid in some way.
# ----------------------------------------------------------
def is_expression(p,tokens:list) -> tuple:
# ---- 1 token?
if len(tokens) == 1:
tf,n = get_value(p,tokens[0])
if tf: return (True,n)
# ---- 3 tokens?
if len(tokens) == 3:
tf,a = get_value(p,tokens[0])
if not tf: return (False,0)
tf,b = get_value(p,tokens[2])
if not tf: return (False,0)
if tokens[1] == '+': return (True,a+b)
if tokens[1] == '-': return (True,a-b)
if tokens[1] == '<':
return (True,1) if a < b else (True,0)
if tokens[1] == '>':
return (True,1) if a > b else (True,0)
if tokens[1] == '=':
return (True,1) if a == b else (True,0)
print()
print(f'Expression error: {tokens}')
return (False,0)
# ----------------------------------------------------------
# ---- read a program from a file, save it in a
# ---- program object, and execute it
# ----------------------------------------------------------
def execute_file(filepath:str) -> bool:
p = Program()
p.filepath = filepath
with open(p.filepath,'r') as f:
for line in f:
p.lines_read += 1
line = line.strip()
if not line: continue # empty string
if line.startswith('#'): continue # comment
p.program_lines.append(line)
p.lines_in_program += 1
print()
p.display_program_lines()
return execute_program(p)
# ----------------------------------------------------------
# ---- tokenize a line from the program
# ---- Note: Tokens are be separated by one or more
# ---- commas and/or spaces
# ----------------------------------------------------------
def tokenize_line(line) -> list:
up_line = line.upper()
# ---- print command? (special processing)
if up_line.startswith('PRINT'):
txt = line[5:].strip()
if txt:
toks = ['PRINT', txt]
else:
toks = ['PRINT']
return(True,toks)
# ---- if command? (special processing)
if up_line.startswith('IF'):
# ---- break line into two parts:
# ---- expression and command
pattern = r'^.. (.+) : (.+)$'
res = re.search(pattern,line)
if res is None: return (False,[])
# ---- tokenize expression
e_toks = re.sub(',',' ',
res.group(1)).upper().split()
# ---- tokenize command
tf,c_toks = tokenize_line(res.group(2))
if not tf: return (False,[])
toks = ['IF'] + e_toks + [':'] + c_toks
return (True,toks)
# ---- other command
toks = re.sub(',',' ',up_line).split()
return (True,toks)
# ----------------------------------------------------------
# ---- tokenize the lines in the program and store
# ---- the tokens separately in the program object
# ----------------------------------------------------------
def tokenize_program(p) -> bool:
for idx,line in enumerate(p.program_lines):
if p.verbose: print(f'tokenize: {line}')
tf,toks = tokenize_line(line)
if not tf:
print()
print(f'program tokenizer failed (line {idx})')
print(line)
print()
return False
p.token_lines.append(toks)
return True
# ----------------------------------------------------------
# ---- EXIT command
# ----------------------------------------------------------
def execute_exit_command(p,tokens:list):
##print(f'execute_exit_command: {tokens}')
sys.exit()
# ----------------------------------------------------------
# ---- IF command
# ----------------------------------------------------------
def execute_if_command(p,tokens:list) -> bool:
##print(f'execute_if_command: {tokens}')
# ---- split into expression and command
if tokens[2] == ':':
exp = tokens[1:2]
tf,val = is_expression(p,exp)
if not tf: return False
if_cmd = tokens[3:]
elif tokens[4] == ':':
exp = tokens[1:4]
tf,val = is_expression(p,exp)
if not tf: return False
if_cmd = tokens[5:]
else:
print()
print(f'If error: unknow expression ({tokens})')
return False
# ---- expression results is False?
if not val: return True
# ---- expression results is True, execute the command
tf = execute_command(p,if_cmd)
if not tf: return False
return True
# ----------------------------------------------------------
# ---- GOTO command
# ----------------------------------------------------------
def execute_goto_command(p,tokens:list) -> bool:
##print(f'execute_goto_command: {tokens}')
if tokens[1] in p.goto_dict:
p.current_cmd = p.goto_dict[tokens[1]]
return True
print()
print(f'Goto error: location {tokens[1]} does no exist')
return False
# ----------------------------------------------------------
# ---- LOOP command
# ----------------------------------------------------------
def execute_loop_command(p,tokens:list) -> bool:
##print(f'execute_loop_command: {tokens}')
if tokens[1] not in p.goto_dict:
p.goto_dict[tokens[1]] = p.current_cmd
return True
print()
print(f'Loop error: '\
f'location ({tokens[1]}) already exists')
return False
# ----------------------------------------------------------
# ---- PRINT command (requires special processing)
# ----------------------------------------------------------
def execute_print_command(p,tokens:list) -> bool:
##print(f'execute_print_command: {tokens}')
if len(tokens) == 1:
print()
return True
ss = tokens[1].strip()
str_idx = 0 # print string index
value = '' # temporary value holder
state = 'search' # searching for something
while True:
match state:
case 'search':
# ---- search for the start of somthing
# ---- that is not a comma or space
if str_idx >= len(ss): break
if ss[str_idx] == ',' or ss[str_idx] == ' ':
str_idx += 1
elif ss[str_idx] == '"': # start of string
str_idx += 1
state = 'string'
else: # start of value
value = ''
state = 'value'
case 'string': # found string
if str_idx >= len(ss): break
if ss[str_idx] == '"': # found end of string
str_idx += 1
state = 'search'
else:
print(ss[str_idx],end='')
str_idx += 1
case 'value': # found a value
# ---- if end of variable or constant
# ---- print variable or constant
# ---- if not add character to value string
if str_idx >= len(ss) \
or ss[str_idx] == ',' \
or ss[str_idx] == ' ':
tf,val = get_value(p,value.upper())
if not tf: return False
print(val,end='')
str_idx += 1
state = 'search'
else:
value += ss[str_idx]
str_idx += 1
print()
if state != 'search':
print()
print(f'Print error: ({ss})')
print(f'Print error: state={state}')
print()
return False
return True
# ----------------------------------------------------------
# ---- SET command
# ----------------------------------------------------------
def execute_set_command(p,tokens:list) -> bool:
##print(f'execute_set_command: {tokens}')
if not tokens[1].isalpha: return False
var = tokens[1]
tf,val = is_expression(p,tokens[2:])
if not tf:
print()
print(f'Command error: '\
f'{p.program_lines[p.current_cmd]}')
return False
p.variable_dict[var] = val
return True
# ----------------------------------------------------------
# ---- execute a command (a list of tokens)
# ----------------------------------------------------------
def execute_command(p,tokens:list) -> bool:
if p.verbose:
print(f'execute_command: {tokens}')
match tokens[0]:
case 'EXIT':
tf = execute_exit_command(p,tokens)
if not tf: return False
case 'GOTO':
tf = execute_goto_command(p,tokens)
if not tf: return False
case 'IF':
tf = execute_if_command(p,tokens)
if not tf: return False
case 'LOOP':
tf = execute_loop_command(p,tokens)
if not tf: return False
case 'PRINT':
tf = execute_print_command(p,tokens)
if not tf: return False
case 'SET':
tf = execute_set_command(p,tokens)
if not tf: return False
# ---- special commands
case 'INTERNALS':
p.internals('My Programming Language')
case 'VERBOSE':
verbose(p)
# ---- what?
case _:
print()
print(f'unknown key word ({tokens[0]}) '\
f'on line {idx}')
print()
return False
return True
# ----------------------------------------------------------
# ---- execute a program one line at a time
# ----------------------------------------------------------
def execute_program(p):
if p.verbose:
p.display_program_lines()
print()
# ---- tokenize the lines in the program
tf = tokenize_program(p)
if not tf: return False
p.current_cmd = 0 # program counter
# ---- process each program line's list of tokens
while p.current_cmd < len(p.token_lines):
if p.verbose:
print(f'processing line {p.current_cmd}')
tf = execute_command(p,p.token_lines[p.current_cmd])
if not tf: return False
p.current_cmd += 1
return True
# ----------------------------------------------------------
# ---- alpha tests (initial testing)
# ---- Note: comments and blank lines are removed from
# ---- input files when they are read in,
# ---- therefore they are not in this test code.
# ----------------------------------------------------------
def alpha_tests():
# ---- test set, and print (also expressions)
test1 = [ 'set x, 100',
'set y, 300',
'set z, x - y',
'print',
'print "x is not ",200,", it is ",x',
'print',
'print "x=",x," y=",y," z is ",z',
'set a, x > y',
'set b, x < y',
'print',
'print "x > y is ",a," x < y is ",b',
'internals'
]
# ---- test if
test2 = [ 'set x, 100',
'if x > 10 : set x, x + 1',
'internals'
]
# ---- test loop, goto, print
test3 = [ 'print',
'set x, 1',
'loop abc',
'print "x is ",x',
'set x, x + 1',
'if x < 10 : goto abc',
'print "x final value ",x',
'internals',
]
# ---- test program
test4 = [ 'print',
'set x, 100',
'set y, 200',
'print "x = ",x',
'print "y = ",y',
'set a, x + y',
'set b, x - y',
'set c, x < y',
'set d, x > y',
'set e, x = y',
'print "x + y is ",a',
'print "x - y is ",b',
'print "x < y is ",c',
'print "x > y is ",d',
'print "x = y is ",e'
]
##prog_lines = test1
##prog_lines = test2
prog_lines = test3
##prog_lines = test4
# ---- create a program object
p = Program()
# ---- add lines of code, etc. to the program object
p.verbose = False
p.program_lines = prog_lines
p.lines_in_program = len(prog_lines)
p.display_program_lines()
# ---- execute the program
tf = execute_program(p)
# ---- did it work?
if not tf:
print()
print('program failed')
print()
# ----------------------------------------------------------
# ---- main
# ----------------------------------------------------------
if __name__ == '__main__':
# ---- test a "canned" program
alpha_tests()
sys.exit()
# ---- load and execute a program from a file
program_file = 'my_programming_language.txt'
p = Program()
print()
if execute_file(program_file):
print('program worked')
else:
print('program failed')
print()