#!/usr/bin/python3 # ========================================================== # my programming language interperter # # Design Questions: # a. Move some of the external functions into the object? # b. Use Python's dataclass decorator? # c. Keep variables as strings and only convert to # do arithmetic? # d. Create a more sophisticated (complex) tokenizer? # e. Add user input capability? # ========================================================== import re import sys # ********************************************************** # class definition - my programming language interperter # ********************************************************** class Program(): def __init__(self): self.filepath = '' self.verbose = False self.variable_dict = {} self.goto_dict = {} self.program_lines = [] self.token_lines = [] self.lines_read = 0 self.lines_in_program = 0 self.current_cmd = 0 # ---------------------------------------------------------- # ---- interperter's internal state # ---------------------------------------------------------- def internals(self,title=None): print() print('---- internals --------------------') if title is not None: print(title) print() print(f'filepath "{self.filepath}"') print(f'verbose is {self.verbose}') print(f'lines read {self.lines_read}') print(f'program lines {self.lines_in_program}') print(f'current cmd {self.current_cmd}') # ---- program lines self.display_program_lines() # ---- token lines self.display_token_lines() # ---- variable dictionary self.display_variable_dictionary() # ---- goto dictionary self.display_goto_dictionary() print() return # ------------------------------------------------------ # ---- display program lines # ------------------------------------------------------ def display_program_lines(self,both=False): print('---- program lines ----------------') for idx,line in enumerate(self.program_lines): print(f'[{idx:02}] {line}') if both: print(f'[{idx:02}] {self.token_lines[idx]}') # ------------------------------------------------------ # ---- display token lines # ------------------------------------------------------ def display_token_lines(self): print('----- tokens lines -----------------') for idx,line in enumerate(self.token_lines): print(f'[{idx:02}] {line}') # ------------------------------------------------------ # ---- display variable dictionary # ------------------------------------------------------ def display_variable_dictionary(self): print('---- variable dictionary -----------') l = len(self.variable_dict) print(f' length: {l}') if l > 0: for key, value in self.variable_dict.items(): print(f' {key} = {value}') # ------------------------------------------------------ # ---- display goto dictionary # ------------------------------------------------------ def display_goto_dictionary(self): print('---- goto dictionary ---------------') l = len(self.goto_dict) print(f' length {l}') if l > 0: for key, value in self.goto_dict.items(): print(f' "{key}" is {value}') # ********************************************************** # end of class definition # ********************************************************** # ---------------------------------------------------------- # ---- toggle verbose messages flag # ---------------------------------------------------------- def verbose(p): if p.verbose: p.verbose = False else: p.verbose = True # ---------------------------------------------------------- # ---- return a variable's value # ---------------------------------------------------------- def is_variable(p,s): if s in p.variable_dict: return (True,p.variable_dict[s]) return (False,0) # ---------------------------------------------------------- # ---- convert a string to an integer # ---------------------------------------------------------- def is_integer(s): try: n = int(s) return (True,n) except: return (False,0) # ---------------------------------------------------------- # ---- get a number's or variable's value # ---------------------------------------------------------- def get_value(p,s): tf,num = is_integer(s) if tf: return (True,num) tf,num = is_variable(p,s) if tf: return (True,num) print() print(f'value error: "{s}" is not ' +\ 'a constant or a variable') return (False,0) # ---------------------------------------------------------- # ---- evaluate an expression # ---- returning the "tf" flag as False means that the # ---- expression is invalid in some way. # ---------------------------------------------------------- def is_expression(p,tokens:list) -> tuple: # ---- 1 token? if len(tokens) == 1: tf,n = get_value(p,tokens[0]) if tf: return (True,n) # ---- 3 tokens? if len(tokens) == 3: tf,a = get_value(p,tokens[0]) if not tf: return (False,0) tf,b = get_value(p,tokens[2]) if not tf: return (False,0) if tokens[1] == '+': return (True,a+b) if tokens[1] == '-': return (True,a-b) if tokens[1] == '<': return (True,1) if a < b else (True,0) if tokens[1] == '>': return (True,1) if a > b else (True,0) if tokens[1] == '=': return (True,1) if a == b else (True,0) print() print(f'Expression error: {tokens}') return (False,0) # ---------------------------------------------------------- # ---- read a program from a file, save it in a # ---- program object, and execute it # ---------------------------------------------------------- def execute_file(filepath:str) -> bool: p = Program() p.filepath = filepath with open(p.filepath,'r') as f: for line in f: p.lines_read += 1 line = line.strip() if not line: continue # empty string if line.startswith('#'): continue # comment p.program_lines.append(line) p.lines_in_program += 1 print() p.display_program_lines() return execute_program(p) # ---------------------------------------------------------- # ---- tokenize a line from the program # ---- Note: Tokens are be separated by one or more # ---- commas and/or spaces # ---------------------------------------------------------- def tokenize_line(line) -> list: up_line = line.upper() # ---- print command? (special processing) if up_line.startswith('PRINT'): txt = line[5:].strip() if txt: toks = ['PRINT', txt] else: toks = ['PRINT'] return(True,toks) # ---- if command? (special processing) if up_line.startswith('IF'): # ---- break line into two parts: # ---- expression and command pattern = r'^.. (.+) : (.+)$' res = re.search(pattern,line) if res is None: return (False,[]) # ---- tokenize expression e_toks = re.sub(',',' ', res.group(1)).upper().split() # ---- tokenize command tf,c_toks = tokenize_line(res.group(2)) if not tf: return (False,[]) toks = ['IF'] + e_toks + [':'] + c_toks return (True,toks) # ---- other command toks = re.sub(',',' ',up_line).split() return (True,toks) # ---------------------------------------------------------- # ---- tokenize the lines in the program and store # ---- the tokens separately in the program object # ---------------------------------------------------------- def tokenize_program(p) -> bool: for idx,line in enumerate(p.program_lines): if p.verbose: print(f'tokenize: {line}') tf,toks = tokenize_line(line) if not tf: print() print(f'program tokenizer failed (line {idx})') print(line) print() return False p.token_lines.append(toks) return True # ---------------------------------------------------------- # ---- EXIT command # ---------------------------------------------------------- def execute_exit_command(p,tokens:list): ##print(f'execute_exit_command: {tokens}') sys.exit() # ---------------------------------------------------------- # ---- IF command # ---------------------------------------------------------- def execute_if_command(p,tokens:list) -> bool: ##print(f'execute_if_command: {tokens}') # ---- split into expression and command if tokens[2] == ':': exp = tokens[1:2] tf,val = is_expression(p,exp) if not tf: return False if_cmd = tokens[3:] elif tokens[4] == ':': exp = tokens[1:4] tf,val = is_expression(p,exp) if not tf: return False if_cmd = tokens[5:] else: print() print(f'If error: unknow expression ({tokens})') return False # ---- expression results is False? if not val: return True # ---- expression results is True, execute the command tf = execute_command(p,if_cmd) if not tf: return False return True # ---------------------------------------------------------- # ---- GOTO command # ---------------------------------------------------------- def execute_goto_command(p,tokens:list) -> bool: ##print(f'execute_goto_command: {tokens}') if tokens[1] in p.goto_dict: p.current_cmd = p.goto_dict[tokens[1]] return True print() print(f'Goto error: location {tokens[1]} does no exist') return False # ---------------------------------------------------------- # ---- LOOP command # ---------------------------------------------------------- def execute_loop_command(p,tokens:list) -> bool: ##print(f'execute_loop_command: {tokens}') if tokens[1] not in p.goto_dict: p.goto_dict[tokens[1]] = p.current_cmd return True print() print(f'Loop error: '\ f'location ({tokens[1]}) already exists') return False # ---------------------------------------------------------- # ---- PRINT command (requires special processing) # ---------------------------------------------------------- def execute_print_command(p,tokens:list) -> bool: ##print(f'execute_print_command: {tokens}') if len(tokens) == 1: print() return True ss = tokens[1].strip() str_idx = 0 # print string index value = '' # temporary value holder state = 'search' # searching for something while True: match state: case 'search': # ---- search for the start of somthing # ---- that is not a comma or space if str_idx >= len(ss): break if ss[str_idx] == ',' or ss[str_idx] == ' ': str_idx += 1 elif ss[str_idx] == '"': # start of string str_idx += 1 state = 'string' else: # start of value value = '' state = 'value' case 'string': # found string if str_idx >= len(ss): break if ss[str_idx] == '"': # found end of string str_idx += 1 state = 'search' else: print(ss[str_idx],end='') str_idx += 1 case 'value': # found a value # ---- if end of variable or constant # ---- print variable or constant # ---- if not add character to value string if str_idx >= len(ss) \ or ss[str_idx] == ',' \ or ss[str_idx] == ' ': tf,val = get_value(p,value.upper()) if not tf: return False print(val,end='') str_idx += 1 state = 'search' else: value += ss[str_idx] str_idx += 1 print() if state != 'search': print() print(f'Print error: ({ss})') print(f'Print error: state={state}') print() return False return True # ---------------------------------------------------------- # ---- SET command # ---------------------------------------------------------- def execute_set_command(p,tokens:list) -> bool: ##print(f'execute_set_command: {tokens}') if not tokens[1].isalpha: return False var = tokens[1] tf,val = is_expression(p,tokens[2:]) if not tf: print() print(f'Command error: '\ f'{p.program_lines[p.current_cmd]}') return False p.variable_dict[var] = val return True # ---------------------------------------------------------- # ---- execute a command (a list of tokens) # ---------------------------------------------------------- def execute_command(p,tokens:list) -> bool: if p.verbose: print(f'execute_command: {tokens}') match tokens[0]: case 'EXIT': tf = execute_exit_command(p,tokens) if not tf: return False case 'GOTO': tf = execute_goto_command(p,tokens) if not tf: return False case 'IF': tf = execute_if_command(p,tokens) if not tf: return False case 'LOOP': tf = execute_loop_command(p,tokens) if not tf: return False case 'PRINT': tf = execute_print_command(p,tokens) if not tf: return False case 'SET': tf = execute_set_command(p,tokens) if not tf: return False # ---- special commands case 'INTERNALS': p.internals('My Programming Language') case 'VERBOSE': verbose(p) # ---- what? case _: print() print(f'unknown key word ({tokens[0]}) '\ f'on line {idx}') print() return False return True # ---------------------------------------------------------- # ---- execute a program one line at a time # ---------------------------------------------------------- def execute_program(p): if p.verbose: p.display_program_lines() print() # ---- tokenize the lines in the program tf = tokenize_program(p) if not tf: return False p.current_cmd = 0 # program counter # ---- process each program line's list of tokens while p.current_cmd < len(p.token_lines): if p.verbose: print(f'processing line {p.current_cmd}') tf = execute_command(p,p.token_lines[p.current_cmd]) if not tf: return False p.current_cmd += 1 return True # ---------------------------------------------------------- # ---- alpha tests (initial testing) # ---- Note: comments and blank lines are removed from # ---- input files when they are read in, # ---- therefore they are not in this test code. # ---------------------------------------------------------- def alpha_tests(): # ---- test set, and print (also expressions) test1 = [ 'set x, 100', 'set y, 300', 'set z, x - y', 'print', 'print "x is not ",200,", it is ",x', 'print', 'print "x=",x," y=",y," z is ",z', 'set a, x > y', 'set b, x < y', 'print', 'print "x > y is ",a," x < y is ",b', 'internals' ] # ---- test if test2 = [ 'set x, 100', 'if x > 10 : set x, x + 1', 'internals' ] # ---- test loop, goto, print test3 = [ 'print', 'set x, 1', 'loop abc', 'print "x is ",x', 'set x, x + 1', 'if x < 10 : goto abc', 'print "x final value ",x', 'internals', ] # ---- test program test4 = [ 'print', 'set x, 100', 'set y, 200', 'print "x = ",x', 'print "y = ",y', 'set a, x + y', 'set b, x - y', 'set c, x < y', 'set d, x > y', 'set e, x = y', 'print "x + y is ",a', 'print "x - y is ",b', 'print "x < y is ",c', 'print "x > y is ",d', 'print "x = y is ",e' ] ##prog_lines = test1 ##prog_lines = test2 prog_lines = test3 ##prog_lines = test4 # ---- create a program object p = Program() # ---- add lines of code, etc. to the program object p.verbose = False p.program_lines = prog_lines p.lines_in_program = len(prog_lines) p.display_program_lines() # ---- execute the program tf = execute_program(p) # ---- did it work? if not tf: print() print('program failed') print() # ---------------------------------------------------------- # ---- main # ---------------------------------------------------------- if __name__ == '__main__': # ---- test a "canned" program alpha_tests() sys.exit() # ---- load and execute a program from a file program_file = 'my_programming_language.txt' p = Program() print() if execute_file(program_file): print('program worked') else: print('program failed') print()