solution_245.py

#!/usr/bin/python3
# ==========================================================
# my programming language interperter
#
# Design Questions:
# a. Move some of the external functions into the object?
# b. Use Python's dataclass decorator?
# c. Keep variables as strings and only convert to
#    do arithmetic?
# d. Create a more sophisticated (complex) tokenizer?
# e. Add user input capability?
# ==========================================================

import re
import sys

# **********************************************************
# class definition - my programming language interperter
# **********************************************************

class Program():

    def __init__(self):
        self.filepath         = ''
        self.verbose          = False   
        self.variable_dict    = {}
        self.goto_dict        = {}
        self.program_lines    = []
        self.token_lines      = []
        self.lines_read       = 0
        self.lines_in_program = 0
        self.current_cmd      = 0

    # ----------------------------------------------------------
    # ---- interperter's internal state
    # ----------------------------------------------------------

    def internals(self,title=None):

        print()

        print('---- internals --------------------')
        if title is not None:
            print(title)
            print()

        print(f'filepath        "{self.filepath}"')
        print(f'verbose is      {self.verbose}')
        print(f'lines read      {self.lines_read}')
        print(f'program lines   {self.lines_in_program}')
        print(f'current cmd     {self.current_cmd}')

        # ---- program lines

        self.display_program_lines()

        # ---- token lines

        self.display_token_lines()

        # ---- variable dictionary
        
        self.display_variable_dictionary()

        # ---- goto dictionary

        self.display_goto_dictionary()

        print()

        return

    # ------------------------------------------------------
    # ---- display program lines
    # ------------------------------------------------------

    def display_program_lines(self,both=False):
        print('---- program lines ----------------')
        for idx,line in enumerate(self.program_lines):
            print(f'[{idx:02}] {line}')
            if both:
                print(f'[{idx:02}] {self.token_lines[idx]}')

    # ------------------------------------------------------
    # ---- display token lines
    # ------------------------------------------------------

    def display_token_lines(self):
        print('----- tokens lines -----------------')
        for idx,line in enumerate(self.token_lines):
            print(f'[{idx:02}] {line}')

    # ------------------------------------------------------
    # ---- display variable dictionary
    # ------------------------------------------------------

    def display_variable_dictionary(self):
        print('---- variable dictionary -----------')
        l = len(self.variable_dict)
        print(f'  length: {l}')
        if l > 0:
            for key, value in self.variable_dict.items():
                print(f'  {key} = {value}')

    # ------------------------------------------------------
    # ---- display goto dictionary
    # ------------------------------------------------------

    def display_goto_dictionary(self):
        print('---- goto dictionary ---------------')
        l = len(self.goto_dict)
        print(f'  length {l}')
        if l > 0:
            for key, value in self.goto_dict.items():
                print(f'  "{key}" is {value}')

# **********************************************************
# end of class definition
# **********************************************************

# ----------------------------------------------------------
# ---- toggle verbose messages flag
# ----------------------------------------------------------

def verbose(p):
    if p.verbose:
        p.verbose = False
    else:
        p.verbose = True
        
# ----------------------------------------------------------
# ---- return a variable's value
# ----------------------------------------------------------

def is_variable(p,s):
    if s in p.variable_dict:
        return (True,p.variable_dict[s])   
    return (False,0)

# ----------------------------------------------------------
# ---- convert a string to an integer
# ----------------------------------------------------------

def is_integer(s):
    try:
        n = int(s)
        return (True,n)
    except:
        return (False,0)

# ----------------------------------------------------------
# ---- get a number's or variable's value
# ----------------------------------------------------------

def get_value(p,s):
    
    tf,num = is_integer(s)
    if tf: return (True,num)
    
    tf,num = is_variable(p,s)
    if tf: return (True,num)

    print()
    print(f'value error: "{s}" is not ' +\
           'a constant or a variable')
    return (False,0)

# ----------------------------------------------------------
# ---- evaluate an expression
# ---- returning the "tf" flag as False means that the
# ---- expression is invalid in some way.
# ----------------------------------------------------------

def is_expression(p,tokens:list) -> tuple:

    # ---- 1 token?

    if len(tokens) == 1:
        
        tf,n = get_value(p,tokens[0])
        if tf: return (True,n)

    # ---- 3 tokens?

    if len(tokens) == 3:
        
        tf,a = get_value(p,tokens[0])
        if not tf: return (False,0)
        
        tf,b = get_value(p,tokens[2])
        if not tf: return (False,0)

        if tokens[1] == '+': return (True,a+b)
        if tokens[1] == '-': return (True,a-b)

        if tokens[1] == '<':
            return (True,1) if a < b else (True,0)
        if tokens[1] == '>':
            return (True,1) if a > b else (True,0)
        if tokens[1] == '=':
            return (True,1) if a == b else (True,0)

    print()
    print(f'Expression error: {tokens}')
    return (False,0)

# ----------------------------------------------------------
# ---- read a program from a file, save it in a
# ---- program object, and execute it
# ----------------------------------------------------------

def execute_file(filepath:str) -> bool:

    p = Program()

    p.filepath = filepath

    with open(p.filepath,'r') as f:
        for line in f:
            p.lines_read += 1
            line = line.strip()
            if not line: continue             # empty string
            if line.startswith('#'): continue # comment
            p.program_lines.append(line)
            p.lines_in_program += 1

    print()
    p.display_program_lines()

    return execute_program(p)

# ----------------------------------------------------------
# ---- tokenize a line from the program
# ---- Note: Tokens are be separated by one or more
# ----       commas and/or spaces
# ----------------------------------------------------------

def tokenize_line(line) -> list:

    up_line = line.upper()

    # ---- print command? (special processing)

    if up_line.startswith('PRINT'):
        
        txt = line[5:].strip()
        if txt:
           toks = ['PRINT', txt]
        else:
           toks = ['PRINT']

        return(True,toks)

    # ---- if command? (special processing)

    if up_line.startswith('IF'):

        # ---- break line into two parts:
        # ---- expression and command

        pattern = r'^.. (.+) : (.+)$'
    
        res = re.search(pattern,line)

        if res is None: return (False,[])

        # ---- tokenize expression

        e_toks = re.sub(',',' ',
                        res.group(1)).upper().split()

        # ---- tokenize command
        
        tf,c_toks = tokenize_line(res.group(2))
        if not tf: return (False,[])
        
        toks = ['IF'] + e_toks + [':']  + c_toks

        return (True,toks) 

    # ---- other command
               
    toks = re.sub(',',' ',up_line).split()

    return (True,toks)

# ----------------------------------------------------------
# ---- tokenize the lines in the program and store
# ---- the tokens separately in the program object
# ----------------------------------------------------------

def tokenize_program(p) -> bool:
    
    for idx,line in enumerate(p.program_lines):

        if p.verbose: print(f'tokenize: {line}')

        tf,toks = tokenize_line(line)

        if not tf:
            print()
            print(f'program tokenizer failed (line {idx})')
            print(line)
            print()
            return False
       
        p.token_lines.append(toks)
            
    return True

# ----------------------------------------------------------
# ---- EXIT command
# ----------------------------------------------------------

def execute_exit_command(p,tokens:list):

    ##print(f'execute_exit_command: {tokens}')
    
    sys.exit()

# ----------------------------------------------------------
# ---- IF command
# ----------------------------------------------------------

def execute_if_command(p,tokens:list) -> bool:

    ##print(f'execute_if_command: {tokens}')

    # ---- split into expression and command

    if tokens[2] == ':':
        exp = tokens[1:2]
        tf,val = is_expression(p,exp)
        if not tf: return False
        if_cmd = tokens[3:]

    elif tokens[4] == ':':
        exp = tokens[1:4]
        tf,val = is_expression(p,exp)
        if not tf: return False
        if_cmd = tokens[5:]

    else:
        print()
        print(f'If error: unknow expression ({tokens})')
        return False

    # ---- expression results is False?

    if not val: return True

    # ---- expression results is True, execute the command
    
    tf = execute_command(p,if_cmd)

    if not tf: return False

    return True

# ----------------------------------------------------------
# ---- GOTO command
# ----------------------------------------------------------

def execute_goto_command(p,tokens:list) -> bool:

    ##print(f'execute_goto_command: {tokens}')
    
    if tokens[1] in p.goto_dict:
        p.current_cmd = p.goto_dict[tokens[1]]
        return True
    print()
    print(f'Goto error: location {tokens[1]} does no exist')
    return False

# ----------------------------------------------------------
# ---- LOOP command
# ----------------------------------------------------------

def execute_loop_command(p,tokens:list) -> bool:

    ##print(f'execute_loop_command: {tokens}')
    
    if tokens[1] not in p.goto_dict:
        p.goto_dict[tokens[1]] = p.current_cmd
        return True
    print()
    print(f'Loop error: '\
          f'location ({tokens[1]}) already exists')
    return False

# ----------------------------------------------------------
# ---- PRINT command (requires special processing)
# ----------------------------------------------------------

def execute_print_command(p,tokens:list) -> bool:

    ##print(f'execute_print_command: {tokens}')

    if len(tokens) == 1:
        print()
        return True

    ss = tokens[1].strip()

    str_idx = 0             # print string index
    value   = ''            # temporary value holder
    state   = 'search'      # searching for something

    while True:

        match state:

            case 'search':

                # ---- search for the start of somthing
                # ---- that is not a comma or space

                if str_idx >= len(ss): break

                if ss[str_idx] == ',' or ss[str_idx] == ' ':
                    str_idx += 1
                elif ss[str_idx] == '"': # start of string
                    str_idx += 1
                    state = 'string'
                else:                    # start of value
                    value = ''
                    state = 'value'

            case 'string':               # found string

                if str_idx >= len(ss): break

                if ss[str_idx] == '"':   # found end of string
                    str_idx += 1
                    state = 'search'
                else:
                    print(ss[str_idx],end='') 
                    str_idx += 1
                
            case 'value':                # found a value

                # ---- if end of variable or constant
                # ---- print variable or constant
                # ---- if not add character to value string

                if str_idx >= len(ss)    \
                   or ss[str_idx] == ',' \
                   or ss[str_idx] == ' ':
                    tf,val = get_value(p,value.upper())
                    if not tf: return False
                    print(val,end='')
                    str_idx += 1
                    state = 'search'
                else:
                    value += ss[str_idx]
                    str_idx += 1                    
    print()

    if state != 'search':
        print()
        print(f'Print error: ({ss})')
        print(f'Print error: state={state}')
        print()
        return False
    
    return True

# ----------------------------------------------------------
# ---- SET command
# ----------------------------------------------------------

def execute_set_command(p,tokens:list) -> bool:

    ##print(f'execute_set_command: {tokens}')

    if not tokens[1].isalpha: return False

    var = tokens[1]
    tf,val = is_expression(p,tokens[2:])

    if not tf:
        print()
        print(f'Command error: '\
              f'{p.program_lines[p.current_cmd]}')
        return False

    p.variable_dict[var] = val

    return True

# ----------------------------------------------------------
# ---- execute a command (a list of tokens)
# ----------------------------------------------------------

def execute_command(p,tokens:list) -> bool:

    if p.verbose:
        print(f'execute_command: {tokens}')

    match tokens[0]:
        case 'EXIT':
            tf = execute_exit_command(p,tokens)
            if not tf: return False
        case 'GOTO':
            tf = execute_goto_command(p,tokens)
            if not tf: return False
        case 'IF':
            tf = execute_if_command(p,tokens)
            if not tf: return False
        case 'LOOP':
            tf = execute_loop_command(p,tokens)
            if not tf: return False
        case 'PRINT':
            tf = execute_print_command(p,tokens)
            if not tf: return False
        case 'SET':
            tf = execute_set_command(p,tokens)
            if not tf: return False

            # ---- special commands
            
        case 'INTERNALS':
            p.internals('My Programming Language')
        case 'VERBOSE':
            verbose(p)

        # ---- what?

        case _:
            print()
            print(f'unknown key word ({tokens[0]}) '\
                  f'on line {idx}')
            print()
            return False

    return True

# ----------------------------------------------------------
# ---- execute a program one line at a time
# ----------------------------------------------------------

def execute_program(p):

    if p.verbose:    
        p.display_program_lines()
        print()

    # ---- tokenize the lines in the program

    tf = tokenize_program(p)

    if not tf: return False
    
    p.current_cmd = 0              # program counter

    # ---- process each program line's list of tokens

    while p.current_cmd < len(p.token_lines):

        if p.verbose:
            print(f'processing line {p.current_cmd}')
        
        tf = execute_command(p,p.token_lines[p.current_cmd])

        if not tf: return False
            
        p.current_cmd += 1

    return True

# ----------------------------------------------------------
# ---- alpha tests (initial testing)
# ---- Note: comments and blank lines are removed from
# ----       input files when they are read in,
# ----       therefore they are not in this test code.
# ----------------------------------------------------------

def alpha_tests():

    # ---- test set, and print (also expressions)

    test1 = [ 'set x, 100',
              'set y, 300',
              'set z, x - y',
              'print',
              'print "x is not ",200,", it is ",x',
              'print',
              'print "x=",x," y=",y," z is ",z',
              'set a, x > y',
              'set b, x < y',
              'print',
              'print "x > y is ",a,"  x < y is ",b',
              'internals'
            ]

    # ----  test if

    test2 = [ 'set x, 100',
              'if x > 10 : set x, x + 1',
              'internals'
            ]
    
    # ---- test loop, goto, print

    test3 = [ 'print',
              'set x, 1',
              'loop abc',
              'print "x is ",x',
              'set x, x + 1',
              'if x < 10 : goto abc',
              'print "x final value ",x',
              'internals',
            ]

    # ---- test program

    test4 = [ 'print',
              'set x, 100',
              'set y, 200',
              'print "x     = ",x',
              'print "y     = ",y',
              'set a, x + y',
              'set b, x - y',
              'set c, x < y',
              'set d, x > y',
              'set e, x = y',
              'print "x + y is ",a',
              'print "x - y is ",b',
              'print "x < y is ",c',
              'print "x > y is ",d',
              'print "x = y is ",e'
            ]

    ##prog_lines = test1
    ##prog_lines = test2
    prog_lines   = test3
    ##prog_lines = test4

    # ---- create a program object

    p = Program()

    # ---- add lines of code, etc. to the program object

    p.verbose          = False
    p.program_lines    = prog_lines
    p.lines_in_program = len(prog_lines)

    p.display_program_lines()

    # ---- execute the program

    tf = execute_program(p)

    # ---- did it work?

    if not tf:
        print()
        print('program failed')

    print()

# ----------------------------------------------------------
# ---- main
# ----------------------------------------------------------

if __name__ == '__main__':

    # ---- test a "canned" program
    
    alpha_tests()
    sys.exit()

    # ---- load and execute a program from a file

    program_file = 'my_programming_language.txt'

    p = Program()

    print()
    if execute_file(program_file):
        print('program worked')
    else:
        print('program failed')
    print()