#!/usr/bin/python3 # ==================================================================== # tokenize a numerical expression string # return a list of tokens (strings) # # Notes: this code does not check for expression syntax errors. # that should be done elsewhere. # ==================================================================== import re import copy import user_interface as ui TOKFLAG = False # ----------------------------------------------------------------- # ---- compile regx # ---- # ---- regx to find tokens: # ---- 99.99, 99, **, //, (, ), *, /, +, -, %, unary-, unary+ # ---- # ---- search order is important # ---- (long token patterns followed by shorter patterns) # ----------------------------------------------------------------- # ---- format: regx, name, token type/precedence REGXLIST = [ ##(r'[+-]?\d+(\.\d*)?', 'number', 0), uniary (+,-)? (r'\d+(\.\d*)?', 'number', 0), (r'\*\*', 'power', 3), (r'\/\/', 'floor', 4), (r'\(', 'sub-start', 1), (r'\)', 'sub-end', 2), (r'\*', 'multiply', 4), (r'\/', 'divide', 4), (r'%', 'remainder', 4), (r'\+', 'add', 5), (r'-', 'subtract', 5) ] REGXPATTERNS = [] for pat in REGXLIST: REGXPATTERNS.append((re.compile(pat[0]), pat[1], pat[2])) # -------------------------------------------------------------------- # ---- return the token at the start of the string # -------------------------------------------------------------------- def _get_token(string): for pat in REGXPATTERNS: res = re.match(pat[0],string) if res is not None: return(True,res,pat) print('no regx match found') return (False,res,None) # -------------------------------------------------------------------- # ---- convert an infix expression string to a list of tokens # -------------------------------------------------------------------- def convert_to_tokens(infix_string): tokens = [] # ---- tokenize the string while len(infix_string) > 0: tf,res,pat = _get_token(infix_string) # ---- found token match? if not tf: return (False,tokens) # ---- extract info from search results ln = len(infix_string) end = res.end() start = res.start() ##print(f'res = {res}') ##print(f'ln = {len(infix_string)}') ##print(f'end = {res.end()}') ##print(f'start = {res.start()}') # ---- add the token to the list of tokens # ---- include precedence token = infix_string[:end] if TOKFLAG: x = (token,pat[2]) print(f'tokenizer found: {x}') tokens.append((token,pat[2])) # ---- skip the stuff we have already processed # ---- and any leading/trailing blanks infix_string = infix_string[end:].strip() # ---- return the tokens we have found return (True,tokens) # -------------------------------------------------------------------- # ---- display list of tokens # -------------------------------------------------------------------- def display_token_list(tokens:list,title:str=None,indexed:bool=False): if title is not None: print(title) if indexed: for i,tok in enumerate(tokens): print(f'[{i:2}] {tok[0]:<9} {tok[1]} {tok[2]}') else: print(' '.join([tok[0] for tok in tokens])) # -------------------------------------------------------------------- # ---- main # -------------------------------------------------------------------- if __name__ == '__main__': idxflag = False while True: # ---- ask the user to enter an infix numerical expression print() ie = ui.get_user_input('Enter infix expression: ') if not ie: break # ---- TOKFLAG if ie == 'tok' or ie == 'tokflag': TOKFLAG = not TOKFLAG # ---- toggle indexed flag if ie == 'idx' or ie == 'index' or ie == 'indexed': idxflag = not idxflag continue # ---- convert the infix numerical expression to tokens tf,tokens = convert_to_tokens(ie) if not tf: print() print('syntax error in expression') continue print() display_token_list(tokens,indexed=idxflag,title='Token List:')