solution_222c.py

#!/usr/bin/python3
# ====================================================================
# tokenize a numerical expression string
# return a list of tokens (strings)
#
# Notes: this code does not check for expression syntax errors.
#        that should be done elsewhere.
# ====================================================================

import re
import copy
import user_interface as ui

TOKFLAG = False

# -----------------------------------------------------------------
# ---- compile regx
# ----
# ---- regx to find tokens:
# ----    99.99, 99, **, //, (, ), *, /, +, -, %, unary-, unary+
# ----
# ---- search order is important
# ---- (long token patterns followed by shorter patterns)
# -----------------------------------------------------------------

# ---- format: regx, name, token type/precedence

REGXLIST = [
    ##(r'[+-]?\d+(\.\d*)?', 'number',    0), uniary (+,-)?
    (r'\d+(\.\d*)?',      'number',    0),
    (r'\*\*',             'power',     3),  
    (r'\/\/',             'floor',     4),
    (r'\(',               'sub-start', 1),
    (r'\)',               'sub-end',   2),
    (r'\*',               'multiply',  4),
    (r'\/',               'divide',    4),
    (r'%',                'remainder', 4),
    (r'\+',               'add',       5),
    (r'-',                'subtract',  5)
           ]

REGXPATTERNS = []
for pat in REGXLIST:
    REGXPATTERNS.append((re.compile(pat[0]), pat[1], pat[2]))

# --------------------------------------------------------------------
# ---- return the token at the start of the string
# --------------------------------------------------------------------

def _get_token(string):

    for pat in REGXPATTERNS:
         
        res = re.match(pat[0],string)

        if res is not None:
            return(True,res,pat)
   
    print('no regx match found')
    return (False,res,None)

# --------------------------------------------------------------------
# ---- convert an infix expression string to a list of tokens
# --------------------------------------------------------------------

def convert_to_tokens(infix_string):

    tokens = []
       
    # ---- tokenize the string

    while len(infix_string) > 0:

        tf,res,pat = _get_token(infix_string)

        # ---- found token match?

        if not tf:
           return (False,tokens)

        # ---- extract info from search results

        ln    = len(infix_string)
        end   = res.end()
        start = res.start()

        ##print(f'res   = {res}')
        ##print(f'ln    = {len(infix_string)}')
        ##print(f'end   = {res.end()}')
        ##print(f'start = {res.start()}')

        # ---- add the token to the list of tokens
        # ---- include precedence

        token = infix_string[:end]

        if TOKFLAG:
            x = (token,pat[2])
            print(f'tokenizer found: {x}')

        tokens.append((token,pat[2]))

        # ---- skip the stuff we have already processed
        # ---- and any leading/trailing blanks

        infix_string = infix_string[end:].strip()

    # ---- return the tokens we have found
    
    return (True,tokens)

# --------------------------------------------------------------------
# ---- display list of tokens
# --------------------------------------------------------------------

def display_token_list(tokens:list,title:str=None,indexed:bool=False):
    if title is not None: print(title)
    if indexed:
        for i,tok in enumerate(tokens):
            print(f'[{i:2}] {tok[0]:<9} {tok[1]} {tok[2]}')
    else:
        print(' '.join([tok[0] for tok in tokens]))

# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------

if __name__ == '__main__':

    idxflag = False
    
    while True:

        # ---- ask the user to enter an infix numerical expression

        print()
        ie = ui.get_user_input('Enter infix expression: ')

        if not ie: break

        # ---- TOKFLAG

        if ie == 'tok' or ie == 'tokflag':
            TOKFLAG = not TOKFLAG
            
        # ---- toggle indexed flag

        if ie == 'idx' or ie == 'index' or ie == 'indexed':
            idxflag = not idxflag
            continue

        # ---- convert the infix numerical expression to tokens

        tf,tokens = convert_to_tokens(ie)

        if not tf:
            print()
            print('syntax error in expression')
            continue

        print()
        display_token_list(tokens,indexed=idxflag,title='Token List:')