#!/usr/bin/python3
# ====================================================================
# tokenize a numerical expression string
# return a list of tokens (strings)
#
# Notes: this code does not check for expression syntax errors.
# that should be done elsewhere.
# ====================================================================
import re
import copy
import user_interface as ui
TOKFLAG = False
# -----------------------------------------------------------------
# ---- compile regx
# ----
# ---- regx to find tokens:
# ---- 99.99, 99, **, //, (, ), *, /, +, -, %, unary-, unary+
# ----
# ---- search order is important
# ---- (long token patterns followed by shorter patterns)
# -----------------------------------------------------------------
# ---- format: regx, name, token type/precedence
REGXLIST = [
##(r'[+-]?\d+(\.\d*)?', 'number', 0), uniary (+,-)?
(r'\d+(\.\d*)?', 'number', 0),
(r'\*\*', 'power', 3),
(r'\/\/', 'floor', 4),
(r'\(', 'sub-start', 1),
(r'\)', 'sub-end', 2),
(r'\*', 'multiply', 4),
(r'\/', 'divide', 4),
(r'%', 'remainder', 4),
(r'\+', 'add', 5),
(r'-', 'subtract', 5)
]
REGXPATTERNS = []
for pat in REGXLIST:
REGXPATTERNS.append((re.compile(pat[0]), pat[1], pat[2]))
# --------------------------------------------------------------------
# ---- return the token at the start of the string
# --------------------------------------------------------------------
def _get_token(string):
for pat in REGXPATTERNS:
res = re.match(pat[0],string)
if res is not None:
return(True,res,pat)
print('no regx match found')
return (False,res,None)
# --------------------------------------------------------------------
# ---- convert an infix expression string to a list of tokens
# --------------------------------------------------------------------
def convert_to_tokens(infix_string):
tokens = []
# ---- tokenize the string
while len(infix_string) > 0:
tf,res,pat = _get_token(infix_string)
# ---- found token match?
if not tf:
return (False,tokens)
# ---- extract info from search results
ln = len(infix_string)
end = res.end()
start = res.start()
##print(f'res = {res}')
##print(f'ln = {len(infix_string)}')
##print(f'end = {res.end()}')
##print(f'start = {res.start()}')
# ---- add the token to the list of tokens
# ---- include precedence
token = infix_string[:end]
if TOKFLAG:
x = (token,pat[2])
print(f'tokenizer found: {x}')
tokens.append((token,pat[2]))
# ---- skip the stuff we have already processed
# ---- and any leading/trailing blanks
infix_string = infix_string[end:].strip()
# ---- return the tokens we have found
return (True,tokens)
# --------------------------------------------------------------------
# ---- display list of tokens
# --------------------------------------------------------------------
def display_token_list(tokens:list,title:str=None,indexed:bool=False):
if title is not None: print(title)
if indexed:
for i,tok in enumerate(tokens):
print(f'[{i:2}] {tok[0]:<9} {tok[1]} {tok[2]}')
else:
print(' '.join([tok[0] for tok in tokens]))
# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------
if __name__ == '__main__':
idxflag = False
while True:
# ---- ask the user to enter an infix numerical expression
print()
ie = ui.get_user_input('Enter infix expression: ')
if not ie: break
# ---- TOKFLAG
if ie == 'tok' or ie == 'tokflag':
TOKFLAG = not TOKFLAG
# ---- toggle indexed flag
if ie == 'idx' or ie == 'index' or ie == 'indexed':
idxflag = not idxflag
continue
# ---- convert the infix numerical expression to tokens
tf,tokens = convert_to_tokens(ie)
if not tf:
print()
print('syntax error in expression')
continue
print()
display_token_list(tokens,indexed=idxflag,title='Token List:')