#!/usr/bin/python3
# ===============================================================
# Locate MD tags using an alternate parser
# ===============================================================
TAGS = [ (r'__',2), (r'**',2), (r'//',2), (r'\\ ',3),
(r'\\',2), (r'##',2), (r'#',1) ]
VERBOSE = False
# ---------------------------------------------------------------
# ---- match a string and a md tag?
# ---------------------------------------------------------------
def match_tag(line,tag):
if VERBOSE:
print(f'match_tag(line={line}) tag={tag}')
# ---- no line to test? (empty string)
if not line:
return False
# ---- tag exceeds the length of the line?
if len(line) < tag[1]:
return False
# ---- tag match?
for tc in tag[0]:
if line[0] != tc:
return False
return True
# ---------------------------------------------------------------
# ---- locate the next md tag in the string (line)
# ---- return the tag found and the starting index
# ---- for the next search
# ---------------------------------------------------------------
def locate_next_tag(line):
if VERBOSE:
print(f'locate_next_tag({line})')
l_idx = 0 # line character index
l_len = len(line) # line length
# ---- search for a MD tag
while True:
# ---- end of the line?
if l_idx >= l_len:
break
# ---- search for a matching tag starting at l_idx
for tag in TAGS:
# ---- match a tag?
tf = match_tag(line[l_idx:],tag)
if tf:
return (tag[0],l_idx +tag[1])
# ---- next line character
l_idx += 1
return (None,len(line))
# ---------------------------------------------------------------
# ---- parse a string containing MD tags
# ---- display the tags found and the starting index for
# ---- the next search
# ---------------------------------------------------------------
def parse_md_string(s):
while True:
print()
print(f'parsing string ({s}) len={len(s)}')
tag_str,nxt_idx = locate_next_tag(s)
print(f'found: tag_str={tag_str} nxt_idx={nxt_idx}')
if nxt_idx >= len(s):
return True
# ---- the next sub-string to search
s = s[nxt_idx:]
return True
# ---------------------------------------------------------------
# ---- main
# ---------------------------------------------------------------
##infile = 'md01.md'
##infile = 'md02.md'
infile = 'md03.md'
line_number = 0
with open(infile,'r') as fin:
for line in fin:
# ---- increment line number
line_number += 1
## ---- remove leading/trailing spaces and \n
line = line.strip().strip('\n')
# ---- process a line (string)
tf = parse_md_string(line)
if not tf:
break
print()
if not tf:
print(f'processing line {line_number} failed')
print(f'line: "{line}"')
else:
print(f'sucessfuly processed {line_number} lines')
print()