#!/usr/bin/python3 # =============================================================== # Locate MD tags using an alternate parser # =============================================================== TAGS = [ (r'__',2), (r'**',2), (r'//',2), (r'\\ ',3), (r'\\',2), (r'##',2), (r'#',1) ] VERBOSE = False # --------------------------------------------------------------- # ---- match a string and a md tag? # --------------------------------------------------------------- def match_tag(line,tag): if VERBOSE: print(f'match_tag(line={line}) tag={tag}') # ---- no line to test? (empty string) if not line: return False # ---- tag exceeds the length of the line? if len(line) < tag[1]: return False # ---- tag match? for tc in tag[0]: if line[0] != tc: return False return True # --------------------------------------------------------------- # ---- locate the next md tag in the string (line) # ---- return the tag found and the starting index # ---- for the next search # --------------------------------------------------------------- def locate_next_tag(line): if VERBOSE: print(f'locate_next_tag({line})') l_idx = 0 # line character index l_len = len(line) # line length # ---- search for a MD tag while True: # ---- end of the line? if l_idx >= l_len: break # ---- search for a matching tag starting at l_idx for tag in TAGS: # ---- match a tag? tf = match_tag(line[l_idx:],tag) if tf: return (tag[0],l_idx +tag[1]) # ---- next line character l_idx += 1 return (None,len(line)) # --------------------------------------------------------------- # ---- parse a string containing MD tags # ---- display the tags found and the starting index for # ---- the next search # --------------------------------------------------------------- def parse_md_string(s): while True: print() print(f'parsing string ({s}) len={len(s)}') tag_str,nxt_idx = locate_next_tag(s) print(f'found: tag_str={tag_str} nxt_idx={nxt_idx}') if nxt_idx >= len(s): return True # ---- the next sub-string to search s = s[nxt_idx:] return True # --------------------------------------------------------------- # ---- main # --------------------------------------------------------------- ##infile = 'md01.md' ##infile = 'md02.md' infile = 'md03.md' line_number = 0 with open(infile,'r') as fin: for line in fin: # ---- increment line number line_number += 1 ## ---- remove leading/trailing spaces and \n line = line.strip().strip('\n') # ---- process a line (string) tf = parse_md_string(line) if not tf: break print() if not tf: print(f'processing line {line_number} failed') print(f'line: "{line}"') else: print(f'sucessfuly processed {line_number} lines') print()