solution_122e.py

#!/usr/bin/python3
# ===============================================================
# Locate MD tags using an alternate parser
# ===============================================================

TAGS = [ (r'__',2), (r'**',2), (r'//',2), (r'\\ ',3),
         (r'\\',2), (r'##',2), (r'#',1) ]

VERBOSE = False


# ---------------------------------------------------------------
# ---- match a string and a md tag?
# ---------------------------------------------------------------

def match_tag(line,tag):

    if VERBOSE:
        print(f'match_tag(line={line}) tag={tag}')

    # ---- no line to test? (empty string)

    if not line:
        return False

    # ---- tag exceeds the length of the line?

    if len(line) < tag[1]:
        return False

    # ---- tag match?

    for tc in tag[0]:
        if line[0] != tc:
            return False

    return True   

# ---------------------------------------------------------------
# ---- locate the next md tag in the string (line)
# ---- return the tag found and the starting index
# ----    for the next search
# ---------------------------------------------------------------

def locate_next_tag(line):

    if VERBOSE:
        print(f'locate_next_tag({line})')

    l_idx = 0                   # line character index
    l_len = len(line)           # line length

    # ---- search for a MD tag

    while True:

        # ---- end of the line?

        if l_idx >= l_len:
            break

        # ---- search for a matching tag starting at l_idx

        for tag in TAGS:

            # ---- match a tag?

            tf = match_tag(line[l_idx:],tag)

            if tf:
                return (tag[0],l_idx +tag[1])

        # ---- next line character

        l_idx += 1

    return (None,len(line))


# ---------------------------------------------------------------
# ---- parse a string containing MD tags
# ---- display the tags found and the starting index for
# ----     the next search
# ---------------------------------------------------------------

def parse_md_string(s):

    while True:

        print()
        print(f'parsing string ({s}) len={len(s)}')

        tag_str,nxt_idx = locate_next_tag(s)

        print(f'found: tag_str={tag_str}  nxt_idx={nxt_idx}') 

        if nxt_idx >= len(s):
              return True

        # ---- the next sub-string to search

        s = s[nxt_idx:]

    return True

# ---------------------------------------------------------------
# ---- main
# ---------------------------------------------------------------

##infile = 'md01.md'
##infile = 'md02.md'
infile   = 'md03.md'

line_number = 0

with open(infile,'r') as fin:

    for line in fin:

        # ---- increment line number

        line_number += 1

        ## ---- remove leading/trailing spaces and \n
            
        line = line.strip().strip('\n')

        # ---- process a line (string)

        tf = parse_md_string(line)

        if not tf:
            break

    print()            
    if not tf:
        print(f'processing line {line_number} failed')
        print(f'line: "{line}"')
    else:
        print(f'sucessfuly processed {line_number} lines')
    print()