solution_248e.py

#!/usr/bin/python3
# ==================================================================
# interpret/display JPEG file header information
#
# en.wikipedia.org/wiki/JPEG
# docs.fileformat.com/image/jpeg/
# en.wikipedia.org/wiki/JPEG_File_Interchange_Format
# stackoverflow.com/questions/26715684/parsing-jpeg-sos-marker
# ==================================================================

import sys
from hex_dump import hex_dump

# -----------------------------------------------------------------
# ---- segment SOI - start of image
# -----------------------------------------------------------------

def segment_soi(mark,flag,idx,byts):
    print(f'segment SOI {mark:02X} {flag:02X} {idx}')
    return idx

# -----------------------------------------------------------------
# ---- segment JFIF APPO - marker segment
# -----------------------------------------------------------------

def segment_jfif_appo(mark,flag,idx,byts):
    idxx = idx
    print(f'segment JFIF APPO {mark:02X} {flag:02X} {idx}')
    
    slen     = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    idx += 2
    sid      = str(jpeg_bytes[idx:idx+5].decode('UTF-8'))
    idx + 5
    version1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
    idx += 1
    version2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
    idx += 1

    print(f'seg       = {mark:02X} {flag:02X} marker segment')
    print(f'seg len   = {slen}')
    print(f'seg id    = {sid}')
    print(f'JFFI ver  = {version1}.{version2:02}')

    return idxx + slen

# -----------------------------------------------------------------
# ---- segment JFIF APPn - application specific
# -----------------------------------------------------------------

def segment_jfif_appn(mark,flag,idx,byts):
    idxx = idx
    print(f'segment JFIF APPn {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment SOFO - start of frame
# -----------------------------------------------------------------

def segment_sofo(mark,flag,idx,byts):
    idxx = idx
    print(f'segment SOFO {mark:02X} {flag:02X} {idx}')
    
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    idx += 2
    form = jpeg_bytes[idx:idx+4]

    print(f'seg       = {mark:02X} {flag:02X} start of frame')
    print(f'seg len   = {slen}')
    print(f'format    = {form}')

    return idxx + slen
    
# -----------------------------------------------------------------
# ---- segment SOF2 - start of frame
# -----------------------------------------------------------------

def segment_sof2(mark,flag,idx,byts):
    idxx = idx
    print(f'segment SOF2 {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment DHT - define huffman table
# -----------------------------------------------------------------

def segment_dht(mark,flag,idx,byts):
    idxx = idx
    print(f'segment DHT {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment DQI - define quantization table(s)
# -----------------------------------------------------------------

def segment_dqt(mark,flag,idx,byts):
    idxx = idx
    print(f'segment DQT {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment DRI - define restart interval
# -----------------------------------------------------------------

def segment_dri(mark,flag,idx,byts):
    idxx = idx
    print(f'segment DRI {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment SOS - start of scan
# ---- do some "fancy stuff" to skip past SOS segment
# -----------------------------------------------------------------

def segment_sos(mark,flag,idx,byts):

    skip_flags = [0x00,0xD0,0xD1,0xD2,0xD3,0xD4,0xd5,0xD6,0xD7]

    print(f'segment SOS {mark:02X} {flag:02X} {idx}')

    max_find_idx = idx + 20
    
    while idx < len(byts)-1:

        ##if idx%500 == 0: print(f'processing SOS idx {idx}')

        if byts[idx] == 0xFF:
            ##print(f'found at {idx} {byts[idx]:02X} {byts[idx+1]:02X}')

            if byts[idx+1] in skip_flags:
                idx += 1
                continue
            
            print(f'found non skip flag {byts[idx+1]:02X} at {idx+1}')
            return idx + 2

            if idx > max_find_idx: sys.exit()

        idx += 1

    print()
    print('scanning SOS for next segment marker 0xFF')
    print('found end of bytes - did not find marker')
    sys.exit()


# -----------------------------------------------------------------
# ---- segment RSTn - restart
# -----------------------------------------------------------------

def segment_rstn(mark,flag,idx,byts):
    idxx = idx
    print(f'segment RSTn {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen
   
# -----------------------------------------------------------------
# ---- segment COM - comments
# -----------------------------------------------------------------

def segment_com(mark,flag,idx,byts):
    idxx = idx
    print(f'segment COM {mark:02X} {flag:02X} {idx}')
    slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
    return idxx + slen

# -----------------------------------------------------------------
# ---- segment EOI - end of image
# -----------------------------------------------------------------

def segment_eoi(mark,flag,idx,byts):
    print(f'segment EOI {mark:02X} {flag:02X} {idx}')
    return idx

# ------------------------------------------------------------------
# ---- display JPEG file header segments
# ------------------------------------------------------------------

def display_jpeg_header(file_path:str,jpeg_bytes:bytes) -> None:

    print('---- jpeg Header Segments -----------------------')

    print(f'file      = {file_path}')
    print(f'file size = {len(jpeg_bytes)}')

    idx = 0                # ---- first byte index

    while True:

        if idx > len(jpeg_bytes)-1: break

        # ---- get marker and marker flag

        m1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')

        if m1 != 0xFF:
            print()
            print(f'no segment marker found ' +\
                  f'index={idx} marker={m1:02X}')
            print()
            if idx-16 < 0: idx = 0
            hex_dump(jpeg_bytes,idx-16,idx+31)
            return False

        idx += 1                # ---- next byte index

        m2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')

        idx += 1                # ---- next byte index

        # ---- process marker flag
       
        match m2:
       
            case 0xD8:
                idx = segment_soi(m1,m2,idx,jpeg_bytes)
                continue

            case 0xE0:
                idx = segment_jfif_appo(m1,m2,idx,jpeg_bytes)

            case 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5:
                idx = segment_jfif_appn(m1,m2,idx,jpeg_bytes)
        
            case 0XC0:
                idx = segment_sofo(m1,m2,idx,jpeg_bytes)

            case 0XC2:
                idx = segment_sofo2(m1,m2,idx,jpeg_bytes)
                
            case 0XC4:
                idx = segment_dht(m1,m2,idx,jpeg_bytes)

            case 0XDB:
                idx = segment_dqt(m1,m2,idx,jpeg_bytes)

            case 0XDD:
                idx = segment_dri(m1,m2,idx,jpeg_bytes)

            case 0XDA:
                idx = segment_sos(m1,m2,idx,jpeg_bytes)

            case 0xD0 | 0xD1 | 0xD2 | 0xD3 | 0xD4 | 0xD5:
                idx = segment_rstn(m1,m2,idx,jpeg_bytes)

            case 0xE0 | 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5:
                idx = segment_appn(m1,m2,idx,jpeg_bytes)

            case 0xEF:
                idx = segment_com(m1,m2,idx,jpeg_bytes)

            case 0xD9:
                idx = segment_eoi(m1,m2,idx,jpeg_bytes)
                break

            case _:
                idx -= 1
                print()
                print(f'bad/illegal marker flag {m2:02X} idx={idx}')
                print()
                hex_dump(jpeg_bytes,idx-16,idx+31)
                return False

    return True
                                          
# ------------------------------------------------------------------
# ---- open jpeg file and read it directly into a byte array
# ----
# ---- It may not be necessary to read in the complete file
# ---- if you are only accessing the headers. For example,
# ---- to read the first 1000 bytes?
# ----
# ----   def load_jpeg_to_array(jpeg_file_path):
# ----       f = open(jpeg_file_path,'rb')
# ----       jpeg_bytes = f.read(1000)
# ----       close(f)
# ----       return jpeg_bytes
# ----
# ----   or
# ----
# ----   def load_jpeg_to_array(jpeg_file_path):
# ----       with open(jpeg_file_path,'rb') as f:
# ----           jpeg_bytes = f.read(1000)
# ----           return jpeg_bytes
# ----
# ------------------------------------------------------------------
    
def load_jpeg_to_array(jpeg_file_path:str,) -> bytearray:

    with open(jpeg_file_path,'rb') as file:
        jpeg_bytes = file.read()
    return jpeg_bytes

# ------------------------------------------------------------------
# ---- main
# ------------------------------------------------------------------

if __name__ == '__main__':

    print()    
    jpeg_file_paths = [ 'mona_lisa_small_a.jpg' ]

    for file_path in jpeg_file_paths:

        jpeg_bytes = load_jpeg_to_array(file_path)
        
        print()
        tf = display_jpeg_header(file_path,jpeg_bytes)
        if tf is not True:
            print()
            print('error: display JPEG headers failed')
            break
        
        print()
        print('last 32 bytes of file')
        max_idx = len(jpeg_bytes) - 1
        hex_dump(jpeg_bytes, max_idx - 32, max_idx)
        
    print()