#!/usr/bin/python3 # ================================================================== # interpret/display JPEG file header information # # en.wikipedia.org/wiki/JPEG # docs.fileformat.com/image/jpeg/ # en.wikipedia.org/wiki/JPEG_File_Interchange_Format # stackoverflow.com/questions/26715684/parsing-jpeg-sos-marker # ================================================================== import sys from hex_dump import hex_dump # ----------------------------------------------------------------- # ---- segment SOI - start of image # ----------------------------------------------------------------- def segment_soi(mark,flag,idx,byts): print(f'segment SOI {mark:02X} {flag:02X} {idx}') return idx # ----------------------------------------------------------------- # ---- segment JFIF APPO - marker segment # ----------------------------------------------------------------- def segment_jfif_appo(mark,flag,idx,byts): idxx = idx print(f'segment JFIF APPO {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') idx += 2 sid = str(jpeg_bytes[idx:idx+5].decode('UTF-8')) idx + 5 version1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big') idx += 1 version2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big') idx += 1 print(f'seg = {mark:02X} {flag:02X} marker segment') print(f'seg len = {slen}') print(f'seg id = {sid}') print(f'JFFI ver = {version1}.{version2:02}') return idxx + slen # ----------------------------------------------------------------- # ---- segment JFIF APPn - application specific # ----------------------------------------------------------------- def segment_jfif_appn(mark,flag,idx,byts): idxx = idx print(f'segment JFIF APPn {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment SOFO - start of frame # ----------------------------------------------------------------- def segment_sofo(mark,flag,idx,byts): idxx = idx print(f'segment SOFO {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') idx += 2 form = jpeg_bytes[idx:idx+4] print(f'seg = {mark:02X} {flag:02X} start of frame') print(f'seg len = {slen}') print(f'format = {form}') return idxx + slen # ----------------------------------------------------------------- # ---- segment SOF2 - start of frame # ----------------------------------------------------------------- def segment_sof2(mark,flag,idx,byts): idxx = idx print(f'segment SOF2 {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment DHT - define huffman table # ----------------------------------------------------------------- def segment_dht(mark,flag,idx,byts): idxx = idx print(f'segment DHT {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment DQI - define quantization table(s) # ----------------------------------------------------------------- def segment_dqt(mark,flag,idx,byts): idxx = idx print(f'segment DQT {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment DRI - define restart interval # ----------------------------------------------------------------- def segment_dri(mark,flag,idx,byts): idxx = idx print(f'segment DRI {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment SOS - start of scan # ---- do some "fancy stuff" to skip past SOS segment # ----------------------------------------------------------------- def segment_sos(mark,flag,idx,byts): skip_flags = [0x00,0xD0,0xD1,0xD2,0xD3,0xD4,0xd5,0xD6,0xD7] print(f'segment SOS {mark:02X} {flag:02X} {idx}') max_find_idx = idx + 20 while idx < len(byts)-1: ##if idx%500 == 0: print(f'processing SOS idx {idx}') if byts[idx] == 0xFF: ##print(f'found at {idx} {byts[idx]:02X} {byts[idx+1]:02X}') if byts[idx+1] in skip_flags: idx += 1 continue print(f'found non skip flag {byts[idx+1]:02X} at {idx+1}') return idx + 2 if idx > max_find_idx: sys.exit() idx += 1 print() print('scanning SOS for next segment marker 0xFF') print('found end of bytes - did not find marker') sys.exit() # ----------------------------------------------------------------- # ---- segment RSTn - restart # ----------------------------------------------------------------- def segment_rstn(mark,flag,idx,byts): idxx = idx print(f'segment RSTn {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment COM - comments # ----------------------------------------------------------------- def segment_com(mark,flag,idx,byts): idxx = idx print(f'segment COM {mark:02X} {flag:02X} {idx}') slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big') return idxx + slen # ----------------------------------------------------------------- # ---- segment EOI - end of image # ----------------------------------------------------------------- def segment_eoi(mark,flag,idx,byts): print(f'segment EOI {mark:02X} {flag:02X} {idx}') return idx # ------------------------------------------------------------------ # ---- display JPEG file header segments # ------------------------------------------------------------------ def display_jpeg_header(file_path:str,jpeg_bytes:bytes) -> None: print('---- jpeg Header Segments -----------------------') print(f'file = {file_path}') print(f'file size = {len(jpeg_bytes)}') idx = 0 # ---- first byte index while True: if idx > len(jpeg_bytes)-1: break # ---- get marker and marker flag m1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big') if m1 != 0xFF: print() print(f'no segment marker found ' +\ f'index={idx} marker={m1:02X}') print() if idx-16 < 0: idx = 0 hex_dump(jpeg_bytes,idx-16,idx+31) return False idx += 1 # ---- next byte index m2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big') idx += 1 # ---- next byte index # ---- process marker flag match m2: case 0xD8: idx = segment_soi(m1,m2,idx,jpeg_bytes) continue case 0xE0: idx = segment_jfif_appo(m1,m2,idx,jpeg_bytes) case 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5: idx = segment_jfif_appn(m1,m2,idx,jpeg_bytes) case 0XC0: idx = segment_sofo(m1,m2,idx,jpeg_bytes) case 0XC2: idx = segment_sofo2(m1,m2,idx,jpeg_bytes) case 0XC4: idx = segment_dht(m1,m2,idx,jpeg_bytes) case 0XDB: idx = segment_dqt(m1,m2,idx,jpeg_bytes) case 0XDD: idx = segment_dri(m1,m2,idx,jpeg_bytes) case 0XDA: idx = segment_sos(m1,m2,idx,jpeg_bytes) case 0xD0 | 0xD1 | 0xD2 | 0xD3 | 0xD4 | 0xD5: idx = segment_rstn(m1,m2,idx,jpeg_bytes) case 0xE0 | 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5: idx = segment_appn(m1,m2,idx,jpeg_bytes) case 0xEF: idx = segment_com(m1,m2,idx,jpeg_bytes) case 0xD9: idx = segment_eoi(m1,m2,idx,jpeg_bytes) break case _: idx -= 1 print() print(f'bad/illegal marker flag {m2:02X} idx={idx}') print() hex_dump(jpeg_bytes,idx-16,idx+31) return False return True # ------------------------------------------------------------------ # ---- open jpeg file and read it directly into a byte array # ---- # ---- It may not be necessary to read in the complete file # ---- if you are only accessing the headers. For example, # ---- to read the first 1000 bytes? # ---- # ---- def load_jpeg_to_array(jpeg_file_path): # ---- f = open(jpeg_file_path,'rb') # ---- jpeg_bytes = f.read(1000) # ---- close(f) # ---- return jpeg_bytes # ---- # ---- or # ---- # ---- def load_jpeg_to_array(jpeg_file_path): # ---- with open(jpeg_file_path,'rb') as f: # ---- jpeg_bytes = f.read(1000) # ---- return jpeg_bytes # ---- # ------------------------------------------------------------------ def load_jpeg_to_array(jpeg_file_path:str,) -> bytearray: with open(jpeg_file_path,'rb') as file: jpeg_bytes = file.read() return jpeg_bytes # ------------------------------------------------------------------ # ---- main # ------------------------------------------------------------------ if __name__ == '__main__': print() jpeg_file_paths = [ 'mona_lisa_small_a.jpg' ] for file_path in jpeg_file_paths: jpeg_bytes = load_jpeg_to_array(file_path) print() tf = display_jpeg_header(file_path,jpeg_bytes) if tf is not True: print() print('error: display JPEG headers failed') break print() print('last 32 bytes of file') max_idx = len(jpeg_bytes) - 1 hex_dump(jpeg_bytes, max_idx - 32, max_idx) print()