#!/usr/bin/python3
# ==================================================================
# interpret/display JPEG file header information
#
# en.wikipedia.org/wiki/JPEG
# docs.fileformat.com/image/jpeg/
# en.wikipedia.org/wiki/JPEG_File_Interchange_Format
# stackoverflow.com/questions/26715684/parsing-jpeg-sos-marker
# ==================================================================
import sys
from hex_dump import hex_dump
# -----------------------------------------------------------------
# ---- segment SOI - start of image
# -----------------------------------------------------------------
def segment_soi(mark,flag,idx,byts):
print(f'segment SOI {mark:02X} {flag:02X} {idx}')
return idx
# -----------------------------------------------------------------
# ---- segment JFIF APPO - marker segment
# -----------------------------------------------------------------
def segment_jfif_appo(mark,flag,idx,byts):
idxx = idx
print(f'segment JFIF APPO {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
idx += 2
sid = str(jpeg_bytes[idx:idx+5].decode('UTF-8'))
idx + 5
version1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
idx += 1
version2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
idx += 1
print(f'seg = {mark:02X} {flag:02X} marker segment')
print(f'seg len = {slen}')
print(f'seg id = {sid}')
print(f'JFFI ver = {version1}.{version2:02}')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment JFIF APPn - application specific
# -----------------------------------------------------------------
def segment_jfif_appn(mark,flag,idx,byts):
idxx = idx
print(f'segment JFIF APPn {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment SOFO - start of frame
# -----------------------------------------------------------------
def segment_sofo(mark,flag,idx,byts):
idxx = idx
print(f'segment SOFO {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
idx += 2
form = jpeg_bytes[idx:idx+4]
print(f'seg = {mark:02X} {flag:02X} start of frame')
print(f'seg len = {slen}')
print(f'format = {form}')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment SOF2 - start of frame
# -----------------------------------------------------------------
def segment_sof2(mark,flag,idx,byts):
idxx = idx
print(f'segment SOF2 {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment DHT - define huffman table
# -----------------------------------------------------------------
def segment_dht(mark,flag,idx,byts):
idxx = idx
print(f'segment DHT {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment DQI - define quantization table(s)
# -----------------------------------------------------------------
def segment_dqt(mark,flag,idx,byts):
idxx = idx
print(f'segment DQT {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment DRI - define restart interval
# -----------------------------------------------------------------
def segment_dri(mark,flag,idx,byts):
idxx = idx
print(f'segment DRI {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment SOS - start of scan
# ---- do some "fancy stuff" to skip past SOS segment
# -----------------------------------------------------------------
def segment_sos(mark,flag,idx,byts):
skip_flags = [0x00,0xD0,0xD1,0xD2,0xD3,0xD4,0xd5,0xD6,0xD7]
print(f'segment SOS {mark:02X} {flag:02X} {idx}')
max_find_idx = idx + 20
while idx < len(byts)-1:
##if idx%500 == 0: print(f'processing SOS idx {idx}')
if byts[idx] == 0xFF:
##print(f'found at {idx} {byts[idx]:02X} {byts[idx+1]:02X}')
if byts[idx+1] in skip_flags:
idx += 1
continue
print(f'found non skip flag {byts[idx+1]:02X} at {idx+1}')
return idx + 2
if idx > max_find_idx: sys.exit()
idx += 1
print()
print('scanning SOS for next segment marker 0xFF')
print('found end of bytes - did not find marker')
sys.exit()
# -----------------------------------------------------------------
# ---- segment RSTn - restart
# -----------------------------------------------------------------
def segment_rstn(mark,flag,idx,byts):
idxx = idx
print(f'segment RSTn {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment COM - comments
# -----------------------------------------------------------------
def segment_com(mark,flag,idx,byts):
idxx = idx
print(f'segment COM {mark:02X} {flag:02X} {idx}')
slen = int.from_bytes(jpeg_bytes[idx:idx+2],'big')
return idxx + slen
# -----------------------------------------------------------------
# ---- segment EOI - end of image
# -----------------------------------------------------------------
def segment_eoi(mark,flag,idx,byts):
print(f'segment EOI {mark:02X} {flag:02X} {idx}')
return idx
# ------------------------------------------------------------------
# ---- display JPEG file header segments
# ------------------------------------------------------------------
def display_jpeg_header(file_path:str,jpeg_bytes:bytes) -> None:
print('---- jpeg Header Segments -----------------------')
print(f'file = {file_path}')
print(f'file size = {len(jpeg_bytes)}')
idx = 0 # ---- first byte index
while True:
if idx > len(jpeg_bytes)-1: break
# ---- get marker and marker flag
m1 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
if m1 != 0xFF:
print()
print(f'no segment marker found ' +\
f'index={idx} marker={m1:02X}')
print()
if idx-16 < 0: idx = 0
hex_dump(jpeg_bytes,idx-16,idx+31)
return False
idx += 1 # ---- next byte index
m2 = int.from_bytes(jpeg_bytes[idx:idx+1],'big')
idx += 1 # ---- next byte index
# ---- process marker flag
match m2:
case 0xD8:
idx = segment_soi(m1,m2,idx,jpeg_bytes)
continue
case 0xE0:
idx = segment_jfif_appo(m1,m2,idx,jpeg_bytes)
case 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5:
idx = segment_jfif_appn(m1,m2,idx,jpeg_bytes)
case 0XC0:
idx = segment_sofo(m1,m2,idx,jpeg_bytes)
case 0XC2:
idx = segment_sofo2(m1,m2,idx,jpeg_bytes)
case 0XC4:
idx = segment_dht(m1,m2,idx,jpeg_bytes)
case 0XDB:
idx = segment_dqt(m1,m2,idx,jpeg_bytes)
case 0XDD:
idx = segment_dri(m1,m2,idx,jpeg_bytes)
case 0XDA:
idx = segment_sos(m1,m2,idx,jpeg_bytes)
case 0xD0 | 0xD1 | 0xD2 | 0xD3 | 0xD4 | 0xD5:
idx = segment_rstn(m1,m2,idx,jpeg_bytes)
case 0xE0 | 0xE1 | 0xE2 | 0xE3 | 0xE4 | 0xE5:
idx = segment_appn(m1,m2,idx,jpeg_bytes)
case 0xEF:
idx = segment_com(m1,m2,idx,jpeg_bytes)
case 0xD9:
idx = segment_eoi(m1,m2,idx,jpeg_bytes)
break
case _:
idx -= 1
print()
print(f'bad/illegal marker flag {m2:02X} idx={idx}')
print()
hex_dump(jpeg_bytes,idx-16,idx+31)
return False
return True
# ------------------------------------------------------------------
# ---- open jpeg file and read it directly into a byte array
# ----
# ---- It may not be necessary to read in the complete file
# ---- if you are only accessing the headers. For example,
# ---- to read the first 1000 bytes?
# ----
# ---- def load_jpeg_to_array(jpeg_file_path):
# ---- f = open(jpeg_file_path,'rb')
# ---- jpeg_bytes = f.read(1000)
# ---- close(f)
# ---- return jpeg_bytes
# ----
# ---- or
# ----
# ---- def load_jpeg_to_array(jpeg_file_path):
# ---- with open(jpeg_file_path,'rb') as f:
# ---- jpeg_bytes = f.read(1000)
# ---- return jpeg_bytes
# ----
# ------------------------------------------------------------------
def load_jpeg_to_array(jpeg_file_path:str,) -> bytearray:
with open(jpeg_file_path,'rb') as file:
jpeg_bytes = file.read()
return jpeg_bytes
# ------------------------------------------------------------------
# ---- main
# ------------------------------------------------------------------
if __name__ == '__main__':
print()
jpeg_file_paths = [ 'mona_lisa_small_a.jpg' ]
for file_path in jpeg_file_paths:
jpeg_bytes = load_jpeg_to_array(file_path)
print()
tf = display_jpeg_header(file_path,jpeg_bytes)
if tf is not True:
print()
print('error: display JPEG headers failed')
break
print()
print('last 32 bytes of file')
max_idx = len(jpeg_bytes) - 1
hex_dump(jpeg_bytes, max_idx - 32, max_idx)
print()