solution_238.py

#!/usr/bin/python3
# ====================================================================
# demonstrate tabulate module
#
# Note: It only work with a small number of columns and rows
# ====================================================================

import csv
from tabulate import tabulate

# --------------------------------------------------------------------
# ---- use string split
# ---- 
# ---- Note:
# ---- a. the use of string.split() does not recognize commas
# ----    that are in row columns. the csv module will parse
# ----    a row correctly.
# ---- b. If the return_count is None, all of the CVS
# ----    rows are returned.
# --------------------------------------------------------------------

def collect_raw_csv_data_string_split(filename:str,
                                      return_count:int=None) -> list:

    column_count = 0
    line_count   = 0
    header_line  = True
    
    with open(filename,'r') as f:
        for line in f:
            line = line.strip()
            line_count += 1

            lst = line.split(',')

            if header_line:
                header_line = False
                column_count = len(lst)
                print()
                print(f'column count = {column_count}')
                continue

            if len(lst) != column_count:
                print('bad column count: ' +\
                     f'line={line_count:<3} '   +\
                     f'count={len(lst)}')

            # ---- return return_count rows

            if return_count is not None:
                if row_count > return_count-1: break

    print()
    print(f'line count = {line_count}')

# --------------------------------------------------------------------
# ---- use CSV module
# ----
# ---- Note: If return_count is None, all of the CVS
# ----       are returned.
# --------------------------------------------------------------------

def collect_raw_csv_data(csvfile:str,
                         colums:list[int]=None,
                         return_count:int=None) -> list:

    column_count = 0
    row_count    = 0
    header_line  = True

    lists= []
    
    with open(csvfile,'r',newline='') as f:
        
        csvreader = csv.reader(f,delimiter=',',quotechar='"')
        
        for row in csvreader:
            
            row_count += 1

            # ---- file header?

            if header_line:

                # ---- get the column count of the first row.
                # ---- this used to verify the number of
                # ---- columns in all of the remaining rows.

                column_count = len(row)

                # ---- display the row's column names

                print()
                print('Raw CVS Data Header Column Names')
                for i,col in enumerate(row):
                    print(f'[{i:<2}] {col.replace("_"," ")}')

                # ---- skip the header
                
                header_line = False

                continue

            # ---- this is used to verify the number of
            # ---- elements in all rows is the same

            if len(row) != column_count:
                print('bad column count: '  +\
                     f'row={row_count:<3} ' +\
                     f'count={len(row)}')
                continue

            # ---- return all columns?

            if columns is None:
               lists.append(row)   # save all of the columns

            # ---- return selected columns

            else:
                new_row = []
                for col in columns:
                    new_row.append(row[col])
                lists.append(new_row)

            # ---- return return_count rows?

            if return_count is not None:
                if row_count > return_count-1: break

    return lists

# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------

if __name__ == '__main__':

    filename = 'tabulate_data.csv'    # CSV file name
    columns  = [0,1,8]                # selected columns
    headers  = ['First Name',         # header column names
                'Last Name',
                'Phone Number']

    # ---- get raw CSV data (list of lists)

    data_rows = collect_raw_csv_data(filename,columns,10)

    print()
    print(f'{len(data_rows)} rows returned')
    print()

    # ---- sort by last name

    sorted_rows = sorted(data_rows,key=lambda lst:lst[1])


    print()
    print('---- Unsorted Rows -----------------------------')
    print()
    for row in data_rows:
        print(row)

    print()
    print('---- Sorted Rows -------------------------------')
    print()
    for row in sorted_rows:
        print(row)

    print()
    print('---- Simple Table ------------------------------')
    print()
    print(tabulate(sorted_rows,headers))

    print()
    print('---- Grid Table --------------------------------')
    print()
    print(tabulate(sorted_rows,headers,tablefmt="grid"))