solution_231b.py

#!/usr/bom/python3
# ====================================================================
# estimate the size of a population using a random sample
# of the population (average 100 samples)
# ====================================================================

# --------------------------------------------------------------------
# ---- estimate size of a population using a random sample
# --------------------------------------------------------------------

def estimate_size(sample:list,verbose:bool=False) -> float:

    if verbose: print()

    sample_length = len(sample)

    # ---- the sample has no items

    if sample_length == 0: return None

    # ---- the sample needs at least two items
    
    if sample_length < 2: return sample[0]

    # ---- sort sample assending

    sample_sorted = sorted(sample)

    # ---- get maximum sample value

    sample_max = max(sample_sorted)

    # ---- sum of the gaps in the sample (don't forget 1 to s[0])

    sample_gap_count = 0
    sample_gap_sum   = 0
    
    for i in range(len(sample_sorted)):

        if i == 0:
            x = sample_sorted[0] - 1
            sample_gap_sum += x
            if verbose:
                print(f'gap between {0:4} to {sample_sorted[i]:4} is {x}')
        else:
            x = sample_sorted[i] - sample_sorted[i-1] - 1
            sample_gap_sum += x
            if i < 10 and verbose:
                print(f'gap between {sample_sorted[i-1]:4} to ' +
                      f'{sample_sorted[i]:4} is {x}')

        if i == 10:
            if verbose: print('...')

        sample_gap_count += 1

    population_size = sample_max + sample_gap_sum/sample_gap_count

    if verbose:
        print()
        print(f'number of gaps  : {sample_gap_count}')
        print(f'sum of gaps     : {sample_gap_sum}')
        print(f'average gap size: {sample_gap_sum/len(sample_sorted)}')
        print(f'sample size     : {len(sample_sorted)}')
        print(f'sample sorted   : {sample_sorted}')

    return population_size

# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------

if __name__ == '__main__':

    import random
    import user_interface as ui
    
    sample_size     = 20
    population_size = 2000

    print()
    print(f'sample     size: {sample_size}')
    print(f'population size: {population_size}')

    population = list(range(1,population_size+1))
    sample = []
    sample = random.sample(population,sample_size)

    size_sum       = 0.0
    difference_sum = 0.0
    percent_sum    = 0.0

    for i in range(100):

        estimated_population_size = estimate_size(sample)
        size_sum       += estimated_population_size

        diff           = estimated_population_size - len(population)
        difference_sum += diff
        percent_sum    += diff/len(population)*100.0

    size_avg       = size_sum/100.0  
    difference_avg = difference_sum/100.0
    percent_avg    = percent_sum/100.0
    
    print()
    print(f'100 samples averaged')
    print(f'actual    population size is {len(population)}')
    print(f'estimated population size is {size_avg:.1f}')
    print(f'difference                is {difference_avg:.1f}')
    print(f'percent difference        is {percent_avg:.1f}%')