#!/usr/bom/python3 # ==================================================================== # estimate the size of a population using a random sample # of the population (average 100 samples) # ==================================================================== # -------------------------------------------------------------------- # ---- estimate size of a population using a random sample # -------------------------------------------------------------------- def estimate_size(sample:list,verbose:bool=False) -> float: if verbose: print() sample_length = len(sample) # ---- the sample has no items if sample_length == 0: return None # ---- the sample needs at least two items if sample_length < 2: return sample[0] # ---- sort sample assending sample_sorted = sorted(sample) # ---- get maximum sample value sample_max = max(sample_sorted) # ---- sum of the gaps in the sample (don't forget 1 to s[0]) sample_gap_count = 0 sample_gap_sum = 0 for i in range(len(sample_sorted)): if i == 0: x = sample_sorted[0] - 1 sample_gap_sum += x if verbose: print(f'gap between {0:4} to {sample_sorted[i]:4} is {x}') else: x = sample_sorted[i] - sample_sorted[i-1] - 1 sample_gap_sum += x if i < 10 and verbose: print(f'gap between {sample_sorted[i-1]:4} to ' + f'{sample_sorted[i]:4} is {x}') if i == 10: if verbose: print('...') sample_gap_count += 1 population_size = sample_max + sample_gap_sum/sample_gap_count if verbose: print() print(f'number of gaps : {sample_gap_count}') print(f'sum of gaps : {sample_gap_sum}') print(f'average gap size: {sample_gap_sum/len(sample_sorted)}') print(f'sample size : {len(sample_sorted)}') print(f'sample sorted : {sample_sorted}') return population_size # -------------------------------------------------------------------- # ---- main # -------------------------------------------------------------------- if __name__ == '__main__': import random import user_interface as ui sample_size = 20 population_size = 2000 print() print(f'sample size: {sample_size}') print(f'population size: {population_size}') population = list(range(1,population_size+1)) sample = [] sample = random.sample(population,sample_size) size_sum = 0.0 difference_sum = 0.0 percent_sum = 0.0 for i in range(100): estimated_population_size = estimate_size(sample) size_sum += estimated_population_size diff = estimated_population_size - len(population) difference_sum += diff percent_sum += diff/len(population)*100.0 size_avg = size_sum/100.0 difference_avg = difference_sum/100.0 percent_avg = percent_sum/100.0 print() print(f'100 samples averaged') print(f'actual population size is {len(population)}') print(f'estimated population size is {size_avg:.1f}') print(f'difference is {difference_avg:.1f}') print(f'percent difference is {percent_avg:.1f}%')