#!/usr/bin/python3 # ========================================================= # Read a text file and report statistics about the # words found in it # ========================================================= import math import sys import os import platform # --------------------------------------------------------- # global variables # # punctuation punctuation characters # wdict dict - holds words and word counts # wline count - lines in file # wtotal count - total number of words found in file # wunique count - unique words in file # --------------------------------------------------------- punctuation = '''.?!,;:-_()[]{}"'/\\''' wdict = dict() wline = 0 wtotal = 0 wunique = 0 # --------------------------------------------------------- # am I running Python 3? # --------------------------------------------------------- def RunningPython3(): ##print(sys.version_info) if sys.version_info[0] == 3: return True return False # --------------------------------------------------------- # get user input (Python 2 or 3) # --------------------------------------------------------- def GetUserInput(prompt,py3): if py3: i = input(prompt) else: i = raw_input(prompt) return i.strip() # --------------------------------------------------------- # pause program # --------------------------------------------------------- def Pause(py3): print('') GetUserInput('Press enter to continue ',py3) # --------------------------------------------------------- # clear the screen # --------------------------------------------------------- def ClearScreen(): if platform.system() == 'Linux': os.system('clear') elif platform.system() == 'Windows': os.system('clear') else: os.system('cls') # --------------------------------------------------------- # test for a punctuation character at the end of a string # --------------------------------------------------------- # punctuation characters are: period,question mark, # exclamation mark, comma, semicolon, colon, dash, # hyphen, parentheses, brackets, braces, apostrophe, # quote marks and ellipsis. # --------------------------------------------------------- # Note: depending on OS, editor settings, etc. dashes, # hyphens, apostrophes, ellipsis, and quote marks # may appear differently in the text file # --------------------------------------------------------- def HasPunctuation(str,py3): global punctuation if str[-1] in punctuation: return True return False # --------------------------------------------------------- # process a text file # --------------------------------------------------------- def ProcessTextFile(file,py3): global wline i = 0 inFile = open(file,'r') for line in inFile: line = line.strip() if line: ##print(line) ProcessTextLine(line,py3) wline += 1 inFile.close() # --------------------------------------------------------- # process a line (string) of text # # convert words to lowercase and remove any punctuation at # the end of words - count the words # --------------------------------------------------------- def ProcessTextLine(line,py3): global wdict, wline, wtotal, wunique wlist = line.split() wc = 0 for w in wlist: w = w.lower() if HasPunctuation(w,py3): w = w[0:-1] if w in wdict: wdict[w] += 1 else: wdict[w] = 1 wunique += 1 wc += 1 wtotal += 1 return wc # --------------------------------------------------------- # main # --------------------------------------------------------- if __name__ == '__main__': py3 = RunningPython3() ##file = 'gettysburg_address.txt' file = 'declaration_of_independence.txt' ProcessTextFile(file,py3) # ----------------------------------------------------- # display the words found in the text file # ----------------------------------------------------- ## ##print('') ## ###normal order ##for k,v in wdict.iteritems(): ## print('{}: {}'.format(k,v)) ## ###sort on key ##for k in sorted(wddict.iterkeys()): ## print('{}: {}'.format(k,wdict[k])) ## ###sort on value (count) ##i = 0 ##for k,v in sorted(wdict.iteritems(), reverse=True, ## key=lambda (k,v): (v,k)): ## print('[{:02}] {:>4}: {}'.format(i,v,k)) ## i += 1 ##Pause(py3) # ----------------------------------------------------- # display text file statistics # ----------------------------------------------------- print('') print('Text file: {}'.format(file)) print('{} unique words in text file'.format(wunique)) print('{} words in text file'.format(wtotal)) Pause(py3) # ----------------------------------------------------- # search the dictionary for a specific word # ----------------------------------------------------- while True: ClearScreen() print('------ Search for Word ------') print('') w = GetUserInput('Enter search word: ',py3) if w == '': break w = w.lower() if w in wdict: print('') print('Found {}, word count is {}'.format(w,wdict[w])) else: print('') print('{} not found'.format(w)) Pause(py3) print('')