words = [line.strip() for line in file('/usr/share/dict/words') if line.strip()] #-------------------------------------------------------------------------- # Goal: create a list of all words that were originally lowercased lowercaseWords = [w.lower() for w in words] #-------------------------------------------------------------------------- # Goal: create a list of all words that were originally lowercased originallyLowercaseWords = [w for w in words if w.islower()] #-------------------------------------------------------------------------- # Goal: create a list of all words that were originally lowercased apostrophed = [w for w in words if "'" in w] print 'There are', len(apostrophed), 'words with an apostrophe' #-------------------------------------------------------------------------- # Goal: find all words with 5 or more lowercase 's' characters. # We can do this with standard control structures or list comprehension #manyS = [] #for w in words: # if w.count('s') >= 5: # manyS.append(w) manyS = [w for w in words if w.count('s') >= 5] print 'There are', len(manyS), "words having 5 or more 's' characters" #-------------------------------------------------------------------------- # Goal: compute average number of characters and number of vowels per word totalChars = 0 totalVowels = 0 for w in words: totalChars += len(w) lowered = w.lower() for vowel in 'aeiou': totalVowels += lowered.count(vowel) print 'Average word length is', float(totalChars) / len(words) print 'Average number of vowels is', float(totalVowels) / len(words) #-------------------------------------------------------------------------- # Goal: classic spelling rule is "i before e except after c, or when sounded..." # We will look for those exceptions to the typical rule special = [] for w in words: if 'ei' in w and 'cei' not in w: special.append(w) print "There are", len(special), "words that are exceptions to the 'i before e' rule" #-------------------------------------------------------------------------- # Goal: find words that have consecutive repeated characters (e.g., filled) # Note: our current implementation is flawed in that a word like 'coffee' # is added twice (once for the 'ff' and again for the 'ee') doubled = [] for w in words: lowered = w.lower() included = False for i in range(len(lowered)-3): piece = lowered[i:i+2] if piece[0] == piece[1] and not included: doubled.append(w) included = True print "There are", len(doubled), "words that have consecutive repeated letters" #-------------------------------------------------------------------------- # Goal: find words that have 'abc' as subsequence abc = [] for w in words: a = w.find('a') # index of a (-1 if not found) if a != -1: b = w.find('b', 1+a) if b != -1: c = w.find('c', 1+b) if c != -1: abc.append(w) print 'There are', len(abc), "words with 'abc' as a subsequence" #-------------------------------------------------------------------------- # Goal: find words that have a particular pattern as a subsequence sub = raw_input("Desired subsequence: ") good = [] for w in words: failure = False j = 0 # we will only consider w[j: ] for letter in sub: if not failure: k = w.find(letter, j) if k == -1: failure = True else: j = k+1 if not failure: good.append(w) print 'There are', len(good), "words with '" + sub + "' as a subsequence"