words = [line.strip() for line in file('/usr/share/dict/words') if line.strip()] # If we wanted to ensure lowercase, we could use a lowercaseWords list lowercaseWords = [w.lower() for w in words] #-------------------------------------------------------------------------- # Goal: find all words with 5 or more lowercase 's' characters. # We can do this with standard control structures or list comprehension #manyS = [] #for w in words: # if w.count('s') >= 5: # manyS.append(w) manyS = [w for w in words if w.count('s') >= 5] #-------------------------------------------------------------------------- # Goal: compute average number of characters and number of vowels per word totalChars = 0 totalVowels = 0 for w in words: totalChars += len(w) lowered = w.lower() for vowel in 'aeiou': totalVowels += lowered.count(vowel) print 'Average word length is', float(totalChars) / len(words) print 'Average number of vowels is', float(totalVowels) / len(words) #-------------------------------------------------------------------------- # Goal: compute number of pieces per entry # I was assuming strings like 'Rhode Island' appeared in the dictionary. # (as it turns out, we see that there are no such entries on our system) numPieces = 0 multiPiece = 0 for w in words: pieces = w.split() numPieces += len(pieces) if len(pieces) > 1: multiPiece += 1 print 'Average number of pieces per entry is', float(numPieces) / len(words) print 'Number of multi-piece entries is', multiPiece #-------------------------------------------------------------------------- # Goal: classic spelling rule is "i before e except after c, or when sounded..." # We will look for those exceptions to the typical rule special = [] for w in words: if 'ei' in w and 'cei' not in w: special.append(w) #-------------------------------------------------------------------------- # Goal: find words that have consecutive repeated characters (e.g., filled) # Note: our current implementation is flawed in that a word like 'coffee' # is added twice (once for the 'ff' and again for the 'ee') doubled = [] for w in words: lowered = w.lower() for i in range(len(lowered)-3): piece = lowered[i:i+2] if piece[0] == piece[1]: doubled.append(w)