words = [line.strip() for line in file('/usr/share/dict/words') if line.strip()]

# If we wanted to ensure lowercase, we could use a lowercaseWords list
lowercaseWords = [w.lower() for w in words]

#--------------------------------------------------------------------------
# Goal: find all words with 5 or more lowercase 's' characters.
# We can do this with standard control structures or list comprehension
    
#manyS = []
#for w in words:
#    if w.count('s') >= 5:
#        manyS.append(w)

manyS = [w for w in words if w.count('s') >= 5]

#--------------------------------------------------------------------------
# Goal:  compute average number of characters and number of vowels per word

totalChars = 0
totalVowels = 0
for w in words:
    totalChars += len(w)
    lowered = w.lower()
    for vowel in 'aeiou':
        totalVowels += lowered.count(vowel)

print 'Average word length is',  float(totalChars) / len(words)
print 'Average number of vowels is', float(totalVowels) / len(words)

#--------------------------------------------------------------------------
# Goal:  compute number of pieces per entry
# I was assuming strings like 'Rhode Island' appeared in the dictionary.
# (as it turns out, we see that there are no such entries on our system)

numPieces = 0
multiPiece = 0
for w in words:
    pieces = w.split()
    numPieces += len(pieces)
    if len(pieces) > 1:
        multiPiece += 1

print 'Average number of pieces per entry is', float(numPieces) / len(words)
print 'Number of multi-piece entries is', multiPiece
    
#--------------------------------------------------------------------------
# Goal:  classic spelling rule is "i before e except after c, or when sounded..."
# We will look for those exceptions to the typical rule

special = []
for w in words:
    if 'ei' in w and 'cei' not in w:
        special.append(w)

#--------------------------------------------------------------------------
# Goal:  find words that have consecutive repeated characters (e.g., filled)
# Note:  our current implementation is flawed in that a word like 'coffee'
#        is added twice (once for the 'ff' and again for the 'ee')

doubled = []
for w in words:
    lowered = w.lower()
    for i in range(len(lowered)-3):
        piece = lowered[i:i+2]
        if piece[0] == piece[1]:
            doubled.append(w)