words = [line.strip() for line in file('/usr/share/dict/words') if line.strip()]

#--------------------------------------------------------------------------
# Goal: create a list of all words that were originally lowercased

lowercaseWords = [w.lower() for w in words]

#--------------------------------------------------------------------------
# Goal: create a list of all words that were originally lowercased

originallyLowercaseWords = [w for w in words if w.islower()]

#--------------------------------------------------------------------------
# Goal: create a list of all words that were originally lowercased

apostrophed = [w for w in words if "'" in w]
print 'There are', len(apostrophed), 'words with an apostrophe'

#--------------------------------------------------------------------------
# Goal: find all words with 5 or more lowercase 's' characters.
# We can do this with standard control structures or list comprehension
    
#manyS = []
#for w in words:
#    if w.count('s') >= 5:
#        manyS.append(w)

manyS = [w for w in words if w.count('s') >= 5]
print 'There are', len(manyS), "words having 5 or more 's' characters"

#--------------------------------------------------------------------------
# Goal:  compute average number of characters and number of vowels per word

totalChars = 0
totalVowels = 0
for w in words:
    totalChars += len(w)
    lowered = w.lower()
    for vowel in 'aeiou':
        totalVowels += lowered.count(vowel)

print 'Average word length is',  float(totalChars) / len(words)
print 'Average number of vowels is', float(totalVowels) / len(words)

#--------------------------------------------------------------------------
# Goal:  classic spelling rule is "i before e except after c, or when sounded..."
# We will look for those exceptions to the typical rule

special = []
for w in words:
    if 'ei' in w and 'cei' not in w:
        special.append(w)
print "There are", len(special), "words that are exceptions to the 'i before e' rule"

#--------------------------------------------------------------------------
# Goal:  find words that have consecutive repeated characters (e.g., filled)
# Note:  our current implementation is flawed in that a word like 'coffee'
#        is added twice (once for the 'ff' and again for the 'ee')

doubled = []
for w in words:
    lowered = w.lower()
    included = False
    for i in range(len(lowered)-3):
        piece = lowered[i:i+2]
        if piece[0] == piece[1] and not included:
            doubled.append(w)
	    included = True
print "There are", len(doubled), "words that have consecutive repeated letters"


#--------------------------------------------------------------------------
# Goal:  find words that have 'abc' as subsequence

abc = []
for w in words:
    a = w.find('a')     # index of a (-1 if not found)
    if a != -1:
        b = w.find('b', 1+a)
        if b != -1:
            c = w.find('c', 1+b)
            if c != -1:
                abc.append(w)
print 'There are', len(abc), "words with 'abc' as a subsequence"

#--------------------------------------------------------------------------
# Goal:  find words that have a particular pattern as a subsequence

sub = raw_input("Desired subsequence: ")
good = []
for w in words:
    failure = False
    j = 0                # we will only consider w[j: ]
    for letter in sub:
        if not failure:
            k = w.find(letter, j)
            if k == -1:
                failure = True
            else:
                j = k+1
    if not failure:
        good.append(w)
print 'There are', len(good), "words with '" + sub + "' as a subsequence"