http://www.sijitao.net/2441.html
def remove_similar ( lists , similarity = 0.9 ):
i = 0
l = len ( lists ) while i < l :
j = i + 1 while j < l :
seq = difflib . SequenceMatcher ( None , lists [ i ], lists [ j ])
ratio =
seq . ratio () if ratio >= similarity : del lists [ j ]
l = l - 1 else :
j += 1
i += 1
return lists