单词来源于coca单词库
效果如下:
2个字母的单词
3个字母的单词
4个字母的单词
5个字母的单词
…
n个字母的单词
def sort_words_by_length_and_alphabet(text):# 将输入文本按空格分割为单词列表words = text.split()# 去除标点符号import stringwords = [word.translate(str.maketrans('', '', string.punctuation)) for word in words]# 去掉重复的单词unique_words = set(words)#将所有单词转换为小写字母unique_words = [word.lower() for word in unique_words]# 按照每个单词的长度和字母顺序进行排序sorted_words = sorted(unique_words, key=lambda word: (len(word), word))# 返回排序后的单词列表return sorted_words# 示例用法
if __name__ == "__main__":# 读取文件内容with open('COCA_20000.txt', 'r', encoding='utf-8') as file:text = file.read()# 提取单词并排序sorted_words = sort_words_by_length_and_alphabet(text)print("按单词长度和字母顺序升序排序的结果:", sorted_words)# 保存到文件with open('sorted_words.txt', 'w', encoding='utf-8') as file:for word in sorted_words:file.write(word + '\n')print("已保存到文件 sorted_words.txt")