algorithm-review/chapter12/3_top-k-frequent-words.py

#!/usr/bin/env python
# coding=utf-8

#######################################################################################
# Leetcode 692 前K个高频单词
#
# 给一非空的单词列表，返回前 k 个出现次数最多的单词。
# 返回的答案应该按单词出现频率由高到低排序。如果不同的单词有相同出现频率，按字母顺序排序。
#
# 示例 1：
#   输入: ["i", "love", "leetcode", "i", "love", "coding"], k = 2
#   输出: ["i", "love"]
#   解析: "i" 和 "love" 为出现次数最多的两个单词，均为2次。
#       注意，按字母顺序 "i" 在 "love" 之前。
#
# 示例 2：
#   输入: ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], k = 4
#   输出: ["the", "is", "sunny", "day"]
#   解析: "the", "is", "sunny" 和 "day" 是出现次数最多的四个单词，
#       出现次数依次为 4, 3, 2 和 1 次。
#
# 注意：
#   1. 假定 k 总为有效值， 1 ≤ k ≤ 集合元素数。
#   2. 输入的单词均由小写字母组成。
#
# 扩展练习：
#   尝试以 O(n log k) 时间复杂度和 O(n) 空间复杂度解决。
#######################################################################################

from typing import List
import collections

class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        """
        :type words: List[str]
        :type k: int
        :rtype List[str]

        (knowledge)

        思路：
        1. 统计每个单词出现的频率;
        2. 然后按照词频和字母序排序（首先按照词频排序，然后如果词频相同就按字母序排序）；
        3. 最后返回排序后前k个元素
        """
        count = collections.Counter(words)
        candidates = list(count.keys())
        candidates.sort(key=lambda w: (-count[w], w))
        return candidates[:k]


if __name__ == '__main__':
    solution = Solution()
    print(solution.topKFrequent(["i", "love", "leetcode", "i", "love", "coding"], 2), "= [\"i\", \"love\"]")