1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
| public class SearchSuggestionTrie {
private TrieNode root;
public SearchSuggestionTrie() {
this.root = new TrieNode();
}
// Trie节点
private static class TrieNode {
private Map<Character, TrieNode> children;
private boolean isEndOfWord;
private int frequency; // 查询频率
private Set<String> suggestions; // 热门建议
public TrieNode() {
this.children = new HashMap<>();
this.isEndOfWord = false;
this.frequency = 0;
this.suggestions = new TreeSet<>();
}
}
// 插入查询词汇
public void insert(String query, int frequency) {
TrieNode current = root;
for (char ch : query.toCharArray()) {
current.children.putIfAbsent(ch, new TrieNode());
current = current.children.get(ch);
// 更新热门建议
current.suggestions.add(query);
if (current.suggestions.size() > 10) {
// 保持只有前10个建议
String leastFrequent = current.suggestions.iterator().next();
current.suggestions.remove(leastFrequent);
}
}
current.isEndOfWord = true;
current.frequency += frequency;
}
// 获取自动补全建议
public List<String> getAutoComplete(String prefix) {
TrieNode current = root;
// 找到前缀对应的节点
for (char ch : prefix.toCharArray()) {
current = current.children.get(ch);
if (current == null) {
return new ArrayList<>();
}
}
// 收集所有以该前缀开始的词汇
List<String> suggestions = new ArrayList<>();
collectSuggestions(current, prefix, suggestions);
// 按频率排序
suggestions.sort((a, b) -> {
int freqA = getQueryFrequency(a);
int freqB = getQueryFrequency(b);
return Integer.compare(freqB, freqA);
});
return suggestions.subList(0, Math.min(suggestions.size(), 10));
}
// 递归收集建议
private void collectSuggestions(TrieNode node, String prefix, List<String> suggestions) {
if (node.isEndOfWord) {
suggestions.add(prefix);
}
for (Map.Entry<Character, TrieNode> entry : node.children.entrySet()) {
collectSuggestions(entry.getValue(), prefix + entry.getKey(), suggestions);
}
}
// 获取查询频率
public int getQueryFrequency(String query) {
TrieNode current = root;
for (char ch : query.toCharArray()) {
current = current.children.get(ch);
if (current == null) {
return 0;
}
}
return current.frequency;
}
// 拼写检查和纠错
public List<String> getSpellingSuggestions(String query, int maxDistance) {
List<String> suggestions = new ArrayList<>();
findSimilarWords(root, "", query, 0, maxDistance, suggestions);
return suggestions;
}
// 使用编辑距离查找相似词汇
private void findSimilarWords(TrieNode node, String current, String target,
int distance, int maxDistance, List<String> suggestions) {
if (distance > maxDistance) {
return;
}
if (node.isEndOfWord && distance <= maxDistance) {
suggestions.add(current);
}
for (Map.Entry<Character, TrieNode> entry : node.children.entrySet()) {
char ch = entry.getKey();
TrieNode child = entry.getValue();
// 递归探索不同的编辑操作
if (current.length() < target.length()) {
if (ch == target.charAt(current.length())) {
// 匹配
findSimilarWords(child, current + ch, target, distance, maxDistance, suggestions);
} else {
// 替换
findSimilarWords(child, current + ch, target, distance + 1, maxDistance, suggestions);
}
}
// 插入
findSimilarWords(child, current + ch, target, distance + 1, maxDistance, suggestions);
}
// 删除操作
if (current.length() < target.length()) {
findSimilarWords(node, current, target.substring(1), distance + 1, maxDistance, suggestions);
}
}
}
|