1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
| public class URLBloomFilter {
private final BitSet bitSet;
private final int size;
private final int hashFunctionCount;
private final MessageDigest[] digestFunctions;
public URLBloomFilter(int expectedElements, double falsePositiveRate) {
this.size = calculateOptimalSize(expectedElements, falsePositiveRate);
this.hashFunctionCount = calculateOptimalHashFunctions(expectedElements, size);
this.bitSet = new BitSet(size);
// 初始化多个哈希函数
this.digestFunctions = new MessageDigest[hashFunctionCount];
try {
for (int i = 0; i < hashFunctionCount; i++) {
digestFunctions[i] = MessageDigest.getInstance("SHA-256");
}
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("SHA-256 not available", e);
}
}
// 计算最优位数组大小
private int calculateOptimalSize(int expectedElements, double falsePositiveRate) {
return (int) (-expectedElements * Math.log(falsePositiveRate) / (Math.log(2) * Math.log(2)));
}
// 计算最优哈希函数数量
private int calculateOptimalHashFunctions(int expectedElements, int size) {
return Math.max(1, (int) Math.round((double) size / expectedElements * Math.log(2)));
}
// 添加URL到布隆过滤器
public void add(String url) {
for (int i = 0; i < hashFunctionCount; i++) {
int hash = hash(url, i);
bitSet.set(Math.abs(hash % size));
}
}
// 检查URL是否可能存在
public boolean mightContain(String url) {
for (int i = 0; i < hashFunctionCount; i++) {
int hash = hash(url, i);
if (!bitSet.get(Math.abs(hash % size))) {
return false;
}
}
return true;
}
// 生成多个哈希值
private int hash(String url, int seed) {
MessageDigest digest = digestFunctions[seed % digestFunctions.length];
digest.reset();
digest.update(url.getBytes(StandardCharsets.UTF_8));
digest.update(String.valueOf(seed).getBytes(StandardCharsets.UTF_8));
byte[] hashBytes = digest.digest();
int hash = 0;
// 将字节数组转换为int
for (int i = 0; i < Math.min(4, hashBytes.length); i++) {
hash = (hash << 8) | (hashBytes[i] & 0xFF);
}
return hash;
}
// 获取当前误判率估计
public double getCurrentFalsePositiveRate() {
int setBits = bitSet.cardinality();
return Math.pow((double) setBits / size, hashFunctionCount);
}
// 重置布隆过滤器
public void clear() {
bitSet.clear();
}
// 获取统计信息
public BloomFilterStats getStats() {
BloomFilterStats stats = new BloomFilterStats();
stats.setSize(size);
stats.setHashFunctionCount(hashFunctionCount);
stats.setBitsSet(bitSet.cardinality());
stats.setCurrentFalsePositiveRate(getCurrentFalsePositiveRate());
return stats;
}
public static class BloomFilterStats {
private int size;
private int hashFunctionCount;
private int bitsSet;
private double currentFalsePositiveRate;
// getter和setter方法
public int getSize() { return size; }
public void setSize(int size) { this.size = size; }
public int getHashFunctionCount() { return hashFunctionCount; }
public void setHashFunctionCount(int hashFunctionCount) { this.hashFunctionCount = hashFunctionCount; }
public int getBitsSet() { return bitsSet; }
public void setBitsSet(int bitsSet) { this.bitsSet = bitsSet; }
public double getCurrentFalsePositiveRate() { return currentFalsePositiveRate; }
public void setCurrentFalsePositiveRate(double currentFalsePositiveRate) {
this.currentFalsePositiveRate = currentFalsePositiveRate;
}
}
// 分布式布隆过滤器
public static class DistributedBloomFilter {
private final RedisTemplate<String, String> redisTemplate;
private final String keyPrefix;
private final int size;
private final int hashFunctionCount;
private static final String LUA_SCRIPT_ADD = """
local key = KEYS[1]
local positions = cjson.decode(ARGV[1])
for i, pos in ipairs(positions) do
redis.call('setbit', key, pos, 1)
end
redis.call('expire', key, 86400)
return 1
""";
private static final String LUA_SCRIPT_CHECK = """
local key = KEYS[1]
local positions = cjson.decode(ARGV[1])
for i, pos in ipairs(positions) do
if redis.call('getbit', key, pos) == 0 then
return 0
end
end
return 1
""";
private final RedisScript<Long> addScript;
private final RedisScript<Long> checkScript;
public DistributedBloomFilter(RedisTemplate<String, String> redisTemplate,
String keyPrefix,
int expectedElements,
double falsePositiveRate) {
this.redisTemplate = redisTemplate;
this.keyPrefix = keyPrefix;
this.size = calculateOptimalSize(expectedElements, falsePositiveRate);
this.hashFunctionCount = calculateOptimalHashFunctions(expectedElements, size);
this.addScript = new DefaultRedisScript<>(LUA_SCRIPT_ADD, Long.class);
this.checkScript = new DefaultRedisScript<>(LUA_SCRIPT_CHECK, Long.class);
}
public void add(String url) {
String key = keyPrefix + ":bloom";
List<Integer> positions = calculatePositions(url);
List<String> keys = Collections.singletonList(key);
String positionsJson = JsonUtils.toJson(positions);
redisTemplate.execute(addScript, keys, positionsJson);
}
public boolean mightContain(String url) {
String key = keyPrefix + ":bloom";
List<Integer> positions = calculatePositions(url);
List<String> keys = Collections.singletonList(key);
String positionsJson = JsonUtils.toJson(positions);
Long result = redisTemplate.execute(checkScript, keys, positionsJson);
return result != null && result == 1;
}
private List<Integer> calculatePositions(String url) {
List<Integer> positions = new ArrayList<>();
try {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
for (int i = 0; i < hashFunctionCount; i++) {
digest.reset();
digest.update(url.getBytes(StandardCharsets.UTF_8));
digest.update(String.valueOf(i).getBytes(StandardCharsets.UTF_8));
byte[] hash = digest.digest();
int hashInt = Math.abs(ByteBuffer.wrap(hash).getInt());
positions.add(hashInt % size);
}
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("SHA-256 not available", e);
}
return positions;
}
private int calculateOptimalSize(int expectedElements, double falsePositiveRate) {
return (int) (-expectedElements * Math.log(falsePositiveRate) / (Math.log(2) * Math.log(2)));
}
private int calculateOptimalHashFunctions(int expectedElements, int size) {
return Math.max(1, (int) Math.round((double) size / expectedElements * Math.log(2)));
}
}
}
|