1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
| /**
* 压缩位图实现 - 使用游程编码(Run-Length Encoding)
*/
public class CompressedBitmap {
// 游程:存储连续0或1的起始位置和长度
private static class Run {
int start;
int length;
boolean value; // true表示1,false表示0
Run(int start, int length, boolean value) {
this.start = start;
this.length = length;
this.value = value;
}
}
private java.util.List<Run> runs;
private int size;
public CompressedBitmap(int size) {
this.size = size;
this.runs = new java.util.ArrayList<>();
// 初始时整个位图都是0
if (size > 0) {
runs.add(new Run(0, size, false));
}
}
/**
* 设置指定位置为1
*/
public void set(int index) {
if (index < 0 || index >= size) {
throw new IndexOutOfBoundsException("Index: " + index);
}
// 查找包含该索引的游程
for (int i = 0; i < runs.size(); i++) {
Run run = runs.get(i);
if (index >= run.start && index < run.start + run.length) {
if (run.value) {
// 已经是1,无需操作
return;
}
// 需要分割当前游程
splitRun(i, index, true);
return;
}
}
}
/**
* 分割游程
*/
private void splitRun(int runIndex, int splitIndex, boolean newValue) {
Run originalRun = runs.get(runIndex);
if (splitIndex == originalRun.start) {
// 在游程开始处分割
if (originalRun.length == 1) {
// 整个游程都变成新值
originalRun.value = newValue;
} else {
// 插入新的单位游程
runs.add(runIndex, new Run(splitIndex, 1, newValue));
originalRun.start++;
originalRun.length--;
}
} else if (splitIndex == originalRun.start + originalRun.length - 1) {
// 在游程结尾处分割
originalRun.length--;
runs.add(runIndex + 1, new Run(splitIndex, 1, newValue));
} else {
// 在游程中间分割
int leftLength = splitIndex - originalRun.start;
int rightStart = splitIndex + 1;
int rightLength = originalRun.start + originalRun.length - rightStart;
// 修改原游程为左半部分
originalRun.length = leftLength;
// 插入新的单位游程
runs.add(runIndex + 1, new Run(splitIndex, 1, newValue));
// 插入右半部分
if (rightLength > 0) {
runs.add(runIndex + 2, new Run(rightStart, rightLength, originalRun.value));
}
}
// 合并相邻的相同值游程
mergeRuns();
}
/**
* 合并相邻的相同值游程
*/
private void mergeRuns() {
for (int i = runs.size() - 1; i > 0; i--) {
Run current = runs.get(i);
Run previous = runs.get(i - 1);
if (current.value == previous.value &&
previous.start + previous.length == current.start) {
// 合并两个游程
previous.length += current.length;
runs.remove(i);
}
}
}
/**
* 检查指定位置是否为1
*/
public boolean get(int index) {
if (index < 0 || index >= size) {
return false;
}
for (Run run : runs) {
if (index >= run.start && index < run.start + run.length) {
return run.value;
}
}
return false;
}
/**
* 统计1的个数
*/
public int cardinality() {
int count = 0;
for (Run run : runs) {
if (run.value) {
count += run.length;
}
}
return count;
}
/**
* 获取压缩率
*/
public double getCompressionRatio() {
int originalBits = size;
int compressedSize = runs.size() * 12; // 每个Run约12字节
return 1.0 - (double) compressedSize / (originalBits / 8.0);
}
/**
* 打印游程信息(用于调试)
*/
public void printRuns() {
System.out.println("压缩位图游程信息:");
for (int i = 0; i < runs.size(); i++) {
Run run = runs.get(i);
System.out.printf("游程%d: [%d, %d) = %s (长度: %d)\n",
i, run.start, run.start + run.length,
run.value ? "1" : "0", run.length);
}
System.out.printf("总游程数: %d, 压缩率: %.2f%%\n",
runs.size(), getCompressionRatio() * 100);
}
}
|