-
Notifications
You must be signed in to change notification settings - Fork 21k
Expand file tree
/
Copy pathArithmeticCoding.java
More file actions
157 lines (133 loc) · 5.59 KB
/
ArithmeticCoding.java
File metadata and controls
157 lines (133 loc) · 5.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package com.thealgorithms.compression;
import java.math.BigDecimal;
import java.math.MathContext;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* An implementation of the Arithmetic Coding algorithm.
*
* <p>
* Arithmetic coding is a form of entropy encoding used in lossless data
* compression. It encodes an entire message into a single number, a fraction n
* where (0.0 <= n < 1.0). Unlike Huffman coding, which assigns a specific
* bit sequence to each symbol, arithmetic coding represents the message as a
* sub-interval of the [0, 1) interval.
* </p>
*
* <p>
* This implementation uses BigDecimal for precision to handle the shrinking
* intervals, making it suitable for educational purposes to demonstrate the
* core logic.
* </p>
*
* <p>
* Time Complexity: O(n*m) for compression and decompression where n is the
* length of the input and m is the number of unique symbols, due to the need
* to calculate symbol probabilities.
* </p>
*
* <p>
* References:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Arithmetic_coding">Wikipedia:
* Arithmetic coding</a></li>
* </ul>
* </p>
*/
public final class ArithmeticCoding {
private ArithmeticCoding() {
}
/**
* Compresses a string using the Arithmetic Coding algorithm.
*
* @param uncompressed The string to be compressed.
* @return The compressed representation as a BigDecimal number.
* @throws IllegalArgumentException if the input string is null or empty.
*/
public static BigDecimal compress(String uncompressed) {
if (uncompressed == null || uncompressed.isEmpty()) {
throw new IllegalArgumentException("Input string cannot be null or empty.");
}
Map<Character, Symbol> probabilityTable = calculateProbabilities(uncompressed);
BigDecimal low = BigDecimal.ZERO;
BigDecimal high = BigDecimal.ONE;
for (char symbol : uncompressed.toCharArray()) {
BigDecimal range = high.subtract(low);
Symbol sym = probabilityTable.get(symbol);
high = low.add(range.multiply(sym.high()));
low = low.add(range.multiply(sym.low()));
}
return low; // Return the lower bound of the final interval
}
/**
* Decompresses a BigDecimal number back into the original string.
*
* @param compressed The compressed BigDecimal number.
* @param length The length of the original uncompressed string.
* @param probabilityTable The probability table used during compression.
* @return The original, uncompressed string.
*/
public static String decompress(BigDecimal compressed, int length, Map<Character, Symbol> probabilityTable) {
StringBuilder decompressed = new StringBuilder();
// Create a sorted list of symbols for deterministic decompression, matching the
// order used in calculateProbabilities
List<Map.Entry<Character, Symbol>> sortedSymbols = new ArrayList<>(probabilityTable.entrySet());
sortedSymbols.sort(Map.Entry.comparingByKey());
BigDecimal low = BigDecimal.ZERO;
BigDecimal high = BigDecimal.ONE;
for (int i = 0; i < length; i++) {
BigDecimal range = high.subtract(low);
// Find which symbol the compressed value falls into
for (Map.Entry<Character, Symbol> entry : sortedSymbols) {
Symbol sym = entry.getValue();
// Calculate the actual range for this symbol in the current interval
BigDecimal symLow = low.add(range.multiply(sym.low()));
BigDecimal symHigh = low.add(range.multiply(sym.high()));
// Check if the compressed value falls within this symbol's range
if (compressed.compareTo(symLow) >= 0 && compressed.compareTo(symHigh) < 0) {
decompressed.append(entry.getKey());
// Update the interval for the next iteration
low = symLow;
high = symHigh;
break;
}
}
}
return decompressed.toString();
}
/**
* Calculates the frequency and probability range for each character in the
* input string in a deterministic order.
*
* @param text The input string.
* @return A map from each character to a Symbol object containing its
* probability range.
*/
public static Map<Character, Symbol> calculateProbabilities(String text) {
Map<Character, Integer> frequencies = new HashMap<>();
for (char c : text.toCharArray()) {
frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
}
// Sort the characters to ensure a deterministic order for the probability table
List<Character> sortedKeys = new ArrayList<>(frequencies.keySet());
Collections.sort(sortedKeys);
Map<Character, Symbol> probabilityTable = new HashMap<>();
BigDecimal currentLow = BigDecimal.ZERO;
int total = text.length();
for (char symbol : sortedKeys) {
BigDecimal probability = BigDecimal.valueOf(frequencies.get(symbol)).divide(BigDecimal.valueOf(total), MathContext.DECIMAL128);
BigDecimal high = currentLow.add(probability);
probabilityTable.put(symbol, new Symbol(currentLow, high));
currentLow = high;
}
return probabilityTable;
}
/**
* Helper class to store the probability range [low, high) for a symbol.
*/
public record Symbol(BigDecimal low, BigDecimal high) {
}
}