239 lines
6.0 KiB
C++
239 lines
6.0 KiB
C++
#include <iostream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <queue>
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
using namespace std;
|
|
|
|
// 记录生成顺序,确保堆里的比较有确定性
|
|
int g_creation_counter = 0;
|
|
|
|
// Huffman 节点,记录权值、字符以及左右孩子
|
|
struct HuffmanNode {
|
|
int weight;
|
|
char character;
|
|
HuffmanNode* left;
|
|
HuffmanNode* right;
|
|
bool is_leaf;
|
|
int first_appearance_index;
|
|
int creation_order;
|
|
|
|
HuffmanNode(int w, char c, int first_idx)
|
|
: weight(w),
|
|
character(c),
|
|
left(nullptr),
|
|
right(nullptr),
|
|
is_leaf(true),
|
|
first_appearance_index(first_idx),
|
|
creation_order(numeric_limits<int>::max()) {}
|
|
|
|
HuffmanNode(int w, HuffmanNode* l, HuffmanNode* r)
|
|
: weight(w),
|
|
character('\0'),
|
|
left(l),
|
|
right(r),
|
|
is_leaf(false),
|
|
first_appearance_index(numeric_limits<int>::max()),
|
|
creation_order(g_creation_counter++) {}
|
|
|
|
~HuffmanNode() {
|
|
delete left;
|
|
delete right;
|
|
}
|
|
};
|
|
|
|
// 优先队列里用的比较器,符合题目的几条 tie-break 规则
|
|
struct NodeComparer {
|
|
bool operator()(const HuffmanNode* a, const HuffmanNode* b) const {
|
|
if (a->weight != b->weight) {
|
|
return a->weight > b->weight;
|
|
}
|
|
|
|
if (a->is_leaf != b->is_leaf) {
|
|
return !a->is_leaf && b->is_leaf;
|
|
}
|
|
|
|
if (a->is_leaf) {
|
|
return a->first_appearance_index > b->first_appearance_index;
|
|
} else {
|
|
return a->creation_order > b->creation_order;
|
|
}
|
|
}
|
|
};
|
|
|
|
// 根据文本构建 Huffman 树,同时补全频率和首出现位置
|
|
HuffmanNode* buildHuffmanTree(const string& text,
|
|
map<char, int>& freq_map,
|
|
map<char, int>& first_occ_map) {
|
|
for (size_t i = 0; i < text.size(); ++i) {
|
|
const char ch = text[i];
|
|
freq_map[ch]++;
|
|
if (!first_occ_map.count(ch)) {
|
|
first_occ_map[ch] = static_cast<int>(i);
|
|
}
|
|
}
|
|
|
|
priority_queue<HuffmanNode*, vector<HuffmanNode*>, NodeComparer> pq;
|
|
g_creation_counter = 0;
|
|
|
|
for (const auto& entry : freq_map) {
|
|
pq.push(new HuffmanNode(entry.second, entry.first, first_occ_map[entry.first]));
|
|
}
|
|
|
|
if (pq.empty()) return nullptr;
|
|
if (pq.size() == 1) return pq.top();
|
|
|
|
while (pq.size() > 1) {
|
|
HuffmanNode* node1 = pq.top(); pq.pop();
|
|
HuffmanNode* node2 = pq.top(); pq.pop();
|
|
|
|
HuffmanNode* parent = new HuffmanNode(
|
|
node1->weight + node2->weight,
|
|
node1,
|
|
node2
|
|
);
|
|
|
|
pq.push(parent);
|
|
}
|
|
|
|
return pq.top();
|
|
}
|
|
|
|
// 递归得到所有字符的编码
|
|
void generateCodes(const HuffmanNode* node, string& buffer, map<char, string>& codes) {
|
|
if (node == nullptr) return;
|
|
|
|
if (node->is_leaf) {
|
|
codes[node->character] = buffer;
|
|
return;
|
|
}
|
|
|
|
buffer.push_back('0');
|
|
generateCodes(node->left, buffer, codes);
|
|
buffer.pop_back();
|
|
|
|
buffer.push_back('1');
|
|
generateCodes(node->right, buffer, codes);
|
|
buffer.pop_back();
|
|
}
|
|
|
|
// 用树解码,遇到问题就返回 INVALID
|
|
string decodeHuffman(const string& encoded_string, const HuffmanNode* root) {
|
|
if (root == nullptr || encoded_string.empty()) {
|
|
return "INVALID";
|
|
}
|
|
|
|
string decoded_text;
|
|
const HuffmanNode* current_node = root;
|
|
|
|
for (char bit : encoded_string) {
|
|
if (bit == '0') {
|
|
current_node = current_node->left;
|
|
} else if (bit == '1') {
|
|
current_node = current_node->right;
|
|
} else {
|
|
return "INVALID";
|
|
}
|
|
|
|
if (!current_node) {
|
|
return "INVALID";
|
|
}
|
|
|
|
if (current_node->is_leaf) {
|
|
decoded_text.push_back(current_node->character);
|
|
current_node = root;
|
|
}
|
|
}
|
|
|
|
if (current_node != root) {
|
|
return "INVALID";
|
|
}
|
|
|
|
return decoded_text;
|
|
}
|
|
|
|
// 打印时的辅助结构
|
|
struct CharInfo {
|
|
char character;
|
|
string code;
|
|
int frequency;
|
|
int first_index;
|
|
};
|
|
|
|
// 先比频率,再比出现顺序
|
|
bool compareCharInfo(const CharInfo& a, const CharInfo& b) {
|
|
if (a.frequency != b.frequency) {
|
|
return a.frequency < b.frequency;
|
|
}
|
|
return a.first_index < b.first_index;
|
|
}
|
|
|
|
|
|
// 主流程:建树、算编码、输出信息和解码
|
|
void processHuffman() {
|
|
string text, encoded1, encoded2;
|
|
|
|
if (!getline(cin, text)) return;
|
|
if (!getline(cin, encoded1)) return;
|
|
if (!getline(cin, encoded2)) return;
|
|
|
|
if (text.empty() || text.length() < 2) return;
|
|
|
|
map<char, int> char_frequency;
|
|
map<char, int> char_first_occurrence;
|
|
|
|
HuffmanNode* root = buildHuffmanTree(text, char_frequency, char_first_occurrence);
|
|
|
|
if (root == nullptr) {
|
|
cout << "0 0" << endl;
|
|
return;
|
|
}
|
|
|
|
map<char, string> huffman_codes;
|
|
string buffer;
|
|
generateCodes(root, buffer, huffman_codes);
|
|
|
|
int original_size = static_cast<int>(text.length());
|
|
|
|
long long compressed_bits = 0;
|
|
for (const auto& pair : char_frequency) {
|
|
compressed_bits += static_cast<long long>(pair.second) * huffman_codes[pair.first].length();
|
|
}
|
|
|
|
int compressed_size = 0;
|
|
if (compressed_bits > 0) {
|
|
compressed_size = max(1, static_cast<int>(ceil(static_cast<double>(compressed_bits) / 8.0)));
|
|
}
|
|
|
|
cout << original_size << " " << compressed_size << endl;
|
|
|
|
vector<CharInfo> char_list;
|
|
char_list.reserve(huffman_codes.size());
|
|
for (const auto& pair : huffman_codes) {
|
|
char_list.push_back({pair.first, pair.second, char_frequency[pair.first], char_first_occurrence[pair.first]});
|
|
}
|
|
|
|
sort(char_list.begin(), char_list.end(), compareCharInfo);
|
|
|
|
for (const auto& info : char_list) {
|
|
cout << info.character << ":" << info.code << endl;
|
|
}
|
|
|
|
string decoded1 = decodeHuffman(encoded1, root);
|
|
cout << decoded1 << endl;
|
|
|
|
string decoded2 = decodeHuffman(encoded2, root);
|
|
cout << decoded2 << endl;
|
|
|
|
delete root;
|
|
}
|
|
|
|
int main() {
|
|
processHuffman();
|
|
return 0;
|
|
}
|