141 lines
3.4 KiB
C++
141 lines
3.4 KiB
C++
#include <bits/stdc++.h>
|
||
using namespace std;
|
||
|
||
struct Node {
|
||
char ch; // 对于非叶节点,ch = 0
|
||
int freq;
|
||
bool isLeaf;
|
||
int firstPos; // 在文本中首次出现的位置(用于 tie-break)
|
||
Node *left, *right;
|
||
int createOrder; // 创建顺序编号,用于非单节点比较
|
||
|
||
Node(char c, int f, int pos, int order)
|
||
: ch(c), freq(f), isLeaf(true), firstPos(pos),
|
||
left(NULL), right(NULL), createOrder(order) {}
|
||
|
||
Node(Node* l, Node* r, int f, int order)
|
||
: ch(0), freq(f), isLeaf(false), firstPos(INT_MAX),
|
||
left(l), right(r), createOrder(order) {}
|
||
};
|
||
|
||
/*
|
||
优先队列排序规则:
|
||
1. 权值小的优先
|
||
2. 权值相等时:
|
||
a. 单节点优先于非单节点
|
||
b. 若均为单节点 → 按 firstPos 小的优先
|
||
c. 若均为非单节点 → 按 createOrder 小的优先
|
||
*/
|
||
struct Cmp {
|
||
bool operator()(Node* a, Node* b) {
|
||
if (a->freq != b->freq) return a->freq > b->freq;
|
||
|
||
// freq 相同
|
||
if (a->isLeaf != b->isLeaf) return a->isLeaf < b->isLeaf;
|
||
|
||
if (a->isLeaf) return a->firstPos > b->firstPos;
|
||
return a->createOrder > b->createOrder;
|
||
}
|
||
};
|
||
|
||
unordered_map<char, string> codeMap;
|
||
|
||
// 生成哈夫曼编码
|
||
void dfs(Node* root, string path) {
|
||
if (!root) return;
|
||
if (root->isLeaf) {
|
||
codeMap[root->ch] = path;
|
||
return;
|
||
}
|
||
dfs(root->left, path + "0");
|
||
dfs(root->right, path + "1");
|
||
}
|
||
|
||
// 用哈夫曼树译码二进制串
|
||
string decode(Node* root, const string& s) {
|
||
string result;
|
||
Node* cur = root;
|
||
for (char c : s) {
|
||
if (c == '0') cur = cur->left;
|
||
else if (c == '1') cur = cur->right;
|
||
else return "INVALID";
|
||
|
||
if (!cur) return "INVALID";
|
||
if (cur->isLeaf) {
|
||
result.push_back(cur->ch);
|
||
cur = root;
|
||
}
|
||
}
|
||
if (cur != root) return "INVALID"; // 未在叶节点结束
|
||
return result;
|
||
}
|
||
|
||
int main() {
|
||
ios::sync_with_stdio(false);
|
||
cin.tie(NULL);
|
||
|
||
string text, code1, code2;
|
||
cin >> text >> code1 >> code2;
|
||
|
||
// 统计出现次数 & 首次出现位置
|
||
unordered_map<char, int> freq, firstPos;
|
||
for (int i = 0; i < text.size(); i++) {
|
||
char c = text[i];
|
||
freq[c]++;
|
||
if (!firstPos.count(c)) firstPos[c] = i;
|
||
}
|
||
|
||
// 创建优先队列
|
||
priority_queue<Node*, vector<Node*>, Cmp> pq;
|
||
int createId = 0;
|
||
|
||
for (auto& p : freq) {
|
||
char c = p.first;
|
||
int f = p.second;
|
||
pq.push(new Node(c, f, firstPos[c], createId++));
|
||
}
|
||
|
||
// 构建唯一哈夫曼树
|
||
while (pq.size() > 1) {
|
||
Node* a = pq.top(); pq.pop();
|
||
Node* b = pq.top(); pq.pop();
|
||
Node* parent = new Node(a, b, a->freq + b->freq, createId++);
|
||
pq.push(parent);
|
||
}
|
||
|
||
Node* root = pq.top();
|
||
|
||
// 生成编码
|
||
dfs(root, "");
|
||
|
||
// 计算压缩前后大小
|
||
int originalBytes = text.size();
|
||
long long bits = 0;
|
||
for (char c : text) bits += codeMap[c].size();
|
||
long long compressedBytes = (bits + 7) / 8;
|
||
|
||
cout << originalBytes << " " << compressedBytes << "\n";
|
||
|
||
// 按题目要求排序输出编码(按频率升序,同频率按出现顺序)
|
||
vector<tuple<int,int,char>> order;
|
||
for (auto& p : freq) {
|
||
order.emplace_back(p.second, firstPos[p.first], p.first);
|
||
}
|
||
sort(order.begin(), order.end());
|
||
|
||
for (auto& t : order) {
|
||
char c = get<2>(t);
|
||
cout << c << ":" << codeMap[c] << "\n";
|
||
}
|
||
|
||
// 译码
|
||
string out1 = decode(root, code1);
|
||
string out2 = decode(root, code2);
|
||
|
||
cout << out1 << "\n";
|
||
cout << out2 << "\n";
|
||
|
||
return 0;
|
||
}
|
||
|