Homework New

2025-11-27 13:40:37 +08:00
parent b84c3ba783
commit 4965074539
39 changed files with 2191 additions and 832 deletions
--- a/Exercise/Homework5/Huffman2.cpp
+++ b/Exercise/Homework5/Huffman2.cpp
@@ -0,0 +1,238 @@
+#include <iostream>
+#include <string>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <queue>
+#include <cmath>
+#include <limits>
+
+using namespace std;
+
+// 记录生成顺序，确保堆里的比较有确定性
+int g_creation_counter = 0;
+
+// Huffman 节点，记录权值、字符以及左右孩子
+struct HuffmanNode {
+    int weight;
+    char character;
+    HuffmanNode* left;
+    HuffmanNode* right;
+    bool is_leaf;
+    int first_appearance_index;
+    int creation_order;
+
+    HuffmanNode(int w, char c, int first_idx)
+        : weight(w),
+          character(c),
+          left(nullptr),
+          right(nullptr),
+          is_leaf(true),
+          first_appearance_index(first_idx),
+          creation_order(numeric_limits<int>::max()) {}
+
+    HuffmanNode(int w, HuffmanNode* l, HuffmanNode* r)
+        : weight(w),
+          character('\0'),
+          left(l),
+          right(r),
+          is_leaf(false),
+          first_appearance_index(numeric_limits<int>::max()),
+          creation_order(g_creation_counter++) {}
+
+    ~HuffmanNode() {
+        delete left;
+        delete right;
+    }
+};
+
+// 优先队列里用的比较器，符合题目的几条 tie-break 规则
+struct NodeComparer {
+    bool operator()(const HuffmanNode* a, const HuffmanNode* b) const {
+        if (a->weight != b->weight) {
+            return a->weight > b->weight;
+        }
+
+        if (a->is_leaf != b->is_leaf) {
+            return !a->is_leaf && b->is_leaf;
+        }
+
+        if (a->is_leaf) {
+            return a->first_appearance_index > b->first_appearance_index;
+        } else {
+            return a->creation_order > b->creation_order;
+        }
+    }
+};
+
+// 根据文本构建 Huffman 树，同时补全频率和首出现位置
+HuffmanNode* buildHuffmanTree(const string& text,
+                              map<char, int>& freq_map,
+                              map<char, int>& first_occ_map) {
+    for (size_t i = 0; i < text.size(); ++i) {
+        const char ch = text[i];
+        freq_map[ch]++;
+        if (!first_occ_map.count(ch)) {
+            first_occ_map[ch] = static_cast<int>(i);
+        }
+    }
+
+    priority_queue<HuffmanNode*, vector<HuffmanNode*>, NodeComparer> pq;
+    g_creation_counter = 0;
+
+    for (const auto& entry : freq_map) {
+        pq.push(new HuffmanNode(entry.second, entry.first, first_occ_map[entry.first]));
+    }
+
+    if (pq.empty()) return nullptr;
+    if (pq.size() == 1) return pq.top();
+
+    while (pq.size() > 1) {
+        HuffmanNode* node1 = pq.top(); pq.pop();
+        HuffmanNode* node2 = pq.top(); pq.pop();
+
+        HuffmanNode* parent = new HuffmanNode(
+            node1->weight + node2->weight,
+            node1,
+            node2
+        );
+
+        pq.push(parent);
+    }
+
+    return pq.top();
+}
+
+// 递归得到所有字符的编码
+void generateCodes(const HuffmanNode* node, string& buffer, map<char, string>& codes) {
+    if (node == nullptr) return;
+
+    if (node->is_leaf) {
+        codes[node->character] = buffer;
+        return;
+    }
+
+    buffer.push_back('0');
+    generateCodes(node->left, buffer, codes);
+    buffer.pop_back();
+
+    buffer.push_back('1');
+    generateCodes(node->right, buffer, codes);
+    buffer.pop_back();
+}
+
+// 用树解码，遇到问题就返回 INVALID
+string decodeHuffman(const string& encoded_string, const HuffmanNode* root) {
+    if (root == nullptr || encoded_string.empty()) {
+        return "INVALID";
+    }
+
+    string decoded_text;
+    const HuffmanNode* current_node = root;
+
+    for (char bit : encoded_string) {
+        if (bit == '0') {
+            current_node = current_node->left;
+        } else if (bit == '1') {
+            current_node = current_node->right;
+        } else {
+            return "INVALID";
+        }
+
+        if (!current_node) {
+            return "INVALID";
+        }
+
+        if (current_node->is_leaf) {
+            decoded_text.push_back(current_node->character);
+            current_node = root;
+        }
+    }
+
+    if (current_node != root) {
+        return "INVALID";
+    }
+
+    return decoded_text;
+}
+
+// 打印时的辅助结构
+struct CharInfo {
+    char character;
+    string code;
+    int frequency;
+    int first_index;
+};
+
+// 先比频率，再比出现顺序
+bool compareCharInfo(const CharInfo& a, const CharInfo& b) {
+    if (a.frequency != b.frequency) {
+        return a.frequency < b.frequency;
+    }
+    return a.first_index < b.first_index;
+}
+
+
+// 主流程：建树、算编码、输出信息和解码
+void processHuffman() {
+    string text, encoded1, encoded2;
+
+    if (!getline(cin, text)) return;
+    if (!getline(cin, encoded1)) return;
+    if (!getline(cin, encoded2)) return;
+
+    if (text.empty() || text.length() < 2) return;
+
+    map<char, int> char_frequency;
+    map<char, int> char_first_occurrence;
+
+    HuffmanNode* root = buildHuffmanTree(text, char_frequency, char_first_occurrence);
+
+    if (root == nullptr) {
+        cout << "0 0" << endl;
+        return;
+    }
+
+    map<char, string> huffman_codes;
+    string buffer;
+    generateCodes(root, buffer, huffman_codes);
+
+    int original_size = static_cast<int>(text.length());
+
+    long long compressed_bits = 0;
+    for (const auto& pair : char_frequency) {
+        compressed_bits += static_cast<long long>(pair.second) * huffman_codes[pair.first].length();
+    }
+
+    int compressed_size = 0;
+    if (compressed_bits > 0) {
+        compressed_size = max(1, static_cast<int>(ceil(static_cast<double>(compressed_bits) / 8.0)));
+    }
+
+    cout << original_size << " " << compressed_size << endl;
+
+    vector<CharInfo> char_list;
+    char_list.reserve(huffman_codes.size());
+    for (const auto& pair : huffman_codes) {
+        char_list.push_back({pair.first, pair.second, char_frequency[pair.first], char_first_occurrence[pair.first]});
+    }
+
+    sort(char_list.begin(), char_list.end(), compareCharInfo);
+
+    for (const auto& info : char_list) {
+        cout << info.character << ":" << info.code << endl;
+    }
+
+    string decoded1 = decodeHuffman(encoded1, root);
+    cout << decoded1 << endl;
+
+    string decoded2 = decodeHuffman(encoded2, root);
+    cout << decoded2 << endl;
+
+    delete root;
+}
+
+int main() {
+    processHuffman();
+    return 0;
+}