Data-Structure/Exercise/Homework5/Huffman2.cpp

#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <queue>
#include <cmath>
#include <limits>

using namespace std;

// 记录生成顺序，确保堆里的比较有确定性
int g_creation_counter = 0;

// Huffman 节点，记录权值、字符以及左右孩子
struct HuffmanNode {
    int weight;
    char character;
    HuffmanNode* left;
    HuffmanNode* right;
    bool is_leaf;
    int first_appearance_index;
    int creation_order;

    HuffmanNode(int w, char c, int first_idx)
        : weight(w),
          character(c),
          left(nullptr),
          right(nullptr),
          is_leaf(true),
          first_appearance_index(first_idx),
          creation_order(numeric_limits<int>::max()) {}

    HuffmanNode(int w, HuffmanNode* l, HuffmanNode* r)
        : weight(w),
          character('\0'),
          left(l),
          right(r),
          is_leaf(false),
          first_appearance_index(numeric_limits<int>::max()),
          creation_order(g_creation_counter++) {}

    ~HuffmanNode() {
        delete left;
        delete right;
    }
};

// 优先队列里用的比较器，符合题目的几条 tie-break 规则
struct NodeComparer {
    bool operator()(const HuffmanNode* a, const HuffmanNode* b) const {
        if (a->weight != b->weight) {
            return a->weight > b->weight;
        }

        if (a->is_leaf != b->is_leaf) {
            return !a->is_leaf && b->is_leaf;
        }

        if (a->is_leaf) {
            return a->first_appearance_index > b->first_appearance_index;
        } else {
            return a->creation_order > b->creation_order;
        }
    }
};

// 根据文本构建 Huffman 树，同时补全频率和首出现位置
HuffmanNode* buildHuffmanTree(const string& text,
                              map<char, int>& freq_map,
                              map<char, int>& first_occ_map) {
    for (size_t i = 0; i < text.size(); ++i) {
        const char ch = text[i];
        freq_map[ch]++;
        if (!first_occ_map.count(ch)) {
            first_occ_map[ch] = static_cast<int>(i);
        }
    }

    priority_queue<HuffmanNode*, vector<HuffmanNode*>, NodeComparer> pq;
    g_creation_counter = 0;

    for (const auto& entry : freq_map) {
        pq.push(new HuffmanNode(entry.second, entry.first, first_occ_map[entry.first]));
    }

    if (pq.empty()) return nullptr;
    if (pq.size() == 1) return pq.top();

    while (pq.size() > 1) {
        HuffmanNode* node1 = pq.top(); pq.pop();
        HuffmanNode* node2 = pq.top(); pq.pop();

        HuffmanNode* parent = new HuffmanNode(
            node1->weight + node2->weight,
            node1,
            node2
        );

        pq.push(parent);
    }

    return pq.top();
}

// 递归得到所有字符的编码
void generateCodes(const HuffmanNode* node, string& buffer, map<char, string>& codes) {
    if (node == nullptr) return;

    if (node->is_leaf) {
        codes[node->character] = buffer;
        return;
    }

    buffer.push_back('0');
    generateCodes(node->left, buffer, codes);
    buffer.pop_back();

    buffer.push_back('1');
    generateCodes(node->right, buffer, codes);
    buffer.pop_back();
}

// 用树解码，遇到问题就返回 INVALID
string decodeHuffman(const string& encoded_string, const HuffmanNode* root) {
    if (root == nullptr || encoded_string.empty()) {
        return "INVALID";
    }

    string decoded_text;
    const HuffmanNode* current_node = root;

    for (char bit : encoded_string) {
        if (bit == '0') {
            current_node = current_node->left;
        } else if (bit == '1') {
            current_node = current_node->right;
        } else {
            return "INVALID";
        }

        if (!current_node) {
            return "INVALID";
        }

        if (current_node->is_leaf) {
            decoded_text.push_back(current_node->character);
            current_node = root;
        }
    }

    if (current_node != root) {
        return "INVALID";
    }

    return decoded_text;
}

// 打印时的辅助结构
struct CharInfo {
    char character;
    string code;
    int frequency;
    int first_index;
};

// 先比频率，再比出现顺序
bool compareCharInfo(const CharInfo& a, const CharInfo& b) {
    if (a.frequency != b.frequency) {
        return a.frequency < b.frequency;
    }
    return a.first_index < b.first_index;
}


// 主流程：建树、算编码、输出信息和解码
void processHuffman() {
    string text, encoded1, encoded2;

    if (!getline(cin, text)) return;
    if (!getline(cin, encoded1)) return;
    if (!getline(cin, encoded2)) return;

    if (text.empty() || text.length() < 2) return;

    map<char, int> char_frequency;
    map<char, int> char_first_occurrence;

    HuffmanNode* root = buildHuffmanTree(text, char_frequency, char_first_occurrence);

    if (root == nullptr) {
        cout << "0 0" << endl;
        return;
    }

    map<char, string> huffman_codes;
    string buffer;
    generateCodes(root, buffer, huffman_codes);

    int original_size = static_cast<int>(text.length());

    long long compressed_bits = 0;
    for (const auto& pair : char_frequency) {
        compressed_bits += static_cast<long long>(pair.second) * huffman_codes[pair.first].length();
    }

    int compressed_size = 0;
    if (compressed_bits > 0) {
        compressed_size = max(1, static_cast<int>(ceil(static_cast<double>(compressed_bits) / 8.0)));
    }

    cout << original_size << " " << compressed_size << endl;

    vector<CharInfo> char_list;
    char_list.reserve(huffman_codes.size());
    for (const auto& pair : huffman_codes) {
        char_list.push_back({pair.first, pair.second, char_frequency[pair.first], char_first_occurrence[pair.first]});
    }

    sort(char_list.begin(), char_list.end(), compareCharInfo);

    for (const auto& info : char_list) {
        cout << info.character << ":" << info.code << endl;
    }

    string decoded1 = decodeHuffman(encoded1, root);
    cout << decoded1 << endl;

    string decoded2 = decodeHuffman(encoded2, root);
    cout << decoded2 << endl;

    delete root;
}

int main() {
    processHuffman();
    return 0;
}