scratch/misc/huffman.cpp
2024-12-26 14:02:40 -03:00

87 lines
2.2 KiB
C++

#include <iostream>
#include <vector>
#include <queue>
#include <string>
#include <unordered_map>
#define HT_TREE_MARK '@'
struct ht {
char chr;
size_t weight;
struct ht *left{};
struct ht *right{};
ht(char _chr, size_t _w, struct ht *l = nullptr, struct ht *r = nullptr) :
chr(_chr), weight(_w), left(l), right(r) {}
};
void ht_build_codes(struct ht *tree, std::unordered_map<char, std::string> &ht_codes, std::string path="")
{
if (tree == nullptr)
return;
if (tree->chr != HT_TREE_MARK) {
std::clog << tree->chr << ": " << path << std::endl;
ht_codes[tree->chr] = path;
}
ht_build_codes(tree->left, ht_codes, path + '0');
ht_build_codes(tree->right, ht_codes, path + '1');
}
bool ht_comp_fn(const struct ht *n1, const struct ht *n2)
{
// enforce stable sorting
if (n1->weight == n2->weight)
return false;
return n1->weight > n2->weight;
};
int main()
{
std::string s = "abracadabraacacacabbaadadeea";
// count occurences (frequency, weight) of each character in the message
std::unordered_map<char, size_t> chr_occu;
for (const auto& c : s)
++chr_occu[c];
std::priority_queue<struct ht *, std::vector<struct ht *>, decltype(&ht_comp_fn)> nodes(ht_comp_fn);
// create initial nodes, from here we start building the tree
for (const auto& v : chr_occu) {
auto *node = new struct ht(v.first, v.second);
nodes.push(node);
}
auto printpq = [](decltype(nodes) n) {
while (!n.empty()) {
auto *cn = n.top();
std::clog << cn->chr << ":" << cn->weight << " ";
n.pop();
}
std::clog << std::endl;
};
while (nodes.size() > 1) { // when the nodes queue element count is 1, we already have our tree
// remove the two first (the one with the lowest weights)
auto *n1 = nodes.top();
nodes.pop();
auto *n2 = nodes.top();
nodes.pop();
// create a new tree with a weight equal to the sum of the two popped nodes
auto *node = new struct ht(HT_TREE_MARK, n1->weight + n2->weight, n1, n2);
nodes.push(node);
}
std::unordered_map<char, std::string> ht_codes;
std::string enc_msg;
enc_msg.reserve(255);
ht_build_codes(nodes.top(), ht_codes);
for (const auto& c : s)
enc_msg += ht_codes[c];
std::clog << s << " -> " << enc_msg << std::endl;
return 0;
}