哈夫曼编码:
假设要给一偏文章进行编码,文章由英文组成,对于全文来说,我们可以统计得到这篇文章各个字母出现的个数,个数大的字母意味着它在文章中出现的频率更高,对所有的字母都用相同的空间大小编码的话会产生一定的空间浪费。哈夫曼编码使得高频率出现的字母用更短码值表示,低频率出现的字母用更长的码值表示,可以缩小整篇文章的空间占用,达到压缩的目的。把字母看做编码用的各个编码数值同理。
哈夫曼树定义:
给定n个自带权值的节点构造一个二叉树,若带权路径长度最小,称这样的树为哈夫曼树或者最优二叉树。哈弗曼树是权值最小的二叉树,权值越大的节点离根节点越近。
基本概念:
路径和路径长度:在一棵树中,从一个节点往下可以到达的孩子或者孙子节点之间的通路,成为路径。通路中分支路径的个数成为路径长度。节点的权与带权路径长度:若将树中节点赋予一个有意义的数值,则这个数值成为这个节点的权。节点的带权路径长度为节点当前权值乘上路径长度。树的带权路径长度: 树的带权路径长度为所有叶节点的带权路径长度之和,几位WPL。
哈夫曼树的构造:
把n个节点看做n棵二叉树,此时每棵树只有一个节点在森林中选择两个权值最小的树进行合并,形成新的二叉树,该树的根节点的权值为合并的左右树的权值之和从森林中删去合并的两棵树,并加入合并后的那棵新树重复2、3步直到森林中只剩一棵树结束,哈夫曼树构造完成。
哈弗曼编码:
先统计带压缩文件的各个节点的权值,生成一个哈夫曼树;根据单词的各个编码字符,从节点开始,找到这个节点的parent节点在比较原节点是父节点的左孩子还是右孩子,一般规定是左孩子为0,右孩子为1,比较到根节点为止,得到的编码是最终编码的倒序,进而得到每个节点的哈夫曼编码。之后结点的个数,符号,以及编码写在哈夫曼压缩文件头部,供文件解压的时候使用;之后开始读入压缩文件,将已有的字符对应的哈夫曼编码写入待输出二进制文件完成压缩。哈夫曼解码的时候根据文件头的信息生成哈夫曼树,因为哈夫曼编码为前缀编码,之后从根节点开始将编码转化成一个个字符(哈夫曼树的叶子节点),以此类推,解压完成。
源程序代码:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define maxsize 20001
typedef struct BTnode{
int weight;
int parent;
int lchild;
int rchild;
unsigned char a;
}BTnode;
typedef struct CharNode{
char bit[maxsize];
unsigned char a;
int weight;
}CharNode;
BTnode BT[maxsize];
CharNode Dictionary[maxsize];
int root;
int node_num;
long File_Point;
void get_weight(
char* input)
{
printf(
"字符权重计算中...\n") ;
unsigned char temp, pre;
FILE *fp;
fp = fopen(input,
"rb");
if(fp == NULL)
{
printf(
"Input Error\n");
exit(
0);
}
while(!feof(fp))
{
fread(&temp,
sizeof(
unsigned char),
1, fp);
if(temp ==
'\0')
continue;
BT[temp].weight++;
BT[temp].a = temp;
temp =
'\0';
}
fclose(fp);
printf(
"计算权重已完成\n");
}
void HuffmanTree()
{
printf(
"正在构建哈夫曼树...\n");
BTnode t;
int minweight;
int lchild, rchild;
int k=
0;
for(
int i =
0; i<maxsize ;i++)
{
for(
int j =
0; j<maxsize; j++)
{
if(BT[j].weight<BT[j+
1].weight)
{
t = BT[j+
1];
BT[j+
1] = BT[j];
BT[j] = t;
}
}
}
while(BT[k].weight!=
0)
k++;
node_num=k;
root =
2*k-
2;
for(
int i=k; i<
2*k-
1; i++)
{
minweight = maxsize;
lchild = -
1;
rchild = -
1;
for(
int j=
0; j<i; j++)
{
if(BT[j].weight<minweight && BT[j].parent==-
1)
{
minweight = BT[j].weight;
lchild = j;
}
}
minweight = maxsize;
BT[lchild].parent = i;
for(
int h=
0; h<i; h++)
{
if(BT[h].weight<minweight &&BT[h].parent==-
1)
{
minweight = BT[h].weight;
rchild = h;
}
}
BT[rchild].parent = i;
BT[i].weight = BT[lchild].weight + BT[rchild].weight;
BT[i].lchild = lchild;
BT[i].rchild = rchild;
}
BT[root].parent = maxsize;
printf(
"哈夫曼树完成构建\n");
}
void coding(){
printf(
"正在进行哈弗曼编码...\n");
int ndoe =
0;
int findp, itself, point;
int end;
char temp;
for(
int node =
0; node<=root; node++)
{
if(BT[node].a != NULL)
{
Dictionary[BT[node].a].a = BT[node].a;
Dictionary[BT[node].a].weight = BT[node].weight;
point=
0;
findp = BT[node].parent;
itself = node;
while(findp <= root)
{
if(BT[findp].lchild == itself)
{
Dictionary[BT[node].a].bit[point] =
'0';
point ++;
}
else if(BT[findp].rchild == itself)
{
Dictionary[BT[node].a].bit[point] =
'1';
point ++;
}
findp = BT[findp].parent;
itself = BT[itself].parent;
}
}
}
for(
int i=
0; i<maxsize; i++)
{
if(
strlen(Dictionary[i].bit)!=
0)
{
for(
int j=
0; j<
strlen(Dictionary[i].bit); j++)
{
for(
int k=
0; k<
strlen(Dictionary[i].bit)-j-
1; k++)
{
temp = Dictionary[i].bit[k];
Dictionary[i].bit[k] = Dictionary[i].bit[k+
1];
Dictionary[i].bit[k+
1] = temp;
}
}
}
}
printf(
"哈弗曼编码已完成\n");
for(
int i=
0; i<maxsize; i++)
{
if(Dictionary[i].weight!=
0)
{
printf(
"符号:%c\t出现频率:%d\t哈夫曼编码%s\n", Dictionary[i].a, Dictionary[i].weight, Dictionary[i].bit);
}
}
}
void Output(
char* input)
{
unsigned char temp;
unsigned char output;
char out_dat[maxsize];
int num =
0;
int all =
0;
FILE *fp, *out;
printf(
"生成文件名:");
scanf(
"%s", out_dat);
strcat(out_dat,
".dat");
printf(
"生成压缩文件%s\n", out_dat);
temp =
'\0';
fp = fopen(input,
"rb");
out = fopen(out_dat,
"wb");
if(fp == NULL || out == NULL)
{
printf(
"输入文件名错误或建立新文件失败\n");
exit(
0);
}
fwrite(&node_num,
sizeof(
int),
1, out);
for(
int i=
0; i<node_num; i++)
{
fwrite(&BT[i].a,
sizeof(
unsigned char),
1, out);
fwrite(&BT[i].weight,
sizeof(
int),
1, out);
}
while(!feof(fp))
{
fread(&temp,
sizeof(
unsigned char),
1, fp);
if(temp ==
'\0')
continue;
all++;
for(
int i=
0; i<
strlen(Dictionary[temp].bit); i++)
{
output <<=
1 ;
if(Dictionary[temp].bit[i] ==
'1')
{
output +=
1;
}
num++;
if(num==
8)
{
fwrite(&output,
sizeof(
unsigned char),
1, out);
num =
0;
}
}
temp =
'\0';
}
if(num!=
0){
for(
int i=
8; i>num; i--)
{
output <<=
1;
}
fwrite(&output,
sizeof(
unsigned char),
1, out);
}
fwrite(&all,
sizeof(
int),
1, out);
printf(
"\n压缩文件成功\n");
}
void Form_Huffman(
char *input)
{
printf(
"生成霍夫曼树...\n");
FILE *fp;
BTnode t;
int num, minweight, all;
int rchild, lchild;
unsigned char a;
int weight;
fp = fopen(input,
"rb");
fseek(fp, -
4,
2);
fread(&all,
sizeof(
int),
1, fp);
fseek(fp,
0,
0);
fread(&num,
sizeof(
int),
1, fp);
for(
int i=
0; i<num; i++)
{
fread(&a,
sizeof(
unsigned char),
1, fp);
fread(&weight,
sizeof(
int),
1, fp);
BT[i].a = a;
BT[i].weight = weight;
}
root = num;
for(
int i =
0; i<num ;i++)
{
for(
int j =
0; j<num-
1; j++)
{
if(BT[j].weight<BT[j+
1].weight)
{
t = BT[j+
1];
BT[j+
1] = BT[j];
BT[j] = t;
}
}
}
for(
int i=num; i<
2*num-
1; i++)
{
minweight = maxsize;
lchild = -
1;
rchild = -
1;
for(
int j=
0; j<i; j++)
{
if(BT[j].weight<minweight && BT[j].parent==-
1)
{
minweight = BT[j].weight;
lchild = j;
}
}
minweight = maxsize;
BT[lchild].parent = i;
for(
int h=
0; h<i; h++)
{
if(BT[h].weight<minweight &&BT[h].parent==-
1)
{
minweight = BT[h].weight;
rchild = h;
}
}
BT[rchild].parent = i;
BT[i].weight = BT[lchild].weight + BT[rchild].weight;
BT[i].lchild = lchild;
BT[i].rchild = rchild;
root = i;
}
printf(
"成功重构霍夫曼树\n");
printf(
"还原源文件中...\n");
int root_head;
char Origin_Code[maxsize];
unsigned char copy, judge;
FILE *out;
printf(
"输入您想输出的文件名称:");
scanf(
"%s", Origin_Code);
out = fopen(Origin_Code,
"w");
if(out == NULL)
{
printf(
"Input Error\n");
exit(
0);
}
root_head = root;
while(!feof(fp))
{
fread(©,
sizeof(
unsigned char),
1, fp);
for(
int i=
1; i<=
8; i++)
{
judge = copy&
10000000;
copy<<=
1;
if(judge>
0)
{
root_head = BT[root_head].rchild;
}
else if(judge==
0)
{
root_head = BT[root_head].lchild;
}
if(BT[root_head].a !=NULL)
{
fwrite(&BT[root_head].a,
sizeof(
unsigned char),
1, out);
all--;
root_head = root;
if(all==
0)
break;
}
}
if(all==
0)
break;
}
printf(
"还原源文件成功\n");
}
int main()
{
int num=
0;
char File_Name[maxsize];
int choose=
0;
printf(
"=================================================================\n");
printf(
" 请输入您想要完成的功能:1、压缩文件;2、解压文件;3、退出程序\n");
printf(
"\n 注意:(1)压缩文件名带后缀;\n\t(2)压缩文件名默认.dat格式;\n\t(3)带压缩文件名带后缀;\n");
printf(
"=================================================================\n");
printf(
"输入:");
scanf(
"%d", &choose);
memset(File_Name,
0,
sizeof(File_Name));
root =
0;
for(
int i =
0; i<maxsize; i++)
{
BT[i].a = NULL;
BT[i].lchild=-
1;
BT[i].parent=-
1;
BT[i].rchild =-
1;
BT[i].weight =
0;
}
for(
int i=
0; i<maxsize; i++)
{
Dictionary[i].a = NULL;
Dictionary[i].weight=
0;
Dictionary[i].bit[
0] =
'\0';
}
switch(choose)
{
case 1:
printf(
"请输入待压缩文件名:");
scanf(
"%s", File_Name);
get_weight(File_Name);
HuffmanTree();
coding();
Output(File_Name);
break;
case 2:
printf(
"输出待解压文件名:");
scanf(
"%s", File_Name);
Form_Huffman(File_Name);
break;
case 3:
return 0;
default :
choose =
0;
printf(
"输入错误请重新输出数据\n");
break;
}
return 0;
}