using System;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.IO;
namespace LZWTest
{
class Program
{
static void Main(string[] args)
{
string test = "ABABCDCDABCD";
Console.WriteLine("测试字符串 {0}", test);
LZW lzw = new LZW();
List<ushort> compress = lzw.Encode(test);
Console.WriteLine();
Console.WriteLine("压缩列表");
for (int i = 0; i < compress.Count; i++)
Console.Write(compress[i] + " ");
Console.WriteLine();
string str = lzw.Decode(compress);
Console.WriteLine();
Console.WriteLine("方式一 解压 {0}", str);
byte[] compressBytes = lzw.GetCompressData();
string str1 = lzw.Decode(compressBytes);
Console.WriteLine("方式二 解压 {0}", str);
Console.Read();
}
}
/// <summary>
/// 串表压缩算法(LZW)
/// 用12位来表示可能的字符串或字符,前256代表单字符,剩下的3840给可能出现的字符组合
///
/// GIF规范
/// 1.原始数据用8位表示
/// 2.标号用9位-12位表示(256-4096)
/// 3.标号一开始用9位表示(256-512),当达到512时再用10位表示,依次类推
/// 4.当达到12位能表示的最大数时(4096),在这里插入清除标志,又从9位开始
/// 5.清除标志(clear flag)的值是原始数据字长表示的最大值加1
/// 6.结束标志(end flag)的值是原始数据字长表示的最大值加2
/// </summary>
public sealed class LZW
{
//清除标志
private ushort clearFlag = 256;
//结束标志
private ushort endFlag = 257;
//字典最大长度
private int maxDicCount = 4096;
//存储压缩后的bit流
private MemoryStream compressStream;
private BinaryWriter compressWriter;
private byte[] compressData;
//初始化标号字典
private void InitCodeDic(Dictionary<ZString, ushort> codeDic)
{
codeDic.Clear();
//原始数据设置到标号字典中
for (ushort i = 0; i <= 255; i++)
{
char c = (char)i;
ZString key = new ZString(c);
codeDic.Add(key, i);
}
}
//编码
public List<ushort> Encode(string str)
{
compressStream = new MemoryStream();
compressWriter = new BinaryWriter(compressStream);
//输出列表
List<ushort> output = new List<ushort>();
//查询表<前缀字符串, 编码>
Dictionary<ZString, ushort> codeDic = new Dictionary<ZString, ushort>();
//前缀
ZString prefix = null;
//后缀
ZString suffix = null;
//标号
ushort mark = 258;
InitCodeDic(codeDic);
if (prefix == null)
prefix = new ZString();
Console.WriteLine("建立字典");
ushort code;
for (int i=0; i<str.Length; i++)
{
char c = str[i];
if (prefix.IsNullOrEmpty())
{
prefix.Add(c);
continue;
}
suffix = new ZString(c);
ZString key = prefix + suffix;
if (codeDic.ContainsKey(key))
{
prefix = key;
}
else
{
codeDic.Add(key, mark);
mark++;
code = codeDic[prefix];
output.Add(code);
Console.WriteLine("{0}->{1}", prefix, code);
prefix = suffix;
WriteCompressStream(code, 12, compressWriter);
//字典长度已达上限
if (codeDic.Count >= maxDicCount)
{
//插入清除标志
output.Add(clearFlag);
InitCodeDic(codeDic);
prefix.Clear();
mark = 258;
WriteCompressStream(clearFlag, 12, compressWriter);
}
}
}
//考虑最后一个字符
code = codeDic[prefix];
output.Add(code);
//插入结束标志
output.Add(endFlag);
Console.WriteLine("{0}->{1}", prefix, code);
WriteCompressStream(endFlag, 12, compressWriter);
BinaryReader br = new BinaryReader(compressStream);
compressData = br.ReadBytes((int)compressStream.Length);
return output;
}
//获取压缩后的数据
public byte[] GetCompressData()
{
return compressData;
}
//解码 (按照单字符8bit,组合字符12bit解码)
public string Decode(byte[] bytes)
{
//转成bit数组
BitArray bitArray = new BitArray(bytes);
//输出列表
List<byte> output = new List<byte>();
//查询表<编码,字符串>
Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
ZString prefix = new ZString();
ushort mark = 258;
int bitIndex = 0;
while (bitIndex < bitArray.Length)
{
ushort code = bitArray.GetUShort(bitIndex, 12);
if (code <= 255)
{
output.Add((byte)code);
prefix.Add(code);
if (prefix.Count == 1)
continue;
//前缀为多字符时加入字典 (重建字典)
if (!codeDic.ContainsKey(mark))
{
codeDic.Add(mark, prefix);
prefix = new ZString(code);
mark++;
}
continue;
}
else if (code == clearFlag)
{
codeDic.Clear();
prefix.Clear();
mark = 258;
}
else if (code == endFlag)
{
break;
}
else if (codeDic.ContainsKey(code))
{
prefix = codeDic[code];
output.AddRange(prefix.ToArray());
mark++;
continue;
}
else
{
Console.WriteLine("重建字典出错,未找到 code={0} 对应的子串", code);
}
}
return Encoding.ASCII.GetString(output.ToArray());
}
//解码
public string Decode(List<ushort> list)
{
//输出列表
List<byte> output = new List<byte>();
//查询表<编码,字符串>
Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
ZString prefix = new ZString();
ushort mark = 258;
for (ushort i=0; i<list.Count; i++)
{
ushort code = list[i];
//原始ASCII字符直接输出
if (code <= 255)
{
output.Add((byte)code);
prefix.Add(code);
if (prefix.Count == 1)
continue;
//前缀为多字符时加入字典 (重建字典)
if (!codeDic.ContainsKey(mark))
{
codeDic.Add(mark, prefix);
prefix = new ZString(code);
mark++;
}
continue;
}
else if (code == clearFlag)
{
codeDic.Clear();
prefix.Clear();
mark = 258;
continue;
}
else if (code == endFlag)
{
break;
}
else if (codeDic.ContainsKey(code))
{
prefix = codeDic[code];
output.AddRange(prefix.ToArray());
mark++;
continue;
}
else
{
//如果执行到这里,说明动态重建查询表出了问题
Console.WriteLine("无法查询到 code={0} 对应的字符串", code);
}
}
return Encoding.ASCII.GetString(output.ToArray());
}
//将数据写入压缩流
private void WriteCompressStream(ushort value, int bitSize, BinaryWriter bw)
{
BitArray arr = new BitArray(new int[1] { value });
for (int i = 0; i < bitSize; i++)
{
bw.Write(arr[i]);
}
bw.Flush();
}
public class ZString
{
private List<byte> list = new List<byte>();
public ZString() { }
public ZString(char c) { Add(c); }
public ZString(string s) { Add(s); }
public ZString(int i) { Add(i); }
public ZString(byte b) { Add(b); }
public int Count { get { return list.Count; } }
public byte[] ToArray()
{
return list.ToArray();
}
public bool IsNullOrEmpty()
{
return list.Count == 0;
}
public void Add(byte[] bytes)
{
if (bytes == null)
return;
list.AddRange(bytes);
}
public void Add(int i)
{
list.Add((byte)i);
}
public void Add(char c)
{
byte b = (byte)c;
list.Add(b);
}
public void Add(byte b)
{
list.Add(b);
}
public void Add(string str)
{
if (string.IsNullOrEmpty(str))
return;
for (int i = 0; i < str.Length; i++)
Add(str[i]);
}
public void Clear()
{
list.Clear();
}
public override int GetHashCode()
{
//BKDR Hash
ulong hash = 0;
//也可以乘以31、131、1313、13131、131313..
uint p = 1313;
for (int i=0; i<list.Count; i++)
{
//当作p进制计算
hash = hash * p + list[i];
}
return (int)hash;
}
public override bool Equals(object obj)
{
ZString key = obj as ZString;
if (key == null)
return false;
return GetHashCode() == key.GetHashCode();
}
public override string ToString()
{
return Encoding.ASCII.GetString(list.ToArray());
}
public static ZString operator +(ZString a, ZString b)
{
ZString zs = new ZString();
zs.Add(a.ToArray());
zs.Add(b.ToArray());
return zs;
}
public static ZString operator +(ZString a, char b)
{
ZString zs = new ZString();
zs.Add(a.ToArray());
zs.Add(b);
return zs;
}
}
}
}
public static class BitArrayExtension
{
public static int GetInt(this BitArray array, int startIndex, int bitLength)
{
var newArray = new BitArray(bitLength);
for (int i = 0; i < bitLength; i++)
{
if (array.Length <= startIndex + i)
{
newArray[i] = false;
}
else
{
bool bit = array.Get(startIndex + i);
newArray[i] = bit;
}
}
return newArray.ToInt();
}
public static int ToInt(this BitArray array)
{
if (array == null)
{
Console.WriteLine("array is nothing.");
return 0;
}
if (array.Length > 32)
{
Console.WriteLine("must be at most 32 bits long.");
return 0;
}
var result = new int[1];
array.CopyTo(result, 0);
return result[0];
}
public static ushort GetUShort(this BitArray array, int startIndex, int bitLength)
{
var newArray = new BitArray(bitLength);
for (int i = 0; i < bitLength; i++)
{
if (array.Length <= startIndex + i)
{
newArray[i] = false;
}
else
{
bool bit = array.Get(startIndex + i);
newArray[i] = bit;
}
}
return newArray.ToUShort();
}
public static ushort ToUShort(this BitArray array)
{
if (array == null)
{
Console.WriteLine("array is nothing.");
return 0;
}
if (array.Length > 32)
{
Console.WriteLine("must be at most 32 bits long.");
return 0;
}
var result = new int[1];
array.CopyTo(result, 0);
return (ushort)result[0];
}
}
运行效果