RLE(Run Length Encoding)压缩算法即行程长度压缩算法,也称游程长度压缩算法,是最早出现、也是最简单的无损数据压缩算法。RLE压缩算法对黑白图像和基于调色板的单调图像有很高的压缩效率,不仅常用于处理图像数据,在传真机上也得到了广泛的应用。
压缩格式
[长度标识位] [数据块]
长度标识位通常用一个字节标识,字节最高位为1表示后面的数据块是连续重复数据。字节最高位为0表示后面的数据块是非连续数据。把最高位除开,只有剩下的7位用来表示数据长度(即最大值是127)。
重复数据: 连续出现次数2次以上。(例如: 65, 65, 65, ...)
以下是RLE算法实现代码
using System;
/// <summary>
/// 行程长度压缩算法 RLE(Run Length Encoding)
/// 最简单的无损数据压缩算法
/// </summary>
public class RLE
{
/// <summary>
/// 压缩
/// </summary>
/// <param name="inbuf">原始数据</param>
/// <returns>压缩后的数据</returns>
public static byte[] Encode(byte[] inbuf)
{
int encSize = 0;
int inbufPos = 0;
int inBuffSize = inbuf.Length;
byte[] buff = new byte[inBuffSize];
while (inbufPos < inBuffSize)
{
byte count = 0;
if (IsRepetitionStart(inbuf, inbufPos))
{
count = GetRepetitionCount(inbuf, inbufPos);
//长度字节最高位设置为1
buff[encSize++] = (byte)(count | 0x80);
buff[encSize++] = inbuf[inbufPos];
inbufPos += count;
}
else
{
count = GetNonRepetitionCount(inbuf, inbufPos);
buff[encSize++] = count;
//逐个复制这些数据
for (int i = 0; i < count; i++)
{
buff[encSize++] = inbuf[inbufPos++];
}
}
}
byte[] outbuf = new byte[encSize];
Array.Copy(buff, 0, outbuf, 0, encSize);
return outbuf;
}
/// <summary>
/// 解压
/// </summary>
/// <param name="inbuf">压缩数据</param>
/// <param name="outBuffSize">输出缓冲区大小</param>
/// <returns>解压后的数据</returns>
public static byte[] Decode(byte[] inbuf, int outBuffSize = -1)
{
int i;
int decSize = 0;
int count = 0;
int inbufPos = 0;
int inbufSize = inbuf.Length;
if (-1 == outBuffSize)
outBuffSize = inbufSize * 5;
byte[] buff = new byte[outBuffSize];
while (inbufPos < inbufSize)
{
byte sign = inbuf[inbufPos++];
count = sign & 0x3F;
//输出缓冲区空间不够了
if (decSize + count > outBuffSize)
{
Console.WriteLine("错误: 输出缓冲区空间不够");
return null;
}
if ((sign & 0x80) == 0x80)//连续重复数据标志
{
for (i = 0; i < count; i++)
{
buff[decSize++] = inbuf[inbufPos];
}
inbufPos++;
}
else
{
for (i = 0; i < count; i++)
{
buff[decSize++] = inbuf[inbufPos++];
}
}
}
byte[] outbuf = new byte[decSize];
Array.Copy(buff, 0, outbuf, 0, decSize);
return outbuf;
}
/// <summary>
/// 是否连续三个字节数据相同
/// </summary>
/// <returns></returns>
static bool IsRepetitionStart(byte[] src, int srcLeft)
{
if (srcLeft + 3 >= src.Length)
return false;
int byte1 = src[srcLeft];
int byte2 = src[srcLeft + 1];
int byte3 = src[srcLeft + 2];
return (byte1 == byte2 && byte1 == byte3);
}
static byte GetRepetitionCount(byte[] src, int srcPos)
{
int endLeft = srcPos + 127;
if (endLeft >= src.Length)
endLeft = src.Length - 1;
byte count = 1;
for (int i = srcPos; i < endLeft; i++)
{
if (src[i] != src[i + 1])
break;
count++;
}
return count;
}
static byte GetNonRepetitionCount(byte[] src, int srcPos)
{
//只剩最后两字节数据了
if (src.Length - srcPos <= 3)
return 2;
int endLeft = srcPos + 127;
if (endLeft >= src.Length)
endLeft = src.Length - 1;
byte count = 0;
for (int i = srcPos; i <= endLeft; i++)
{
if (IsRepetitionStart(src, i))
break;
count++;
}
return count;
}
}
测试
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
class Program
{
static void Main(string[] args)
{
//测试数据
byte[] data = new byte[] {
10,
20, 20,
30, 30, 30,
40, 40, 40, 40,
50, 50, 50, 50, 50,
60, 60, 60, 60, 60, 60,
70, 70, 80, 80, 90
};
Console.WriteLine("原始数据");
for (int i = 0; i < data.Length; i++)
Console.Write(data[i]+", ");
Console.WriteLine();
Console.WriteLine("RLE压缩数据");
byte[] en_data = RLE.Encode(data);
for (int i = 0; i < en_data.Length; i++)
Console.Write(en_data[i] + ", ");
Console.WriteLine();
Console.WriteLine("RLE解压数据");
byte[] de_data = RLE.Decode(en_data);
for (int i = 0; i < de_data.Length; i++)
Console.Write(de_data[i] + ", ");
Console.Read();
}
}
运行效果