/*
								+----------------------------------+
								|                                  |
								|  *** Basic compression algs ***  |
								|                                  |
								|   Copyright  -tHE SWINe- 2008   |
								|                                  |
								|            Compress.h            |
								|                                  |
								+----------------------------------+
*/

/**
 *	@file Compress.h
 *	@author -tHE SWINe-
 *	@brief Simple experimental data compression framework, focused
 *		on Burrows-Wheeler methods.
 *
 *	@date 2007-02-25
 *
 *	this is first beta version if the file. todo - rewrite TBuffer so it can realloc
 *	itself in a way std::vector can and try to avoid any (re)allocations while (un)packing
 *
 *	@date 2008-03-13
 *
 *	TBuffer was rewritten as requested, fixed some signed / unsigned mismatches for gcc
 *
 *	@date 2008-11-20
 *
 *	TBuffer unit is no longer unsigned char, but uint8_t instead, this should avoid
 *	incompatibility with some extended character encoding in newer versions of visual studio
 *
 *	@date 2009-05-23
 *
 *	removed all instances of std::vector::reserve and replaced them by stl_ut::Reserve_*
 *
 *	@date 2009-10-08
 *
 *	slightly improved CHuffmanCodec, fixed bug in cannonical huffman code generation for
 *	trees where there are no symbols of length n, but there are both shorter and longer
 *	symbols (codes got shifted too much, got too long, had to be regenerated). this was
 *	hurting compression and so it had to be fixed, but the bug was also in decomression
 *	code, so this sadly breaks backward compatibility.
 *
 *	@date 2009-10-11
 *
 *	replaced stl container ::resize() by stl_ut::Resize_*() to avoid unhandled
 *	std::bad_alloc
 *
 *	optimized CBurrowsWheelerTransform::CWrapMemCmp() wrap-arround memory comparator by
 *	calculating lengths of blocks that do not wrap and comparing them in shorter loops
 *
 *	added __BWT_ENABLE_THREADED_ENCODE macro
 *
 *	@date 2009-10-20
 *
 *	fixed some warnings when compiling under VC 2005, implemented "Security
 *	Enhancements in the CRT" for VC 2008. compare against MyProjects_2009-10-19_
 *
 */

#ifndef __SIMPLE_COMPRESSION_INCLUDED
#define __SIMPLE_COMPRESSION_INCLUDED

#include "Buffer.h"

/**
 *	@def __BWT_ENABLE_THREADED_ENCODE
 *
 *	@brief enables CBurrowsWheelerTransform::ThreadedEncode()
 *
 *	This enables multi-thread implementation of Burrows-Wheeler transform. While
 *		it may be faster than single-threaded implementation, it's far from perfect
 *		(threads are not loaded equally). Therefore, for high-performance
 *		implementations, paralelism should be achieved another way.
 */
//#define __BWT_ENABLE_THREADED_ENCODE

/**
 *	@brief simple BWT encoder / decoder
 *
 *	Simple Burrows-Wheeler transform implementation. Uses indices instead
 *		of string copies to minimize memory usage. Also uses somewhat optimized
 *		version of memcpy, only available under windows (it is written in MASM).
 *
 *	@todo Create GAS port of assembly part, so this would be same fast in linux.
 */
class CBurrowsWheelerTransform {
private:
	class CWrapMemCmp;
	class CIota;
	class CSorter;
	class CMerger;

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note This doesn't work (is going to return false) with empty input buffer.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest);

#ifdef __BWT_ENABLE_THREADED_ENCODE

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest, works in parallel. While
	 *		this may be faster than single-threaded implementation, it's far from
	 *		perfect (threads are not loaded equally). Therefore, for high-performance
	 *		implementations, paralelism should be achieved another way.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_thread_num is number of worker threads (must be power of two).
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Doesn't work (is going to return false) with empty input buffer.
	 *	@note This only gets compiled if __BWT_ENABLE_THREADED_ENCODE macro
	 *		is defined (not by default).
	 */
	static bool ThreadedEncode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_thread_num);

#endif //__BWT_ENABLE_THREADED_ENCODE
};

/**
 *	@brief basic move to front transformation implementation
 *
 *	Implements MTF as originally proposed, and it's variant MTF-1.
 */
class CMoveToFrontTransform {
public:
	/**
	 *	@brief MTF algorithm names
	 */
	enum {
		algo_MTF,	/**< original MTF */
		algo_MTF_1	/**< MTF-1 */
	};

	/**
	 *	@brief in-place decoding function
	 *
	 *	Decodes data in r_t_buffer.
	 *
	 *	@param[in,out] r_t_buffer is both source and destination data buffer
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Using different algorithm than the one used when encoding yields
	 *		different results.
	 */
	static void Decode(TBuffer &r_t_buffer, int n_algorithm = algo_MTF);

	/**
	 *	@brief in-place encoding function
	 *
	 *	Encodes data in r_t_buffer.
	 *
	 *	@param[in,out] r_t_buffer is both source and destination data buffer
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static void Encode(TBuffer &r_t_buffer, int n_algorithm = algo_MTF);

	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Using different algorithm than the one used when encoding yields
	 *		different results.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm = algo_MTF);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm = algo_MTF);

private:
	static void _Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm);
	static void _Decode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm);
};

/**
 *	@brief simple run length coder
 *
 *	Implementation of RLE, optimized for packing MTF outputs (compression flag bit is LSB so,
 *		in theory, symbols with lower values are generated, in hope not to disturb symbol
 *		probabilities after MTF too much). It actually works with the Callgary corpus.
 */
class CRunLengthCodec {
private:
	/**
	  *	@brief RLE configuration enums
	  */
	enum {
		min_RunLength = 3	/**< @brief minimal length setting */
	};

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest);
};

/**
 *	@brief modified dual-stream run length coder
 *
 *	Simple RLE, optimized for packing IF outputs (run lengths are stored in a second buffer).
 *		This implementation is similar to the RLE-EXP algorithm.
 */
class CModifiedRLECodec {
public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, r_t_src_runs and outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer, containing encoded symbols
	 *	@param[in] r_t_src_runs is source data buffer, containing run lengths
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_src_runs, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest and r_t_dest_runs.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer, containing encoded symbols
	 *		(original contents will be lost)
	 *	@param[in] r_t_dest_runs is source data buffer, containing run lengths
	 *		(original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, TBuffer &r_t_dest_runs);
};

/**
 *	@brief Huffman coder
 *
 *	Very simple and easy to use cannonical huffman (de)coder, operating on bytes.
 *		Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *		symbols), followed by bitstream. last byte is padded with zeros.
 */
class CHuffmanCodec {
public:
	/**
	  *	@brief Huffman configuration enums
	  */
	enum {
		max_CodeBitNum = 16	/**< @brief Huffman code length limit (can be 1 to 31) */
	};

	struct TFrequency;
	struct TNode;
#ifdef _DEBUG
	class CFindUnsorted;
#endif //_DEBUG
	class CGoUp;

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

private:
	/**
	 *	@brief does blah
	 *
	 *	@param r_freq_a is a
	 *	@param r_freq_b is b
	 *
	 *	@return Returns blah.
	 */
	static inline bool CompareSymbol(const TFrequency &r_freq_a, const TFrequency &r_freq_b);
	static inline bool FindZeroFreq(const TFrequency &r_t_freq);
	static bool CreateHuffmanTree(std::vector<TFrequency> &r_freq_list, int *p_code_num);
};

/**
 *	@brief inversion frequencies coder
 *
 *	Naive (sorted) inversion frequencies implementation.
 */
class CInversionFrequenciesCodec {
private:
	class CSortAsc;
	class CSortDesc;

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief symbol sorting names for encoding
	 */
	enum {
		sort_NoSort = 0,		/**< @brief symbols are not sorted */
		sort_FreqAscending,		/**< @brief symbols are sorted with increasing frequency before encoding */
		sort_FreqDescending		/**< @brief symbols are sorted with decreasing frequency before encoding */
	};

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *	@param[in] n_permutation_type is type of permutation, applied to symbols before encoding
	 *		(one of sort_NoSort, sort_FreqAscending or sort_FreqDescending)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer,
		TBuffer &r_t_out_buffer, int n_permutation_type = sort_NoSort);

private:
	static inline bool Emit(TBuffer &r_t_out_buffer, uint32_t n_value);
};

#endif //__SIMPLE_COMPRESSION_INCLUDED
