/*
								+--------------------------------+
								|                                |
								|*** Elementary combinatorics ***|
								|                                |
								|  Copyright  -tHE SWINe- 2009  |
								|                                |
								|          Combinat.cpp          |
								|                                |
								+--------------------------------+
*/

/**
 *	@file Combinat.cpp
 *	@brief elementary combinatorics methods
 *	@date 2009
 *	@author -tHE SWINe-
 */

#include "NewFix.h"
#include "CallStack.h"
#include "Combinat.h"

#if defined(_MSC_VER) && !defined(__MWERKS__) && !defined(for) && _MSC_VER <= 1200
#define for if(0) {} else for
#endif // _MSC_VER && !__MWERKS__ && !for && _MSC_VER <= 1200
// msvc 'for' scoping hack

/*
 *								=== Unit tests ===
 */

#if 0

/**
 *	@brief unit tests for Combinat.cpp
 *	@note This does not test CUnorderedPartition.
 */
class CCombinatTests {
public:
	static inline void runtime_assert(bool b_expression)
	{
		if(!b_expression) {
			fprintf(stderr, "error: CCombinatTests failed\n");
			_ASSERTE(b_expression); // debugger will stop at this, can backtrack through call stack
		}
	}

	/**
	 *	@brief default constructor; runs the tests
	 */
	CCombinatTests()
	{
		{
			TestMixRadix(16, 16, 16);
			TestMixRadix(10, 10, 10);
			TestMixRadix(10, 1000, 100);
			TestMixRadix(3, 5, 7);
			TestMixRadix(1, 2, 4);
			{
				const size_t p_radix[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX};
				CMixedRadixInteger mix_radix(p_radix, 3);
				runtime_assert(!mix_radix.n_Max_Value());
				uint64_t n_max_value;
				runtime_assert(!mix_radix.Get_Max_Value(n_max_value));
			}
			{
				const size_t p_radix[] = {0};
				CMixedRadixInteger mix_radix(p_radix, 1);
				runtime_assert(!mix_radix.n_Value());
				runtime_assert(!mix_radix.n_Max_Value());
				uint64_t n_max_value;
				runtime_assert(mix_radix.Get_Max_Value(n_max_value) && !n_max_value);
				runtime_assert(!mix_radix[0]);
				bool b_increment = ++ mix_radix;
				runtime_assert(!b_increment && !mix_radix[0]);
				bool b_decrement = -- mix_radix;
				runtime_assert(!b_decrement && !mix_radix[0]);
			}
			TestMixRadix_ZeroBase(10, 10, 10, 0);
			TestMixRadix_ZeroBase(10, 10, 10, 1);
			TestMixRadix_ZeroBase(10, 10, 10, 2);
			TestMixRadix_ZeroBase(10, 10, 10, 3);
			TestMixRadix_ZeroBase(3, 5, 7, 0);
			TestMixRadix_ZeroBase(3, 5, 7, 1);
			TestMixRadix_ZeroBase(3, 5, 7, 2);
			TestMixRadix_ZeroBase(3, 5, 7, 3);
			TestMixRadix_ZeroBase(1, 2, 4, 0);
			TestMixRadix_ZeroBase(1, 2, 4, 1);
			TestMixRadix_ZeroBase(1, 2, 4, 2);
			TestMixRadix_ZeroBase(1, 2, 4, 3);
		}
		// CMixedRadixInteger test

		{
			TestCombination(10, 3);
			TestCombination(10, 2);
			TestCombination(10, 1);
			TestCombination(10, 0);
			TestCombination(7, 4);
			TestCombination(5, 7);
			TestCombination(1, 3);
			//TestCombination(0, 3); // correctly triggers assertion in CCombination constructor
			TestCombination(0, 0); // this works correctly
		}
		// CCombination test

		{
			/*CSeekablePermutation p(UINT32_MAX, 3); // max = UINT32_MAX * (UINT32_MAX - 1) * (UINT32_MAX - 2) > UINT64_MAX
			runtime_assert(!p.n_Permutation_Num()); // overflow, return zero
			uint64_t n_num;
			runtime_assert(!p.Get_Permutation_Num(n_num));*/ // overflow, fail
			// fails to initialize
		}
		{
			CSeekablePermutation p(21, 21); // max = 21! = 51090942171709440000, 21! / 2^64 = 2.769 ..., so it is more
			runtime_assert(!p.n_Permutation_Num()); // overflow, return zero
			uint64_t n_num;
			runtime_assert(!p.Get_Permutation_Num(n_num)); // overflow, fail
		}
		{
			CSeekablePermutation p(50, 3); // max = 50 * 49 * 48
			runtime_assert(p.n_Permutation_Num() == 50 * 49 * 48); // overflow, return zero
			uint64_t n_num;
			runtime_assert(p.Get_Permutation_Num(n_num) && n_num == 50 * 49 * 48); // overflow, fail
		}
		{
			/*CPermutation p(UINT32_MAX, 3); // max = UINT32_MAX * (UINT32_MAX - 1) * (UINT32_MAX - 2) > UINT64_MAX
			runtime_assert(!p.n_Permutation_Num()); // overflow, return zero
			uint64_t n_num;
			runtime_assert(!p.Get_Permutation_Num(n_num));*/ // overflow, fail
			// fails to initialize
		}
		{
			CPermutation p(21, 21); // max = 21! = 51090942171709440000, 21! / 2^64 = 2.769 ..., so it is more
			runtime_assert(!p.n_Permutation_Num()); // overflow, return zero
			uint64_t n_num;
			runtime_assert(!p.Get_Permutation_Num(n_num)); // overflow, fail
		}
		{
			CPermutation p(5, 3); // max = 5 * 4 * 3
			runtime_assert(p.n_Permutation_Num() == 5 * 4 * 3); // overflow, return zero
			uint64_t n_num;
			runtime_assert(p.Get_Permutation_Num(n_num) && n_num == 5 * 4 * 3); // overflow, fail
		}
		{
			TestPermutation(3, 3);
			TestPermutation(4, 4);
			TestPermutation(1, 1);
			TestPermutation(5, 3);
			TestPermutation(5, 4);
			TestPermutation(6, 3);
			TestPermutation(6, 4);
			TestPermutation(5, 5);
			TestPermutation(5, 0);
			TestPermutation(6, 0);
			TestPermutation(0, 0);
			TestPermutation(11, 4);
			TestPermutation(12, 4); 
			TestPermutation(11, 3);
			TestPermutation(12, 3); // big one, both CPermutation and CSeekablePermutation are tested
			TestPermutation(50, 2);
			TestPermutation(51, 2);
			TestPermutation(50, 3);
			TestPermutation(51, 3); // big one, only CSeekablePermutation is tested
		}
		// CPermutation and CSeekablePermutation test

		fprintf(stderr, "permutation unit tests finished\n");
	}

	void TestPermutation(int m, int n) // choose n items out of m
	{
		fprintf(stderr, "TestPermutation(%d, %d)\n", m, n);
#ifndef __PERMUTATION_LSB_FIRST
		runtime_assert(0); // __PERMUTATION_LSB_FIRST must be defined
#endif // __PERMUTATION_LSB_FIRST

		std::set<std::vector<int> > all_perms;
		{
			CCombination combination(m, n);
			do {
				std::vector<int> c(n), d;
				for(int i = 0; i < n; ++ i)
					c[i] = (int)combination[i]; // we're ok with the cast (it saves memory)
				d = c;
				std::sort(d.begin(), d.end());
				if(std::unique(d.begin(), d.end()) != d.end())
					continue; // one of the elements occured more than once
				all_perms.insert(c); // a combination which is also a valid permutaion
			} while(combination.Next());
		}
		uint64_t n_max = all_perms.size();
		// generate ordered set of all the permutations

		CPermutation p(min(21, m), n);
		if(m < 22) { // small one only
			runtime_assert(p.n_Set_Size() == m);
			runtime_assert(p.n_Permutation_Size() == n);
			for(int i = 0; i < n; ++ i)
				runtime_assert(p[i] == n - 1 - i && p[i] == p.p_Indices()[i]);
			p.First();
			for(int i = 0; i < n; ++ i)
				runtime_assert(p[i] == n - 1 - i && p[i] == p.p_Indices()[i]);
			runtime_assert(n_max == p.n_Permutation_Num() || (!n && p.n_Permutation_Num() == 1)); // if there are no element, it still counts as one permutation ("nothing")
			std::set<std::vector<int> >::const_iterator p_it = all_perms.begin();
			do {
				runtime_assert(p_it != all_perms.end()); // make sure p.Next() returns false at the correct moment
				for(int i = 0; i < n; ++ i)
					runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]);
				++ p_it;
			} while(p.Next());
			runtime_assert(p_it == all_perms.end()); // make sure p.Next() returns false at the correct moment
			// the set is actually lexicographically sorted, we can use it to match permutations

			if(!all_perms.empty()) {
				-- p_it;
				for(int i = 0; i < n; ++ i)
					runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]);
			}
			// p still contains the last permutation

			for(int i = 0; i < 10; ++ i) {
				bool b_increment = p.Next(); // incrementing again still fails, does nothing
				runtime_assert(!b_increment);
				if(!all_perms.empty()) {
					for(int i = 0; i < n; ++ i)
						runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]); // it is pointing at the last solution
				}
			}
			// incrementing again still fails, does nothing
		}
		// test CPermutation

		CSeekablePermutation seekp(m, n);
		{
			runtime_assert(seekp.n_Set_Size() == m);
			runtime_assert(seekp.n_Permutation_Size() == n);
			for(int i = 0; i < n; ++ i)
				runtime_assert(seekp[i] == n - 1 - i && seekp[i] == seekp.p_Indices()[i]);
			seekp.First();
			for(int i = 0; i < n; ++ i)
				runtime_assert(seekp[i] == n - 1 - i && seekp[i] == seekp.p_Indices()[i]);
			runtime_assert(n_max == seekp.n_Permutation_Num() || (!n && seekp.n_Permutation_Num() == 1)); // if there are no element, it still counts as one permutation ("nothing")

			std::set<std::vector<int> >::const_iterator p_it = all_perms.begin();
			do {
				runtime_assert(p_it != all_perms.end()); // make sure p.Next() returns false at the correct moment
				for(int i = 0; i < n; ++ i)
					runtime_assert(seekp[i] == (*p_it)[n - 1 - i] && seekp[i] == seekp.p_Indices()[i]);
				++ p_it;
			} while(seekp.Next());
			runtime_assert(p_it == all_perms.end()); // make sure p.Next() returns false at the correct moment
			// the set is actually lexicographically sorted, we can use it to match permutations

			if(!all_perms.empty()) {
				p_it = all_perms.begin();
				for(int i = 0; i < n; ++ i)
					runtime_assert(seekp[i] == (*p_it)[n - 1 - i] && seekp[i] == seekp.p_Indices()[i]);
			}
			if(n_max > 1) {
				bool b_incremented = seekp.Next();
				runtime_assert(b_incremented); // incrementing again must work (if there is >1 permutation)
				runtime_assert(all_perms.size() > 1);
				p_it = all_perms.begin();;
				++ p_it;
				for(int i = 0; i < n; ++ i)
					runtime_assert(seekp[i] == (*p_it)[n - 1 - i] && seekp[i] == seekp.p_Indices()[i]);
			}
			// seekp contains the first
		}
		// test seekable permutation

		for(int i = 0; i < 1000; ++ i) {
			int n_index = rand() % n_max;

			std::set<std::vector<int> >::const_iterator p_it = all_perms.begin();
			for(int j = 0; j < n_index; ++ j)
				++ p_it;
			// find n-th permutation

			seekp.Seek(n_index);
			// seek to the n-th permutation

			for(int i = 0; i < n; ++ i)
				runtime_assert(seekp[i] == (*p_it)[n - 1 - i] && seekp[i] == seekp.p_Indices()[i]);
			// make sure it seeked to the same one

			if(m < 22) {
				p.Set_Permutation(seekp.p_Indices());
				// copy the permutation to p

				for(int i = 0; i < n; ++ i)
					runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]);
				// make sure it seeked to the same one
			}

			for(int j = 0; j < 10; ++ j) {
				bool b_incr0;
				if(m < 22) {
					b_incr0 = p.Next();
					if(b_incr0) {
						runtime_assert(p_it != all_perms.end());
						++ p_it;
						for(int i = 0; i < n; ++ i)
							runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]);
						// moved to the next one
					} else {
						++ p_it; // !!
						runtime_assert(p_it == all_perms.end());
						-- p_it;
						for(int i = 0; i < n; ++ i)
							runtime_assert(p[i] == (*p_it)[n - 1 - i] && p[i] == p.p_Indices()[i]);
						// stuck at the last one
					}
				}
				bool b_incr1 = seekp.Next();
				runtime_assert(m >= 22 || b_incr0 == b_incr1);
				if(b_incr1) {
					runtime_assert(p_it != all_perms.end());
					if(m >= 22)
						++ p_it; // already did that above
					for(int i = 0; i < n; ++ i)
						runtime_assert(seekp[i] == (*p_it)[n - 1 - i] && seekp[i] == seekp.p_Indices()[i]);
					// moved to the next one
				} else {
					runtime_assert(m >= 22 || ++ p_it == all_perms.end()); // otherwise did/checked that above
					p_it = all_perms.begin(); // don't care about above
					for(int i = 0; i < n; ++ i)
						runtime_assert(seekp[i] == n - 1 - i && seekp[i] == seekp.p_Indices()[i]);
					// back to the first one
				}
				if(!b_incr1) // b_incr0 may be uninitialized
					break;
			}
			// test incrementing after a seek
		}
		// test seeking and setting the seeked permutation
	}

	void TestCombination(int m, int n) // choose n items out of m
	{
		fprintf(stderr, "TestCombination(%d, %d)\n", m, n);

		std::vector<size_t> radix(n, m);
		runtime_assert(radix.size() == n && (radix.empty() || radix[0] == m)); // "n" long vector of "m"s

		CCombination c(m, n);
		for(int i = 0; i < n; ++ i)
			runtime_assert(!c[i]);
		c.First();
		for(int i = 0; i < n; ++ i)
			runtime_assert(!c[i]);
		CMixedRadixInteger integer((n)? &radix[0] : 0, n);
		uint64_t n_max = integer.n_Max_Value();
		runtime_assert(n_max + 1 == c.n_Combination_Num()); // must match
		for(size_t i = 0; i < n_max; ++ i) {
			for(int j = 0; j < n; ++ j)
				runtime_assert(c[j] == integer[j]);
			bool b_increment = c.Next();
			runtime_assert(b_increment);
			++ integer;
		}
		bool b_increment = c.Next();
		runtime_assert(!b_increment);
		for(int i = 0; i < n; ++ i)
			runtime_assert(!c[i]);
		b_increment = c.Next();
		if(n_max) {
			runtime_assert(b_increment);
			runtime_assert(c[0] == 1);
			for(int i = 1; i < n; ++ i)
				runtime_assert(!c[i]);
		} else {
			runtime_assert(!b_increment);
			for(int i = 0; i < n; ++ i)
				runtime_assert(!c[i]);
		}
	}

	void TestMixRadix(int r0, int r1, int r2)
	{
		fprintf(stderr, "TestCombination(%d, %d, %d)\n", r0, r1, r2);

		runtime_assert(r0 > 0 && r1 > 0 && r2 > 0); // this test does not handle zero radix
		{
			const size_t p_radix[] = {r0, r1, r2};
			CMixedRadixInteger mix_radix(p_radix, 3);
			runtime_assert(mix_radix.n_Max_Value() == p_radix[0] * p_radix[1] * p_radix[2] - 1);
			runtime_assert(mix_radix.n_Value() == 0);
			for(size_t i = 0; i < p_radix[0] * p_radix[1] * p_radix[2] - 1; ++ i) {
				runtime_assert(mix_radix.n_Value() == i);
				runtime_assert(mix_radix[0] == i % p_radix[0]);
				runtime_assert(mix_radix[1] == (i  / p_radix[0]) % p_radix[1]);
				runtime_assert(mix_radix[2] == (i  / (p_radix[0] * p_radix[1])) % p_radix[2]);
				bool b_increment = ++ mix_radix;
				runtime_assert(b_increment);
			}
			runtime_assert(mix_radix.n_Value() == mix_radix.n_Max_Value());
			bool b_increment = ++ mix_radix;
			runtime_assert(!b_increment);
			runtime_assert(!mix_radix[0] && !mix_radix[1] && !mix_radix[2]); // all zeroes
			b_increment = ++ mix_radix;
			runtime_assert(b_increment);
			runtime_assert(mix_radix[0] == 1 % p_radix[0] && mix_radix[1] == (1 / p_radix[0]) % p_radix[1] &&
				mix_radix[2] == (1 / (p_radix[0] * p_radix[1])) % p_radix[2]); // all zeroes
			bool b_decrement = -- mix_radix;
			runtime_assert(b_decrement);
			runtime_assert(!mix_radix[0] && !mix_radix[1] && !mix_radix[2]); // all zeroes
			b_decrement = -- mix_radix;
			runtime_assert(!b_decrement);
			runtime_assert(mix_radix.n_Value() == mix_radix.n_Max_Value());
			runtime_assert(mix_radix[0] == p_radix[0] - 1 &&
				mix_radix[1] == p_radix[1] - 1 &&
				mix_radix[2] == p_radix[2] - 1);
			for(int i = 0; i < 1000; ++ i) {
				int n = rand() % mix_radix.n_Max_Value();
				mix_radix.SetValue(n);
				runtime_assert(mix_radix.n_Value() == n);
				runtime_assert(mix_radix[0] == n % p_radix[0]);
				runtime_assert(mix_radix[1] == (n  / p_radix[0]) % p_radix[1]);
				runtime_assert(mix_radix[2] == (n / (p_radix[0] * p_radix[1])) % p_radix[2]);
				bool b_increment = ++ mix_radix;
				runtime_assert(b_increment == n < mix_radix.n_Max_Value() && mix_radix.n_Value() == (n + 1) % (mix_radix.n_Max_Value() + 1));
				bool b_decrement = -- mix_radix;
				runtime_assert(b_decrement && mix_radix.n_Value() == n);
				b_decrement = -- mix_radix;
				runtime_assert(b_decrement == n > 0 && mix_radix.n_Value() == (n + mix_radix.n_Max_Value()) % (mix_radix.n_Max_Value() + 1));
			}
		}
	}

	void TestMixRadix_ZeroBase(int r0, int r1, int r2, int n_zero_base)
	{
		fprintf(stderr, "TestCombination(%d, %d, %d, %d)\n", r0, r1, r2, n_zero_base);

		runtime_assert(r0 > 0 && r1 > 0 && r2 > 0); // this test does not handle zero radix
		{
			std::vector<size_t> radix;
			radix.push_back(r0);
			radix.push_back(r1);
			radix.push_back(r2);
			radix.insert(radix.begin() + n_zero_base, 0);
			runtime_assert(radix[n_zero_base] == 0);
			runtime_assert(radix.size() == 4);
			int n_digit0 = (!n_zero_base)? 1 : 0;
			int n_digit1 = (n_zero_base <= 1)? 2 : 1;
			int n_digit2 = (n_zero_base <= 2)? 3 : 2;
			runtime_assert(radix[n_digit0] == r0);
			runtime_assert(radix[n_digit1] == r1);
			runtime_assert(radix[n_digit2] == r2);
			// make a radix list

			CMixedRadixInteger mix_radix(&radix[0], 4);
			runtime_assert(mix_radix.n_Max_Value() == r0 * r1 * r2 - 1);
			runtime_assert(mix_radix.n_Value() == 0);
			for(int i = 0; i < r0 * r1 * r2 - 1; ++ i) {
				runtime_assert(mix_radix.n_Value() == i);
				runtime_assert(mix_radix[n_zero_base] == 0); // should always remain 0
				runtime_assert(mix_radix[n_digit0] == i % radix[n_digit0]);
				runtime_assert(mix_radix[n_digit1] == (i  / radix[n_digit0]) % radix[n_digit1]);
				runtime_assert(mix_radix[n_digit2] == (i  / (radix[n_digit0] * radix[n_digit1])) % radix[n_digit2]);
				bool b_increment = ++ mix_radix;
				runtime_assert(b_increment);
			}
			runtime_assert(mix_radix.n_Value() == mix_radix.n_Max_Value());
			bool b_increment = ++ mix_radix;
			runtime_assert(!b_increment);
			runtime_assert(!mix_radix[n_digit0] && !mix_radix[n_digit1] && !mix_radix[n_digit2] && !mix_radix[n_zero_base]); // all zeroes
			b_increment = ++ mix_radix;
			runtime_assert(b_increment);
			runtime_assert(mix_radix[n_digit0] == 1 % radix[n_digit0] && mix_radix[n_digit1] == (1 / radix[n_digit0]) % radix[n_digit1] &&
				mix_radix[n_digit2] == (1 / (radix[n_digit0] * radix[n_digit1])) % radix[n_digit2] && !mix_radix[n_zero_base]); // one
			bool b_decrement = -- mix_radix;
			runtime_assert(b_decrement);
			runtime_assert(!mix_radix[n_digit0] && !mix_radix[n_digit1] && !mix_radix[n_digit2] && !mix_radix[n_zero_base]); // all zeroes
			b_decrement = -- mix_radix;
			runtime_assert(!b_decrement);
			runtime_assert(mix_radix.n_Value() == mix_radix.n_Max_Value());
			runtime_assert(mix_radix[n_zero_base] == 0); // should always remain 0
			runtime_assert(mix_radix[n_digit0] == radix[n_digit0] - 1 &&
				mix_radix[n_digit1] == radix[n_digit1] - 1 &&
				mix_radix[n_digit2] == radix[n_digit2] - 1);
			for(int i = 0; i < 1000; ++ i) {
				int n = rand() % mix_radix.n_Max_Value();
				mix_radix.SetValue(n);
				runtime_assert(mix_radix[n_zero_base] == 0); // should always remain 0
				runtime_assert(mix_radix[n_digit0] == n % radix[n_digit0]);
				runtime_assert(mix_radix[n_digit1] == (n  / radix[n_digit0]) % radix[n_digit1]);
				runtime_assert(mix_radix[n_digit2] == (n  / (radix[n_digit0] * radix[n_digit1])) % radix[n_digit2]);
				bool b_increment = ++ mix_radix;
				runtime_assert(b_increment == n < mix_radix.n_Max_Value() && mix_radix.n_Value() == (n + 1) % (mix_radix.n_Max_Value() + 1));
				bool b_decrement = -- mix_radix;
				runtime_assert(b_decrement && mix_radix.n_Value() == n);
				b_decrement = -- mix_radix;
				runtime_assert(b_decrement == n > 0 && mix_radix.n_Value() == (n + mix_radix.n_Max_Value()) % (mix_radix.n_Max_Value() + 1));
			}
		}
	}
} run_tests;

#endif // 0

/*
 *								=== ~Unit tests ===
 */

/*
 *								=== global ===
 */

#if 0

bool Permutation_Test(int n_min_set_size = 1, int n_max_set_size = 15,
	bool b_show_results = false, bool b_show_progress = true) // takes a long time
{
	bool b_failure = false;
	for(int n_set_size = max(1, n_min_set_size); n_set_size < min(21, n_max_set_size); ++ n_set_size) {
		for(int n_subset_size = 1; n_subset_size <= n_set_size; ++ n_subset_size) {
			if(b_show_progress) {
				if(b_show_results)
					printf("\nset size: %d, subset size: %d\n", n_set_size, n_subset_size);
				else
					printf("set size: %d, subset size: %d\n", n_set_size, n_subset_size);
			}

#if 1
			{
				CSeekablePermutation perm(n_set_size, n_subset_size);
				for(uint64_t n_index = 0, n = perm.n_Permutation_Num(); n_index < n; ++ n_index) {
					perm.Seek(n_index);

					CSeekablePermutation perm2(n_set_size, n_subset_size);
					for(uint64_t i = 0; i < n_index; ++ i) {
						if(!perm2.Next()) {
							fprintf(stderr, "error: failed to seek after blah\n");
							b_failure = true;
						}
					}

					if(memcmp(perm.p_Index_List(), perm2.p_Index_List(), n_subset_size * sizeof(size_t))) {
						fprintf(stderr, "error: seek gives different results\n");
						b_failure = true;
					}
				}
			}
			// test seeking on CSeekablePermutation
#endif

#if 1
			{
				CSeekablePermutation perm(n_set_size, n_subset_size);
				CPermutation perm_ns(n_set_size, n_subset_size);
				for(uint64_t n_index = 1, n = perm.n_Permutation_Num(); n_index < n; ++ n_index) {
					perm.Seek(n_index - 1);
					perm_ns.Set_Permutation(perm.p_Index_List());
					if(!perm_ns.Next()) {
						fprintf(stderr, "error: failed to seek after blaha\n");
						b_failure = true;
					}
					// seek and next() to make sure it recovered correctly

					CPermutation perm2(n_set_size, n_subset_size);
					for(uint64_t i = 0; i < n_index; ++ i) {
						if(!perm2.Next()) {
							fprintf(stderr, "error: failed to seek after blahb\n");
							b_failure = true;
						}
					}

					if(memcmp(perm_ns.p_Index_List(), perm2.p_Index_List(), n_subset_size * sizeof(size_t))) {
						fprintf(stderr, "error: seek gives different results\n");
						b_failure = true;
					}
				}
			}
			// test aided seeking on CPermutation
#endif

#if 1
			{
				CSeekablePermutation perm_old(n_set_size, n_subset_size); // old
				CPermutation perm(n_set_size, n_subset_size); // new
				size_t i = 0;
				do {
					std::string s_perm, s_perm_old;
					for(int i = 0, n = n_subset_size; i < n; ++ i) {
						char p_s_temp[64];
						sprintf(p_s_temp, (i)? ", %d" : "perm: %d", perm.p_Index_List()[n_subset_size - 1 - i]);
						s_perm += p_s_temp;
						sprintf(p_s_temp, (i)? ", %d" : "perm: %d", perm_old.p_Index_List()[n_subset_size - 1 - i]);
						s_perm_old += p_s_temp;
					}

					/*std::string s_perm2;
					for(int i = 0, n = perm.n_Set_Size(); i < n; ++ i) {
						char p_s_temp[64];
						sprintf(p_s_temp, (i)? ", %d" : "perm: %d", perm.p_Index_List()[n - 1 - i -
							(perm.n_Set_Size() - n_subset_size)]);
						s_perm2 += p_s_temp;
					}*/ // debug

					bool b_correct = !memcmp(perm.p_Index_List(),
						perm_old.p_Index_List(), n_subset_size * sizeof(size_t));
					if(!b_correct)
						b_failure = true;
					// compare the results

					if(b_show_results) {
						printf("%s (%s): %s\n", s_perm.c_str(), s_perm_old.c_str(),
							(b_correct)? "correct" : "!!! INCORRECT !!!");
					}

					++ i;
				} while(perm.Next() && perm_old.Next());
				if(i != perm.n_Permutation_Num() || i != perm_old.n_Permutation_Num())
					b_failure = true;
				if(b_show_results) {
					printf("generated " PRIu64 " permutations (expected " PRIu64 ")\n",
						uint64_t(i), perm.n_Permutation_Num());
				}
			}
			// test the results of permutation against the new permutation
#endif
		}
	}
	if(b_failure)
		fprintf(stderr, "error: some error(s) occured\n");
	else
		printf("\nok. all test passed\n\n");

	return !b_failure;
}

#endif // 0

/*
 *								=== ~global ===
 */

/*
 *								=== CCombination ===
 */

CCombination::CCombination(size_t n_set_size, size_t n_combination_size) // throw(std::bad_alloc)
	:m_n_size(n_set_size), m_combo(n_combination_size, 0)
{
	_ASSERTE(n_set_size > 0 || !n_combination_size); // unable to choose something from nothing.
	// choosing nothing from something (n_combination_size being zero) is legal, though.
}

uint64_t CCombination::n_Combination_Num() const
{
	uint64_t n_count = 1;
	for(size_t i = 0, n = m_combo.size(); i < n; ++ i) {
		if(m_n_size && n_count > UINT64_MAX / m_n_size ||
		   (n_count == UINT64_MAX / m_n_size && (UINT64_MAX % m_n_size)))
			return 0;
		// make sure it won't overflow
		n_count *= m_n_size;
	}
	return n_count; // may overflow
}

void CCombination::First()
{
	std::fill(m_combo.begin(), m_combo.end(), 0);
}

bool CCombination::Next()
{
	const size_t n_size = m_n_size;
	for(size_t i = 0, n = m_combo.size(); i < n; ++ i) {
		_ASSERTE(m_combo[i] < n_size);
		if(m_combo[i] + 1 < n_size) {
			++ m_combo[i];
			return true;
		} else
			m_combo[i] = 0;
	}
	return false; // rewound back to the zeros, no more combinations
}

/*
 *								=== ~CCombination ===
 */

/*
 *								=== CMixedRadixInteger ===
 */

CMixedRadixInteger::CMixedRadixInteger(const size_t *p_radix_list, size_t n_radix_num) // throws(std::bad_alloc)
	:m_comb(n_radix_num, 0)
{
	m_base.insert(m_base.begin(), p_radix_list, p_radix_list + n_radix_num);
}

bool CMixedRadixInteger::Get_Max_Value(uint64_t &r_n_max_value) const
{
	uint64_t n_combination_num = 1;
	for(size_t i = 0, n = m_base.size(); i < n; ++ i) {
#ifdef _DEBUG
		uint64_t n_prev_combination_num = n_combination_num;
#endif // _DEBUG

		size_t n_base_i = m_base[i];
		if(!n_base_i)
			continue;

		if(n_combination_num > UINT64_MAX / n_base_i /*||
		   (n_combination_num == UINT64_MAX / n_base_i && (UINT64_MAX % i))*/) // this would break the test
			return false;
		// make sure it won't overflow

		n_combination_num *= n_base_i;

#ifdef _DEBUG
		_ASSERTE(n_prev_combination_num == n_combination_num / n_base_i);
#endif // _DEBUG
	}
	// calculate product of all the bases

	r_n_max_value = n_combination_num - 1;
	// write the result

	return true;
}

uint64_t CMixedRadixInteger::n_Value() const
{
	uint64_t n_multiplier = 1;
	uint64_t n_value = 0;

	if(!m_base.empty()) {
		for(size_t i = 0, n = m_base.size();; ++ i) {
			size_t n_comb_i = m_comb[i];
			if(n_comb_i && n_multiplier > UINT64_MAX / n_comb_i /*||
			   (n_multiplier == UINT64_MAX / n_comb_i && (UINT64_MAX % i))*/) // this would break the test
				return 0;
			// makes sure it wouldn't overflow

			uint64_t n_digit = n_multiplier * n_comb_i;
			// calculate the value of the single digit

			if(n_value > UINT64_MAX - n_digit)
				return 0;
			// makes sure it wouldn't overflow

			n_value += n_digit;
			// sum up values of all the digits

			if(i + 1 == n)
				break;

			{
#ifdef _DEBUG
				uint64_t n_prev_multiplier = n_multiplier;
#endif // _DEBUG

				size_t n_base_i = m_base[i];
				if(!n_base_i)
					continue;

				if(n_multiplier > UINT64_MAX / n_base_i /*||
				   (n_multiplier == UINT64_MAX / n_base_i && (UINT64_MAX % i))*/) // this would break the test
					return 0;
				// make sure it won't overflow

				n_multiplier *= n_base_i;

#ifdef _DEBUG
				_ASSERTE(n_prev_multiplier == n_multiplier / n_base_i);
#endif // _DEBUG
			}
			// increase multiplier for the next digit
		}
	}
	// calculate value

	return n_value;
}

bool CMixedRadixInteger::SetValue(uint64_t n_value)
{
	_ASSERTE(n_value <= n_Max_Value());

	for(size_t i = 0, n = m_base.size(); i < n; ++ i) {
		size_t n_base_i = m_base[i];
		if(n_base_i) {
			m_comb[i] = n_value % n_base_i;
			n_value /= n_base_i;
		} else
			m_comb[i] = 0;
	}
	// set value of the counter

	if(n_value)
		return false;
	// if it carried, then the value set is greater than maximal representable value

	return true;
}

bool CMixedRadixInteger::Increment()
{
	bool b_carry = true; // causes incrementation
	for(size_t i = 0, n = m_base.size(); i < n; ++ i) {
		size_t n_base_i = m_base[i];
		if(!n_base_i)
			continue;
		// allow zero bases (totally practical, those are just padding zeroes in the number that never change)

		_ASSERTE(m_comb[i] < n_base_i);

		if(b_carry) {
			if(++ m_comb[i] == n_base_i)
				m_comb[i] = 0;
			else {
				b_carry = false;
				return true; // no more carry
			}
		}
		// carry
	}
	// increment counter

	_ASSERTE(b_carry);
	return false;
	// counter owerflown
}

bool CMixedRadixInteger::Decrement()
{
	bool b_carry = true; // causes incrementation
	for(size_t i = 0, n = m_base.size(); i < n; ++ i) {
		size_t n_base_i = m_base[i];
		if(!n_base_i)
			continue;
		// allow zero bases (totally practical, those are just padding zeroes in the number that never change)

		_ASSERTE(m_comb[i] < n_base_i);

		if(b_carry) {
			if(!m_comb[i])
				m_comb[i] = n_base_i - 1;
			else {
				-- m_comb[i];
				b_carry = false;
				return true; // no more carry
			}
		}
		// carry
	}
	// increment counter

	_ASSERTE(b_carry);
	return false;
	// counter underflown
}

/*
 *								=== ~CMixedRadixInteger ===
 */

/*
 *								=== CPermutation ===
 */

CPermutation::CPermutation(size_t n_set_size, size_t n_permutation_size) // throws(std::bad_alloc)
	:m_n_size(n_permutation_size), m_n_off(n_set_size - m_n_size),
	m_perm(n_set_size)
{
	_ASSERTE(n_set_size < 22); // 22! > UINT64_MAX
	// note that the permutation actually simulates all permutations (n of n) but transparently
	// shows only (m of n). this provides fast solutions but severely limits usability.
	// we need to have an alternative algorithm for "narrow" permutations, such as CPermutation(50, 3)
	// which only has 117600 combinations, but can't be enumerated using this class

	_ASSERTE(n_permutation_size <= n_set_size);
#ifdef __PERMUTATION_LSB_FIRST
	for(size_t i = 0; i < n_set_size; ++ i)
		m_perm[i] = n_set_size - 1 - i;
#else // __PERMUTATION_LSB_FIRST
	for(size_t i = 0; i < n_set_size; ++ i)
		m_perm[i] = i;
#endif // __PERMUTATION_LSB_FIRST
	// generate the first permutation

	m_n_skip_ratio = 1;
	for(size_t i = 2, n = n_set_size - n_permutation_size + 1; i < n; ++ i) {
#ifdef _DEBUG
		uint64_t n_old_skip_ratio = m_n_skip_ratio;
#endif // _DEBUG

		_ASSERTE(i && m_n_skip_ratio < UINT64_MAX / i /*||
			(m_n_skip_ratio == UINT64_MAX / i && !(UINT64_MAX % i))*/); // this would break the test
		m_n_skip_ratio *= i;

#ifdef _DEBUG
		_ASSERTE(m_n_skip_ratio / i == n_old_skip_ratio);
#endif // _DEBUG
	}
	// calculate skip ratio (in case n_permutation_size is less than n_set_size - 1,
	// Next() must be repeated m_n_skip_ratio times in order to produce correct results)
}

bool CPermutation::Set_Permutation(const size_t *p_index_list)
{
	try {
		const size_t n_permutation_size = m_n_size;
		const size_t n_set_size = m_perm.size();
		const size_t n_off = m_n_off;

		std::set<size_t> remaining_index_set;
		for(size_t i = 0; i < n_set_size; ++ i)
			remaining_index_set.insert(i);
		// create a set of remaining indices

		for(size_t i = 0, j = m_n_off; i < n_permutation_size; ++ i, ++ j) {
			size_t n_index = p_index_list[i];

			std::set<size_t>::iterator p_set_it = remaining_index_set.find(n_index);
			if(p_set_it == remaining_index_set.end())
				return false; // one of the indices was there twice - fail
			remaining_index_set.erase(p_set_it);
			// make sure set items do not repeat

#ifdef __PERMUTATION_LSB_FIRST
			m_perm[j] = n_index;
#else // __PERMUTATION_LSB_FIRST
			m_perm[i] = n_index;
#endif // __PERMUTATION_LSB_FIRST
		}
		// copy indices

		std::set<size_t>::iterator p_set_it = remaining_index_set.begin();
		_ASSERTE(remaining_index_set.size() == m_n_off); // n_set_size - n_permutation_size
		for(size_t i = 0, n = m_n_off; i < n; ++ i, ++ p_set_it) {
#ifdef __PERMUTATION_LSB_FIRST
			m_perm[/*n - 1 - */i] = *p_set_it;
#else // __PERMUTATION_LSB_FIRST
			m_perm[/*n - 1 - */i + n_permutation_size] = *p_set_it;
#endif // __PERMUTATION_LSB_FIRST
			// note that using "i" instead of "n - 1 - i" gives different ordering of
			// permutations, which doesn't really matter because it's only in scope
			// of m_n_skip_ratio
		}
		// pad indices with the remaining set items

		return true;
	} catch(std::bad_alloc&) {
		return false;
	}
}

bool CPermutation::Get_Permutation_Num(uint64_t &r_n_permutation_num) const
{
	uint64_t n_permutation_num = 1;
	for(size_t i = m_perm.size() - m_n_size + 1,
	   n_set_size = m_perm.size(); i <= n_set_size; ++ i) {
#ifdef _DEBUG
		uint64_t n_prev_perm_num = n_permutation_num;
#endif // _DEBUG

		if(i && n_permutation_num > UINT64_MAX / i /*||
		   (n_permutation_num == UINT64_MAX / i && (UINT64_MAX % i))*/) // this would break the test
			return false;
		// make sure it won't overflow

		n_permutation_num *= i;

#ifdef _DEBUG
		_ASSERTE(n_prev_perm_num == n_permutation_num / i);
#endif // _DEBUG
	}
	// calculate n! / (n - k)!

	r_n_permutation_num = n_permutation_num;
	// write the result

	return true;
}

void CPermutation::First()
{
	const size_t n_set_size = m_perm.size();
#ifdef __PERMUTATION_LSB_FIRST
	for(size_t i = 0; i < n_set_size; ++ i)
		m_perm[i] = n_set_size - 1 - i;
#else // __PERMUTATION_LSB_FIRST
	for(size_t i = 0; i < n_set_size; ++ i)
		m_perm[i] = i;
#endif // __PERMUTATION_LSB_FIRST
}

bool CPermutation::Next()
{
	const size_t n_set_size = m_perm.size();

	for(uint64_t n_pass = 0, n_skip = m_n_skip_ratio; n_pass < n_skip; ++ n_pass) {
#ifdef __PERMUTATION_LSB_FIRST
		size_t k = 0;
		for(size_t i = 1; i < n_set_size; ++ i) {
			if(m_perm[i] < m_perm[i - 1]) {
				k = i;
				break;
			}
		}
		if(!k)
			return false; // no next permutation
		// find the largest index k such that a[k] < a[k + 1].
		// if no such index exists, the permutation is the last permutation.

		size_t l;
		for(size_t i = 0, n_thresh = m_perm[k];; ++ i) {
			if(n_thresh < m_perm[i]) {
				l = i;
				break;
			}
			_ASSERTE(i + 1 < n_set_size); // l can always be k - 1
		}
		_ASSERTE(k > l);
		// find the largest index l such that a[k] < a[l].
		// since k + 1 is such an index, l is well defined and satisfies k < l.

		std::swap(m_perm[l], m_perm[k]);
		// Swap a[k] with a[l].

		for(size_t i = 0, n = k - 1, m = k / 2; i < m; ++ i)
			std::swap(m_perm[i], m_perm[n - i]);
		// reverse the sequence from a[k + 1] up to and including the final element a[n].
#else // __PERMUTATION_LSB_FIRST
		size_t k = n_set_size;
		for(size_t i = n_set_size - 1; i > 0; -- i) {
			if(m_perm[i - 1] < m_perm[i]) {
				k = i - 1;
				break;
			}
		}
		if(k == n_set_size)
			return false; // no next permutation
		// find the largest index k such that a[k] < a[k + 1].
		// if no such index exists, the permutation is the last permutation.

		size_t l;
		for(size_t i = n_set_size - 1, n_thresh = m_perm[k];; -- i) {
			if(n_thresh < m_perm[i]) {
				l = i;
				break;
			}
			_ASSERTE(i - 1 >= 0); // l can always be k + 1
		}
		_ASSERTE(k < l);
		// find the largest index l such that a[k] < a[l].
		// since k + 1 is such an index, l is well defined and satisfies k < l.

		std::swap(m_perm[l], m_perm[k]);
		// Swap a[k] with a[l].

		for(size_t i = k + 1, n = n_set_size + k, m = (n_set_size + k + 1) / 2; i < m; ++ i)
			std::swap(m_perm[i], m_perm[n - i]);
		// reverse the sequence from a[k + 1] up to and including the final element a[n].
#endif // __PERMUTATION_LSB_FIRST
	}

	return true;
}

/*
 *								=== ~CPermutation ===
 */

/*
 *								=== CSeekablePermutation ===
 */

CSeekablePermutation::CSeekablePermutation(size_t n_set_size,
	size_t n_permutation_size) // throws(std::bad_alloc)
	:m_n_size(n_permutation_size), m_n_off(n_set_size - n_permutation_size)
{
	_ASSERTE(n_permutation_size <= n_set_size);
	m_perm.resize(n_set_size); // !!
	m_count.resize(n_permutation_size, 0);

	FinishPermutation();
}

bool CSeekablePermutation::Get_Permutation_Num(uint64_t &r_n_permutation_num) const
{
	const size_t n_set_size = m_perm.size();
	const size_t n_permutation_size = m_n_size;

	//_ASSERTE(n_set_size < 22); // 22! > UINT64_MAX // does not belong here, CSeekablePermutation(22, 1) has only 22 combinations
	uint64_t n_permutation_num = 1;
	for(size_t i = n_set_size - n_permutation_size + 1; i <= n_set_size; ++ i) {
#ifdef _DEBUG
		uint64_t n_prev_perm_num = n_permutation_num;
#endif // _DEBUG

		if(i && n_permutation_num > UINT64_MAX / i /*||
		   (n_permutation_num == UINT64_MAX / i && (UINT64_MAX % i))*/) // this would break the test
			return false;
		// make sure it won't overflow

		n_permutation_num *= i;

#ifdef _DEBUG
		_ASSERTE(n_prev_perm_num == n_permutation_num / i);
#endif // _DEBUG
	}
	// calculate n! / (n - k)!

	r_n_permutation_num = n_permutation_num;
	// write the result

	return true;
}

bool CSeekablePermutation::Seek(uint64_t n_permutation_index)
{
	const size_t n_set_size = m_perm.size();
	const size_t n_permutation_size = m_n_size;

	//_ASSERTE(n_set_size < 22); // 22! > UINT64_MAX // not here!
	_ASSERTE(n_permutation_index < n_Permutation_Num());

	for(size_t i = 0, n_base = n_set_size - n_permutation_size + 1;
	   i < n_permutation_size; ++ i, ++ n_base) {
		m_count[i] = n_permutation_index % n_base;
		n_permutation_index /= n_base;
	}
	if(n_permutation_index)
		return false; // there was carry (n_set_size >= 22 || n_permutation_index >= n_Permutation_Num())
	// set counter to the correct position

	FinishPermutation();

	return true;
}

void CSeekablePermutation::First()
{
	/*for(size_t i = 0, n = m_count.size(); i < n; ++ i)
		m_count[i] = 0;*/
	std::fill(m_count.begin(), m_count.end(), 0);
	// reset the counters

	FinishPermutation();
	// finish the permutation
}

bool CSeekablePermutation::Next()
{
	const size_t n_set_size = m_perm.size();
	const size_t n_permutation_size = m_n_size;

	bool b_carry = true; // causes incrementation
	for(size_t i = 0, n_base = n_set_size - n_permutation_size + 1;
	   i < n_permutation_size; ++ i, ++ n_base) {
		_ASSERTE(m_count[i] < n_base);

		if(b_carry) {
			if(++ m_count[i] == n_base)
				m_count[i] = 0;
			else {
				b_carry = false;
				break; // no more carry
			}
		}
		// carry
	}
	// increment counter

	FinishPermutation();
	// finish the permutation even if b_carry (it generates the first permutation)

	if(b_carry)
		return false;
	// counter owerflown, there will be no next permutations
	// (note the next call to Next() will return a valid permutation again)

	return true;
}

void CSeekablePermutation::FinishPermutation()
{
	if(m_perm.empty())
		return; // otherwise will try to get address of the zero-th element of vectors and cause assertion fails
	const size_t n_set_size = m_perm.size();
	const size_t n_permutation_size = m_n_size;

#ifdef __PERMUTATION_LSB_FIRST
	{
		size_t *p_perm = &m_perm[0];
		for(size_t i = 0; i < n_set_size; ++ i, ++ p_perm)
			*p_perm = n_set_size - 1 - i;
	}
	// generate a list of indices

	{
		size_t *p_perm = &m_perm[n_set_size - 1];
		for(size_t i = n_permutation_size, o = n_set_size - 1; i > 0; -- o, -- p_perm) {
			-- i;
			size_t j = m_count[i];
			if(j == 1)
				std::swap(*p_perm, m_perm[o - j]);
			else if(j > 1) {
				size_t n_perm_j = m_perm[o - j];
				for(size_t k = j; k > 0; -- k)
					m_perm[o - k] = m_perm[o - k + 1];
				*p_perm = n_perm_j;
			}
		}
	}
	// generate the final permutation
#else // __PERMUTATION_LSB_FIRST
	{
		size_t *p_perm = &m_perm[0];
		for(size_t i = 0; i < n_set_size; ++ i, ++ p_perm)
			*p_perm = i;
	}
	// generate a list of indices

	{
		size_t *p_perm = &m_perm[0];
		for(size_t i = n_permutation_size; i > 0; ++ p_perm) {
			-- i;
			size_t j = m_count[i];
			if(j == 1)
				std::swap(*p_perm, p_perm[j]);
			else if(j > 1) {
				size_t n_perm_j = p_perm[j];
				for(size_t k = j; k > 0; -- k)
					p_perm[k] = p_perm[k - 1];
				*p_perm = n_perm_j;
			}
		}
	}
	// generate the final permutation
#endif // __PERMUTATION_LSB_FIRST
}

/*
 *								=== ~CSeekablePermutation ===
 */

/*
 *								=== CUnorderedPartition ===
 */

// bin 0 is choosing out of n_block_size numbers
// bin 1 is choosing out of n_block_size - bin 0 numbers
// bin n is n_block_size - sum(bin 0 ... bin n - 1) (only one possibility)
// combinations add up

// for n_block_size = 10, n_bins = 1
// bin 0 is 10 (1 combination)

// for n_block_size = 10, n_bins = 2
// bin 0 is 0 - 10 (11 combinations), bin 1 is the rest

// for n_block_size = 10, n_bins = 3
// bin 0 is 0 - 10 (11 combinations), bins 1 and 2 contain the rest
// for bin 0 = 0, bins 1 and 2 have 11 combinations
// for bin 1 = 1, bins 1 and 2 have 10 combinations
// ...
// for bin 1 = 111, bins 1 and 2 have 1 combinations
// that totals 11 + 10 + 9 + 8 + ... + 1 = 66 combinations

// for m-1 counters, values of histogram are i0, i1 - i0, i2 - i1, ... , im-1 - im-2, n - im-1
// if we want values of histograms in ascending order:
// i0 >= i1 - i0 >= i2 - i1 >= ...  >= im-1 - im-2 >= n - im-1
// 2 * i0 >= i1, therefore i0 >= i1 / 2
// 2 * i1 - i0 >= i2
// if we want values of histograms in descending order:
// i0 <= i1 - i0 <= i2 - i1 <= ...  <= im-1 - im-2 <= n - im-1
// i1 >= 2 * i0, therefore i0 <= i1 / 2
// i2 >= 2 * i1 - i0
// i0 <= n - im-1
// for the first iteration:
//		i1 = 2 * i0
//		i2 = 2 * i1 - i0 = 4 * i0 - i0 = 3 * i0
//		i3 = 2 * i2 - i1 = 6 * i0 - 2 * i0 = 4 * i0
//		i4 = 2 * i3 - i2 = 8 * i0 - 3 * i0 = 5 * i0
//		i5 = 2 * i4 - i3 = 10 * i0 - 4 * i0 = 6 * i0
// so apparently in = (n + 1) * i0, which also applies to i0 :)
// and i0 <= n - im-1, i0 <= n - m * i0, (m + 1) * i0 <= n, i0 <= n / (m + 1)
//															~~~~~~~~~~~~~~~~~

// for the last iteration:
// im-1 - im-2 <= n - im-1, im-1 <= (n - im-2) / 2
// im-3 - im-2 <= n - im-1, im-3 <= (n - im-2) / 2 + im-2, which is loose (not a real upper bound)

// incorrect:
// and the upper bound:
// i1 - i0 <= i2 - i1
// 2 * i1 - i0 <= i2
// 2 * i1 <= i2 + i0
// i1 <= (i2 + i0) / 2, i1 <= (2 * i0 + i0) / 2, i1 <= 3 / 2 * i0
// i0 <= (i1 + 0) / 2 (correct, see above)
// in <= (n + 2) * i0 / 2, or 
// ~~~~~~~~~~~~~~~~~~~~~~
// im-1 <= (m-1 + 2) * i0 / 2, im-1 <= (m + 1) * i0 / 2, im-1 <= (m + 1) * n / (m + 1) / 2, im-1 <= n / 2 (dubious)
// im-1 <= (im-2 + n) / 2 (whoa, not sure)

// 2 4 4 ~ 2, 6
// 3 3 4 ~ 3, 6

// worst cases seem to be powers of two, in 8 bins:
// 256 256 128 128 128 64 32 32
// 9   9   8   8   8   7  6  6 = 61 bits
// in case the bins are distributed in a "good" way, could even make it in 64 bits alone (2x speed)

void CUnorderedPartition::First()
{
	std::fill(m_counters.begin(), m_counters.end(), 0);
	std::fill(m_bin_values.begin(), m_bin_values.end(), 0);
	m_bin_values.back() = m_n_item_num;
}

bool CUnorderedPartition::Next()
{
	return _b_Next(); // this is inline
}

inline bool CUnorderedPartition::_b_Next(size_t n_min_depth /*= 0*/)
{
	if(m_n_bin_num < 2)
		return false;
	// small tasks have no next combinations

	size_t *p_i = &m_counters[0];
	size_t *p_histogram = &m_bin_values[0];
	const size_t n_bins = m_n_bin_num, n_block_size = m_n_item_num;
	const size_t n_last = n_bins - 2;
	// antialiass

	{
		_ASSERTE(n_last == n_bins - 2); // last one
		// n_last > 0 ~ n_bins - 2 > 0 ~ n_bins > 2
		size_t n_prev = (n_bins > 2/*n_last > 0*/)? p_i[n_bins - 3/*n_last - 1*/] : 0; // can change to compile-time constant
		_ASSERTE(n_bins == 1 || n_last + 1 > n_bins - 2); // condition below is always false, can skip the branch
		size_t n_next = /*(n_last + 1 <= n_bins - 2)? p_i[n_last + 1] :*/ n_block_size;
		// get prev and next counter

		if(p_i[n_last] + 1 - n_prev <= n_next - p_i[n_last] - 1) {
			++ p_i[n_last]; // increment the last counter

			if(n_bins - 2 > 0) {
				size_t i = n_bins - 2;
				_ASSERTE(p_i[i] >= p_i[i - 1]);
				p_histogram[i] = p_i[i] - p_i[i - 1];
			} else
				p_histogram[0] = p_i[0];
			p_histogram[n_bins - 1] = n_block_size - p_i[n_bins - 2];
			// quickly update the histogram

#ifdef _DEBUG
			size_t n_sum = 0;
			for(size_t i = 0; i < n_bins; ++ i) {
				_ASSERTE(!i || p_histogram[i] >= p_histogram[i - 1]); // sorted order
				n_sum += p_histogram[i];
			}
			_ASSERTE(n_sum == n_block_size);
#endif // _DEBUG
			// make sure that the bins are nonnegative and they sum up to block size

			return true; // there is a next iter
		}
		// see if incrementing violates
	}
	// a simple case: the last counter is incremented

	if(n_min_depth >= n_last)
		return false; // does not handle short histogram (1 or 2 bins)
	size_t n_depth = n_last - 1;
	for(;;) {
		++ p_i[n_depth];
		// try to increment the counter

		for(size_t j = n_depth; j < n_bins - 2; ++ j) {
			p_i[j + 1] = 2 * p_i[j] - ((j)? p_i[j - 1] : 0); // i1 >= 2 * i0, i2 >= 2 * i1 - i0, ...
			// applies lower bound here
		}
		// set all the higher counters up (to their minimum)

		bool b_failed;
		{
			_ASSERTE(n_last == n_bins - 2); // last one
			// n_last > 0 ~ n_bins - 2 > 0 ~ n_bins > 2
			size_t n_prev = (n_bins > 2/*n_last > 0*/)? p_i[n_bins - 3/*n_last - 1*/] : 0; // can change to compile-time constant
			_ASSERTE(n_last + 1 > n_bins - 2); // condition below is always false, can skip the branch
			size_t n_next = /*(n_last + 1 <= n_bins - 2)? p_i[n_last + 1] :*/ n_block_size;
			// get prev and next counter

			_ASSERTE(p_i[n_last] >= n_prev); // this one will not underflow
			b_failed = p_i[n_last] > n_next || (p_i[n_last] - n_prev > n_next - p_i[n_last]);
		}
		// see if the last failed (only the last can, the previous ones are set so they don't)

		if(b_failed) { // not a valid combination, need to increment even lower counter
			if(n_depth == n_min_depth) // we already processed the last combination, end
				return false;
			-- n_depth; // go one lower
		} else
			break; // we successfully incremented
	}
	// increment the last counter, or if overflew, recurse down and increment something else
	// if all overflew, we're done

	{
		if(!n_depth) {
			p_histogram[0] = p_i[0];
			++ n_depth;
		}
		for(size_t i = n_depth; i < n_bins - 1; ++ i) {
			_ASSERTE(p_i[i] >= p_i[i - 1]);
			p_histogram[i] = p_i[i] - p_i[i - 1];
		}
		p_histogram[n_bins - 1] = n_block_size - p_i[n_bins - 2];
		// calculate the state of the histogram

#ifdef _DEBUG
		size_t n_sum = 0;
		for(size_t i = 0; i < n_bins; ++ i) {
			_ASSERTE(p_histogram[i] >= 0);
			_ASSERTE(!i || p_histogram[i] >= p_histogram[i - 1]); // sorted order
			n_sum += p_histogram[i];
		}
		_ASSERTE(n_sum == n_block_size); // this will match, but it offset, some items may be negative
#endif // _DEBUG
		// make sure that the bins are nonnegative and they sum up to block size
	}
	// update the histogram

	return true;
}

uint64_t CUnorderedPartition::n_UP2_Size(size_t n_item_num, size_t n_bin_num)
{
	if(n_item_num < n_bin_num)
		return 0;
	if(n_bin_num == 1 || n_bin_num == n_item_num)
		return 1;
	uint64_t n_size = 0;
	n_item_num -= n_bin_num; // const in the loop
	n_bin_num = std::min(n_item_num, n_bin_num); // if n_item_num < n_bin_num, will add zero
	for(size_t i = 1; i <= n_bin_num; ++ i)
		n_size += n_UP2_Size(n_item_num, i);
	return n_size;
}

uint64_t CUnorderedPartition::n_UP_Size(size_t n_item_num, size_t n_bin_num)
{
	uint64_t n_size = 0;
	_ASSERTE(n_bin_num <= INT_MAX);
	int _n_bin_num = int(n_bin_num);
	#pragma	omp parallel for schedule(dynamic, 1) reduction(+: n_size) // desperate attempt to arrive at the solution quickly
	for(int m = 1; m <= _n_bin_num; ++ m)
		n_size += n_UP2_Size(n_item_num, m);
	return n_size;
}

/*
 *								=== ~CUnorderedPartition ===
 */
