#include "codeblock.h"
#include "transform.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "system.h"
#include "threads.h"

/// independently allocated code-blocks
// #define CONFIG_CODEBLOCK_TEMPORARY

/// persistent code-block
#define CONFIG_CODEBLOCK_PERSISTENT

/// persistent pool of code-blocks, process the code-blocks in groups
// #define CONFIG_CODEBLOCK_PERSISTENT_POOL

/// call the callback
#define CONFIG_CODEBLOCK_CALLBACK

static
void codeblock_init(
	struct codeblock_t *codeblock,
	const struct transform_t *transform
)
{
	assert( codeblock );
	assert( transform );

	codeblock->llband = imageptr_create(transform->cb_size1);
	codeblock->hlband = imageptr_create(transform->cb_size1);
	codeblock->lhband = imageptr_create(transform->cb_size1);
	codeblock->hhband = imageptr_create(transform->cb_size1);

	codeblock->cb_exp = transform->cb_exp;
	codeblock->n = transform->n;
	codeblock->tr = vec2_zero;
}

static
void codeblock_setup(
	struct codeblock_t *codeblock,
	const struct transform_t *transform,
	const struct vec2_t tr
)
{
	assert( codeblock );

#ifdef CONFIG_CODEBLOCK_PARAMETERS
	assert( transform );

	codeblock->r = transform->N - codeblock->n;

	// next resolution
	{
		// (floor_cb(2*tr) - floor_cb(tc0)) / cb_size2
		codeblock->index = vec2_ceil_div_pow2_1(vec2_rshift(
			vec2_sub(
				vec2_floor_codeblock(vec2_mul2(tr), transform->cb_exp),
				vec2_floor_codeblock(transform->tc0, transform->cb_exp)
			),
			transform->cb_exp));

		// (floor_cb(tc1-1) - floor_cb(tc0)) / cb_size2 + 1
		const struct vec2_t total = vec2_add(vec2_scalar(1), vec2_ceil_div_pow2_1(vec2_rshift(
			vec2_sub(
				vec2_floor_codeblock(vec2_sub(transform->tc1, vec2_scalar(1)), transform->cb_exp),
				vec2_floor_codeblock(transform->tc0, transform->cb_exp)
			),
			transform->cb_exp)));

		codeblock->seq = codeblock->index.y * total.x + codeblock->index.x;
	}

	// LL band
	{
		// NOTE: this could be probably encapsulated by 'vec2_floor_codeblock1'
		const struct vec2_t tc0_next_ll = convert_tc_to_next_tb_ll(transform->tc0);
		// NOTE: this is already/always on the codeblock coordinates
		const struct vec2_t tcc_next_ll = tr;
		const struct vec2_t tc1_next_ll = vec2_ceil_codeblock1(convert_tc_to_next_tb_ll(transform->tc1), transform->cb_exp);

		codeblock->llband_index = vec2_ceil_div_pow2v(
			vec2_sub(tcc_next_ll, tc0_next_ll),
			transform->cb_exp
		);

		struct vec2_t lltotal = vec2_ceil_div_pow2v(
			vec2_sub(tc1_next_ll, tc0_next_ll),
			transform->cb_exp
		);

		codeblock->llband_seq = codeblock->llband_index.y * lltotal.x + codeblock->llband_index.x;
	}

	// HL band
	{
		const struct vec2_t tc0_next_hl = convert_tc_to_next_tb_hl(transform->tc0);
		const struct vec2_t tcc_next_hl = tr;
		const struct vec2_t tc1_next_hl = vec2_ceil_codeblock1(convert_tc_to_next_tb_hl(transform->tc1), transform->cb_exp);

		codeblock->hlband_index = vec2_ceil_div_pow2v(
			vec2_sub(tcc_next_hl, tc0_next_hl),
			transform->cb_exp
		);

		struct vec2_t hltotal = vec2_ceil_div_pow2v(
			vec2_sub(tc1_next_hl, tc0_next_hl),
			transform->cb_exp
		);

		codeblock->hlband_seq = codeblock->hlband_index.y * hltotal.x + codeblock->hlband_index.x;
	}

	// LH band
	{
		const struct vec2_t tc0_next_lh = convert_tc_to_next_tb_lh(transform->tc0);
		const struct vec2_t tcc_next_lh = tr;
		const struct vec2_t tc1_next_lh = vec2_ceil_codeblock1(convert_tc_to_next_tb_lh(transform->tc1), transform->cb_exp);

		codeblock->lhband_index = vec2_ceil_div_pow2v(
			vec2_sub(tcc_next_lh, tc0_next_lh),
			transform->cb_exp
		);

		struct vec2_t lhtotal = vec2_ceil_div_pow2v(
			vec2_sub(tc1_next_lh, tc0_next_lh),
			transform->cb_exp
		);

		codeblock->lhband_seq = codeblock->lhband_index.y * lhtotal.x + codeblock->lhband_index.x;
	}

	// HH band
	{
		const struct vec2_t tc0_next_hh = convert_tc_to_next_tb_hh(transform->tc0);
		const struct vec2_t tcc_next_hh = tr;
		const struct vec2_t tc1_next_hh = vec2_ceil_codeblock1(convert_tc_to_next_tb_hh(transform->tc1), transform->cb_exp);

		codeblock->hhband_index = vec2_ceil_div_pow2v(
			vec2_sub(tcc_next_hh, tc0_next_hh),
			transform->cb_exp
		);

		struct vec2_t hhtotal = vec2_ceil_div_pow2v(
			vec2_sub(tc1_next_hh, tc0_next_hh),
			transform->cb_exp
		);

		codeblock->hhband_seq = codeblock->hhband_index.y * hhtotal.x + codeblock->hhband_index.x;
	}
#else
	UNUSED(transform);
#endif

	codeblock->tr = tr;
}

static
void codeblock_free(
	const struct transform_t *transform,
	struct codeblock_t *codeblock
)
{
	assert( codeblock );

	imageptr_destroy(codeblock->llband, transform->cb_size1);
	imageptr_destroy(codeblock->hlband, transform->cb_size1);
	imageptr_destroy(codeblock->lhband, transform->cb_size1);
	imageptr_destroy(codeblock->hhband, transform->cb_size1);
}

struct codeblock_t *codeblock_create(
	const struct transform_t *transform
)
{
	assert( transform );

	struct codeblock_t *codeblock = malloc(sizeof(struct codeblock_t));

	if( !codeblock )
		return NULL;

	codeblock_init(codeblock, transform);

	if( !codeblock->llband || !codeblock->hlband || !codeblock->lhband || !codeblock->hhband )
		return NULL;

	return codeblock;
}

struct codeblock_t *codeblock_create_pool(
	const struct transform_t *transform
)
{
#ifdef CONFIG_CODEBLOCK_TEMPORARY
	UNUSED(transform);

	// return some pointer distinct from NULL
	return VALID;
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT
	return codeblock_create(transform);
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT_POOL
	assert( transform );
	assert( transform->cb_pool > 0 );

	struct codeblock_t *codeblock = malloc( sizeof(struct codeblock_t) * transform->cb_pool );

	if( !codeblock )
		return NULL;

	for(int id = 0; id < transform->cb_pool; id++)
	{
		codeblock_init(codeblock + id, transform);

		if( !codeblock[id].llband || !codeblock[id].hlband || !codeblock[id].lhband || !codeblock[id].hhband )
			return NULL;
	}

	return codeblock;
#endif
}

void codeblock_destroy(
	const struct transform_t *transform,
	struct codeblock_t *codeblock
)
{
	assert( codeblock );

	codeblock_free(transform, codeblock);

	free( codeblock );
}

void codeblock_destroy_pool(
	const struct transform_t *transform,
	struct codeblock_t *codeblock
)
{
#ifdef CONFIG_CODEBLOCK_TEMPORARY
	assert( VALID == codeblock );

	UNUSED(transform);
	UNUSED(codeblock);
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT
	UNUSED(transform);

	codeblock_destroy(transform, codeblock);
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT_POOL
	assert( transform );
	assert( transform->cb_pool > 0 );
	assert( codeblock );

	for(int id = 0; id < transform->cb_pool; id++)
	{
		codeblock_free(transform, codeblock + id);
	}

	free( codeblock );
#endif
}

struct codeblock_t *codeblock_acquire(
	const struct transform_t *transform,
	const struct vec2_t tr
)
{
#ifdef CONFIG_CODEBLOCK_TEMPORARY
	struct codeblock_t *codeblock = codeblock_create(transform);

	codeblock_setup(codeblock, transform, tr);

	return codeblock;
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT
	const struct worker_t *worker = transform_get_worker_const(transform);

	assert( worker );

	struct codeblock_t *codeblock = worker->codeblock;

	codeblock_setup(codeblock, transform, tr);

	return codeblock;
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT_POOL
	assert( transform );

	const int segment_x = ceil_codeblock(ceil_div(transform->super.x, transform->threads), transform->cb_exp.x);

	assert( segment_x * transform->threads >= transform->super.x && "threads does not cover entire frame" );

	const int t = threads_get_thread_id();

	const int tr0_x = ceil_div_pow2_1(                         transform->tc0_cb.x + (t+0)*segment_x) ;
#if 0
	const int tr1_x = ceil_div_pow2_1(min(transform->tc1_cb.x, transform->tc0_cb.x + (t+1)*segment_x));
#endif
// 	dprintf("codeblock_acquire(%i): tr = [%i | %i | %i)\n", t, tr0_x, tr.x, tr1_x);

#if 0
	const int cid0 = 0;
	const int cid1 = ((tr1_x - tr0_x) >> transform->cb_exp.x);
#endif
	const int cid  = ((tr .x - tr0_x) >> transform->cb_exp.x);

// 	dprintf("codeblock_acquire(%i): cid = [%i | %i | %i)\n", t, cid0, cid, cid1);

	const struct worker_t *worker = transform_get_worker_const(transform);

	assert( worker );
	assert( worker->codeblock );
	assert( transform->cb_pool > 0 && "code-block pool size needs to be > 0" );

	struct codeblock_t *codeblock = worker->codeblock + cid%transform->cb_pool;

	codeblock_setup(codeblock, transform, tr);

	return codeblock;
#endif
}

void codeblock_release(
	const struct transform_t *transform,
	struct codeblock_t *codeblock
)
{
#ifdef CONFIG_CODEBLOCK_TEMPORARY
	codeblock_destroy(transform, codeblock);
#else
	UNUSED(transform);
	UNUSED(codeblock);
#endif
}

void codeblock_process(
	const struct transform_t *transform,
	const struct codeblock_t *codeblock
)
{
	assert( codeblock );

	// does the code-block carry useful data?
	if( codeblock->local_size.x > 0 && codeblock->local_size.y > 0 )
	{
#ifdef CONFIG_CODEBLOCK_PARAMETERS
		dprintf("[ CODEBLOCK ] n=%i r=%i index=(%i,%i) seq=%i size=(%i,%i)\n",
			codeblock->n, codeblock->r,
			codeblock->index.x, codeblock->index.y,
			codeblock->seq,
			codeblock->local_size.x, codeblock->local_size.y
		);
#endif

		// TODO: process the code-block using EBCOT

#ifdef CONFIG_CODEBLOCK_CALLBACK
		assert( transform );

		if( transform->codeblock_callback )
		{
			transform->codeblock_callback(transform->codeblock_params, codeblock);
		}
#else
		UNUSED(transform);
#endif

#if 0
		// NOTE: codeblock->llband of size cb_size1
		const struct vec2_t local_c0_ll = convert_tc_to_next_tb_ll(codeblock->local_c0);
		const struct vec2_t local_c1_ll = convert_tc_to_next_tb_ll(codeblock->local_c1);
		const struct vec2_t local_size_ll = vec2_sub(local_c1_ll, local_c0_ll);
		imageptr_log_dump_viewport_fmt(
			codeblock->llband,
			local_c0_ll,
			local_size_ll,
			"codeblocks/cb_n%i_x%i_y%i_LL_viewport.pgm",
			codeblock->n,
			codeblock->tr.x,
			codeblock->tr.y
		);

		// NOTE: codeblock->hlband of size cb_size1
		const struct vec2_t local_c0_hl = convert_tc_to_next_tb_hl(codeblock->local_c0);
		const struct vec2_t local_c1_hl = convert_tc_to_next_tb_hl(codeblock->local_c1);
		const struct vec2_t local_size_hl = vec2_sub(local_c1_hl, local_c0_hl);
		imageptr_log_dump_viewport_fmt(
			codeblock->hlband,
			local_c0_hl,
			local_size_hl,
			"codeblocks/cb_n%i_x%i_y%i_HL_viewport.pgm",
			codeblock->n,
			codeblock->tr.x,
			codeblock->tr.y
		);

		// NOTE: codeblock->lhband of size cb_size1
		const struct vec2_t local_c0_lh = convert_tc_to_next_tb_lh(codeblock->local_c0);
		const struct vec2_t local_c1_lh = convert_tc_to_next_tb_lh(codeblock->local_c1);
		const struct vec2_t local_size_lh = vec2_sub(local_c1_lh, local_c0_lh);
		imageptr_log_dump_viewport_fmt(
			codeblock->lhband,
			local_c0_lh,
			local_size_lh,
			"codeblocks/cb_n%i_x%i_y%i_LH_viewport.pgm",
			codeblock->n,
			codeblock->tr.x,
			codeblock->tr.y
		);

		// NOTE: codeblock->hhband of size cb_size1
		const struct vec2_t local_c0_hh =  convert_tc_to_next_tb_hh(codeblock->local_c0);
		const struct vec2_t local_c1_hh =  convert_tc_to_next_tb_hh(codeblock->local_c1);
		const struct vec2_t local_size_hh = vec2_sub(local_c1_hh, local_c0_hh);
		imageptr_log_dump_viewport_fmt(
			codeblock->hhband,
			local_c0_hh,
			local_size_hh,
			"codeblocks/cb_n%i_x%i_y%i_HH_viewport.pgm",
			codeblock->n,
			codeblock->tr.x,
			codeblock->tr.y
		);
#endif
	}
}

void codeblock_process_pool(
	const struct transform_t *transform,
	const struct codeblock_t *codeblock
)
{
#ifdef CONFIG_CODEBLOCK_TEMPORARY
	codeblock_process(transform, codeblock);
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT
	codeblock_process(transform, codeblock);
#endif
#ifdef CONFIG_CODEBLOCK_PERSISTENT_POOL
	assert( codeblock );
	assert( transform );
	assert( transform->cb_pool > 0 );

	const int tr_x = codeblock->tr.x;

	const int segment_x = ceil_codeblock(ceil_div(transform->super.x, transform->threads), transform->cb_exp.x);

	assert( segment_x * transform->threads >= transform->super.x && "threads does not cover entire frame" );

	const int t = threads_get_thread_id();

	const int tr0_x = ceil_div_pow2_1(                         transform->tc0_cb.x + (t+0)*segment_x) ;
	const int tr1_x = ceil_div_pow2_1(min(transform->tc1_cb.x, transform->tc0_cb.x + (t+1)*segment_x));

	const int cid0 = 0;
	const int cid1 = ((tr1_x - tr0_x) >> transform->cb_exp.x);
	const int cid  = (( tr_x - tr0_x) >> transform->cb_exp.x);

// 	dprintf("codeblock_process_pool(%i): checking cid=[%i|%i|%i) cb_pool=%i cid%%pool=[%i|%i|%i) ...\n",
// 		t,
// 		cid0, cid, cid1, transform->cb_pool,
// 		0, cid%transform->cb_pool, transform->cb_pool
// 	);

	// codeblock id == last id of the pool || codeblock id == last id of the segment
	if( cid%transform->cb_pool == transform->cb_pool-1 || cid == cid1-1 )
	{
// 		dprintf("codeblock_process_pool(%i): accept cid = %i; cids = [%i..%i]\n", t, cid, cid0, min(cid1-1, cid%transform->cb_pool));

		// min(cid1-1, cid%transform->cb_pool)
		const int last_id = (cid == cid1-1) ? (cid%transform->cb_pool) : (transform->cb_pool-1);

		const struct worker_t *worker = transform_get_worker_const(transform);
		assert( worker );
		assert( worker->codeblock );

		// NOTE: process the codeblocks (possibly in parallel)
		for(int id = cid0; id <= last_id; id++)
		{
			dprintf("process codeblock n=%i tr=(%i,%i)\n", worker->codeblock[id].n, worker->codeblock[id].tr.x, worker->codeblock[id].tr.y);

			codeblock_process(transform, worker->codeblock + id);
		}
	}
#endif
}
