#ifndef __CL_KERNEL_INTEGER_INCLUDED
#define __CL_KERNEL_INTEGER_INCLUDED

/**
 *	@file gpgpu/kernel_utils/Integer.h
 *	@date 2016
 *	@author -tHE SWINe-
 *	@brief fixed-width integer types definitions, limits and misc integer functions
 */

#include "StaticAssert.h"

typedef signed char int8_t; /**< @brief signed 8-bit integer type */
typedef signed short int16_t; /**< @brief signed 16-bit integer type */
typedef signed int int32_t; /**< @brief signed 32-bit integer type */
typedef signed long int64_t; /**< @brief signed 64-bit integer type */
typedef unsigned char uint8_t; /**< @brief unsigned 8-bit integer type */
typedef unsigned short uint16_t; /**< @brief unsigned 16-bit integer type */
typedef unsigned int uint32_t; /**< @brief unsigned 32-bit integer type */
typedef unsigned long uint64_t; /**< @brief unsigned 64-bit integer type */
STATIC_ASSERT(8 * sizeof(int8_t) == 8, INT8_T_MUST_BE_A_8_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(int16_t) == 16, INT16_T_MUST_BE_A_16_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(int32_t) == 32, INT32_T_MUST_BE_A_32_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(int64_t) == 64, INT64_T_MUST_BE_A_64_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(uint8_t) == 8, UINT8_T_MUST_BE_A_8_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(uint16_t) == 16, UINT16_T_MUST_BE_A_16_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(uint32_t) == 32, UINT32_T_MUST_BE_A_32_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(uint64_t) == 64, UINT64_T_MUST_BE_A_64_BIT_TYPE);
#ifdef NVIDIA
typedef signed long long int128_t; /**< @brief signed 128-bit integer type */
typedef unsigned long long uint128_t; /**< @brief unsigned 128-bit integer type */
typedef int128_t intmax_t; /**< @brief signed maximum size (128-bit) integer type */
typedef uint128_t uintmax_t; /**< @brief unsigned maximum size (128-bit) integer type */
STATIC_ASSERT(8 * sizeof(int128_t) == 128, INT128_T_MUST_BE_A_128_BIT_TYPE);
STATIC_ASSERT(8 * sizeof(uint128_t) == 128, UINT128_T_MUST_BE_A_128_BIT_TYPE);
#else // NVIDIA
typedef int64_t intmax_t; /**< @brief signed maximum size (64-bit) integer type */
typedef uint64_t uintmax_t; /**< @brief unsigned maximum size (64-bit) integer type */
#endif // NVIDIA

#ifndef UINT8_MAX
/**
 *	@brief maximal value of 8 bit unsigned integer (255)
 */
#define UINT8_MAX  ((uint8_t)(-1))
#endif

#ifndef UINT16_MAX
/**
 *	@brief maximal value of 16 bit unsigned integer (65535)
 */
#define UINT16_MAX ((uint16_t)(-1))
#endif

#ifndef UINT32_MAX
/**
 *	@brief maximal value of 32 bit unsigned integer (4294967295)
 */
#define UINT32_MAX ((uint32_t)(-1))
#endif

#ifndef UINT64_MAX
/**
 *	@brief maximal value of 64 bit unsigned integer (18446744073709551615)
 */
#define UINT64_MAX ((uint64_t)(-1))
#endif

#ifndef INT8_MAX
/**
 *	@brief maximal value of 8 bit signed integer (127)
 */
#define INT8_MAX  ((int8_t)(UINT8_MAX / 2))
#endif

#ifndef INT16_MAX
/**
 *	@brief maximal value of 16 bit signed integer (32767)
 */
#define INT16_MAX ((int16_t)(UINT16_MAX / 2))
#endif

#ifndef INT32_MAX
/**
 *	@brief maximal value of 32 bit signed integer (2147483647)
 */
#define INT32_MAX ((int32_t)(UINT32_MAX / 2))
#endif

#ifndef INT64_MAX
/**
 *	@brief maximal value of 64 bit signed integer (9223372036854775807)
 */
#define INT64_MAX ((int64_t)(UINT64_MAX / 2))
#endif

#ifndef INT8_MIN
/**
 *	@brief minimal value of 8 bit signed integer (-128)
 */
#define INT8_MIN  ((int8_t)(-INT8_MAX - 1))
#endif

#ifndef INT16_MIN
/**
 *	@brief minimal value of 16 bit signed integer (-32768)
 */
#define INT16_MIN ((int16_t)(-INT16_MAX - 1))
#endif

#ifndef INT32_MIN
/**
 *	@brief minimal value of 32 bit signed integer (-2147483648)
 */
#define INT32_MIN ((int32_t)(-INT32_MAX - 1))
#endif

#ifndef INT64_MIN
/**
 *	@brief minimal value of 64 bit signed integer (-9223372036854775808)
 */
#define INT64_MIN ((int64_t)(-INT64_MAX - 1))
#endif

#ifndef SIZE_MAX
/**
 *	@brief maximal value of type size_t (same as UINT32_MAX or UINT64_MAX)
 */
#define SIZE_MAX  ((size_t)(-1))
#endif
// calc limits

// === bit twiddles for OpenCL ===

/**
 *	@brief aligns number up to the next multiple of given alignment
 *
 *	@param[in] n_x is value to be aligned
 *	@param[in] n_alignment is alignment
 *
 *	@return Returns such y, so that y >= n_x and y < n_x + n_alignment and y % n_alignment = 0.
 */
#define n_Align_Up(n_x,n_alignment) (((n_x) + (n_alignment) - 1) - ((n_x) + (n_alignment) - 1) % (n_alignment))

/**
 *	@brief aligns number up to the next multiple of given (power-of-two) alignment
 *
 *	@param[in] n_x is value to be aligned
 *	@param[in] n_alignment is alignment
 *
 *	@return Returns such y, so that y >= n_x and y < n_x + n_alignment and y % n_alignment = 0.
 */
#define n_Align_Up_POT(n_x,n_alignment) (((n_x) + (n_alignment) - 1) & ~((n_alignment) - 1))

/**
 *	@def n_Sum_EvenOddBits_32
 *	@brief sums even and odd bits in a 32-bit integer constant
 *	@param[in] x is 32-bit integer constant
 *	@return Returns a number with bit pairs, containing sums
 *		of ones in the same corresponding bits in the original number.
 */
#define n_Sum_EvenOddBits_32(x) ((x) - (((x) >> 1) & 0x55555555U))

/**
 *	@def n_Sum_BitPairs_32
 *	@brief sums bit pairs in a 32-bit integer constant
 *	@param[in] x is 32-bit integer constant
 *	@return Returns a number with nibbles, each containing a sum
 *		of values of two corresponding bit pairs in the original number.
 */
#define n_Sum_BitPairs_32(x) (((x) & 0x33333333U) + (((x) >> 2) & 0x33333333U))

/**
 *	@def n_Sum_Nibbles_32
 *	@brief sums bit nubbles in a 32-bit integer constant
 *	@param[in] x is 32-bit integer constant
 *	@return Returns a number with each byte containing a sum
 *		of values of its two nibbles in the original number.
 */
#define n_Sum_Nibbles_32(x) (((x) + ((x) >> 4)) & 0x0f0f0f0f)

/**
 *	@def n_Sum_Bytes_32
 *	@brief sums values of bytes a 32-bit integer constant
 *	@param[in] x is 32-bit integer constant
 *	@return Returns a sum of values of the four bytes of the original number.
 */
#define n_Sum_Bytes_32(x) (((x) * 0x01010101) >> 24)

/**
 *	@def n_SetBit_Num_32
 *	@brief counts set bits in a 32-bit integer constant
 *	@param[in] x is 32-bit integer constant
 *	@return Returns number of bits that are set.
 */
#define n_SetBit_Num(x) n_Sum_Bytes_32(n_Sum_Nibbles_32(n_Sum_BitPairs_32(n_Sum_EvenOddBits_32(x))))

/**
 *	@def n_Sum_EvenOddBits_64
 *	@brief sums even and odd bits in a 64-bit integer constant
 *	@param[in] x is 64-bit integer constant
 *	@return Returns a number with bit pairs, containing sums
 *		of ones in the same corresponding bits in the original number.
 */
#define n_Sum_EvenOddBits_64(x) ((x) - (((x) >> 1) & 0x5555555555555555UL))

/**
 *	@def n_Sum_BitPairs_64
 *	@brief sums bit pairs in a 64-bit integer constant
 *	@param[in] x is 64-bit integer constant
 *	@return Returns a number with nibbles, each containing a sum
 *		of values of two corresponding bit pairs in the original number.
 */
#define n_Sum_BitPairs_64(x) (((x) & 0x3333333333333333UL) + (((x) >> 2) & 0x3333333333333333UL))

/**
 *	@def n_Sum_Nibbles_64
 *	@brief sums bit nubbles in a 64-bit integer constant
 *	@param[in] x is 64-bit integer constant
 *	@return Returns a number with each byte containing a sum
 *		of values of its two nibbles in the original number.
 */
#define n_Sum_Nibbles_64(x) (((x) + ((x) >> 4)) & 0x0f0f0f0f0f0f0f0fUL)

/**
 *	@def n_Sum_Bytes_64
 *	@brief sums values of bytes a 64-bit integer constant
 *	@param[in] x is 64-bit integer constant
 *	@return Returns a sum of values of the four bytes of the original number.
 */
#define n_Sum_Bytes_64(x) (((x) * 0x0101010101010101UL) >> 56)

/**
 *	@def n_SetBit_Num_64
 *	@brief counts set bits in a 64-bit integer constant, using 64-bit operations
 *	@param[in] x is 64-bit integer constant
 *	@return Returns number of bits that are set.
 */
#define n_SetBit_Num_64(x) n_Sum_Bytes_64(n_Sum_Nibbles_64(n_Sum_BitPairs_64(n_Sum_EvenOddBits_64(x))))

/**
 *	@def n_SetBit_Num_64_B
 *	@brief counts set bits in a 64-bit integer constant, using conversion to a pair of 32-bit integers
 *	@param[in] x is 64-bit integer constant
 *	@return Returns number of bits that are set.
 */
#define n_SetBit_Num_64_B(x) (n_SetBit_Num_32((uint32_t)x) + n_SetBit_Num_32((uint32_t)(x >> 32)))

/**
 *	@brief determines whether a number is power of two, or not, can be evaluated at compile-time
 *	@param[in] n_x is number being tested. note that it must be positive
 *	@return Returns true if n_x is power of two, otherwise returns false.
 */
#define b_Is_POT(n_x) (!((n_x) & ((n_x) - 1U)))

/**
 *	@brief set all bits after the first leading bit in each nibble
 *	@param[in] x is integer input to be right-filled
 *	@return Returns the number with the set bits duplicated
 *		towards LSB in each nibble of the input.
 */
#define n_RightFill_4(x) ((x) | ((x) >> 1U) | (((x) | ((x) >> 1U)) >> 2U))

/**
 *	@brief set all bits after the first leading bit in each byte
 *	@param[in] x is integer input to be right-filled
 *	@return Returns the number with the set bits duplicated
 *		towards LSB in each byte of the input.
 */
#define n_RightFill_8(x) (n_RightFill_4(x) | (n_RightFill_4(x) >> 4U))

/**
 *	@brief set all bits after the first leading bit in each short
 *	@param[in] x is integer input to be right-filled
 *	@return Returns the number with the set bits duplicated
 *		towards LSB in each short of the input.
 */
#define n_RightFill_16(x) (n_RightFill_8(x) | (n_RightFill_8(x) >> 8U))

/**
 *	@brief set all bits after the first leading bit in the input
 *	@param[in] x is 32-bit integer input
 *	@return Returns the input number with the set bits duplicated towards LSB.
 */
#define n_RightFill_32(x) (n_RightFill_16(x) | (n_RightFill_16(x) >> 16U))

/**
 *	@brief set all bits after the first leading bit in the input
 *	@param[in] x is 64-bit integer input
 *	@return Returns the input number with the set bits duplicated towards LSB.
 */
#define n_RightFill_64(x) (n_RightFill_32(x) | (n_RightFill_32(x) >> 32U))

/**
 *	@brief calculates power of two greater or equal to the argument
 *
 *	@param[in] x is 32-bit integer constant
 *
 *	@return Returns power of two greater or equal to the input.
 *
 *	@note In case _Ty is unsigned and n_x is greater than the largest power of two,
 *		representable by the given type, returns null.
 *	@note In case _Ty is signed and n_x is greater than the largest power of two,
 *		representable by this type, returns the maximum negative value representable
 *		by this type (can be set to zero by masking-out the sign bit).
 */
#define n_Make_POT(x) (n_RightFill_32((x) - 1U) + 1U)

/**
 *	@brief calculates power of two greater or equal to the argument
 *
 *	@param[in] x is 32-bit integer constant
 *
 *	@return Returns power of two greater or equal to the input.
 *
 *	@note In case _Ty is unsigned and n_x is greater than the largest power of two,
 *		representable by the given type, returns null.
 *	@note In case _Ty is signed and n_x is greater than the largest power of two,
 *		representable by this type, returns the maximum negative value representable
 *		by this type (can be set to zero by masking-out the sign bit).
 */
#define n_Make_POT_64(x) (n_RightFill_64((x) - 1UL) + 1UL)

/**
 *	@brief calculates power of two lower or equal to the argument
 *	@param[in] x is 32-bit integer constant
 *	@return Returns power of two lower or equal to the input.
 */
#define n_Make_Lower_POT(x) (((x) > 0) * (n_RightFill_32((x) >> 1U) + 1U))

/**
 *	@brief calculates power of two lower or equal to the argument
 *	@param[in] x is 32-bit integer constant
 *	@return Returns power of two lower or equal to the input.
 */
#define n_Make_Lower_POT_64(x) (((x) > 0) * (n_RightFill_64((x) >> 1U) + 1U))

/**
 *	@brief calculates base-2 logarithm (round down)
 *	@param[in] x is 32-bit integer constant
 *	@return Returns base-2 logarithm of the input.
 */
#define n_Log2(x) (((x) > 0) * n_SetBit_Num(n_Make_Lower_POT(x) - 1U))

/**
 *	@brief calculates base-2 logarithm (round down)
 *	@param[in] x is 32-bit integer constant
 *	@return Returns base-2 logarithm of the input.
 */
#define n_Log2_64(x) (((x) > 0) * n_SetBit_Num_64(n_Make_Lower_POT_64(x) - 1UL))

/**
 *	@brief calculates base two logarithm of a given number (round up)
 *
 *	@param[in] n_x is input value, which must not be negative
 *
 *	@return Returns floor(log2(n_x)).
 *
 *	@note This does not return the number of bits required to store the number
 *		(it underestimates the powers of two by 1 bit), se n_Bit_Width_Static() instead.
 *	@note On range of 1 to 0.5 * 10^9, <tt>n_Log2_Ceil(x)</tt> returns the same values
 *		as <tt>int(ceil(log(double(x)) / log(2.0)))</tt>.
 */
#define n_Log2_Ceil(n_x) (n_Log2(n_Make_POT(n_x)))

/**
 *	@brief calculates base two logarithm of a given number (round up)
 *
 *	@param[in] n_x is input value, which must not be negative
 *
 *	@return Returns floor(log2(n_x)).
 *
 *	@note This does not return the number of bits required to store the number
 *		(it underestimates the powers of two by 1 bit), se n_Bit_Width_Static() instead.
 *	@note On range of 1 to 0.5 * 10^9, <tt>n_Log2_Ceil(x)</tt> returns the same values
 *		as <tt>int(ceil(log(double(x)) / log(2.0)))</tt>.
 */
#define n_Log2_Ceil_64(n_x) (n_Log2_64(n_Make_POT_64(n_x)))

/**
 *	@brief calculates number of leading zeros at compile-time
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_LeadingZero_Num(n_x) (n_SetBit_Num(~n_RightFill_32(n_x)))

/**
 *	@brief calculates number of leading zeros at compile-time
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_LeadingZero_Num_64(n_x) (n_SetBit_Num_64(~n_RightFill_64(n_x)))

/**
 *	@brief calculates mask with a specified right-most bits set
 *	@param[in] n_bit_num is number of bits to set (must be non-negative and less than or equal to 32)
 *	@return Returns mask with n_bit_num right-most bits set.
 */
#define n_Mask(n_bit_num) \
	((n_bit_num > 0)? ((1 << ((n_bit_num) - 1)) | \
	((n_bit_num > 1)? ((1 << ((n_bit_num) - 1)) - 1) : 0)) : 0)

/**
 *	@brief calculates mask with a specified right-most bits set
 *	@param[in] n_bit_num is number of bits to set (must be non-negative and less than or equal to 32)
 *	@return Returns mask with n_bit_num right-most bits set.
 */
#define n_Mask_32(n_bit_num) (((n_bit_num) > 0) * (0xffffffffU >> (32 - (n_bit_num))))

/**
 *	@brief calculates mask with a specified right-most bits set
 *	@param[in] n_bit_num is number of bits to set (must be non-negative and less than or equal to 32)
 *	@return Returns mask with n_bit_num right-most bits set.
 */
#define n_Mask_64(n_bit_num) (((n_bit_num) > 0) * (0xffffffffffffffffUL >> (64 - (n_bit_num))))

/**
 *	@brief calculates number of leading zeros
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_LeadingZero_Num(n_x) (n_SetBit_Num(~n_RightFill_32(n_x)))

/**
 *	@brief calculates number of leading zeros
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_LeadingZero_Num_64(n_x) (n_SetBit_Num_64(~n_RightFill_64(n_x)))

/**
 *	@brief calculates number of leading zeros
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_TrailingZero_Num(n_x) ((n_x)? n_SetBit_Num((((n_x) - 1U) | (n_x)) ^ (n_x)) : 8 * sizeof(n_x))
// handle zero explicitly otherwise it will cause an underflow, potentially
// setting higher bits (we don't know the type of n_x so we can't cast it)

/**
 *	@brief calculates number of leading zeros
 *	@param[in] n_x is the input number
 *	@return Returns number of leading zeros in n_x.
 */
#define n_TrailingZero_Num_64(n_x) ((n_x)? n_SetBit_Num_64((((n_x) - 1UL) | (n_x)) ^ (n_x)) : 8 * sizeof(n_x))
// handle zero explicitly otherwise it will cause an underflow, potentially
// setting higher bits (we don't know the type of n_x so we can't cast it)

/**
 *	@brief calculates the number of bits the nonzero bits in a number span over
 *	@param[in] n_x is the input number
 *	@return Returns the bandwidth of n_x.
 *	@note This is equivalent to shifting right until the LSB is one and then
 *		taking bit width, with the exception that bandwidth of zero is zero.
 */
#define n_Bit_Bandwidth(n_x) ((n_x)? n_SetBit_Num( \
	n_RightFill_32(n_x) ^ (((n_x - 1U) | n_x) ^ n_x)) : 0)

/**
 *	@brief calculates the number of bits the nonzero bits in a number span over
 *	@param[in] n_x is the input number
 *	@return Returns the bandwidth of n_x.
 *	@note This is equivalent to shifting right until the LSB is one and then
 *		taking bit width, with the exception that bandwidth of zero is zero.
 */
#define n_Bit_Bandwidth_64(n_x) ((n_x)? n_SetBit_Num_64( \
	n_RightFill_64(n_x) ^ (((n_x - 1UL) | n_x) ^ n_x)) : 0)

/**
 *	@brief calculates the number of bits needed to store the input number
 *	@param[in] n_x is the input number
 *	@return Returns number of bits required to store n_x.
 */
#define n_Bit_Width(n_x) ((n_x)? n_SetBit_Num(n_RightFill_32(n_x)) : 1)

/**
 *	@brief calculates the number of bits needed to store the input number
 *	@param[in] n_x is the input number
 *	@return Returns number of bits required to store n_x.
 */
#define n_Bit_Width_64(n_x) ((n_x)? n_SetBit_Num_64(n_RightFill_64(n_x)) : 1)

// === ~bit twiddles for OpenCL ===

#endif // !__CL_KERNEL_INTEGER_INCLUDED
