/*
								+----------------------------------+
								|                                  |
								|       ***  Vector math  ***      |
								|                                  |
								|   Copyright  -tHE SWINe- 2005   |
								|                                  |
								|             Vector.h             |
								|                                  |
								+----------------------------------+
*/

/*
 *	passed code revision at 2006-05-16
 *
 *	optimized some parts of code
 *	all integers storing just true of false were replaced with bool
 *
 *	passed code revision at 2006-07-02
 *
 *	added swizzle operators to vector classes
 *	added redlect / refract / fresnel functions to Vector3<T> class
 *	renamed Vector3<T>::Len to Vector3<T>::f_Length, Vector3<T>::Len2 to Vector3<T>::f_Length2
 *	added some Matrix3f methods, renamed Matrix3f::Init() to Matrix3f::Identity()
 *	renamed Planef to Plane3f
 *	renamed Pi to f_pi
 *	general cleanup of code
 *
 *	2006-07-17
 *
 *	renamed CPolygon::r_Vertex(int) to CPolygon::r_t_Vertex(int)
 *	renamed CPolygon::r_Normal() to CPolygon::r_t_Normal()
 *
 *	2006-07-20
 *
 *	added CPolygon::b_MT_RayHit_CullFrontfaces and CPolygon::b_MT_RayHit_CullBackFaces
 *
 *	2006-07-24
 *
 *	renamed TVertexStruct to TVertex3f
 *	added class CVectorMath with ray distances and bounding box / line (ray) intersection tests
 *
 *	2007-01-23
 *
 *	fixed std::vector handling in template CPolygon<TVertex3f> (not-enough-memory checks)
 *	remade TVertex3f base class and CPolygon<TVertex3f> a bit (added + and * operators,
 *	renamed Lerp() to t_Lerp())
 *
 *	2007-03-07
 *
 *	added complete swizzling functions set to vectors
 *
 *	2007-06-04
 *
 *	added missing const keyword to input swizzle functions
 *
 *	2007-08-03
 *
 *	added quaternion class with basic quaternion operations
 *
 *	2007-09-17
 *
 *	fixed swizzle*_in macros
 *
 *	2007-10-27
 *
 *	added convenience vector / constant operators and component-wise vector * vector
 *	and vector / vector operators to all vector templates
 *	added Vector4f Matrix4f::operator *(const Vector4f &) function for natural matrix-vertex
 *	multiplication as alternative to v_Transform_Pos() and v_Transform_Dir()
 *
 *	2007-11-26
 *
 *	added Matrix4f::f_Subdet, Matrix4f::f_Determinant, Matrix4f::FullInvert and
 *	Matrix4f::t_FullInverse
 *	added convenience vector + constant and vector - constant operators to all vector templates
 *
 *	2008-05-19
 *
 *	fixed typos in the word 'environment'
 *
 *	2008-08-21
 *
 *	fixed Matrix4f::Scale(), added non-Vector3f variants of Matrix4f::Scale(),
 *	Matrix4f::Translate() and Matrix4f::Rotate()
 *
 *	2008-08-23
 *
 *	documented Matrix4f class, added functions for generating transformation
 *	matrices (not applying transformation on the matrix), namely Matrix4f::Translation(),
 *	Matrix4f::Scaling(), Matrix4f::RotationX(), Matrix4f::RotationY(), Matrix4f::RotationZ(),
 *	and Matrix4f::Rotation(). added component-wise matrix by scalar multiplication (overloads
 *	of Matrix4f::operator *() and Matrix4f::operator *=()) and finaly added transposition
 *	function Matrix4f::Transposition()
 *
 *	2009-05-04
 *
 *	fixed mixed windows / linux line endings
 *
 *	2009-05-12
 *
 *	fixed out-of-bounds index in TObjectOrientedBox::b_Intersect_Ray()
 *
 *	2009-05-23
 *
 *	removed all instances of std::vector::reserve and replaced them by stl_ut::Reserve_*
 *
 *	2009-12-18
 *
 *	removed Plane3f::v_Intersect_Ray(), Plane3f::f_Intersect_Ray_t() in favor of
 *	Plane3f::Intersect_Ray(), Plane3f::Intersect_Ray_t(), the new functions do not throw
 *	exceptions, which is considered better solution here.
 *
 *	added Matrix4f::ProductOf(), Matrix4f::FastInverseTo(), Matrix4f::FullInverseTo()
 *	and Matrix4f::TransposeTo(), which are intended to reduce unnecessary matrix copying
 *	when using overloaded operators (overloaded operators remains unchanged)
 *
 *	renamed Matrix4f::Invert(), Matrix4f::t_Inverse() to Matrix4f::FastInvert(),
 *	Matrix4f::t_FastInverse() respectively
 *
 *	renamed Quat4f::q_*() to Quat4f::t_*() to satisfy naming convention (t_ for struct)
 *
 *	added connecting constructors for vectors, meaning it's possible to write
 *	<tt>Vector3f(Vector2f(1, 2), float(3))</tt> with the same effect, as <tt>Vector3f(1, 2, 3)</tt>.
 *	all combinations are possible,
 *
 */

#ifndef __VECTOR2_INCLUDED
#define __VECTOR2_INCLUDED

#include "NewFix.h"
#include "CallStack.h"

#include <math.h>
#include <vector>

#if defined(_MSC_VER) && !defined(__MWERKS__) && !defined(for)
#define for if(0) {} else for
#endif
// msvc 'for' hack

extern const float f_pi; // 3.1415926535897932384626433832795028841971691075f
extern const float f_epsilon; // 0.01f
extern const float f_edge_epsilon; // 0.05f

/*
 *								=== Vector2 ===
 */

#define swizzle2_in(a,b,ab) inline void ab(const Vector2<T> &r_v_vec) \
	{ \
		a = r_v_vec.x; \
		b = r_v_vec.y; \
	}
#define swizzle3_in(a,b,c,abc) inline void abc(const Vector3<T> &r_v_vec) \
	{ \
		a = r_v_vec.x; \
		b = r_v_vec.y; \
		c = r_v_vec.z; \
	}
#define swizzle4_in(a,b,c,d,abcd) inline void abcd(const Vector4<T> &r_v_vec) \
	{ \
		a = r_v_vec.x; \
		b = r_v_vec.y; \
		c = r_v_vec.z; \
		d = r_v_vec.w; \
	}
#define swizzle2_out(a,b,v_ab) inline Vector2<T> v_ab() const \
	{ \
		return Vector2<T>(a, b); \
	}
#define swizzle3_out(a,b,c,v_abc) inline Vector3<T> v_abc() const \
	{ \
		return Vector3<T>(a, b, c); \
	}
#define swizzle4_out(a,b,c,d,v_abcd) inline Vector4<T> v_abcd() const \
	{ \
		return Vector4<T>(a, b, c, d); \
	}

template <class T> struct Vector2 {
	T x, y;

	inline Vector2() { }
	inline Vector2(T t_x, T t_y) :x(t_x), y(t_y) { }

	inline T f_Length() const { return (T)sqrt(x * x + y * y); }
	inline T f_Length2() const { return x * x + y * y; }

	inline T operator [](int n_index) const
	{
		if(&x + 1 == &y) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else
				return y;
		}
	}

	inline T &operator [](int n_index)
	{
		if(&x + 1 == &y) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else
				return y;
		}
	}

	inline void Normalize()
	{
		T t = f_Length();
		
		if(t != 0) {
			x /= t;
			y /= t;
		}
	}

	inline void Normalize(T t_len)
	{
		T t = f_Length();

		if(t != 0) {
			t /= t_len;
			x /= t;
			y /= t;
		}
	}

	inline bool operator ==(const Vector2<T> &r_v_vec) const
	{
		return x == r_v_vec.x && y == r_v_vec.y;
	}

	inline Vector2<T> operator +(const Vector2<T> &r_v_vec) const
	{
		return Vector2<T>(x + r_v_vec.x, y + r_v_vec.y);
	}

	inline Vector2<T> operator -(const Vector2<T> &r_v_vec) const
	{
		return Vector2<T>(x - r_v_vec.x, y - r_v_vec.y);
	}

	inline Vector2<T> operator *(const Vector2<T> &r_v_vec) const
	{
		return Vector2<T>(x * r_v_vec.x, y * r_v_vec.y);
	}

	inline Vector2<T> operator /(const Vector2<T> &r_v_vec) const
	{
		return Vector2<T>(x / r_v_vec.x, y / r_v_vec.y);
	}

	inline Vector2<T> operator -() const
	{
		return Vector2<T>(-x, -y);
	}

	inline Vector2<T> operator *(T t) const
	{
		return Vector2<T>(x * t, y * t);
	}

	inline Vector2<T> operator /(T t) const
	{
		return Vector2<T>(x / t, y / t);
	}

	inline Vector2<T> operator +(T t) const
	{
		return Vector2<T>(x + t, y + t);
	}

	inline Vector2<T> operator -(T t) const
	{
		return Vector2<T>(x - t, y - t);
	}

	inline Vector2<T> operator +=(const Vector2<T> &r_v_vec)
	{
		x += r_v_vec.x;
		y += r_v_vec.y;
		return *this;
	}

	inline Vector2<T> operator -=(const Vector2<T> &r_v_vec)
	{
		x -= r_v_vec.x;
		y -= r_v_vec.y;
		return *this;
	}

	inline Vector2<T> operator *=(const Vector2<T> &r_v_vec)
	{
		x *= r_v_vec.x;
		y *= r_v_vec.y;
		return *this;
	}

	inline Vector2<T> operator /=(const Vector2<T> &r_v_vec)
	{
		x /= r_v_vec.x;
		y /= r_v_vec.y;
		return *this;
	}

	inline Vector2<T> operator *=(T t)
	{
		x *= t;
		y *= t;
		return *this;
	}

	inline Vector2<T> operator /=(T t)
	{
		x /= t;
		y /= t;
		return *this;
	}

	inline Vector2<T> operator +=(T t)
	{
		x += t;
		y += t;
		return *this;
	}

	inline Vector2<T> operator -=(T t)
	{
		x -= t;
		y -= t;
		return *this;
	}

	inline T f_Dot(const Vector2<T> &r_v_vec) const
	{
		return x * r_v_vec.x + y * r_v_vec.y;
	}

	inline Vector2<T> v_Orthogonal() const
	{
		return Vector2<T>(-y, x);
	}

	swizzle2_out(x, x, v_xx)
	swizzle2_out(x, y, v_xy)
	swizzle2_out(y, x, v_yx)
	swizzle2_out(y, y, v_yy)
};

typedef Vector2<float> Vector2f;
typedef Vector2<int> Vector2i;

/*
 *								=== ~Vector2 ===
 */

/*
 *								=== Vector3 ===
 */

template <class T> struct Vector3 {
	T x, y, z;

	inline Vector3()
	{}

	inline Vector3(T t_x, T t_y, T t_z)
		:x(t_x), y(t_y), z(t_z)
	{}

	inline Vector3(Vector2<T> &r_v_vec, T t_z)
		:x(r_v_vec.x), y(r_v_vec.y), z(t_z)
	{}

	inline Vector3(T t_x, Vector2<T> &r_v_vec)
		:x(t_x), y(r_v_vec.x), z(r_v_vec.y)
	{}

	inline T f_Length() const
	{
		return (T)sqrt(x * x + y * y + z * z);
	}

	inline T f_Length2() const
	{
		return x * x + y * y + z * z;
	}

	inline T operator [](int n_index) const
	{
		if(&x + 1 == &y && &x + 2 == &z) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else if(n_index == 1)
				return y;
			else
				return z;
		}
	}

	inline T &operator [](int n_index)
	{
		if(&x + 1 == &y && &x + 2 == &z) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else if(n_index == 1)
				return y;
			else
				return z;
		}
	}

	inline void Normalize()
	{
		T t = f_Length();
		
		if(t != 0) {
			t = 1 / t;
			x *= t;
			y *= t;
			z *= t;
		}
	}

	inline void Normalize(T t_len)
	{
		T t = f_Length();

		if(t != 0) {
			t = t_len / t;
			x *= t;
			y *= t;
			z *= t;
		}
	}

	inline bool operator ==(const Vector3<T> &r_v_vec) const
	{
		return x == r_v_vec.x && y == r_v_vec.y && z == r_v_vec.z;
	}

	inline Vector3<T> operator +(const Vector3<T> &r_v_vec) const
	{
		return Vector3<T>(x + r_v_vec.x, y + r_v_vec.y, z + r_v_vec.z);
	}

	inline Vector3<T> operator -(const Vector3<T> &r_v_vec) const
	{
		return Vector3<T>(x - r_v_vec.x, y - r_v_vec.y, z - r_v_vec.z);
	}

	inline Vector3<T> operator *(const Vector3<T> &r_v_vec) const
	{
		return Vector3<T>(x * r_v_vec.x, y * r_v_vec.y, z * r_v_vec.z);
	}

	inline Vector3<T> operator /(const Vector3<T> &r_v_vec) const
	{
		return Vector3<T>(x / r_v_vec.x, y / r_v_vec.y, z / r_v_vec.z);
	}

	inline Vector3<T> operator -() const
	{
		return Vector3<T>(-x, -y, -z);
	}

	inline Vector3<T> operator *(T t) const
	{
		return Vector3<T>(x * t, y * t, z * t);
	}

	inline Vector3<T> operator /(T t) const
	{
		t = 1 / t;
		return Vector3<T>(x * t, y * t, z * t);
	}

	inline Vector3<T> operator +(T t) const
	{
		return Vector3<T>(x + t, y + t, z + t);
	}

	inline Vector3<T> operator -(T t) const
	{
		return Vector3<T>(x - t, y - t, z - t);
	}

	inline Vector3<T> operator +=(const Vector3<T> &r_v_vec)
	{
		x += r_v_vec.x;
		y += r_v_vec.y;
		z += r_v_vec.z;
		return *this;
	}

	inline Vector3<T> operator -=(const Vector3<T> &r_v_vec)
	{
		x -= r_v_vec.x;
		y -= r_v_vec.y;
		z -= r_v_vec.z;
		return *this;
	}

	inline Vector3<T> operator *=(const Vector3<T> &r_v_vec)
	{
		x *= r_v_vec.x;
		y *= r_v_vec.y;
		z *= r_v_vec.z;
		return *this;
	}

	inline Vector3<T> operator /=(const Vector3<T> &r_v_vec)
	{
		x /= r_v_vec.x;
		y /= r_v_vec.y;
		z /= r_v_vec.z;
		return *this;
	}

	inline Vector3<T> operator *=(T t)
	{
		x *= t;
		y *= t;
		z *= t;
		return *this;
	}

	inline Vector3<T> operator /=(T t)
	{
		t = 1 / t;
		x *= t;
		y *= t;
		z *= t;
		return *this;
	}

	inline Vector3<T> operator +=(T t)
	{
		x += t;
		y += t;
		z += t;
		return *this;
	}

	inline Vector3<T> operator -=(T t)
	{
		x -= t;
		y -= t;
		z -= t;
		return *this;
	}

	inline T f_Dot(const Vector3<T> &r_v_vec) const // dot product
	{
		return x * r_v_vec.x + y * r_v_vec.y + z * r_v_vec.z;
	}

	inline Vector3<T> v_Cross(const Vector3<T> &r_v_vec) const // cross product
	{
		return Vector3<T>(r_v_vec.y * z - r_v_vec.z * y, r_v_vec.z * x - r_v_vec.x * z, r_v_vec.x * y - r_v_vec.y * x);
	}

	inline Vector3<T> Cross(const Vector3<T> &r_v_vec) // cross product
	{
		T tx, ty;

		tx = r_v_vec.y * z - r_v_vec.z * y;
		ty = r_v_vec.z * x - r_v_vec.x * z;
		z = r_v_vec.x * y - r_v_vec.y * x;
		x = tx;
		y = ty;
		return *this;
	}

	float f_TE(const Vector3<T> &r_v_normal, const Vector3<T> &r_v_transmission,
		float f_n1_to_mi1, float f_n2_to_mi2) const
	{
		/*
		Fresnel law for transmission of light,
		polarized prependicular to surface of a dielectric

		n1, n2 are dielectric constants
		mi1, mi2 are magnetic permeabilities

		t_|_ = (2 * n1 / mi1 * cos(psi_i)) / (n1 / mi1 * cos(psi_i) + n2 / mi2 * cos(psi_t))
		*/

		float f_cos_psi_i = f_Dot(r_v_normal);
		float f_cos_psi_t = -r_v_normal.f_Dot(r_v_transmission);

		return 2 * f_n1_to_mi1 * f_cos_psi_i /
			(f_n1_to_mi1 * f_cos_psi_i + f_n2_to_mi2 * f_cos_psi_t);
	}

	float f_TM(const Vector3<T> &r_v_normal, const Vector3<T> &r_v_transmission,
		float f_n1_to_mi1, float f_n2_to_mi2) const
	{
		/*
		Fresnel law for transmission of light,
		polarized parallel to surface of a dielectric

		n1, n2 are dielectric constants
		mi1, mi2 are magnetic permeabilities

		t|| = (2 * n1 / mi1 * cos(psi_i)) / (n1 / mi1 * cos(psi_t) + n2 / mi2 * cos(psi_i))
		*/

		float f_cos_psi_i = f_Dot(r_v_normal);
		float f_cos_psi_t = -r_v_normal.f_Dot(r_v_transmission);

		return 2 * f_n1_to_mi1 * f_cos_psi_i /
			(f_n1_to_mi1 * f_cos_psi_t + f_n2_to_mi2 * f_cos_psi_i);
	}

	/*
	 *	float f_Fresnel_T(const Vector3<T> &r_v_normal, const Vector3<T> &r_v_transmission,
	 *		float f_n1_to_mi1, float f_n2_to_mi2)
	 *		- calculates transmission for ray that goes trough surface with normal r_v_normal
	 *		  and refracts in direction r_v_transmission (reflectance is 1 - transmission)
	 *		- f_n1_to_mi1 = n1 / mi1 = dielectric constant / magnetic permeability
	 *		- todo - find out if it actually works, look for some common constants
	 */
	float f_Fresnel_T(const Vector3<T> &r_v_normal, const Vector3<T> &r_v_transmission,
		float f_n1_to_mi1, float f_n2_to_mi2) const
	{
		// Fresnel law for transmission of unpolarized light t = .5 * (f_TM^2 + f_TE^2)

		float f_cos_psi_i = f_Dot(r_v_normal);
		float f_cos_psi_t = -r_v_normal.f_Dot(r_v_transmission);

		float f_te = 2 * f_n1_to_mi1 * f_cos_psi_i /
			(f_n1_to_mi1 * f_cos_psi_i + f_n2_to_mi2 * f_cos_psi_t);
		float f_tm = 2 * f_n1_to_mi1 * f_cos_psi_i /
			(f_n1_to_mi1 * f_cos_psi_t + f_n2_to_mi2 * f_cos_psi_i);

		return .5f * (f_te * f_te + f_tm * f_tm);
	}

	/*
	 *	inline Vector3<T> v_Refraction(const Vector3<T> &r_v_normal, float f_eta)
	 *		- calcualte refraction of vector coming trough surface with normal r_v_normal
	 *		- f_eta = index of refraction of first environment to index of refraction of second
	 *		- n(vacuum) = 1, n(air) = 1.00029, n(water) = 1.33,
	 *		  n(glass) = 1.52 - 1.62, n(diamond) = 2.417, n(plastic) = 1.46 - 1.55
	 */
	inline Vector3<T> v_Refraction(const Vector3<T> &r_v_normal, float f_eta) const
	{
		Vector3<T> v_tangent = v_Orthogonal(r_v_normal);
		float f_sin2_psi2 = f_eta * f_eta * (1.0f -
			r_v_normal.f_Dot(*this) * r_v_normal.f_Dot(*this));
		return (r_v_normal * -(float)sqrt(1.0f - f_sin2_psi2)) +
			   (v_tangent * (float)sqrt(f_sin2_psi2));
		// v_transmitted = cos(psi2) * (-r_v_normal) + sin(psi2) * v_tangent
	}

	inline Vector3<T> Refract(const Vector3<T> &r_v_normal, float f_eta)
	{
		return (*this = v_Refraction(r_v_normal, f_eta));
	}

	inline Vector3<T> v_Reflection(const Vector3<T> &r_v_normal) const
	{
		return (*this) - ((r_v_normal * f_Dot(r_v_normal)) * 2.0f);
	}

	inline Vector3<T> Reflect(const Vector3<T> &r_v_normal)
	{
		return ((*this) -= ((r_v_normal * f_Dot(r_v_normal)) * 2.0f));
	}

	inline Vector3<T> v_Orthogonal(const Vector3<T> &r_v_normal) const
	{
		Vector3<T> v_tangent;
		v_tangent = *this - r_v_normal * f_Dot(r_v_normal);
		v_tangent.Normalize();
		return v_tangent;
	}

	inline Vector3<T> Orthogonalize(const Vector3<T> &r_v_normal)
	{
		*this -= r_v_normal * f_Dot(r_v_normal);
		Normalize();
		return *this;
	}

	swizzle2_out(x, y, v_xy)
	swizzle2_out(x, z, v_xz)
	swizzle2_out(y, x, v_yx)
	swizzle2_out(y, z, v_yz)
	swizzle2_out(z, x, v_zx)
	swizzle2_out(z, y, v_zy)
	swizzle2_out(x, x, v_xx)
	swizzle2_out(y, y, v_yy)
	swizzle2_out(z, z, v_zz)

	swizzle2_in(x, y, xy)
	swizzle2_in(x, z, xz)
	swizzle2_in(y, z, yz)

	swizzle3_out(x, x, x, v_xxx)
	swizzle3_out(x, x, y, v_xxy)
	swizzle3_out(x, x, z, v_xxz)
	swizzle3_out(x, y, x, v_xyx)
	swizzle3_out(x, y, y, v_xyy)
	swizzle3_out(x, y, z, v_xyz)
	swizzle3_out(x, z, x, v_xzx)
	swizzle3_out(x, z, y, v_xzy)
	swizzle3_out(x, z, z, v_xzz)
	swizzle3_out(y, x, x, v_yxx)
	swizzle3_out(y, x, y, v_yxy)
	swizzle3_out(y, x, z, v_yxz)
	swizzle3_out(y, y, x, v_yyx)
	swizzle3_out(y, y, y, v_yyy)
	swizzle3_out(y, y, z, v_yyz)
	swizzle3_out(y, z, x, v_yzx)
	swizzle3_out(y, z, y, v_yzy)
	swizzle3_out(y, z, z, v_yzz)
	swizzle3_out(z, x, x, v_zxx)
	swizzle3_out(z, x, y, v_zxy)
	swizzle3_out(z, x, z, v_zxz)
	swizzle3_out(z, y, x, v_zyx)
	swizzle3_out(z, y, y, v_zyy)
	swizzle3_out(z, y, z, v_zyz)
	swizzle3_out(z, z, x, v_zzx)
	swizzle3_out(z, z, y, v_zzy)
	swizzle3_out(z, z, z, v_zzz)
};

typedef Vector3<float> Vector3f;
typedef Vector3<int> Vector3i;

/*
 *								=== ~Vector3 ===
 */

/*
 *								=== Vector4 ===
 */

template <class T> struct Vector4 {
	T x, y, z, w;

	inline Vector4()
	{}

	inline Vector4(T t_x, T t_y, T t_z, T t_w)
		:x(t_x), y(t_y), z(t_z), w(t_w)
	{}

	inline Vector4(Vector2<T> &r_t_vec, T t_z, T t_w)
		:x(r_t_vec.x), y(r_t_vec.y), z(t_z), w(t_w)
	{}

	inline Vector4(T t_x, Vector2<T> &r_t_vec, T t_w)
		:x(t_x), y(r_t_vec.x), z(r_t_vec.y), w(t_w)
	{}

	inline Vector4(T t_x, T t_y, Vector2<T> &r_t_vec)
		:x(t_x), y(t_y), z(r_t_vec.x), w(r_t_vec.y)
	{}

	inline Vector4(Vector2<T> &r_t_vec, Vector2<T> &r_t_vec2)
		:x(r_t_vec.x), y(r_t_vec.y), z(r_t_vec2.x), w(r_t_vec2.y)
	{}

	inline Vector4(Vector3<T> &r_t_vec, T t_w)
		:x(r_t_vec.x), y(r_t_vec.y), z(r_t_vec.z), w(t_w)
	{}

	inline Vector4(T t_x, Vector3<T> &r_t_vec)
		:x(t_x), y(r_t_vec.x), z(r_t_vec.y), w(r_t_vec.z)
	{}

	inline T f_Length() const
	{
		return (T)sqrt(x * x + y * y + z * z + w * w);
	}

	inline T f_Length2() const
	{
		return x * x + y * y + z * z + w * w;
	}

	inline T operator [](int n_index) const
	{
		if(&x + 1 == &y && &x + 2 == &z && &x + 3 == &w) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else if(n_index == 1)
				return y;
			else if(n_index == 2)
				return z;
			else
				return w;
		}
	}

	inline T &operator [](int n_index)
	{
		if(&x + 1 == &y && &x + 2 == &z && &x + 3 == &w) // should optimize away in release mode
			return *(&x + n_index);
		else {
			if(n_index == 0)
				return x;
			else if(n_index == 1)
				return y;
			else if(n_index == 2)
				return z;
			else
				return w;
		}
	}

	inline void Normalize()
	{
		T t = f_Length();
		
		if(t != 0) {
			t = 1 / t;
			x *= t;
			y *= t;
			z *= t;
			w *= t;
		}
	}

	inline void Normalize(T t_len)
	{
		T t = f_Length();

		if(t != 0) {
			t = t_len / t;
			x *= t;
			y *= t;
			z *= t;
			y *= t;
		}
	}

	inline bool operator ==(const Vector4<T> &r_v_vec) const
	{
		return x == r_v_vec.x && y == r_v_vec.y && z == r_v_vec.z && w == r_v_vec.w;
	}

	inline Vector4<T> operator +(const Vector4<T> &r_v_vec) const
	{
		return Vector4<T>(x + r_v_vec.x, y + r_v_vec.y, z + r_v_vec.z, w + r_v_vec.w);
	}

	inline Vector4<T> operator -(const Vector4<T> &r_v_vec) const
	{
		return Vector4<T>(x - r_v_vec.x, y - r_v_vec.y, z - r_v_vec.z, w - r_v_vec.w);
	}

	inline Vector4<T> operator *(const Vector4<T> &r_v_vec) const
	{
		return Vector4<T>(x * r_v_vec.x, y * r_v_vec.y, z * r_v_vec.z, w * r_v_vec.w);
	}

	inline Vector4<T> operator /(const Vector4<T> &r_v_vec) const
	{
		return Vector4<T>(x / r_v_vec.x, y / r_v_vec.y, z / r_v_vec.z, w / r_v_vec.w);
	}

	inline Vector4<T> operator -() const
	{
		return Vector4<T>(-x, -y, -z, -w);
	}

	inline Vector4<T> operator *(T t) const
	{
		return Vector4<T>(x * t, y * t, z * t, w * t);
	}

	inline Vector4<T> operator /(T t) const
	{
		t = 1 / t;
		return Vector4<T>(x * t, y * t, z * t, w * t);
	}

	inline Vector4<T> operator +(T t) const
	{
		return Vector4<T>(x + t, y + t, z + t, w + t);
	}

	inline Vector4<T> operator -(T t) const
	{
		return Vector4<T>(x - t, y - t, z - t, w - t);
	}

	inline Vector4<T> operator +=(const Vector4<T> &r_v_vec)
	{
		x += r_v_vec.x;
		y += r_v_vec.y;
		z += r_v_vec.z;
		w += r_v_vec.w;
		return *this;
	}

	inline Vector4<T> operator -=(const Vector4<T> &r_v_vec)
	{
		x -= r_v_vec.x;
		y -= r_v_vec.y;
		z -= r_v_vec.z;
		w -= r_v_vec.w;
		return *this;
	}

	inline Vector4<T> operator *=(const Vector4<T> &r_v_vec)
	{
		x *= r_v_vec.x;
		y *= r_v_vec.y;
		z *= r_v_vec.z;
		w *= r_v_vec.w;
		return *this;
	}

	inline Vector4<T> operator /=(const Vector4<T> &r_v_vec)
	{
		x /= r_v_vec.x;
		y /= r_v_vec.y;
		z /= r_v_vec.z;
		w /= r_v_vec.w;
		return *this;
	}

	inline Vector4<T> operator *=(T t)
	{
		x *= t;
		y *= t;
		z *= t;
		w *= t;
		return *this;
	}

	inline Vector4<T> operator /=(T t)
	{
		t = 1 / t;
		x *= t;
		y *= t;
		z *= t;
		w *= t;
		return *this;
	}

	inline Vector4<T> operator +=(T t)
	{
		x += t;
		y += t;
		z += t;
		w += t;
		return *this;
	}

	inline Vector4<T> operator -=(T t)
	{
		x -= t;
		y -= t;
		z -= t;
		w -= t;
		return *this;
	}

	inline T f_Dot(const Vector4<T> &r_v_vec) const
	{
		return x * r_v_vec.x + y * r_v_vec.y + z * r_v_vec.z + w * r_v_vec.w;
	}

	swizzle4_out(x, x, x, x, v_xxxx)
	swizzle4_out(x, x, x, y, v_xxxy)
	swizzle4_out(x, x, x, z, v_xxxz)
	swizzle4_out(x, x, x, w, v_xxxw)
	swizzle4_out(x, x, y, x, v_xxyx)
	swizzle4_out(x, x, y, y, v_xxyy)
	swizzle4_out(x, x, y, z, v_xxyz)
	swizzle4_out(x, x, y, w, v_xxyw)
	swizzle4_out(x, x, z, x, v_xxzx)
	swizzle4_out(x, x, z, y, v_xxzy)
	swizzle4_out(x, x, z, z, v_xxzz)
	swizzle4_out(x, x, z, w, v_xxzw)
	swizzle4_out(x, x, w, x, v_xxwx)
	swizzle4_out(x, x, w, y, v_xxwy)
	swizzle4_out(x, x, w, z, v_xxwz)
	swizzle4_out(x, x, w, w, v_xxww)
	swizzle4_out(x, y, x, x, v_xyxx)
	swizzle4_out(x, y, x, y, v_xyxy)
	swizzle4_out(x, y, x, z, v_xyxz)
	swizzle4_out(x, y, x, w, v_xyxw)
	swizzle4_out(x, y, y, x, v_xyyx)
	swizzle4_out(x, y, y, y, v_xyyy)
	swizzle4_out(x, y, y, z, v_xyyz)
	swizzle4_out(x, y, y, w, v_xyyw)
	swizzle4_out(x, y, z, x, v_xyzx)
	swizzle4_out(x, y, z, y, v_xyzy)
	swizzle4_out(x, y, z, z, v_xyzz)
	swizzle4_out(x, y, z, w, v_xyzw)
	swizzle4_out(x, y, w, x, v_xywx)
	swizzle4_out(x, y, w, y, v_xywy)
	swizzle4_out(x, y, w, z, v_xywz)
	swizzle4_out(x, y, w, w, v_xyww)
	swizzle4_out(x, z, x, x, v_xzxx)
	swizzle4_out(x, z, x, y, v_xzxy)
	swizzle4_out(x, z, x, z, v_xzxz)
	swizzle4_out(x, z, x, w, v_xzxw)
	swizzle4_out(x, z, y, x, v_xzyx)
	swizzle4_out(x, z, y, y, v_xzyy)
	swizzle4_out(x, z, y, z, v_xzyz)
	swizzle4_out(x, z, y, w, v_xzyw)
	swizzle4_out(x, z, z, x, v_xzzx)
	swizzle4_out(x, z, z, y, v_xzzy)
	swizzle4_out(x, z, z, z, v_xzzz)
	swizzle4_out(x, z, z, w, v_xzzw)
	swizzle4_out(x, z, w, x, v_xzwx)
	swizzle4_out(x, z, w, y, v_xzwy)
	swizzle4_out(x, z, w, z, v_xzwz)
	swizzle4_out(x, z, w, w, v_xzww)
	swizzle4_out(x, w, x, x, v_xwxx)
	swizzle4_out(x, w, x, y, v_xwxy)
	swizzle4_out(x, w, x, z, v_xwxz)
	swizzle4_out(x, w, x, w, v_xwxw)
	swizzle4_out(x, w, y, x, v_xwyx)
	swizzle4_out(x, w, y, y, v_xwyy)
	swizzle4_out(x, w, y, z, v_xwyz)
	swizzle4_out(x, w, y, w, v_xwyw)
	swizzle4_out(x, w, z, x, v_xwzx)
	swizzle4_out(x, w, z, y, v_xwzy)
	swizzle4_out(x, w, z, z, v_xwzz)
	swizzle4_out(x, w, z, w, v_xwzw)
	swizzle4_out(x, w, w, x, v_xwwx)
	swizzle4_out(x, w, w, y, v_xwwy)
	swizzle4_out(x, w, w, z, v_xwwz)
	swizzle4_out(x, w, w, w, v_xwww)
	swizzle4_out(y, x, x, x, v_yxxx)
	swizzle4_out(y, x, x, y, v_yxxy)
	swizzle4_out(y, x, x, z, v_yxxz)
	swizzle4_out(y, x, x, w, v_yxxw)
	swizzle4_out(y, x, y, x, v_yxyx)
	swizzle4_out(y, x, y, y, v_yxyy)
	swizzle4_out(y, x, y, z, v_yxyz)
	swizzle4_out(y, x, y, w, v_yxyw)
	swizzle4_out(y, x, z, x, v_yxzx)
	swizzle4_out(y, x, z, y, v_yxzy)
	swizzle4_out(y, x, z, z, v_yxzz)
	swizzle4_out(y, x, z, w, v_yxzw)
	swizzle4_out(y, x, w, x, v_yxwx)
	swizzle4_out(y, x, w, y, v_yxwy)
	swizzle4_out(y, x, w, z, v_yxwz)
	swizzle4_out(y, x, w, w, v_yxww)
	swizzle4_out(y, y, x, x, v_yyxx)
	swizzle4_out(y, y, x, y, v_yyxy)
	swizzle4_out(y, y, x, z, v_yyxz)
	swizzle4_out(y, y, x, w, v_yyxw)
	swizzle4_out(y, y, y, x, v_yyyx)
	swizzle4_out(y, y, y, y, v_yyyy)
	swizzle4_out(y, y, y, z, v_yyyz)
	swizzle4_out(y, y, y, w, v_yyyw)
	swizzle4_out(y, y, z, x, v_yyzx)
	swizzle4_out(y, y, z, y, v_yyzy)
	swizzle4_out(y, y, z, z, v_yyzz)
	swizzle4_out(y, y, z, w, v_yyzw)
	swizzle4_out(y, y, w, x, v_yywx)
	swizzle4_out(y, y, w, y, v_yywy)
	swizzle4_out(y, y, w, z, v_yywz)
	swizzle4_out(y, y, w, w, v_yyww)
	swizzle4_out(y, z, x, x, v_yzxx)
	swizzle4_out(y, z, x, y, v_yzxy)
	swizzle4_out(y, z, x, z, v_yzxz)
	swizzle4_out(y, z, x, w, v_yzxw)
	swizzle4_out(y, z, y, x, v_yzyx)
	swizzle4_out(y, z, y, y, v_yzyy)
	swizzle4_out(y, z, y, z, v_yzyz)
	swizzle4_out(y, z, y, w, v_yzyw)
	swizzle4_out(y, z, z, x, v_yzzx)
	swizzle4_out(y, z, z, y, v_yzzy)
	swizzle4_out(y, z, z, z, v_yzzz)
	swizzle4_out(y, z, z, w, v_yzzw)
	swizzle4_out(y, z, w, x, v_yzwx)
	swizzle4_out(y, z, w, y, v_yzwy)
	swizzle4_out(y, z, w, z, v_yzwz)
	swizzle4_out(y, z, w, w, v_yzww)
	swizzle4_out(y, w, x, x, v_ywxx)
	swizzle4_out(y, w, x, y, v_ywxy)
	swizzle4_out(y, w, x, z, v_ywxz)
	swizzle4_out(y, w, x, w, v_ywxw)
	swizzle4_out(y, w, y, x, v_ywyx)
	swizzle4_out(y, w, y, y, v_ywyy)
	swizzle4_out(y, w, y, z, v_ywyz)
	swizzle4_out(y, w, y, w, v_ywyw)
	swizzle4_out(y, w, z, x, v_ywzx)
	swizzle4_out(y, w, z, y, v_ywzy)
	swizzle4_out(y, w, z, z, v_ywzz)
	swizzle4_out(y, w, z, w, v_ywzw)
	swizzle4_out(y, w, w, x, v_ywwx)
	swizzle4_out(y, w, w, y, v_ywwy)
	swizzle4_out(y, w, w, z, v_ywwz)
	swizzle4_out(y, w, w, w, v_ywww)
	swizzle4_out(z, x, x, x, v_zxxx)
	swizzle4_out(z, x, x, y, v_zxxy)
	swizzle4_out(z, x, x, z, v_zxxz)
	swizzle4_out(z, x, x, w, v_zxxw)
	swizzle4_out(z, x, y, x, v_zxyx)
	swizzle4_out(z, x, y, y, v_zxyy)
	swizzle4_out(z, x, y, z, v_zxyz)
	swizzle4_out(z, x, y, w, v_zxyw)
	swizzle4_out(z, x, z, x, v_zxzx)
	swizzle4_out(z, x, z, y, v_zxzy)
	swizzle4_out(z, x, z, z, v_zxzz)
	swizzle4_out(z, x, z, w, v_zxzw)
	swizzle4_out(z, x, w, x, v_zxwx)
	swizzle4_out(z, x, w, y, v_zxwy)
	swizzle4_out(z, x, w, z, v_zxwz)
	swizzle4_out(z, x, w, w, v_zxww)
	swizzle4_out(z, y, x, x, v_zyxx)
	swizzle4_out(z, y, x, y, v_zyxy)
	swizzle4_out(z, y, x, z, v_zyxz)
	swizzle4_out(z, y, x, w, v_zyxw)
	swizzle4_out(z, y, y, x, v_zyyx)
	swizzle4_out(z, y, y, y, v_zyyy)
	swizzle4_out(z, y, y, z, v_zyyz)
	swizzle4_out(z, y, y, w, v_zyyw)
	swizzle4_out(z, y, z, x, v_zyzx)
	swizzle4_out(z, y, z, y, v_zyzy)
	swizzle4_out(z, y, z, z, v_zyzz)
	swizzle4_out(z, y, z, w, v_zyzw)
	swizzle4_out(z, y, w, x, v_zywx)
	swizzle4_out(z, y, w, y, v_zywy)
	swizzle4_out(z, y, w, z, v_zywz)
	swizzle4_out(z, y, w, w, v_zyww)
	swizzle4_out(z, z, x, x, v_zzxx)
	swizzle4_out(z, z, x, y, v_zzxy)
	swizzle4_out(z, z, x, z, v_zzxz)
	swizzle4_out(z, z, x, w, v_zzxw)
	swizzle4_out(z, z, y, x, v_zzyx)
	swizzle4_out(z, z, y, y, v_zzyy)
	swizzle4_out(z, z, y, z, v_zzyz)
	swizzle4_out(z, z, y, w, v_zzyw)
	swizzle4_out(z, z, z, x, v_zzzx)
	swizzle4_out(z, z, z, y, v_zzzy)
	swizzle4_out(z, z, z, z, v_zzzz)
	swizzle4_out(z, z, z, w, v_zzzw)
	swizzle4_out(z, z, w, x, v_zzwx)
	swizzle4_out(z, z, w, y, v_zzwy)
	swizzle4_out(z, z, w, z, v_zzwz)
	swizzle4_out(z, z, w, w, v_zzww)
	swizzle4_out(z, w, x, x, v_zwxx)
	swizzle4_out(z, w, x, y, v_zwxy)
	swizzle4_out(z, w, x, z, v_zwxz)
	swizzle4_out(z, w, x, w, v_zwxw)
	swizzle4_out(z, w, y, x, v_zwyx)
	swizzle4_out(z, w, y, y, v_zwyy)
	swizzle4_out(z, w, y, z, v_zwyz)
	swizzle4_out(z, w, y, w, v_zwyw)
	swizzle4_out(z, w, z, x, v_zwzx)
	swizzle4_out(z, w, z, y, v_zwzy)
	swizzle4_out(z, w, z, z, v_zwzz)
	swizzle4_out(z, w, z, w, v_zwzw)
	swizzle4_out(z, w, w, x, v_zwwx)
	swizzle4_out(z, w, w, y, v_zwwy)
	swizzle4_out(z, w, w, z, v_zwwz)
	swizzle4_out(z, w, w, w, v_zwww)
	swizzle4_out(w, x, x, x, v_wxxx)
	swizzle4_out(w, x, x, y, v_wxxy)
	swizzle4_out(w, x, x, z, v_wxxz)
	swizzle4_out(w, x, x, w, v_wxxw)
	swizzle4_out(w, x, y, x, v_wxyx)
	swizzle4_out(w, x, y, y, v_wxyy)
	swizzle4_out(w, x, y, z, v_wxyz)
	swizzle4_out(w, x, y, w, v_wxyw)
	swizzle4_out(w, x, z, x, v_wxzx)
	swizzle4_out(w, x, z, y, v_wxzy)
	swizzle4_out(w, x, z, z, v_wxzz)
	swizzle4_out(w, x, z, w, v_wxzw)
	swizzle4_out(w, x, w, x, v_wxwx)
	swizzle4_out(w, x, w, y, v_wxwy)
	swizzle4_out(w, x, w, z, v_wxwz)
	swizzle4_out(w, x, w, w, v_wxww)
	swizzle4_out(w, y, x, x, v_wyxx)
	swizzle4_out(w, y, x, y, v_wyxy)
	swizzle4_out(w, y, x, z, v_wyxz)
	swizzle4_out(w, y, x, w, v_wyxw)
	swizzle4_out(w, y, y, x, v_wyyx)
	swizzle4_out(w, y, y, y, v_wyyy)
	swizzle4_out(w, y, y, z, v_wyyz)
	swizzle4_out(w, y, y, w, v_wyyw)
	swizzle4_out(w, y, z, x, v_wyzx)
	swizzle4_out(w, y, z, y, v_wyzy)
	swizzle4_out(w, y, z, z, v_wyzz)
	swizzle4_out(w, y, z, w, v_wyzw)
	swizzle4_out(w, y, w, x, v_wywx)
	swizzle4_out(w, y, w, y, v_wywy)
	swizzle4_out(w, y, w, z, v_wywz)
	swizzle4_out(w, y, w, w, v_wyww)
	swizzle4_out(w, z, x, x, v_wzxx)
	swizzle4_out(w, z, x, y, v_wzxy)
	swizzle4_out(w, z, x, z, v_wzxz)
	swizzle4_out(w, z, x, w, v_wzxw)
	swizzle4_out(w, z, y, x, v_wzyx)
	swizzle4_out(w, z, y, y, v_wzyy)
	swizzle4_out(w, z, y, z, v_wzyz)
	swizzle4_out(w, z, y, w, v_wzyw)
	swizzle4_out(w, z, z, x, v_wzzx)
	swizzle4_out(w, z, z, y, v_wzzy)
	swizzle4_out(w, z, z, z, v_wzzz)
	swizzle4_out(w, z, z, w, v_wzzw)
	swizzle4_out(w, z, w, x, v_wzwx)
	swizzle4_out(w, z, w, y, v_wzwy)
	swizzle4_out(w, z, w, z, v_wzwz)
	swizzle4_out(w, z, w, w, v_wzww)
	swizzle4_out(w, w, x, x, v_wwxx)
	swizzle4_out(w, w, x, y, v_wwxy)
	swizzle4_out(w, w, x, z, v_wwxz)
	swizzle4_out(w, w, x, w, v_wwxw)
	swizzle4_out(w, w, y, x, v_wwyx)
	swizzle4_out(w, w, y, y, v_wwyy)
	swizzle4_out(w, w, y, z, v_wwyz)
	swizzle4_out(w, w, y, w, v_wwyw)
	swizzle4_out(w, w, z, x, v_wwzx)
	swizzle4_out(w, w, z, y, v_wwzy)
	swizzle4_out(w, w, z, z, v_wwzz)
	swizzle4_out(w, w, z, w, v_wwzw)
	swizzle4_out(w, w, w, x, v_wwwx)
	swizzle4_out(w, w, w, y, v_wwwy)
	swizzle4_out(w, w, w, z, v_wwwz)
	swizzle4_out(w, w, w, w, v_wwww)

	swizzle3_out(x, x, x, v_xxx)
	swizzle3_out(x, x, y, v_xxy)
	swizzle3_out(x, x, z, v_xxz)
	swizzle3_out(x, x, w, v_xxw)
	swizzle3_out(x, y, x, v_xyx)
	swizzle3_out(x, y, y, v_xyy)
	swizzle3_out(x, y, z, v_xyz)
	swizzle3_out(x, y, w, v_xyw)
	swizzle3_out(x, z, x, v_xzx)
	swizzle3_out(x, z, y, v_xzy)
	swizzle3_out(x, z, z, v_xzz)
	swizzle3_out(x, z, w, v_xzw)
	swizzle3_out(x, w, x, v_xwx)
	swizzle3_out(x, w, y, v_xwy)
	swizzle3_out(x, w, z, v_xwz)
	swizzle3_out(x, w, w, v_xww)
	swizzle3_out(y, x, x, v_yxx)
	swizzle3_out(y, x, y, v_yxy)
	swizzle3_out(y, x, z, v_yxz)
	swizzle3_out(y, x, w, v_yxw)
	swizzle3_out(y, y, x, v_yyx)
	swizzle3_out(y, y, y, v_yyy)
	swizzle3_out(y, y, z, v_yyz)
	swizzle3_out(y, y, w, v_yyw)
	swizzle3_out(y, z, x, v_yzx)
	swizzle3_out(y, z, y, v_yzy)
	swizzle3_out(y, z, z, v_yzz)
	swizzle3_out(y, z, w, v_yzw)
	swizzle3_out(y, w, x, v_ywx)
	swizzle3_out(y, w, y, v_ywy)
	swizzle3_out(y, w, z, v_ywz)
	swizzle3_out(y, w, w, v_yww)
	swizzle3_out(z, x, x, v_zxx)
	swizzle3_out(z, x, y, v_zxy)
	swizzle3_out(z, x, z, v_zxz)
	swizzle3_out(z, x, w, v_zxw)
	swizzle3_out(z, y, x, v_zyx)
	swizzle3_out(z, y, y, v_zyy)
	swizzle3_out(z, y, z, v_zyz)
	swizzle3_out(z, y, w, v_zyw)
	swizzle3_out(z, z, x, v_zzx)
	swizzle3_out(z, z, y, v_zzy)
	swizzle3_out(z, z, z, v_zzz)
	swizzle3_out(z, z, w, v_zzw)
	swizzle3_out(z, w, x, v_zwx)
	swizzle3_out(z, w, y, v_zwy)
	swizzle3_out(z, w, z, v_zwz)
	swizzle3_out(z, w, w, v_zww)
	swizzle3_out(w, x, x, v_wxx)
	swizzle3_out(w, x, y, v_wxy)
	swizzle3_out(w, x, z, v_wxz)
	swizzle3_out(w, x, w, v_wxw)
	swizzle3_out(w, y, x, v_wyx)
	swizzle3_out(w, y, y, v_wyy)
	swizzle3_out(w, y, z, v_wyz)
	swizzle3_out(w, y, w, v_wyw)
	swizzle3_out(w, z, x, v_wzx)
	swizzle3_out(w, z, y, v_wzy)
	swizzle3_out(w, z, z, v_wzz)
	swizzle3_out(w, z, w, v_wzw)
	swizzle3_out(w, w, x, v_wwx)
	swizzle3_out(w, w, y, v_wwy)
	swizzle3_out(w, w, z, v_wwz)
	swizzle3_out(w, w, w, v_www)

	swizzle2_out(x, x, v_xx)
	swizzle2_out(x, y, v_xy)
	swizzle2_out(x, z, v_xz)
	swizzle2_out(x, w, v_xw)
	swizzle2_out(y, x, v_yx)
	swizzle2_out(y, y, v_yy)
	swizzle2_out(y, z, v_yz)
	swizzle2_out(y, w, v_yw)
	swizzle2_out(z, x, v_zx)
	swizzle2_out(z, y, v_zy)
	swizzle2_out(z, z, v_zz)
	swizzle2_out(z, w, v_zw)
	swizzle2_out(w, x, v_wx)
	swizzle2_out(w, y, v_wy)
	swizzle2_out(w, z, v_wz)
	swizzle2_out(w, w, v_ww)

	swizzle2_in(x, y, xy)
	swizzle2_in(x, z, xz)
	swizzle2_in(x, w, xw)
	swizzle2_in(y, z, yz)
	swizzle2_in(y, w, yw)
	swizzle2_in(z, w, zw)

	swizzle3_in(x, y, z, xyz)
	swizzle3_in(x, y, w, xyw)
	swizzle3_in(x, z, w, xzw)
	swizzle3_in(y, z, w, yzw)
};

typedef Vector4<int> Vector4i;
typedef Vector4<float> Vector4f;
typedef Vector4<float> Color4f;

/*
 *								=== Vector4 ===
 */

/*
 *								=== Plane3f ===
 */

enum EPlanePos {
	plane_Front,
	plane_Back,
	plane_Onplane,
	plane_Split
};

struct Plane3f {
	Vector3f v_normal;
	float f_dist;

	inline Plane3f() {}
	Plane3f(const Vector3f &r_v_norm, float _f_dist);
	Plane3f(const Vector3f &r_v_pos, const Vector3f &r_v_norm);
	Plane3f(const Vector3f &r_v_u, const Vector3f &r_v_v, const Vector3f &r_v_pos);

	EPlanePos n_Tri_Pos(const Vector3f &r_v_a,
		const Vector3f &r_v_b, const Vector3f &r_v_c, float f_epsilon_ex = f_epsilon) const;
	EPlanePos n_Vector_Pos(const Vector3f &r_v_vec, float f_epsilon_ex = f_epsilon) const;
	float f_Vector_Dist(const Vector3f &r_v_vec) const;
	float f_Vector_Dot(const Vector3f &r_v_vec) const;

	bool Intersect_Ray(Vector3f &r_v_intersection, const Vector3f &r_v_org, const Vector3f &r_v_dir) const;
	bool Intersect_Ray_t(float &r_f_intersection_t, const Vector3f &r_v_org, const Vector3f &r_v_dir) const;

	bool operator ==(const Plane3f &r_t_plane) const;
};

/*
 *								=== ~Plane3f ===
 */

/*
 *								=== Matrix4f ===
 */

/*
 *	struct Matrix4f
 *		- 4x4 column-major order matrix class (suitable for use with OpenGL)
 */
struct Matrix4f {
protected:
	float f[4][4];

public:
	/*
	 *	void Matrix4f::Identity()
	 *		- creates unit matrix (identity transformation)
	 *		- note this is not done automatically by constructor (there's none)
	 */
	void Identity();

	/*
	 *	void Matrix4f::Translation(const Vector3f &r_v_translate)
	 *		- creates translation matrix; r_v_translate is translation vector
	 */
	void Translation(const Vector3f &r_v_translate);

	/*
	 *	void Matrix4f::Translation(float f_translate_x, float f_translate_y, float f_translate_z)
	 *		- creates translation matrix;
	 *		  (f_translate_x, f_translate_y, f_translate_z) is translation vector
	 */
	void Translation(float f_translate_x, float f_translate_y, float f_translate_z);

	/*
	 *	void Matrix4f::Scaling(float f_scale)
	 *		- creates scaling matrix; f_scale is scaling factor (same for x, y and z)
	 */
	void Scaling(float f_scale);

	/*
	 *	void Matrix4f::Scaling(float f_scale_x, float f_scale_y, float f_scale_z)
	 *		- creates scaling matrix; f_scale_x, f_scale_y and f_scale_z are
	 *		  scaling factors for x, y and z, respectively
	 */
	void Scaling(float f_scale_x, float f_scale_y, float f_scale_z);

	/*
	 *	void Matrix4f::Scaling(const Vector3f &r_v_scale)
	 *		- creates scaling matrix; r_v_scale contains scaling factors for x, y and z
	 */
	void Scaling(const Vector3f &r_v_scale);

	/*
	 *	void Matrix4f::RotationX(float f_angle)
	 *		- creates matrix for rotation arround x-axis; f_angle is angle in radians
	 */
	void RotationX(float f_angle);

	/*
	 *	void Matrix4f::RotationY(float f_angle)
	 *		- creates matrix for rotation arround y-axis; f_angle is angle in radians
	 */
	void RotationY(float f_angle);

	/*
	 *	void Matrix4f::RotationZ(float f_angle)
	 *		- creates matrix for rotation arround z-axis; f_angle is angle in radians
	 */
	void RotationZ(float f_angle);

	/*
	 *	void Matrix4f::Rotation(float f_angle, float f_axis_x, float f_axis_y, float f_axis_z)
	 *		- creates matrix for rotation arround axis given
	 *		  by (f_axis_x, f_axis_y, f_axis_z), f_angle is angle in radians
	 */
	void Rotation(float f_angle, float f_axis_x, float f_axis_y, float f_axis_z);

	/*
	 *	void Matrix4f::Rotation(float f_angle, const Vector3f &r_v_axis)
	 *		- creates matrix for rotation arround axis given by r_v_axis,
	 *		  f_angle is angle in radians
	 */
	void Rotation(float f_angle, const Vector3f &r_v_axis);

	/*
	 *	void Matrix4f::Translate(float f_translate_x, float f_translate_y, float f_translate_z)
	 *		- applies translation on this matrix; translation vector
	 *		  is given by (f_translate_x, f_translate_y, f_translate_z)
	 */
	void Translate(float f_translate_x, float f_translate_y, float f_translate_z);

	/*
	 *	void Matrix4f::Translate(const Vector3f &r_v_translate)
	 *		- applies translation on this matrix; r_v_translate is translation vector
	 */
	void Translate(const Vector3f &r_v_translate);

	/*
	 *	void Matrix4f::Scale(float f_scale)
	 *		- applies scaling on this matrix; f_scale is scaling factor (for all x, y and z)
	 */
	void Scale(float f_scale);

	/*
	 *	void Matrix4f::Scale(float f_scale_x, float f_scale_y, float f_scale_z)
	 *		- applies scaling on this matrix; f_scale_x, f_scale_y, f_scale_z
	 *		  are scaling factors for x, y and z, respectively
	 */
	void Scale(float f_scale_x, float f_scale_y, float f_scale_z);

	/*
	 *	void Matrix4f::Scale(const Vector3f &r_v_scale)
	 *		- applies scaling on this matrix; r_v_scale contains scaling factors for x, y and z
	 */
	void Scale(const Vector3f &r_v_scale);

	/*
	 *	void Matrix4f::RotateX(float f_angle)
	 *		- applies rotation f_angle radians arround x-axis to this matrix
	 */
	void RotateX(float f_angle);

	/*
	 *	void Matrix4f::RotateY(float f_angle)
	 *		- applies rotation f_angle radians arround y-axis to this matrix
	 */
	void RotateY(float f_angle);

	/*
	 *	void Matrix4f::RotateZ(float f_angle)
	 *		- applies rotation f_angle radians arround z-axis to this matrix
	 */
	void RotateZ(float f_angle);

	/*
	 *	void Matrix4f::Rotate(float f_angle, float f_axis_x, float f_axis_y, float f_axis_z)
	 *		- applies rotation f_angle radians arround axis given
	 *		  by (f_axis_x, f_axis_y, f_axis_z) to this matrix
	 */
	void Rotate(float f_angle, float f_axis_x, float f_axis_y, float f_axis_z);

	/*
	 *	void Matrix4f::Rotate(float f_angle, const Vector3f &r_v_axis)
	 *		- applies rotation f_angle radians arround axis given by r_v_axis to this matrix
	 */
	void Rotate(float f_angle, const Vector3f &r_v_axis);

	void ProductOf(const Matrix4f &r_t_mat1, float f_factor);
	void ProductOf(const Matrix4f &r_t_mat1, const Matrix4f &r_t_mat2);

	/*
	 *	Matrix4f Matrix4f::operator *(float f_factor) const
	 *		- returns element-wise multiplication of matrix and f_factor
	 */
	inline Matrix4f operator *(float f_factor) const
	{
		Matrix4f t_mult;
		t_mult.ProductOf(*this, f_factor);
		return t_mult;
	}

	/*
	 *	Matrix4f Matrix4f::operator *=(float f_factor) const
	 *		- element-wise multiplies this matrix by
	 *		  f_factor and returns reference to this
	 *		- note this is faster, than <tt>*this = *this * f_factor;</tt>.
	 */
	Matrix4f &operator *=(float f_factor);

	/*
	 *	Matrix4f Matrix4f::operator *(const Matrix4f &r_t_mat) const
	 *		- returns multiplication of this matrix and r_t_mat
	 */
	inline Matrix4f operator *(const Matrix4f &r_t_mat) const
	{
		Matrix4f t_mult;
		t_mult.ProductOf(*this, r_t_mat);
		return t_mult;
	}

	/*
	 *	Matrix4f Matrix4f::&operator *=(const Matrix4f &r_t_mat)
	 *		- multiplies this matrix by r_t_mat and returns reference to this
	 *		- note this is better optimized, than just <tt>*this = *this * r_t_mat;</tt>.
	 */
	Matrix4f &operator *=(const Matrix4f &r_t_mat);

	/*
	 *	Vector4f Matrix4f::operator *(const Vector4f &r_v_vec) const
	 *		- vector-matrix multiplication
	 *		- returns this * r_v_vec
	 */
	Vector4f operator *(const Vector4f &r_v_vec) const;

	/*
	 *	Vector3f Matrix4f::v_Transform_Pos(const Vector3f &r_v_vec) const
	 *		- transforms position r_v_vec by this matrix
	 *		- equivalent to multiplying this matrix by Vector4f(r_v_vec, 1)
	 */
	Vector3f v_Transform_Pos(const Vector3f &r_v_vec) const;

	/*
	 *	Vector3f Matrix4f::v_Transform_Dir(const Vector3f &r_v_vec) const
	 *		- transforms direction r_v_vec by this matrix
	 *		- equivalent to multiplying this matrix by Vector4f(r_v_vec, 0)
	 */
	Vector3f v_Transform_Dir(const Vector3f &r_v_vec) const;

	/*
	 *	float Matrix4f::f_Subdet(int n_col, int n_row) const
	 *		- returns determinant of this matrix with column n_col and row
	 *		  n_row left out (so it calculates 3x3 matrix determinant)
	 *		- note the result is not multiplied by (-1)^(n_col + n_row)
	 */
	float f_Subdet(int n_col, int n_row) const;

	/*
	 *	float Matrix4f::f_Determinant() const
	 *		- returns determinant of this matrix
	 *		- note it uses subdeterminants, it is optimized for matrices
	 *		  having zeros in the last row (common transformation matrices)
	 */
	float f_Determinant() const;

	/*
	 *	void Matrix4f::FastInvert()
	 *		- inverts this matrix (uses adjunged matrix method)
	 *		- note this is optimized for matrices with bottom row equal to 0 0 0 1
	 *		  (common transformation matrices), this will give faulty output for
	 *		  other matrices; use FullInvert() instead
	 */
	inline void FastInvert()
	{
		Matrix4f t_inverse;
		FastInverseTo(t_inverse);
		*this = t_inverse;
	}

	void FastInverseTo(Matrix4f &r_dest) const;

	inline void FastInverseOf(const Matrix4f &r_src)
	{
		r_src.FastInverseTo(*this);
	}

	/*
	 *	Matrix4f Matrix4f::t_FastInverse() const
	 *		- returns inverse of this matrix (uses adjunged matrix method)
	 *		- note this is optimized for matrices with bottom row equal to 0 0 0 1
	 *		  (common transformation matrices), this will give faulty output for
	 *		  other matrices; use t_FullInverse() instead
	 */
	inline Matrix4f t_FastInverse() const
	{
		Matrix4f t_inverse;
		FastInverseTo(t_inverse);
		return t_inverse;
	}

	/*
	 *	void Matrix4f::FullInvert()
	 *		- inverts this matrix (uses adjunged matrix method)
	 *		- note full here means unoptimized, Invert() can be used to invert
	 *		  matrices with bottom row equal to 0 0 0 1 (common transformation
	 *		  matrices) more optimally
	 */
	inline void FullInvert()
	{
		Matrix4f t_inverse;
		FullInverseTo(t_inverse);
		*this = t_inverse;
	}

	inline void FullInvertNoTranspose()
	{
		Matrix4f t_inverse;
		FullInverseNoTransposeTo(t_inverse);
		*this = t_inverse;
	}

	/*
	 *	Matrix4f Matrix4f::t_FullInverse() const
	 *		- inverts this matrix (uses adjunged matrix method)
	 *		- note full here means unoptimized, t_FastInverse() can be used to invert
	 *		  matrices with bottom row equal to 0 0 0 1 (common transformation
	 *		  matrices) more optimally
	 */
	inline Matrix4f t_FullInverse() const
	{
		Matrix4f t_inverse;
		FullInverseTo(t_inverse);
		return t_inverse;
	}

	inline Matrix4f t_FullInverseNoTranspose() const
	{
		Matrix4f t_inverse;
		FullInverseNoTransposeTo(t_inverse);
		return t_inverse;
	}

	void FullInverseTo(Matrix4f &r_dest) const;
	void FullInverseNoTransposeTo(Matrix4f &r_dest) const;

	inline void FullInverseOf(const Matrix4f &r_src)
	{
		r_src.FullInverseTo(*this);
	}

	inline void FullInverseNoTransposeOf(const Matrix4f &r_src)
	{
		r_src.FullInverseNoTransposeTo(*this);
	}

	/*
	 *	void Matrix4f::Transpose()
	 *		- transposes this matrix
	 *		- note this is better optimized, than just <tt>*this = t_Transpose();</tt>.
	 */
	void Transpose();

	/*
	 *	void Matrix4f::Transpose()
	 *		- transposes this matrix
	 */
	void TransposeTo(Matrix4f &r_dest) const;

	/*
	 *	void Matrix4f::Transpose()
	 *		- transposes this matrix
	 */
	inline void TransposeOf(const Matrix4f &r_src)
	{
		r_src.TransposeTo(*this);
	}

	/*
	 *	Matrix4f Matrix4f::t_Transpose() const
	 *		- returns transposition of this matrix
	 */
	inline Matrix4f t_Transpose() const
	{
		Matrix4f t_transpose;
		TransposeTo(t_transpose);
		return t_transpose;
	}

	inline float *p_Data()
	{
		return &f[0][0];
	}

	inline const float *p_Data() const
	{
		return &f[0][0];
	}

	/*
	 *	inline float *Matrix4f::operator [](int n_index)
	 *		- returns pointer to n_index-th column of this matrix (math notation)
	 */
	inline float *operator [](int n_index) { return f[n_index]; }

	/*
	 *	inline const float *Matrix4f::operator [](int n_index)
	 *		- returns const pointer to n_index-th column of this matrix (math notation)
	 */
	inline const float *operator [](int n_index) const { return f[n_index]; }

	/*
	 *	inline Vector3f Matrix4f::v_Right() const
	 *		- returns local x-axis vector
	 */
	inline Vector3f v_Right() const { return Vector3f(f[0][0], f[0][1], f[0][2]); }

	/*
	 *	inline Vector3f Matrix4f::v_Up() const
	 *		- returns local y-axis vector
	 */
	inline Vector3f v_Up() const { return Vector3f(f[1][0], f[1][1], f[1][2]); }

	/*
	 *	inline Vector3f Matrix4f::v_Dir() const
	 *		- returns local z-axis vector
	 */
	inline Vector3f v_Dir() const { return Vector3f(f[2][0], f[2][1], f[2][2]); }

	/*
	 *	inline Vector3f Matrix4f::v_Offset() const
	 *		- returns offset vector
	 */
	inline Vector3f v_Offset() const { return Vector3f(f[3][0], f[3][1], f[3][2]); }

	/*
	 *	inline void Matrix4f::Right(const Vector3f &r_v_vec)
	 *		- sets x-axis vector r_v_vec
	 */
	inline void Right(const Vector3f &r_v_vec)
	{
		f[0][0] = r_v_vec.x;
		f[0][1] = r_v_vec.y;
		f[0][2] = r_v_vec.z;
	}

	/*
	 *	inline void Matrix4f::Up(const Vector3f &r_v_vec)
	 *		- sets y-axis vector r_v_vec
	 */
	inline void Up(const Vector3f &r_v_vec)
	{
		f[1][0] = r_v_vec.x;
		f[1][1] = r_v_vec.y;
		f[1][2] = r_v_vec.z;
	}

	/*
	 *	inline void Matrix4f::Dir(const Vector3f &r_v_vec)
	 *		- sets z-axis vector r_v_vec
	 */
	inline void Dir(const Vector3f &r_v_vec)
	{
		f[2][0] = r_v_vec.x;
		f[2][1] = r_v_vec.y;
		f[2][2] = r_v_vec.z;
	}

	/*
	 *	inline void Matrix4f::Offset(const Vector3f &r_v_vec)
	 *		- sets offset vector r_v_vec
	 *		- note again, this *sets* offset, does *not* add vector to offset
	 */
	inline void Offset(const Vector3f &r_v_vec)
	{
		f[3][0] = r_v_vec.x;
		f[3][1] = r_v_vec.y;
		f[3][2] = r_v_vec.z;
	}
};

/*
 *								=== ~Matrix4f ===
 */

/*
 *								=== Quat ===
 */

template <class T>
class Quat {
public:
	T x, y, z, w;

	inline Quat()
	{}

	inline Quat(T real)
		:x(0), y(0), z(0), w(real)
	{}

	inline Quat(T _x, T _y, T _z, T _w)
		:x(_x), y(_y), z(_z), w(_w)
	{}

	inline Quat(const Quat<T> &r_t_quat)
		:x(r_t_quat.x), y(r_t_quat.y), z(r_t_quat.z), w(r_t_quat.w)
	{}

	Quat(const Vector3<T> &r_v_axis, T f_angle)
	{
		f_angle *= (T).5;
		w = (T)cos(f_angle);
		T f_sin = (T)sin(f_angle);
		x = r_v_axis.x * f_sin;
		y = r_v_axis.y * f_sin;
		z = r_v_axis.z * f_sin;
	}

	Quat(const Matrix4f &r_t_rot)
	{
		const float *p_matrix = &r_t_rot[0][0];
		if(p_matrix[0 * 4 + 0] + p_matrix[1 * 4 + 1] + p_matrix[2 * 4 + 2] > 0) {
			T f_t = p_matrix[0 * 4 + 0] + p_matrix[1 * 4 + 1] + p_matrix[2 * 4 + 2] + 1;
			T f_s = (T).5 / (T)sqrt(f_t);
			w = f_s * f_t;
			z = (p_matrix[0 * 4 + 1] - p_matrix[1 * 4 + 0]) * f_s;
			y = (p_matrix[2 * 4 + 0] - p_matrix[0 * 4 + 2]) * f_s;
			x = (p_matrix[1 * 4 + 2] - p_matrix[2 * 4 + 1]) * f_s;
		} else if(p_matrix[0 * 4 + 0] > p_matrix[1 * 4 + 1] &&
		   p_matrix[0 * 4 + 0] > p_matrix[2 * 4 + 2]) {
			T f_t = p_matrix[0 * 4 + 0] - p_matrix[1 * 4 + 1] - p_matrix[2 * 4 + 2] + 1;
			T f_s = (T).5 / (T)sqrt(f_t);
			x = f_s * f_t;
			y = (p_matrix[0 * 4 + 1] + p_matrix[1 * 4 + 0]) * f_s;
			z = (p_matrix[2 * 4 + 0] + p_matrix[0 * 4 + 2]) * f_s;
			w = (p_matrix[1 * 4 + 2] - p_matrix[2 * 4 + 1]) * f_s;
		} else if(p_matrix[1 * 4 + 1] > p_matrix[2 * 4 + 2]) {
			T f_t = -p_matrix[0 * 4 + 0] + p_matrix[1 * 4 + 1] - p_matrix[2 * 4 + 2] + 1;
			T f_s = (T).5 / (T)sqrt(f_t);
			y = f_s * f_t;
			x = (p_matrix[0 * 4 + 1] + p_matrix[1 * 4 + 0]) * f_s;
			w = (p_matrix[2 * 4 + 0] - p_matrix[0 * 4 + 2]) * f_s;
			z = (p_matrix[1 * 4 + 2] + p_matrix[2 * 4 + 1]) * f_s;
		} else {
			T f_t = -p_matrix[0 * 4 + 0] - p_matrix[1 * 4 + 1] + p_matrix[2 * 4 + 2] + 1;
			T f_s = (T).5 / (T)sqrt(f_t);
			z = f_s * f_t;
			w = (p_matrix[0 * 4 + 1] - p_matrix[1 * 4 + 0]) * f_s;
			x = (p_matrix[2 * 4 + 0] + p_matrix[0 * 4 + 2]) * f_s;
			y = (p_matrix[1 * 4 + 2] + p_matrix[2 * 4 + 1]) * f_s;
		}
	}

	Matrix4f t_ToMatrix() const
	{
		Matrix4f t_matrix;

		float *p_matrix = &t_matrix[0][0];
		p_matrix[3 * 4 + 0] = 0;
		p_matrix[3 * 4 + 1] = 0;
		p_matrix[3 * 4 + 2] = 0;
		// zero offset

		p_matrix[0 * 4 + 3] = 0; // f_ixme - is this right?
		p_matrix[1 * 4 + 3] = 0;
		p_matrix[2 * 4 + 3] = 0;
		p_matrix[3 * 4 + 3] = 1;
		// unit w

		T x2 = x + x;
		T y2 = y + y;
		T z2 = z + z;
		{
			T xx2 = x * x2;
			T yy2 = y * y2;
			T zz2 = z * z2;
			p_matrix[0 * 4 + 0] = 1.0f - yy2 - zz2;
			p_matrix[1 * 4 + 1] = 1.0f - xx2 - zz2;
			p_matrix[2 * 4 + 2] = 1.0f - xx2 - yy2;
		}
		{
			T yz2 = y * z2;
			T wx2 = w * x2;
			p_matrix[1 * 4 + 2] = yz2 - wx2;
			p_matrix[2 * 4 + 1] = yz2 + wx2;
		}
		{
			T xy2 = x * y2;
			T wz2 = w * z2;
			p_matrix[0 * 4 + 1] = xy2 - wz2;
			p_matrix[1 * 4 + 0] = xy2 + wz2;
		}
		{
			T xz2 = x * z2;
			T wy2 = w * y2;
			p_matrix[2 * 4 + 0] = xz2 - wy2;
			p_matrix[0 * 4 + 2] = xz2 + wy2;
		}

		return t_matrix;
	}

	void ToAxisAngle(Vector3<T> &r_t_axis, float &r_f_angle) const
	{
		T f_length2 = x * x + y * y + z * z;
		if(f_length2 > f_epsilon) {
			r_f_angle = 2 * (T)acos(w);
			T f_inv_length = 1 / (T)sqrt(f_inv_length);
			r_t_axis.x = x * f_inv_length;
			r_t_axis.y = y * f_inv_length;
			r_t_axis.z = z * f_inv_length;
		} else {
			r_f_angle = 0; // fmod(angle, 2 * pi) = 0
			r_t_axis.x = 1; // no rotation so axis doesn't matter
			r_t_axis.y = 0;
			r_t_axis.z = 0;
		}
	}

	inline Quat<T> operator =(const Quat<T> &r_t_quat)
	{
		x = r_t_quat.x;
		y = r_t_quat.y;
		z = r_t_quat.z;
		w = r_t_quat.w;
		return *this;
	}

	inline Quat<T> operator -() const
	{
		return Quat<T>(-x, -y, -z, -w);
	}

	inline Quat<T> operator +(const Quat<T> &r_t_quat) const
	{
		return Quat<T>(x + r_t_quat.x, y + r_t_quat.y, z + r_t_quat.z, w + r_t_quat.w);
	}

	inline Quat<T> operator +=(const Quat<T> &r_t_quat)
	{
		x += r_t_quat.x;
		y += r_t_quat.y;
		z += r_t_quat.z;
		w += r_t_quat.w;
		return *this;
	}

	inline Quat<T> operator -(const Quat<T> &r_t_quat) const
	{
		return Quat<T>(x - r_t_quat.x, y - r_t_quat.y, z - r_t_quat.z, w - r_t_quat.w);
	}

	inline Quat<T> operator -=(const Quat<T> &r_t_quat)
	{
		x -= r_t_quat.x;
		y -= r_t_quat.y;
		z -= r_t_quat.z;
		w -= r_t_quat.w;
		return *this;
	}

	inline Quat<T> operator *(T f_scale) const
	{
		return Quat<T>(x * f_scale, y * f_scale, z * f_scale, w * f_scale);
	}

	inline Quat<T> operator *=(T f_scale) const
	{
		x *= f_scale;
		y *= f_scale;
		z *= f_scale;
		w *= f_scale;
		return *this;
	}

	Quat<T> operator *(const Quat<T> &r_t_quat) const
	{
		return Quat<T>(
			w * r_t_quat.x + x * r_t_quat.w + y * r_t_quat.z - z * r_t_quat.y,
			w * r_t_quat.y - x * r_t_quat.z + y * r_t_quat.w + z * r_t_quat.x,
			w * r_t_quat.z + x * r_t_quat.y - y * r_t_quat.x + z * r_t_quat.w,
			w * r_t_quat.w - x * r_t_quat.x - y * r_t_quat.y - z * r_t_quat.z);
	}

	Quat<T> operator *=(const Quat<T> &r_t_quat)
	{
		T _x = w * r_t_quat.x + x * r_t_quat.w + y * r_t_quat.z - z * r_t_quat.y;
		T _y = w * r_t_quat.y - x * r_t_quat.z + y * r_t_quat.w + z * r_t_quat.x;
		T _z = w * r_t_quat.z + x * r_t_quat.y - y * r_t_quat.x + z * r_t_quat.w;
		w = w * r_t_quat.w - x * r_t_quat.x - y * r_t_quat.y - z * r_t_quat.z;
		x = _x;
		y = _y;
		z = _z;
		return *this;
	}

	inline Quat<T> t_Conjugate() const
	{
		return Quat<T>(-x, -y, -z, w);
	}

	inline T f_Length2() const
	{
		return x * x + y * y + z * z + w * w;
	}

	inline T f_Length() const
	{
		return (T)sqrt(x * x + y * y + z * z + w * w);
	}

	inline void Normalise()
	{
		T f_inv_length = 1 / f_Length();
		*this *= f_inv_length;
	}

	inline Quat<T> t_Inverse() const
	{
		return t_Conjugate() * (1 / f_Length2());
	}

	Quat<T> t_Exp() const
	{
		// exp(v * fi) = cos(fi) + v * sin(fi)
		//_ASSERTE(w == 0);

		T f_angle = (T)sqrt(x * x + y * y + z * z);
		T f_sin = (T)sin(f_angle);
		T f_cos = (T)cos(f_angle);
		if(f_sin < -f_epsilon || f_sin > f_epsilon) {
			T f_scale = f_sin / f_angle;
			return Quat<T>(x * f_scale, y * f_scale, z * f_scale, f_cos);
		} else
			return Quat<T>(x, y, z, f_cos); // lim(a / sin(a)) a -> 0 = 1
	}

	Quat<T> Exp()
	{
		// exp(v * fi) = cos(fi) + v * sin(fi)
		//_ASSERTE(w == 0);

		T f_angle = (T)sqrt(x * x + y * y + z * z);
		T f_sin = (T)sin(f_angle);
		w = (T)cos(f_angle);
		if(f_sin < -f_epsilon || f_sin > f_epsilon) {
			T f_scale = f_sin / f_angle;
			x *= f_scale;
			y *= f_scale;
			z *= f_scale;
		} // no else because lim(a / sin(a)) a -> 0 = 1
		return *this;
	}

	Quat<T> t_Log() const
	{
		// log(q) = log(cos(fi) + v * sin(fi)) = log(exp(v * fi)) = v * fi

		if(w > -1 && w < 1) { // in case w = +- 1, angle is 0 or pi, sin is 0, sin / angle is 1
			T f_angle = (T)acos(w);
			T f_sin = (T)sin(f_angle);
			T f_scale = (f_sin < -f_epsilon || f_sin > f_epsilon)?
				f_angle / f_sin : 1; // lim(sin(a) / a) a -> 0 = 1
			return Quat<T>(x * f_scale, y * f_scale, z * f_scale, 0);
		} else
			return Quat<T>(x, y, z, 0);
	}

	Quat<T> Log()
	{
		// log(q) = log(cos(fi) + v * sin(fi)) = log(exp(v * fi)) = v * fi

		if(w > -1 && w < 1) { // in case w = +- 1, angle is 0 or pi, sin is 0, sin / angle is 1
			T f_angle = (T)acos(w);
			T f_sin = (T)sin(f_angle);
			T f_scale = (f_sin < -f_epsilon || f_sin > f_epsilon)?
				f_angle / f_sin : 1; // lim(sin(a) / a) a -> 0 = 1
			x *= f_scale;
			y *= f_scale;
			z *= f_scale;
		}
		w = 0;
		return *this;
	}

	inline T f_Dot(const Quat<T> &r_t_quat) const
	{
		return x * r_t_quat.x + y * r_t_quat.y + z * r_t_quat.z + w * r_t_quat.w;
	}

	void Align(Vector3<T> v_original, Vector3<T> v_align_to) // assumes input vectors are unit length
	{
		T f_cos_half_angle; // cos(angle / 2) where angle is rotation angle
		Vector3<T> v_axis; // sin(angle / 2) * rotation_axis

		Vector3<T> v_bisect = v_original + v_align_to;
		if(v_bisect.f_Length() > f_epsilon) {
			v_bisect.Normalize();
			f_cos_half_angle = v_bisect.f_Dot(v_original);
			v_axis = v_original.v_Cross(v_bisect); // cross-product has length of sin(angle)
		} else {
			T f_half_angle = (T).5 * (T)acos(v_original.f_Dot(v_align_to)); // angle will be near pi / 2
			f_cos_half_angle = (T)cos(f_half_angle); // cos will be near 0.0

			//v_axis = v_original.v_Cross(v_align_to);
			// cross product would yield zero vector (angle is pi, sin(pi) = 0)

			if(fabs(v_original.x) >= fabs(v_original.y)) {
				T f_inv_length = ((T)1.0) / v_original.v_xz().f_Length();
				v_axis = Vector3<T>(-v_original.z * f_inv_length, 0, v_original.x * f_inv_length);
			} else {
				T f_inv_length = ((T)1.0) / v_original.v_zy().f_Length();
				v_axis = Vector3<T>(0, v_original.z * f_inv_length, -v_original.y * f_inv_length);
			}
			// select greatest component and create perpendicular vector in 2D plane to be used as axis of rotation

			v_axis *= (T)sin(f_half_angle); // sin will be near 1.0
		}
		
		w = f_cos_half_angle;
		x = v_axis.x;
		y = v_axis.y;
		z = v_axis.z;
	}

	Quat<T> Slerp(T f_t, const Quat<T> &r_t_quat_q) // linear interpolation between this and r_t_quat_q
	{
		T f_cos = f_Dot(r_t_quat_q);
		T f_angle = (T)acos(f_cos);
		if(f_angle < -f_epsilon || f_angle > f_epsilon) {
			T f_sin = (T)sin(f_angle);
			T f_inv_sin = 1 / f_sin;
			T f_weight_p = (T)sin((1 - f_t) * f_angle) * f_inv_sin;
			T f_weight_q = (T)sin(f_t * f_angle) * f_inv_sin;
			return *this * f_weight_p + r_t_quat_q * f_weight_q;
		} else
			return *this; // zero rotation
	}

	static Quat<T> Squad(T f_t, const Quat<T> &r_t_quat_p, const Quat<T> &r_t_quat_p0,
		const Quat<T> &r_t_quat_q0, const Quat<T> &r_t_quat_q) // cubic interpolation
	{
		Quat<T> t_slerp_p = r_t_quat_p.Slerp(f_t, r_t_quat_q);
		Quat<T> t_slerp_q = r_t_quat_p0.Slerp(f_t, r_t_quat_q0);
		return t_slerp_p.Slerp(2 * f_t * (1 - f_t), t_slerp_q);
	}
};

typedef Quat<float> Quatf;
typedef Quat<double> Quatd;

/*
 *								=== ~Quat ===
 */

/*
 *								=== TVertex3f ===
 */

struct TVertex3f : public Vector3f {
	TVertex3f();
	TVertex3f(const Vector3f &r_v_vec)
		:Vector3f(r_v_vec)
	{}

	inline const Vector3f &v_Pos() const
	{
		return *this;
	}

	inline Vector3f &v_Pos()
	{
		return *this;
	}

	inline TVertex3f operator *(float f_scalar) const
	{
		return *this * f_scalar;
	}

	inline TVertex3f operator +(const TVertex3f &r_vertex) const
	{
		return *this + r_vertex;
	}

	TVertex3f t_Lerp(float f_t, const TVertex3f &r_t_right) const
	{
		return *this + (r_t_right - *this) * f_t;
	}
};

/*
 *								=== ~TVertex3f ===
 */

/*
 *								=== CPolygon ===
 */

template<class TVertStruct>
class CPolygon {
protected:
	std::vector<TVertStruct> m_vertex_list;
	Plane3f m_t_normal;

public:
	/*
	 *	void Delete()
	 *		- delete all polygon vertices
	 */
	void Delete()
	{
		m_vertex_list.clear();
	}

	/*
	 *	inline bool Add_Vertex(const TVertStruct &r_t_vertex)
	 *		- add single vertex past the last vertex in the array
	 */
	inline bool Add_Vertex(const TVertStruct &r_t_vertex)
	{
		return Add_Vertex(0, &r_t_vertex, 1);
	}

	/*
	 *	inline bool Add_Vertex(int n_insert_before, const TVertStruct &r_t_vertex)
	 *		- add single vertex before the n_insert_before-th vertex in the array
	 */
	inline bool Add_Vertex(int n_insert_before, const TVertStruct &r_t_vertex)
	{
		return Add_Vertex(n_insert_before, &r_t_vertex, 1);
	}

	/*
	 *	inline bool Add_Vertex(const TVertStruct *p_vertex, int n_count)
	 *		- add array of vertices past the last vertex in the array
	 */
	inline bool Add_Vertex(const TVertStruct *p_vertex, int n_count)
	{
		return Add_Vertex(0, p_vertex, n_count);
	}

	/*
	 *	bool Add_Vertex(int n_insert_before, const TVertStruct *p_vertex, int n_count)
	 *		- add array of vertices before the n_insert_before-th vertex in the array
	 */
	bool Add_Vertex(int n_insert_before, const TVertStruct *p_vertex, int n_count)
	{
		_ASSERTE(n_count > 0);
		_ASSERTE(n_insert_before >= 0 && n_insert_before < m_vertex_list.size());
		if(!stl_ut::Reserve_NMore(m_vertex_list, n_count))
			return false;
		m_vertex_list.insert(m_vertex_list.begin() + n_insert_before,
			p_vertex, p_vertex + n_count);
		return true;
	}

	/*
	 *	void Delete_Vertices(int n_index, int n_count)
	 *		- delete n_count vertices, beggining with vertex n_index
	 */
	void Delete_Vertices(int n_index, int n_count)
	{
		m_vertex_list.erase(m_vertex_list.begin() + n_index,
			m_vertex_list.begin() + (n_index + n_count));
	}

	/*
	 *	inline int n_Vertex_Num() const
	 *		- return number of vertices
	 */
	inline int n_Vertex_Num() const
	{
		return m_vertex_list.size();
	}

	/*
	 *	inline TVertStruct &r_t_Vertex(int n_index) const
	 *		- vertex access function
	 *		- doesn't check array bounds
	 */
	inline TVertStruct &r_t_Vertex(int n_index)
	{
		return m_vertex_list[n_index];
	}

	/*
	 *	inline TVertStruct &t_Vertex(int n_index) const
	 *		- vertex access function
	 *		- doesn't check array bounds
	 */
	inline const TVertStruct &t_Vertex(int n_index) const
	{
		return m_vertex_list[n_index];
	}

	/*
	 *	const Vector3f v_Center() const
	 *		- return position of center of polygon
	 *		- if polygon has no vertices, return the O vector
	 */
	const Vector3f v_Center() const
	{
		if(!m_vertex_list.size())
			return Vector3f(0, 0, 0);
		Vector3f v_center(0, 0, 0);
		for(int i = 0; i < m_vertex_list.size(); ++ i)
			v_center += m_vertex_list[i].v_Pos();
		v_center *= 1.0f / (float)m_vertex_list.size();

		return v_center;
	}

	/*
	 *	const Vector3f v_NearestPoint(const Vector3f &r_v_point) const
	 *		- return point, lying on polygon, nearest to <r_v_point>
	 *		- must have valid normal and >= 3 vertices
	 */
	const Vector3f v_NearestPoint(const Vector3f &r_v_point) const // todo - rewrite using std::for_each
	{
		Vector3f v_nearest;
		try {
			v_nearest = m_t_normal.v_Intersect_Ray(r_v_point, m_t_normal.v_normal);
		} catch(Plane3f::CRayIsParallel_Exception) {
			_ASSERTE(0); // normal should be never coplanar
		}
		// try point, projected perpendicularily

		if(m_vertex_list.size() < 3)
			return v_nearest;

		float f_distance;
		if(b_Contain_Point(v_nearest))
			f_distance = (v_nearest - r_v_point).f_Length2();
		else
			f_distance = -1;

		Vector3f v_prev = m_vertex_list[m_vertex_list.size() - 1].v_Pos();
		for(int i = 0; i < m_vertex_list.size(); v_prev = m_vertex_list[i ++].v_Pos()) {
			Vector3f v_cur = m_vertex_list[i].v_Pos();
			Vector3f v_edge = v_prev - v_cur, v_to_cur = r_v_point - v_cur;
			float f_len = v_edge.f_Length();
			v_edge *= 1.0 / f_len;
			float t = v_edge.f_Dot(v_to_cur); // projected to edge
			t = (t < 0)? 0 : ((t > f_len)? f_len : t); // clamp
			Vector3f v_near = v_cur + v_edge * t;

			if((v_near - r_v_point).f_Length2() < f_distance || f_distance < 0) {
				v_nearest = v_near;
				f_distance = (v_nearest - r_v_point).f_Length2();
			}
		}
		// try to find on edges ...

		return v_nearest;
	}

	/*
	 *	bool Calc_Normal()
	 *		- calculate normal, return false if there wasn't trinity
	 *		  of vertices to construct plane from
	 *		- it has to be called explicitly, it is never called by CPolygon itself
	 *		- normal state is saved onto disk, though
	 */
	bool Calc_Normal(float f_epsilon_ex = f_epsilon, float f_edge_epsilon_ex = f_edge_epsilon)
	{
		Vector3f v_u, v_v;
		Vector3f v_norm;

		if(m_vertex_list.size() < 3)
			return false;

		for(typename std::vector<TVertStruct>::const_iterator p_vertex_0 = m_vertex_list.begin(),
		   p_vertex_a = m_vertex_list.begin() + 1; p_vertex_a < m_vertex_list.end();
		   p_vertex_0 = p_vertex_a ++) {
			if((v_u = (*p_vertex_0).v_Pos() - (*p_vertex_a).v_Pos()).f_Length() >
			   f_edge_epsilon_ex) {
				for(typename std::vector<TVertStruct>::const_iterator p_vertex_b =
				   m_vertex_list.begin() + 1; p_vertex_b < m_vertex_list.end(); p_vertex_b ++) {
					if(p_vertex_a == p_vertex_b || p_vertex_b == p_vertex_0)
						continue;
					if((v_v = (*p_vertex_a).v_Pos() - (*p_vertex_b).v_Pos()).f_Length() >
					   f_edge_epsilon_ex) {
						v_norm = v_u.v_Cross(v_v);
						if(v_norm.f_Length() < f_epsilon_ex)
							continue;
						v_norm.Normalize();

						m_t_normal = Plane3f(m_vertex_list[0].v_Pos(), v_norm);
						return true;
					}
				}
			}
		}
		// seek such a vertices that does have distance between them and they aren't colinear

		return false;
	}

	/*
	 *	inline Plane3f &r_t_Normal()
	 *		- return reference to a normal
	 *		- when you are going to split polygons, it's better to
	 *		  overwrite normals because once polygons are too small, you
	 *		  can't safely compute them again (that's why normals are saved into file)
	 */
	inline Plane3f &r_t_Normal()
	{
		return m_t_normal;
	}

	/*
	 *	inline const Plane3f &t_Normal() const
	 *		- return normal
	 */
	inline const Plane3f &t_Normal() const
	{
		return m_t_normal;
	}

	/*
	 *	EPlanePos n_Plane_Pos(const Plane3f &t_plane, float f_epsilon_ex) const
	 *		- check position of polygon against plane, explicit epsilon
	 *		- possible return values are plane_Back, plane_Front, plane_Onplane or plane_Split
	 */
	EPlanePos n_Plane_Pos(const Plane3f &t_plane, float f_epsilon_ex = f_epsilon) const
	{
		bool b_front, b_back;

		for(int i = 0; i < m_vertex_list.size(); ++ i) {
			switch(t_plane.n_Vector_Pos(m_vertex_list[i].v_Pos(), f_epsilon_ex)) {
			case plane_Front:
				b_front = true;
				break;
			case plane_Back:
				b_back = true;
				break;
			}
		}
		if(!b_back) {
			if(!b_front)
				return plane_Onplane;
			return plane_Front;
		} else {
			if(!b_front)
				return plane_Back;
			return plane_Split;
		}
	}

	/*
	 *	bool Cut(const Plane3f &t_plane, EPlanePos n_desired_half, float f_epsilon_ex)
	 *		- seems to be working for non-convex polygons as well
	 *		- cuts polygon against plane so the rest lies completely
	 *		  in desired halfspace, defined by plane
	 *		- to perform actual cutting, n_desired_half must be one of plane_Back or plane_Front
	 *		  otherwise actual plane position is compared against n_desired_half
	 *		  and in case it differs, polygon is erased (otherwise polygon is kept as-is)
	 *		  i.e. for example in case plane_Split, only polygon that is split by plane is kept
	 *		- polygon, lying completely onplane is cut away! (in case cutting is performed at all)
	 *		- explicit epsilon
	 */
	bool Cut(const Plane3f &t_plane, EPlanePos n_desired_half, float f_epsilon_ex = f_epsilon)
	{
		if(m_vertex_list.size() < 3)
			return true;
		// it must be polygon

		EPlanePos n_side, n_prev_side;
		TVertStruct v_cur, v_prev;
		EPlanePos n_other_side;

		if(n_desired_half == plane_Front)
			n_other_side = plane_Back;
		else if(n_desired_half == plane_Back)
			n_other_side = plane_Front;
		else {
			if(n_Plane_Pos(t_plane, f_epsilon_ex) != n_desired_half)
				Delete();
			return true;
		}
		// other half

		bool b_onplane_edges = false;

		v_prev = m_vertex_list.back();
		n_prev_side = t_plane.n_Vector_Pos(v_prev.v_Pos(), f_epsilon_ex);
		for(int i = 0; i < m_vertex_list.size(); v_prev = v_cur, n_prev_side = n_side, ++ i) {
			v_cur = m_vertex_list[i];
			n_side = t_plane.n_Vector_Pos(v_cur.v_Pos(), f_epsilon_ex);
			//
			if(n_prev_side == n_other_side) { // prev vertex was on the side to be cut ...
				if(n_side == n_desired_half) { // current vertex is on our side ... calc intersection & insert current vertex
					m_vertex_list[i] = v_cur;
					if(!stl_ut::Reserve_1More(m_vertex_list))
						return false;
					m_vertex_list.insert(&m_vertex_list[i ++], v_cur.t_Lerp(t_plane.f_Intersect_Ray_t(v_cur.v_Pos(), v_prev.v_Pos() - v_cur.v_Pos()), v_prev));
				} else if(n_side != plane_Onplane) // current vertex is not on our side either -> it's about to be erased
					m_vertex_list.erase(m_vertex_list.begin() + i --);
				// otherwise current vertex is onplane and we're going to keep it as is
			} else { // prev vertex was on our side ...
				if(n_side == n_other_side) { // now, current vertex is on the other side
					if(n_prev_side != plane_Onplane) // prev vertex was on our side
						m_vertex_list[i] = v_cur.t_Lerp(t_plane.f_Intersect_Ray_t(v_cur.v_Pos(), v_prev.v_Pos() - v_cur.v_Pos()), v_prev);
					else // prev vertex was onplane -> erase cur. vertex
						m_vertex_list.erase(m_vertex_list.begin() + i --);
				}
				// otherwise cur vertex is on our side or on the edge so we'll keep it as is
			}
			if(n_side == plane_Onplane && n_prev_side == plane_Onplane)
				b_onplane_edges = true;
		}
		// cut polygon so only the part, lying on <n_desired_half> side of plane remains

		if(b_onplane_edges) {
			v_prev = m_vertex_list.back();
			n_prev_side = t_plane.n_Vector_Pos(v_prev.v_Pos(), f_epsilon_ex);
			for(int i = 0; i < m_vertex_list.size(); v_prev = v_cur, n_prev_side = n_side, ++ i) {
				v_cur = m_vertex_list[i];
				n_side = t_plane.n_Vector_Pos(v_cur.v_Pos(), f_epsilon_ex);

				if(n_side == plane_Onplane && n_prev_side == plane_Onplane) {
					Vector3f v_dir[2] = {v_cur.v_Pos() - v_prev.v_Pos(),
						m_vertex_list[(i + 1) % m_vertex_list.size()].v_Pos() - v_cur.v_Pos()};

					v_dir[0].Normalize();
					v_dir[1].Normalize();

					if(fabsf(v_dir[0].f_Dot(v_dir[1]) + 1.0f) < f_epsilon_ex) // ugly overlap
						m_vertex_list.erase(m_vertex_list.begin() + i --);
				}
			}
		}
		// check for onplane edges - there may occur edges, overlaping themselves
		// - work well for non-convex polygon situations
		// - work well for case of polygons whose all points are onplane (deletes them)

		if(m_vertex_list.size() < 3)
			m_vertex_list.clear();

		return true;
	}

	/*
	 *	bool Split(const Plane3f &t_plane,
	 *		CPolygon<TVertStruct> &r_new_poly, EPlanePos n_desired_half, float f_epsilon_ex)
	 *		- cut <this> polygon to n_desired_halfe halfspace, defined by plane
	 *		  and cut r_new_poly (where it loads a copy of <this>) to the other one
	 *		- if n_desired_half is one of plane_Onplane or plane_Split, <this> polygon
	 *		  will be copied to r_new_poly and erased in case it lies in different
	 *		  position against plane than specified. otherwise r_new_poly is erased
	 *		- return false if there's not enough memory
	 */
	bool Split(const Plane3f &t_plane, CPolygon<TVertStruct> &r_new_poly,
		EPlanePos n_desired_half, float f_epsilon_ex = f_epsilon) // todo - rewrite cut for two polygons
	{
		EPlanePos n_other_half;

		if(n_desired_half == plane_Front)
			n_other_half = plane_Back;
		else if(n_desired_half == plane_Back)
			n_other_half = plane_Front;
		else {
			if(n_Plane_Pos(t_plane, f_epsilon_ex) != n_desired_half) {
				if(!(r_new_poly = *this))
					return false;
				// copy, check if there was enough memory

				Delete();
			} else
				r_new_poly.Delete();
			// keep polygon, lying in the right position

			return true;
		}
		// other half

		if(!(r_new_poly = *this))
			return false;
		// copy, check if there was enough memory

		return r_new_poly.Cut(t_plane, n_other_half) && Cut(t_plane, n_desired_half);
		// make two copys of current polygon and split them
	}

	class CPointOutside {
	protected:
		Vector3f m_v_prev;
		Vector3f m_v_normal;
		Vector3f m_v_point;

	public:
		CPointOutside(const Vector3f &r_v_point,
			const Vector3f &r_v_prev, const Vector3f &r_v_normal)
			:m_v_point(r_v_point), m_v_prev(r_v_prev), m_v_normal(r_v_normal)
		{}

		inline bool operator ()(const TVertStruct &r_vertex)
		{
			Vector3f v_cur = r_vertex.v_Pos();
			Plane3f t_edge(m_v_normal, v_cur - m_v_prev, v_cur);
			m_v_prev = v_cur;
			return t_edge.n_Vector_Pos(m_v_point) == plane_Back;
		}
	};

	/*
	 *	bool b_Contain_Point(const Vector3f &r_v_point) const
	 *		- method with edge and normal collinear planes (work with convex polygons only)
	 *		- need to have calculated normal
	 */
	bool b_Contain_Point(const Vector3f &r_v_point) const
	{
		return std::find_if(m_vertex_list.begin(), m_vertex_list.end(), CPointOutside(r_v_point,
			m_vertex_list.back().v_Pos(), m_t_normal.v_normal)) == m_vertex_list.end();
		// clearer and possibly faster version with std::find_if

		/*for(int i = 0, n_prev = m_vertex_list.size() - 1; i < m_vertex_list.size(); n_prev = i ++) {
			Plane3f t_edge(m_t_normal.v_normal, m_vertex_list[i].v_Pos() -
				m_vertex_list[n_prev].v_Pos(), m_vertex_list[i].v_Pos());
			if(t_edge.n_Vector_Pos(r_v_point) == plane_Back)
				return false;
		}
		return true;*/
	}
	
	template <const bool b_cull_back = false>
	class CTriDecomposerRayHit {
	protected:
		Vector3f m_v_vert0;
		Vector3f m_v_vert1;

		const Vector3f &m_r_v_org, &m_r_v_dir;
		float &m_r_f_t, &m_r_f_u, &m_r_f_v;

	public:
		inline CTriDecomposerRayHit(Vector3f v_first, Vector3f v_prev,
			const Vector3f &r_v_org, const Vector3f &r_v_dir,
			float &r_f_t, float &r_f_u, float &r_f_v)
			:m_v_vert0(v_first), m_v_vert1(v_prev), m_r_v_org(r_v_org), m_r_v_dir(r_v_dir),
			m_r_f_t(r_f_t), m_r_f_u(r_f_u), m_r_f_v(r_f_v)
		{}

		inline bool operator ()(const TVertStruct &r_vertex)
		{
			Vector3f v_vert2 = r_vertex.v_Pos();
			Vector3f v_edge1 = m_v_vert1 - m_v_vert0,
					 v_edge2 = v_vert2 - m_v_vert0;
			Vector3f v_p = m_r_v_dir.v_Cross(v_edge2);
			float f_det = v_edge1.f_Dot(v_p);

			m_v_vert1 = v_vert2; // shift vertices

			if(b_cull_back) {
				if(f_det < f_epsilon)
					return false;
			} else {
				if(f_det > -f_epsilon && f_det < f_epsilon)
					return false;
			}
			f_det = 1.0f / f_det;
			Vector3f v_t = m_r_v_org - m_v_vert0;
			m_r_f_u = v_t.f_Dot(v_p) * f_det;
			if(m_r_f_u < 0.0f || m_r_f_u > 1.0f)
				return false;
			Vector3f v_q = v_t.v_Cross(v_edge1);
			m_r_f_v = m_r_v_dir.f_Dot(v_q) * f_det;
			if(m_r_f_v < 0.0f || m_r_f_u + m_r_f_v > 1.0f)
				return false;
			m_r_f_t = v_edge2.f_Dot(v_q) * f_det;
			return true;
		}
	};

	/*
	 *	inline bool b_MT_RayHit(const Vector3f &r_v_org, const Vector3f &r_v_dir,
	 *		float &r_f_t, float &r_f_u, float &r_f_v) const
	 *		- Moller-Trumbore ray-triangle hit test 
	 *		- v_org and v_dir are ray source and direction,
	 *		- r_f_t is time of ray to hit
	 *		- r_f_u and r_f_v are barycentric coordinates of intersection
	 *		- return true if ray hits polygon, otherwise false
	 *		- hard to say if it's actualy faster or slower than the plane check,
	 *		  it definitely suffers from poylgon to triangle decomposition
	 */
	inline bool b_MT_RayHit(const Vector3f &r_v_org, const Vector3f &r_v_dir,
		float &r_f_t, float &r_f_u, float &r_f_v) const
	{
		if(m_vertex_list.size() < 3)
			return false;
		return std::find_if(m_vertex_list.begin() + 2, m_vertex_list.end(),
			CTriDecomposerRayHit<false>(m_vertex_list[0].v_Pos(), m_vertex_list[1].v_Pos(),
			r_v_org, r_v_dir, r_f_t, r_f_u, r_f_v)) != m_vertex_list.end();
		// clearer and possibly faster version with std::find_if

		/*for(int i = 1; i < m_vertex_list.size() - 1; ++ i) {
			const TVertStruct *p_vert0 = &m_vertex_list[0];
			const TVertStruct *p_vert1 = &m_vertex_list[i];
			const TVertStruct *p_vert2 = &m_vertex_list[(i + 1) % m_vertex_list.size()];

			Vector3f v_edge1 = p_vert1->v_Pos() - p_vert0->v_Pos(),
					 v_edge2 = p_vert2->v_Pos() - p_vert0->v_Pos();
			Vector3f v_p = r_v_dir.v_Cross(v_edge2);
			float f_det = v_edge1.f_Dot(v_p);

			if(f_det > -f_epsilon && f_det < f_epsilon)
				continue;
			f_det = 1.0f / f_det;

			Vector3f v_t = r_v_org - p_vert0->v_Pos();

			r_f_u = v_t.f_Dot(v_p) * f_det;
			if(r_f_u < 0.0f || r_f_u > 1.0f)
				continue;

			Vector3f v_q = v_t.v_Cross(v_edge1);

			r_f_v = r_v_dir.f_Dot(v_q) * f_det;
			if(r_f_v < 0.0f || r_f_u + r_f_v > 1.0f)
				continue;

			r_f_t = v_edge2.f_Dot(v_q) * f_det;

			return true;
		}
		return false;*/ // todo - perform speed test by raytracing some real mesh
	}

	/*
	 *	bool b_MT_RayHit_CullBackfaces(const Vector3f &r_v_org, const Vector3f &r_v_dir,
	 *		float &r_f_t, float &r_f_u, float &r_f_v) const
	 *		- the same as b_MT_RayHit, but in case the ray is coming from behind the polygon
	 *		  (dot product of direction of ray and polygon normal is positive),
	 *		  intersection isn't detected even if it does occur
	 */
	bool b_MT_RayHit_CullBackfaces(const Vector3f &r_v_org, const Vector3f &r_v_dir,
		float &r_f_t, float &r_f_u, float &r_f_v) const
	{
		if(m_vertex_list.size() < 3)
			return false;
		return std::find_if(m_vertex_list.begin() + 2, m_vertex_list.end(),
			CTriDecomposerRayHit<true>(m_vertex_list[0].v_Pos(), m_vertex_list[1].v_Pos(),
			r_v_org, r_v_dir, r_f_t, r_f_u, r_f_v)) != m_vertex_list.end();
		// clearer and possibly faster version with std::find_if
	}

	bool b_Collide(const CPolygon<TVertStruct> &r_polygon) const
	{
		if(r_polygon.m_vertex_list.size() < 3 || m_vertex_list.size() < 3) // todo
			return false;
		{
			const CPolygon<TVertStruct> &r_a = *this;
			const CPolygon<TVertStruct> &r_b = r_polygon;

			Vector3f v_edge_a = r_a.m_vertex_list[r_a.m_vertex_list.size() - 1].v_Pos(),
			   v_edge_b = r_a.m_vertex_list[0].v_Pos();
			for(int i = 0;;) {
				float t, u, v;
				if(r_b.b_MT_RayHit(v_edge_a, v_edge_b - v_edge_a, t, u, v) && t >= 0.0f && t <= 1.0f)
					return true;
				v_edge_a = v_edge_b;
				if(++ i < r_a.m_vertex_list.size())
					v_edge_b = r_a.m_vertex_list[i].v_Pos();
				else
					break;
			}
		}
		{
			const CPolygon<TVertStruct> &r_a = r_polygon;
			const CPolygon<TVertStruct> &r_b = *this;

			Vector3f v_edge_a = r_a.m_vertex_list[r_a.m_vertex_list.size() - 1].v_Pos(),
			   v_edge_b = r_a.m_vertex_list[0].v_Pos();
			for(int i = 0;;) {
				float t, u, v;
				if(r_b.b_MT_RayHit(v_edge_a, v_edge_b - v_edge_a, t, u, v) && t >= 0.0f && t <= 1.0f)
					return true;
				v_edge_a = v_edge_b;
				if(++ i < r_a.m_vertex_list.size())
					v_edge_b = r_a.m_vertex_list[i].v_Pos();
				else
					break;
			}
		}
		// see if this polygon is intersected by any of r_polygon's edges
		// (being touched by vertex alsou counts as collision)
		// it needs to check edge intersection this->r_polygon AND vice-versa
		return false;
	}

	bool Write(FILE *p_file) const
	{
		int n_temp = m_vertex_list.size();
		return fwrite(&m_t_normal, 1, sizeof(Plane3f), p_file) &&
			fwrite(&n_temp, 1, sizeof(int), p_file) &&
			fwrite(m_vertex_list.begin(), m_vertex_list.size(), sizeof(TVertStruct), p_file);
	}

	bool Read(FILE *p_file)
	{
		if(!fread(&m_t_normal, 1, sizeof(Plane3f), p_file))
			return false;
		int n_temp;
		if(!fread(&n_temp, sizeof(int), 1, p_file))
			return false;
		m_vertex_list.clear();
		if(!stl_ut::Reserve_N(m_vertex_list, n_temp))
			return false;
		for(int i = 0; i < n_temp; ++ i) {
			TVertStruct v_tmp;
			if(!fread(&v_tmp, sizeof(TVertStruct), 1, p_file))
				return false;
			m_vertex_list.push_back(v_tmp);
		}
		if(m_vertex_list.size() < n_temp)
			return false;
		return true;
	}

	/*
	 *	bool operator = (const CPolygon<TVertStruct> &r_poly)
	 *		- return value is true if everything went ok
	 */
	bool operator = (const CPolygon<TVertStruct> &r_poly)
	{
		m_t_normal = r_poly.m_t_normal;
		m_vertex_list.clear();
		if(!stl_ut::Reserve_N(m_vertex_list, r_poly.m_vertex_list.size()))
			return false;
		m_vertex_list.insert(m_vertex_list.end(),
			r_poly.m_vertex_list.begin(), r_poly.m_vertex_list.end());
		return true;
	}

	/*
	 *	void Reverb()
	 *		- reverb order of vertices. doesn't need any extra memory so it can't fail
	 */
	void Reverb()
	{
		TVertStruct v_temp;

		for(int i = 0; i < m_vertex_list.size() / 2; ++ i) {
			v_temp = m_vertex_list[i];
			m_vertex_list[i] = m_vertex_list[m_vertex_list.size() - i - 1];
			m_vertex_list[m_vertex_list.size() - i - 1] = v_temp;
		}
		// prohod vrcholy
	}

	/*
	 *	bool b_Overlap(const CPolygon<TVertStruct> &r_poly) const
	 *		- return true if polygons overlap, otherwise false
	 */
	bool b_Overlap(const CPolygon<TVertStruct> &r_poly) const
	{
		for(int i = 0; i < r_poly.m_vertex_list.size(); ++ i) {
			if(b_Contain_Point(r_poly.m_vertex_list[i].v_Pos()))
				return true;
		}
		for(int i = 0; i < m_vertex_list.size(); ++ i) {
			if(r_poly.b_Contain_Point(m_vertex_list[i].v_Pos()))
				return true;
		}
		return false;
	}


	/*
	 *	bool b_IsTiny(float f_epsilon_ex, float f_edge_epsilon_ex)
	 *		- return true if polygon is considered to be tiny
	 *		  (either can't calculate normal or it's area is under treshold)
	 *		- explicit epsilon
	 */
	bool b_IsTiny(float f_epsilon_ex = f_epsilon, float f_edge_epsilon_ex = f_edge_epsilon)
	{
		Plane3f t_tmp_normal = m_t_normal;
		bool b_have_normal = Calc_Normal(f_epsilon_ex, f_edge_epsilon_ex);
		m_t_normal = t_tmp_normal;
		// if there are three vertices, far enough to calculate the plane, polygon isn't tiny

		return b_have_normal;
	}
};

/*
 *								=== ~CPolygon ===
 */

/*
 *								=== CVectorMath ===
 */

class CVectorMath {
public:
	/*
	 *	static Vector3f v_Project(const Vector3f &r_v_vec, const Vector3f &r_v_ray_org,
	 *		const Vector3f &r_v_ray_dir, float f_ray_dir_length2 = 1.0f)
	 *		- project vector r_v_vec onto ray coming trough point r_v_ray_org
	 *		  in direction r_v_ray_dir
	 *		- f_ray_dir_length2 is squared length of r_v_ray_dir (or 1 in case it's unit length)
	 */
	static Vector3f v_Project(const Vector3f &r_v_vec, const Vector3f &r_v_ray_org,
		const Vector3f &r_v_ray_dir, float f_ray_dir_length2 = 1.0f)
	{
		return r_v_ray_org + r_v_ray_dir *
			(r_v_ray_dir.f_Dot(r_v_vec - r_v_ray_org) / f_ray_dir_length2);
	}

	/*
	 *	static Vector3f v_Nearest_Point(const Vector3f &r_v_ray_org, const Vector3f &r_v_ray_dir,
	 *		const Vector3f &r_v_ray2_org, const Vector3f &r_v_ray2_dir)
	 *		- returns nearest point, lying on ray [r_v_ray_org, r_v_ray_dir] to ray
	 *		  [r_v_ray2_org, r_v_ray2_dir]
	 *		- ray directions don't have to be unit length
	 *		- doesn't work for colinear rays - every point on ray is nearest in such a case
	 */
	static Vector3f v_Nearest_Point(const Vector3f &r_v_ray_org, const Vector3f &r_v_ray_dir,
		const Vector3f &r_v_ray2_org, const Vector3f &r_v_ray2_dir)
	{
		Plane3f t_coplane(r_v_ray2_org, r_v_ray2_dir.v_Cross(r_v_ray2_dir.v_Cross(r_v_ray_dir)));
		// plane, this line lies on and it' also prependicular to both this and r_t_line

		Vector3f v_intersection;
		if(t_coplane.Intersect_Ray(v_intersection, r_v_ray_org, r_v_ray_dir)) {
			return v_intersection;
			// intersection of plane with this line is nearest point to r_t_line
			// (nearest points are connected by line, prependicular to both of them)
		} else {
			return (r_v_ray_org - v_Project(r_v_ray_org, r_v_ray2_org, r_v_ray2_dir, r_v_ray2_dir.f_Length2()));
			// rays are parallel
		}
	}

	/*
	 * static inline float f_Det3(float f_00, float f_10, float f_20,
	 *							  float f_01, float f_11, float f_21,
	 *							  float f_02, float f_12, float f_22)
	 *		- calculates determinant of 3rd-order matrix
	 */
	static inline float f_Det3(float f_00, float f_10, float f_20,
							   float f_01, float f_11, float f_21,
							   float f_02, float f_12, float f_22)
	{
		return (f_00 * f_11 * f_22) + (f_10 * f_21 * f_02) + (f_01 * f_12 * f_20) -
			   (f_20 * f_11 * f_02) - (f_10 * f_01 * f_22) - (f_00 * f_21 * f_12);
	}

	/*
	 * static inline float f_Det2(float f_00, float f_10,
	 *							  float f_01, float f_11)
	 *		- calculates determinant of 2nd-order matrix
	 */
	static inline float f_Det2(float f_00, float f_10,
							   float f_01, float f_11)
	{
		return (f_00 * f_11) - (f_01 * f_10);
	}

	/*
	 *	static inline float f_Sqr(float f_x)
	 *		- calculates 2nd power of x
	 */
	static inline float f_Sqr(float f_x)
	{
		return f_x * f_x;
	}

	/*
	 *	static float f_Min_Distance(const Vector3f &r_v_ray_org, const Vector3f &r_v_ray_dir,
	 *		const Vector3f &r_v_ray2_org, const Vector3f &r_v_ray2_dir)
	 *		- returns minimal distance of ray [r_v_ray_org, r_v_ray_dir] to ray
	 *		  [r_v_ray2_org, r_v_ray2_dir]
	 *		- ray directions don't have to be unit length
	 *		- works for colinear rays as well
	 */
	static float f_Min_Distance(const Vector3f &r_v_ray_org, const Vector3f &r_v_ray_dir,
		const Vector3f &r_v_ray2_org, const Vector3f &r_v_ray2_dir)
	{
		Vector3f v_org_dist = r_v_ray_org - r_v_ray2_org;

		return (float)fabs(f_Det3(v_org_dist.x, v_org_dist.y, v_org_dist.z,
								  r_v_ray_dir.x, r_v_ray_dir.y, r_v_ray_dir.z,
								  r_v_ray2_dir.x, r_v_ray2_dir.y, r_v_ray2_dir.z)) / (float)sqrt(
			f_Sqr(f_Det2(r_v_ray_dir.y, r_v_ray_dir.z,
						 r_v_ray2_dir.y, r_v_ray2_dir.z)) +
			f_Sqr(f_Det2(r_v_ray_dir.z, r_v_ray_dir.x,
						 r_v_ray2_dir.z, r_v_ray2_dir.x)) +
			f_Sqr(f_Det2(r_v_ray_dir.x, r_v_ray_dir.y,
						 r_v_ray2_dir.x, r_v_ray2_dir.y)));
		// 1 sqrt, 21 mul, 1 div, 7 add, 3 sub = 8 + 42 + 2 + 7 + 3 = 62 ticks
		// formula from Hans-Jochen Bartsch's math book (results are almost identical
		// to those, obtained by meassuring distance between nerarest points on rays)
	}

	/*
	 *	struct TAxisAlignedBox
	 *		- bounding box, aligned with coordinate axes
	 */
	struct TAxisAlignedBox {
		Vector3f v_center; // box center
		Vector3f v_size; // box size along coordinate axes

		TAxisAlignedBox()
		{ }

		TAxisAlignedBox(const Vector3f &r_v_center, const Vector3f &r_v_size)
			:v_center(r_v_center), v_size(r_v_size)
		{ }

		/*
		 *	bool b_Intersect_Ray(const Vector3f &r_v_org, const Vector3f &r_v_dir) const
		 *		- returns true if ray [r_v_org, r_v_dir] intersects the box. otherwise false
		 */
		bool b_Intersect_Ray(const Vector3f &r_v_org, const Vector3f &r_v_dir) const
		{
			Vector3f v_diff = v_center - r_v_org;
			Vector3f v_ray_normal = r_v_dir * (r_v_dir.f_Dot(v_diff) / r_v_dir.f_Length());
			float f_effective_radius = Vector3f(v_ray_normal.x * v_size.x,
				v_ray_normal.y * v_size.y, v_ray_normal.z * v_size.z).f_Length();
			// should be divided by length of v_ray_normal, but ...

			return f_effective_radius <= v_ray_normal.f_Length2(); // we compensate here and save sqrt!
		}

		/*
		 *	bool b_Intersect_Segment(const Vector3f &r_v_org, const Vector3f &r_v_end) const
		 *		- returns true if segment [r_v_org, r_v_end] intersects the box. otherwise false
		 */
		bool b_Intersect_Segment(const Vector3f &r_v_org, const Vector3f &r_v_end) const
		{
			Vector3f v_half_dir = (r_v_end - r_v_org) * .5f;
			Vector3f v_seg_center = r_v_org + v_half_dir;
			Vector3f v_diff = v_center - v_seg_center;
			Vector3f v_dir_dot_axis;
			for(int i = 0; i < 3; ++ i) {
				v_dir_dot_axis[i] = (float)fabs(v_half_dir[i]);
				if((float)fabs(v_diff[i]) + v_size[i] > v_dir_dot_axis[i])
					return false;
			}
			// see if segment, projected to box axis miss the box

			Vector3f v_tangent = v_half_dir.v_Cross(v_diff);
			for(int i = 0; i < 3; ++ i) {
				const int n_index0[] = {1, 0, 0};
				const int n_index1[] = {2, 2, 1};
				if((float)fabs(v_tangent[i]) > v_size[n_index0[i]] * v_dir_dot_axis[n_index1[i]] +
				   v_size[n_index1[i]] * v_dir_dot_axis[n_index0[i]])
					return false;
			}
			// see for each axial projection if segment miss the box orthogonaly

			return true;
		}
	};

	/*
	 *	struct TObjectOrientedBox
	 *		- extension of TAxisAlignedBox, box is now aligned to some arbitrary axes
	 *		  (but they still ought to form right-handed euclidean coordinate system)
	 */
	struct TObjectOrientedBox : public TAxisAlignedBox {
		Vector3f v_axis[3]; // normalized axes, box size is along those axes

		TObjectOrientedBox()
		{ }

		TObjectOrientedBox(const Vector3f &r_v_center, const Vector3f &r_v_size,
			const Vector3f _v_axis[3])
			:TAxisAlignedBox(r_v_center, r_v_size)
		{
			memcpy(v_axis, _v_axis, 3 * sizeof(Vector3f));
			for(int i = 0; i < 3; ++ i)
				v_axis[i].Normalize();
		}

		TObjectOrientedBox(const Vector3f &r_v_center, const Vector3f &r_v_size,
			const Matrix4f &r_t_matrix)
			:TAxisAlignedBox(r_v_center, r_v_size)
		{
			v_axis[0] = r_t_matrix.v_Right();
			v_axis[1] = r_t_matrix.v_Up();
			v_axis[2] = r_t_matrix.v_Dir();
			for(int i = 0; i < 3; ++ i)
				v_axis[i].Normalize();
		}

		TObjectOrientedBox(const TAxisAlignedBox &r_t_box)
			:TAxisAlignedBox(r_t_box.v_center, r_t_box.v_size)
		{
			v_axis[0] = Vector3f(1, 0, 0);
			v_axis[1] = Vector3f(0, 1, 0);
			v_axis[2] = Vector3f(0, 0, 1);
		}

		/*
		 *	bool TObjectOrientedBox::b_Intersect_Ray(const Vector3f &r_v_org, const Vector3f &r_v_dir) const
		 *		- returns true if ray [r_v_org, r_v_dir] intersects the box. otherwise false
		 */
		bool b_Intersect_Ray(const Vector3f &r_v_org, const Vector3f &r_v_dir) const
		{
			Vector3f v_diff = v_center - r_v_org;
			Vector3f v_ray_normal = r_v_dir * (r_v_dir.f_Dot(v_diff) / r_v_dir.f_Length());
			float f_effective_radius = Vector3f(v_axis[0].f_Dot(v_ray_normal) * v_size.x,
				v_axis[1].f_Dot(v_ray_normal) * v_size.y,
				v_axis[2].f_Dot(v_ray_normal) * v_size.z).f_Length();
			// should be divided by length of v_ray_normal, but ...

			return f_effective_radius <= v_ray_normal.f_Length2(); // we compensate here and save sqrt!
		}

		/*
		 *	bool b_Intersect_Segment(const Vector3f &r_v_org, const Vector3f &r_v_end) const
		 *		- returns true if segment [r_v_org, r_v_end] intersects the box. otherwise false
		 */
		bool b_Intersect_Segment(const Vector3f &r_v_org, const Vector3f &r_v_end) const
		{
			Vector3f v_half_dir = (r_v_end - r_v_org) * .5f;
			Vector3f v_seg_center = r_v_org + v_half_dir;
			Vector3f v_diff = v_center - v_seg_center;
			Vector3f v_dir_dot_axis;
			for(int i = 0; i < 3; ++ i) {
				v_dir_dot_axis[i] = (float)fabs(v_half_dir.f_Dot(v_axis[i]));
				if((float)fabs(v_diff.f_Dot(v_axis[i])) + v_size[i] > v_dir_dot_axis[i])
					return false;
			}
			// see if segment, projected to box axis miss the box

			Vector3f v_tangent = v_half_dir.v_Cross(v_diff);
			for(int i = 0; i < 3; ++ i) {
				const int n_index0[] = {1, 0, 0};
				const int n_index1[] = {2, 2, 1};
				if((float)fabs(v_tangent.f_Dot(v_axis[i])) >
				   v_size[n_index0[i]] * v_dir_dot_axis[n_index1[i]] +
				   v_size[n_index1[i]] * v_dir_dot_axis[n_index0[i]])
					return false;
			}
			// see for each axial projection if segment miss the box orthogonaly

			return true;
		}
	};
};

/*
 *								=== ~CVectorMath ===
 */

#endif // __VECTOR2_INCLUDED
