/*
								+----------------------------------+
								|                                  |
								|   ***  Boolean operations  ***   |
								|                                  |
								|   Copyright  -tHE SWINe- 2013   |
								|                                  |
								|           BooleanOps.h           |
								|                                  |
								+----------------------------------+
*/

#pragma once
#ifndef __BOOLEAN_OPERATIONS_INCLUDED
#define __BOOLEAN_OPERATIONS_INCLUDED

/**
 *	@file lml/BooleanOps.h
 *	@date 2007
 *	@author -tHE SWINe-
 *	@brief boolean operations on polygonal meshes
 *
 *	@todo - doc, make .cpp, think about using that uniform grid, think about "robust geometric predicates"
 *	@todo - try to see if there is a better way to cut the polygons; plane cutting is going to cause
 *		a lot of t-junctions in the mesh, it would be much better to calculate cut multilines inside the polygon
 *		and retriangulate only the affected edges (it would also lead to lower polygon count in most cases).
 *		the robust polygon split does not help here, as for small polygons it simply does not split them,
 *		and the polygons get incorrectly saved or thrown away completely, depending on which side is the center
 *		of each triangle, creating holes in the mesh.
 */

#include "PolyMesh.h"

/**
 *	@def __USE_VOLRAST
 *	@brief if defined, the uniform grid is filled using the fast
 *		volume rasterizer (not always precise at the edges)
 */
//#define __USE_VOLRAST

// todo - move these to BooleanOps.cpp
#define axistest_x01(a, b, fa, fb)			   \
	do { float p0 = a*v0.y - b*v0.z;			       	   \
	float p2 = a*v2.y - b*v2.z, min, max;			       	   \
        if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
	float rad = fa * v_boxhalfsize.y + fb * v_boxhalfsize.z;   \
	if(min>rad || max<-rad) return false; } while(0)

#define axistest_x2(a, b, fa, fb)			   \
	do { float p0 = a*v0.y - b*v0.z;			           \
	float p1 = a*v1.y - b*v1.z, min, max;			       	   \
        if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
	float rad = fa * v_boxhalfsize.y + fb * v_boxhalfsize.z;   \
	if(min>rad || max<-rad) return false; } while(0)

#define axistest_y02(a, b, fa, fb)			   \
	do { float p0 = -a*v0.x + b*v0.z;		      	   \
	float p2 = -a*v2.x + b*v2.z, min, max;	       	       	   \
        if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
	float rad = fa * v_boxhalfsize.x + fb * v_boxhalfsize.z;   \
	if(min>rad || max<-rad) return false; } while(0)

#define axistest_y1(a, b, fa, fb)			   \
	do { float p0 = -a*v0.x + b*v0.z;		      	   \
	float p1 = -a*v1.x + b*v1.z, min, max;	     	       	   \
        if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
	float rad = fa * v_boxhalfsize.x + fb * v_boxhalfsize.z;   \
	if(min>rad || max<-rad) return false; } while(0)

#define axistest_z12(a, b, fa, fb)			   \
	do { float p1 = a*v1.x - b*v1.y;			           \
	float p2 = a*v2.x - b*v2.y, min, max;			       	   \
        if(p2<p1) {min=p2; max=p1;} else {min=p1; max=p2;} \
	float rad = fa * v_boxhalfsize.x + fb * v_boxhalfsize.y;   \
	if(min>rad || max<-rad) return false; } while(0)

#define axistest_z0(a, b, fa, fb)			   \
	do { float p0 = a*v0.x - b*v0.y;				   \
	float p1 = a*v1.x - b*v1.y, min, max;			           \
        if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
	float rad = fa * v_boxhalfsize.x + fb * v_boxhalfsize.y;   \
	if(min>rad || max<-rad) return false; } while(0)

class CPolygonHash {
public:
	typedef CPolyMesh::_TyPolygon _TyPolygon; /**< @brief polygon type */
	typedef CPolyMesh::_TyVertex _TyVertex; /**< @brief vertex type */

	/**
	 *	@brief a simple "volume" rasterizer
	 */
	class CVolRast {
	public:
		/**
		 *	@brief rasterizes a 2D polygon, filling all the touched pixels
		 *
		 *	@tparam CDestOp is destination operator, taking care of "filling pixels";
		 *		it must implement function operator with integer x and y coordinates
		 *
		 *	@param[in] p_vertex is pointer to the vertices of the polygon to be rasterized
		 *	@param[in] n_vertex_num is number of vertices of the polygon to be rasterized
		 *	@param[in] op is destination operator, used to fill pixels
		 *
		 *	@return Returns the value of op, after filling all the pixels.
		 *
		 *	@note This function throws std::bad_alloc.
		 */
		template <class CDestOp>
		static CDestOp Rasterize2D(const Vector2f *p_vertex, size_t n_vertex_num, CDestOp op) // throw(std::bad_alloc)
		{
			if(!n_vertex_num)
				return op;
			// simple case (but necessary, min-max requires at least 1 vertex)

			int n_min_y = 0, n_max_y = 0;
			float f_min_y = p_vertex[0].y, f_max_y = p_vertex[0].y;
			for(size_t i = 1; i < n_vertex_num; i ++) {
				if(p_vertex[i].y > f_max_y) {
					f_max_y = p_vertex[i].y;
					n_max_y = i;
				}
				if(p_vertex[i].y < f_min_y) {
					f_min_y = p_vertex[i].y;
					n_min_y = i;
				}
			}
			// find min and max y-vertex

			int n_left_top_x = 0x10000;
			int n_right_top_x = -0x10000;
			int n_left_bottom_x = 0x10000;
			int n_right_bottom_x = -0x10000;
			{
				int n_min_y_coord = (int)ceil(f_min_y),
					n_max_y_coord = (int)ceil(f_max_y);
				for(size_t i = 0; i < n_vertex_num; i ++) {
					int n_y_coord = (int)ceil(p_vertex[i].y);
					int n_x_coord = (int)p_vertex[i].x;
					if(n_min_y_coord == n_y_coord) {
						if(n_left_top_x > n_x_coord)
							n_left_top_x = n_x_coord;
						if(n_right_top_x < n_x_coord)
							n_right_top_x = n_x_coord;
					}
					if(n_max_y_coord == n_y_coord) {
						if(n_left_bottom_x > n_x_coord)
							n_left_bottom_x = n_x_coord;
						if(n_right_bottom_x < n_x_coord)
							n_right_bottom_x = n_x_coord;
					}
				}
			}
			// find min and max x-vertex on top and bottom scanline

			int n_left_count = 0;
			int n_right_count = 0;
			int n_left_edge = n_min_y;
			int n_right_edge = n_min_y;
			// edge states

			int n_left_dxdy, p_left_x[2] = {0, n_left_top_x << 16};
			int n_right_dxdy, p_right_x[2] = {0, n_right_top_x << 16};
			// 16:16 edge equations

			int n_dest_y = ((int)ceil(f_min_y) - 1);
			// shift raster pointer to the first polygon scanline

			for(;;) {
				p_left_x[0] = p_left_x[1];
				p_right_x[0] = p_right_x[1];
				// remember left and right x at the top of current scanline

				if(!n_left_count) {
					int n_prev_edge;
					// previous edge index

					int n_cur_y = (int)ceil(p_vertex[n_left_edge].y);
					// store vertex y so it can determine scanline length

					int n_current_x = (int)(p_vertex[n_left_edge].x * 0x10000);
					// use vertex x as current x, projection of prev edge to next scanline is not accurate

					do {
						if(n_left_edge == n_max_y) {
							for(int l = min(p_left_x[0] >> 16, n_left_bottom_x),
							   r = max(p_right_x[0] >> 16, n_right_bottom_x); l <= r; l ++)
								op(l, n_dest_y);
							return op;
						}
						// we're done, fill the last (bottom) scanline

						n_prev_edge = n_left_edge;
						n_left_edge = (n_left_edge + n_vertex_num - 1) % n_vertex_num;
						// get next left edge index

						int n_y = (int)ceil(p_vertex[n_left_edge].y);
						n_left_count = n_y - n_cur_y;
						// calc scanline count

						n_current_x = min(n_current_x, (int)(p_vertex[n_prev_edge].x * 0x10000));
						// update current x so it's position of left-most vertex at this scanline
					} while(!n_left_count);
					// find edge with non-zero length

					float f_dxdy = (p_vertex[n_left_edge].x - p_vertex[n_prev_edge].x) /
						(p_vertex[n_left_edge].y - p_vertex[n_prev_edge].y);
					n_left_dxdy = (int)(f_dxdy * 0x10000);
					p_left_x[0] = min(p_left_x[0], n_current_x);
					p_left_x[1] = (int)((p_vertex[n_prev_edge].x +
						(ceil(p_vertex[n_prev_edge].y) - p_vertex[n_prev_edge].y) * f_dxdy) * 0x10000);
					// calculate new edge equation, x positions at bottom and top of the current scanline
				} else
					p_left_x[1] += n_left_dxdy;
				-- n_left_count;
				if(!n_right_count) {
					int n_prev_edge;
					// previous edge index

					int n_cur_y = (int)ceil(p_vertex[n_right_edge].y);
					// store vertex y so it can determine scanline length

					int n_current_x = (int)(p_vertex[n_right_edge].x * 0x10000);
					// use vertex x as current x, projection of prev edge to next scanline is not accurate

					do {
						_ASSERTE(n_right_edge != n_max_y);
						// shouldn't happen, left scanline update code would already return

						n_prev_edge = n_right_edge;
						n_right_edge = (n_right_edge + 1) % n_vertex_num;
						// get next right edge index

						int n_y = (int)ceil(p_vertex[n_right_edge].y);
						n_right_count = n_y - n_cur_y;
						// calc scanline count

						n_current_x = max(n_current_x, (int)(p_vertex[n_prev_edge].x * 0x10000));
						// update current x so it's position of right-most vertex at this scanline
					} while(!n_right_count);
					// find edge with non-zero length

					float f_dxdy = (p_vertex[n_right_edge].x - p_vertex[n_prev_edge].x) /
						(p_vertex[n_right_edge].y - p_vertex[n_prev_edge].y);
					n_right_dxdy = (int)(f_dxdy * 0x10000);
					p_right_x[0] = max(p_right_x[0], n_current_x);
					p_right_x[1] = (int)((p_vertex[n_prev_edge].x +
						(ceil(p_vertex[n_prev_edge].y) - p_vertex[n_prev_edge].y) * f_dxdy) * 0x10000);
					// calculate new edge equation, x positions at bottom and top of the current scanline
				} else
					p_right_x[1] += n_right_dxdy;
				-- n_right_count;
				// update edges, calculate positions at the bottom of the scanline

				for(int l = p_left_x[n_left_dxdy < 0] >> 16,
				   r = p_right_x[n_right_dxdy > 0] >> 16; l <= r; l ++)
					op(l, n_dest_y);
				// fill a single scanline

				++ n_dest_y;
				// move to the next scanline
			}

			return op;
		}
	};

	class CUniformGrid {
	protected:
		Vector3f m_v_min; /**< @brief minimal coordinate of the bounding box */
		Vector3f m_v_max; /**< @brief maximal coordinate of the bounding box */
		Matrix4f m_t_transform; /**< @brief transformation from worldspace to the uniform grid indices */
		Vector3<size_t> m_v_table_size; /**< @brief number of cells along each respective axis */
		Vector3f m_v_cell_size;

		std::map<size_t, std::vector<_TyPolygon*> > m_poly_hash; /**< @brief a "sparse" list of lists of vertices (one vector per cell) */
		static std::vector<_TyPolygon*> m_empty; /**< @brief empty list of vertices shared by all empty cells */
		// todo - implement "dense" map as well?

		class CMapWriter {
		protected:
			std::map<size_t, std::vector<_TyPolygon*> > &m_r_poly_hash;
			_TyPolygon *m_p_fill;
			Vector3<size_t> m_v_table_size;
			int m_n_dim_x, m_n_dim_y, m_n_dim_z;
			Plane3f m_t_plane;
			Vector3f m_v_off_min;
			Vector3f m_v_off_max;

		public:
			inline CMapWriter(std::map<size_t, std::vector<_TyPolygon*> > &r_poly_hash,
				_TyPolygon *p_fill, Vector3<size_t> v_table_size, int n_dim_x, int n_dim_y,
				int n_dim_z, Plane3f t_plane, Vector3f v_off_min, Vector3f v_off_max)
				:m_r_poly_hash(r_poly_hash), m_p_fill(p_fill), m_v_table_size(v_table_size),
				m_n_dim_x(n_dim_x), m_n_dim_y(n_dim_y), m_n_dim_z(n_dim_z),
				m_t_plane(t_plane), m_v_off_min(v_off_min), m_v_off_max(v_off_max)
			{}

			void operator ()(int x, int y)
			{
				// rasterized coordinates (x, y) are real coordinates[n_dim], rasterized z is unknown
				// rast = real[perm] | perm = 2 0 1, rast = z x y
				// real[perm] = rast | rast = z x y, real[2] = z, real[0] = x, real[1] = y

				Vector3f v_cube_org;
				v_cube_org[m_n_dim_x] = float(x);
				v_cube_org[m_n_dim_y] = float(y);
				v_cube_org[m_n_dim_z] = 0;
				// calculate the origin of the cube in question

				if(v_cube_org.x < 0 || v_cube_org.y < 0 || v_cube_org.z < 0 ||
				   unsigned(v_cube_org.x) >= m_v_table_size.x ||
				   unsigned(v_cube_org.y) >= m_v_table_size.y ||
				   unsigned(v_cube_org.z) >= m_v_table_size.z)
					return;
				// polygons outside of the grid are to be ignored

				Vector3f v_z_ray(0, 0, 0);
				v_z_ray[m_n_dim_z] = 1;
				// create a ray for testing "z" in the raster

#if 0
				float f_intersect_min, f_intersect_max;
				for(int i = 0; i < 4; ++ i) {
					Vector3f v_corner(v_cube_org);
					v_corner[m_n_dim_x] += i & 1;
					v_corner[m_n_dim_y] += i >> 1;
					// get one of four cube corners in the z-plane
					// note that the z-dimension is irrelevant as that is where we shoot the ray

					Vector3f v_intersect;
					bool b_have_isect = m_t_plane.Intersect_Ray(v_intersect, v_corner, v_z_ray);
					_ASSERTE(b_have_isect); // must have one, that is why we chose this plane
					// get the intersection

					if(!i || f_intersect_min > v_intersect[m_n_dim_z])
						f_intersect_min = v_intersect[m_n_dim_z];
					if(!i || f_intersect_max < v_intersect[m_n_dim_z])
						f_intersect_max = v_intersect[m_n_dim_z];
					// find min/max
				}
				// find minimum / maximum intersection accross the cube

				int n_min_z_ref = max(0, int(floor(f_intersect_min)));
				int n_max_z_ref = min(m_v_table_size[m_n_dim_z],
					size_t(floor(f_intersect_max)) + 1);
				// find minimal and maximal projection in the plane // t_odo - this can be optimized by doing the same in the main loop
#endif // 0
				// this now only serves for doublechecking

				Vector3f v_intersect_min;
				bool b_have_isect = m_t_plane.Intersect_Ray(v_intersect_min,
					v_cube_org + m_v_off_min, v_z_ray);
				_ASSERTE(b_have_isect); // must have one

				Vector3f v_intersect_max;
				bool b_have_isect2 = m_t_plane.Intersect_Ray(v_intersect_max,
					v_cube_org + m_v_off_max, v_z_ray);
				_ASSERTE(b_have_isect2); // must have one
				// find minimal and maximal projection of the polygon plane

				int n_min_z = max(0, int(floor(v_intersect_min[m_n_dim_z])));
				int n_max_z = min(m_v_table_size[m_n_dim_z],
					size_t(floor(v_intersect_max[m_n_dim_z])) + 1);
#if 0
				_ASSERTE(n_min_z == n_min_z_ref);
				_ASSERTE(n_max_z == n_max_z_ref);
#endif // 0
				// find minimal and maximal projection in the plane

				// note that this is flawed at the edges, where the polygon may
				// not be overlapping under the given point. using a nearest point
				// at the polygon near the intersection would give better results,
				// but would be slower

				Vector3<size_t> v_index;
				v_index[m_n_dim_x] = x;
				v_index[m_n_dim_y] = y;
				for(int z = n_min_z; z < n_max_z; ++ z) {
					v_index[m_n_dim_z] = z;
					size_t n_index = v_index.x + m_v_table_size.x *
						(v_index.y + m_v_table_size.y * v_index.z);
					// calculate grid index

					_ASSERTE(std::find(m_r_poly_hash[n_index].begin(),
						m_r_poly_hash[n_index].end(), m_p_fill) == m_r_poly_hash[n_index].end());
					// make sure that this polygon is not already present (should not be)

					m_r_poly_hash[n_index].push_back(m_p_fill);
					// fill the polygon in the grid
				}
				// fill the entries in the uniform grid
			}
		};

	public:
		/**
		 *	@brief default constructor
		 *
		 *	@param[in] r_mesh is the mesh to be hashed (face normals must be valid)
		 *	@param[in] v_min is minimal coordinate of the bounding box (polygons outside of the box are ignored)
		 *	@param[in] v_max is maximal coordinate of the bounding box (polygons outside of the box are ignored)
		 *	@param[in] n_polygons_per_node is desired number of polygons per node
		 *		(to determine uniform grid size)
		 *
		 *	@note Use b_Status() to determine wheter constructor was successful.
		 */
		CUniformGrid(const CPolyMesh &r_mesh, Vector3f v_min,
			Vector3f v_max, size_t n_polygons_per_node = 10)
			:m_v_min(v_min), m_v_max(v_max)
		{
			size_t n_node_num = max(size_t(1), r_mesh.n_Polygon_Num() / n_polygons_per_node);
			Vector3f v_cell_size = v_max - v_min;
			float f_cell_side = std::max((float)pow(v_cell_size.x * v_cell_size.y * v_cell_size.z /
				n_node_num, 1.0f / 3), 1e-2f);
			float f_inv_cell_side = 1 / f_cell_side;
			m_v_table_size.x = max(size_t(1), size_t(v_cell_size.x * f_inv_cell_side + .5f));
			m_v_table_size.y = max(size_t(1), size_t(v_cell_size.y * f_inv_cell_side + .5f));
			m_v_table_size.z = max(size_t(1), size_t(v_cell_size.z * f_inv_cell_side + .5f));
			// calculate grid dimensions

			Vector3f v_scale(Vector3f(float(m_v_table_size.x),
				float(m_v_table_size.y), float(m_v_table_size.z)) / v_cell_size);
			// scaling of the cells (1 / cell size in each dimension)
			// note it will be stored on the diagonal of m_t_transform

			for(int n = 0; n < 3; ++ n)
				m_v_cell_size[n] = 1 / v_scale[n];
			// calculate cell sizes as well

			m_t_transform.Identity();
			m_t_transform.Scale(v_scale);
			m_t_transform.Translate(-v_min);
			// calculate a transformation from worldspace to gridspace

			_ASSERTE((m_t_transform.v_Transform_Pos(v_min) - Vector3f(0, 0, 0)).f_Length() < f_epsilon);
			_ASSERTE((m_t_transform.v_Transform_Pos(v_max) - Vector3f(float(m_v_table_size.x),
				float(m_v_table_size.y), float(m_v_table_size.z))).f_Length() < f_epsilon);
			// make sure that the transform works

			try {
#ifdef __USE_VOLRAST
				std::vector<Vector3f> poly_verts;
				std::vector<Vector2f> poly_verts2;
#endif // __USE_VOLRAST
				for(size_t i = 0, n = r_mesh.n_Polygon_Num(); i < n; ++ i) {
					const _TyPolygon &r_poly = r_mesh.r_Polygon(i);
					if(r_poly.n_Vertex_Num() < 3)
						continue; // will not work with degenerates
					// get a polygon

#ifdef __USE_VOLRAST
					int n_dim_x, n_dim_y, n_dim_z;
					Vector3f v_normal = r_poly.t_Normal().v_normal;
					if(fabs(v_normal.x) > fabs(v_normal.y) && fabs(v_normal.x) >= fabs(v_normal.z)) {
						n_dim_x = 1;
						n_dim_y = 2;
						n_dim_z = 0;
						// x is the greatest
					} else if(fabs(v_normal.y) > fabs(v_normal.x) && fabs(v_normal.y) >= fabs(v_normal.z)) {
						n_dim_x = 0;
						n_dim_y = 2;
						n_dim_z = 1;
						// y is the greatest
					} else /*if(fabs(v_normal.z) > fabs(v_normal.x) && fabs(v_normal.z) >= fabs(v_normal.y))*/ {
						n_dim_x = 0;
						n_dim_y = 1;
						n_dim_z = 2;
						// z is the greatest
					}
					// choose "good" dimensions for 2D rasterization (note that this will somehow work even
					// without the face normals, it will just be slower)

					poly_verts.resize(r_poly.n_Vertex_Num());
					poly_verts2.resize(r_poly.n_Vertex_Num());
					for(size_t j = 0, m = r_poly.n_Vertex_Num(); j < m; ++ j) {
						poly_verts[j] = m_t_transform.v_Transform_Pos((Vector3f)r_poly.t_Vertex(j));
						poly_verts2[j] = Vector2f(poly_verts[j][n_dim_x], poly_verts[j][n_dim_y]);
					}
					// transform polygon vertices to gridspace and project them to the chosen 2D plane

					Plane3f t_plane = m_t_transform.t_Transform_Plane(r_poly.t_Normal());
					t_plane.Normalize();
					// here we do need the plane

					float f_inv_dot = 1 / t_plane.v_normal[n_dim_z];
					// dot of plane normal with grid dimension

#if 0
					int n_min_vertex = -1, n_max_vertex = -1;
					float f_min_vertex_dist, f_max_vertex_dist;
					for(int j = 0; j < 8; ++ j) {
						int n_x = (j >> n_dim_x) & 1;
						int n_y = (j >> n_dim_y) & 1;
						int n_z = (j >> n_dim_z) & 1;
						Vector3f v_corner((n_x)? float(m_v_table_size[n_dim_x]) : 0,
							(n_y)? float(m_v_table_size[n_dim_y]) : 0,
							(n_z)? float(m_v_table_size[n_dim_z]) : 0);
						// get corners of the bounding box, in box-space

						if(!n_z) {
							float f_dist = t_plane.f_Vector_Dist(v_corner) * f_inv_dot;
							if(n_min_vertex == -1 || f_min_vertex_dist < f_dist) {
								f_min_vertex_dist = f_dist;
								n_min_vertex = n_x | (n_y << 1);
							}
						} else {
							float f_dist = t_plane.f_Vector_Dist(v_corner) * f_inv_dot;
							if(n_max_vertex == -1 || f_max_vertex_dist < f_dist) {
								f_max_vertex_dist = f_dist;
								n_max_vertex = n_x | (n_y << 1);
							}
						}
						// note that using plane distance is actually not correct,
						// it should be plane / ray intersection, but the distance
						// by a ray is equivalent (just scaled by ray / normal vector dot)

						// todo - what will happen if the triangle is completely outside?
					}
					// this does not really work, the principal idea is flawed
#else // 0
					Vector3f v_z_ray(0, 0, 0);
					v_z_ray[n_dim_z] = 1;
					// create a ray for testing "z" in the raster

					int n_min_vertex, n_max_vertex;
					float f_intersect_min, f_intersect_max;
					for(int j = 0; j < 4; ++ j) {
						Vector3f v_corner(0, 0, 0);
						v_corner[n_dim_x] += (j & 1) * m_v_table_size[n_dim_x];
						v_corner[n_dim_y] += (j >> 1) * m_v_table_size[n_dim_y];
						// get one of four cube corners in the z-plane
						// note that the z-dimension is irrelevant as that is where we shoot the ray

						Vector3f v_intersect;
						bool b_have_isect = t_plane.Intersect_Ray(v_intersect, v_corner, v_z_ray);
						_ASSERTE(b_have_isect); // must have one, that is why we chose this plane
						// get the intersection

						if(!j || f_intersect_min > v_intersect[n_dim_z]) {
							f_intersect_min = v_intersect[n_dim_z];
							n_min_vertex = j;
						}
						if(!j || f_intersect_max < v_intersect[n_dim_z]) {
							f_intersect_max = v_intersect[n_dim_z];
							n_max_vertex = j;
						}
						// find min/max
					}
					// find minimum / maximum intersection accross the cube
#endif // 0
					// find out which cube vertices to test against

					Vector3f v_off_min(0, 0, 0);
					if(n_min_vertex & 1)
						v_off_min[n_dim_x] += 1;
					if(n_min_vertex & 2)
						v_off_min[n_dim_y] += 1;
					Vector3f v_off_max(0, 0, 0);
					if(n_max_vertex & 1)
						v_off_max[n_dim_x] += 1;
					if(n_max_vertex & 2)
						v_off_max[n_dim_y] += 1;
					// calculate worldspace offsets to grid vertices, containing min / max "height"

					CMapWriter writer(m_poly_hash, const_cast<_TyPolygon*>(&r_poly), m_v_table_size,
						n_dim_x, n_dim_y, n_dim_z, t_plane, v_off_min, v_off_max);
					CVolRast::Rasterize2D(&poly_verts2[0], poly_verts2.size(), writer);
					// write polygon entries in the map
#else // __USE_VOLRAST
					CPolygon2<Vector3f> transformed_poly;
					for(size_t j = 0, m = r_poly.n_Vertex_Num(); j < m; ++ j) {
						transformed_poly.Insert_Vertex(m_t_transform.v_Transform_Pos(
							(Vector3f)r_poly.t_Vertex(j)));
					}
					transformed_poly.Calc_Normal(); // !!
					// transform the original polygon to the grid space

					Vector3<size_t> v_min_cell, v_max_cell;
					{
						Vector3f v_min = transformed_poly.t_Vertex(0);
						Vector3f v_max = v_min;
						for(size_t k = 1, o = r_poly.n_Vertex_Num(); k < o; ++ k) {
							Vector3f v = transformed_poly.t_Vertex(k);
							for(int j = 0; j < 3; ++ j) {
								if(v_min[j] > v[j])
									v_min[j] = v[j];
								if(v_max[j] < v[j])
									v_max[j] = v[j];
							}
						}
						for(int j = 0; j < 3; ++ j) {
							v_min_cell[j] = min(size_t(max(0, int(floor(v_min[j])))), m_v_table_size[j]);
							v_max_cell[j] = min(size_t(max(0, int(floor(v_max[j])) + 1)), m_v_table_size[j]);
						}
					}
					// find polygon bounding box, in grid space

					for(size_t x = v_min_cell.x; x < v_max_cell.x; ++ x) {
						for(size_t y = v_min_cell.y; y < v_max_cell.y; ++ y) {
							for(size_t z = v_min_cell.z; z < v_max_cell.z; ++ z) {
								Vector3f v_cell_min = Vector3f(float(x), float(y), float(z));
								Vector3f v_cell_max = v_cell_min + 1;
								// calculate a grid cell

								if(b_PolygonCell_Intersect(transformed_poly, v_cell_min, v_cell_max)) {
									std::vector<_TyPolygon*> &r_cell = m_poly_hash[n_GridCell_Index(x, y, z)];
									r_cell.push_back(const_cast<_TyPolygon*>(&r_poly));
								}
								// insert the polygon
							}
						}
					}
					// use a dumb, but most likely correct algorithm
#endif // __USE_VOLRAST

#if 0 // only for debugging
#ifdef __USE_VOLRAST
					CPolygon2<Vector3f> transformed_poly;
					for(size_t j = 0, m = r_poly.n_Vertex_Num(); j < m; ++ j) {
						transformed_poly.Insert_Vertex(m_t_transform.v_Transform_Pos(
							(Vector3f)r_poly.t_Vertex(j)));
					}
					transformed_poly.Calc_Normal(); // !!
					// transform the original polygon to the grid space (if we did not do it already)
#endif // __USE_VOLRAST

					size_t n_false_positive_num = 0;
					size_t n_false_negative_num = 0;
					for(size_t x = 0; x < m_v_table_size.x; ++ x) {
						for(size_t y = 0; y < m_v_table_size.y; ++ y) {
							for(size_t z = 0; z < m_v_table_size.z; ++ z) {
								Vector3f v_cell_min = Vector3f(float(x), float(y), float(z));
								Vector3f v_cell_max = v_cell_min + 1;
								// calculate a grid cell

								const std::vector<_TyPolygon*> &r_cell = r_GridCell(x, y, z);
								bool b_found = std::find(r_cell.begin(),
									r_cell.end(), &r_poly) != r_cell.end();
								if(b_PolygonCell_Intersect(transformed_poly, v_cell_min, v_cell_max)) {
									if(!b_found)
										++ n_false_negative_num;
								} else {
									if(b_found)
										++ n_false_positive_num;
								}
							}
						}
					}
					if(n_false_positive_num) {
						fprintf(stderr, "warning: polygon " PRIsize " had " PRIsize
							" false positives\n", i, n_false_positive_num);
					}
					if(n_false_negative_num) {
						fprintf(stderr, "error: polygon " PRIsize " had " PRIsize
							" false negatives (misses)\n", i, n_false_negative_num);
					}
					// debug - test if the polygon was filled in the grid correctly
#endif // 0
				}
			} catch(std::bad_alloc&) {
				m_v_table_size = Vector3<size_t>(0, 0, 0); // mark failure
			}
		}

		std::vector<_TyPolygon*> &r_GridCell(size_t x, size_t y, size_t z)
		{
			if(x >= m_v_table_size.x ||
			   y >= m_v_table_size.y ||
			   z >= m_v_table_size.z)
				return m_empty;
			std::map<size_t, std::vector<_TyPolygon*> >::iterator p_cell_it =
				m_poly_hash.find(n_GridCell_Index(x, y, z));
			if(p_cell_it == m_poly_hash.end())
				return m_empty;
			return (*p_cell_it).second;
		}

		const std::vector<_TyPolygon*> &r_GridCell(size_t x, size_t y, size_t z) const
		{
			if(x >= m_v_table_size.x ||
			   y >= m_v_table_size.y ||
			   z >= m_v_table_size.z)
				return m_empty;
			std::map<size_t, std::vector<_TyPolygon*> >::const_iterator p_cell_it =
				m_poly_hash.find(n_GridCell_Index(x, y, z));
			if(p_cell_it == m_poly_hash.end())
				return m_empty;
			return (*p_cell_it).second;
		}

		inline bool b_Status() const
		{
			_ASSERTE((!m_v_table_size.x && !m_v_table_size.y && !m_v_table_size.z) || // should also be zero
				(m_v_table_size.x && m_v_table_size.y && m_v_table_size.z)); // otherwise those are nonzero as well
			return m_v_table_size.x != 0;
		}

		inline Vector3<size_t> v_Table_Size() const
		{
			return m_v_table_size;
		}

		Vector3f v_GridCell_Min(int x, int y, int z) const
		{
			return m_v_min + Vector3f(x * m_v_cell_size.x,
				y * m_v_cell_size.y, z * m_v_cell_size.z);
		}

		inline Vector3f v_GridCell_Size() const
		{
			return m_v_cell_size;
		}

		inline size_t n_GridCell_Index(int x, int y, int z) const
		{
			return x + m_v_table_size.x * (y + m_v_table_size.y * z);
		}

		template <class CPolygonType>
		static bool b_PolygonCell_Intersect(const CPolygonType &r_poly,
			Vector3f v_cell_min, Vector3f v_cell_max)
		{
			if(r_poly.n_Vertex_Num() < 3)
				return false;
			// can't handle degenerate cases

			Vector3f v_boxcenter = (v_cell_min + v_cell_max) * .5f;
			Vector3f v_boxhalfsize = v_cell_max - v_boxcenter;
			// calculate box center and half of its size

			Vector3f v_vert0 = (Vector3f)r_poly.t_Vertex(0) - v_boxcenter;
			Vector3f v_vert1 = (Vector3f)r_poly.t_Vertex(1) - v_boxcenter;
			for(size_t i = 2, n = r_poly.n_Vertex_Num(); i < n; ++ i) {
				Vector3f v_vert2 = (Vector3f)r_poly.t_Vertex(i) - v_boxcenter;
				// move everything so that the boxcenter is in (0,0,0)

				if(b_TriangleCell_Intersect(v_vert0, v_vert1, v_vert2, v_boxcenter,
				   v_boxhalfsize, v_cell_min, v_cell_max))
					return true;
				// test against a triangle

				v_vert1 = v_vert2;
			}
			// decompose to triangles, test separately

			return false;
			// none of the triangles intersected, no intersection occurs
		}

		// note that r_poly can be arbitrary polygon, different from those passed to the ctor
		void Find_PolygonIntersectors(const _TyPolygon &r_poly,
			std::vector<_TyPolygon*> &r_isect_list) // throw(std::bad_alloc)
		{
			r_isect_list.clear();
			const size_t n = r_poly.n_Vertex_Num();
			if(n < 3)
				return;

			CPolygon2<Vector3f> transformed_poly;
			for(size_t i = 0; i < n; ++ i) {
				transformed_poly.Insert_Vertex(m_t_transform.v_Transform_Pos(
					(Vector3f)r_poly.t_Vertex(i)));
			}
			transformed_poly.Calc_Normal(); // !!
			// get polygon vertices

			Vector3<size_t> v_min_cell, v_max_cell;
			{
				Vector3f v_min = transformed_poly.t_Vertex(0);
				Vector3f v_max = v_min;
				for(size_t i = 1; i < n; ++ i) {
					Vector3f v = transformed_poly.t_Vertex(i);
					for(int j = 0; j < 3; ++ j) {
						if(v_min[j] > v[j])
							v_min[j] = v[j];
						if(v_max[j] < v[j])
							v_max[j] = v[j];
					}
				}
				for(int j = 0; j < 3; ++ j) {
					v_min_cell[j] = min(size_t(max(0, int(floor(v_min[j])))), m_v_table_size[j]);
					v_max_cell[j] = min(size_t(max(0, int(floor(v_max[j])) + 1)), m_v_table_size[j]);
				}
			}
			// find polygon bounding box, in grid space

			size_t n_cell_num = 0;
			for(size_t x = v_min_cell.x; x < v_max_cell.x; ++ x) {
				for(size_t y = v_min_cell.y; y < v_max_cell.y; ++ y) {
					for(size_t z = v_min_cell.z; z < v_max_cell.z; ++ z) {
						Vector3f v_cell_min = Vector3f(float(x), float(y), float(z));
						Vector3f v_cell_max = v_cell_min + 1; // worldspace again
						// calculate a grid cell

						if(b_PolygonCell_Intersect(transformed_poly, v_cell_min, v_cell_max)) {
							const std::vector<_TyPolygon*> &r_cell = r_GridCell(x, y, z);
							r_isect_list.insert(r_isect_list.end(), r_cell.begin(), r_cell.end());
							++ n_cell_num;
						}
					}
				}
			}
			// add all the intersecting polygons

			if(n_cell_num > 1) {
				std::sort(r_isect_list.begin(), r_isect_list.end());
				r_isect_list.erase(std::unique(r_isect_list.begin(),
					r_isect_list.end()), r_isect_list.end());
			}
			// remove duplicate entries, if any
		}

	protected:
		static bool b_TriangleCell_Intersect(Vector3f v0, Vector3f v1, Vector3f v2,
			Vector3f v_boxcenter, Vector3f v_boxhalfsize,
			Vector3f v_cell_min, Vector3f v_cell_max)
		{
			Vector3f e0 = v1 - v0;
			Vector3f e1 = v2 - v1;
			Vector3f e2 = v0 - v2;
			// compute triangle edges

			{
				float fex = fabs(e0.x);
				float fey = fabs(e0.y);
				float fez = fabs(e0.z);
				axistest_x01(e0.z, e0.y, fez, fey);
				axistest_y02(e0.z, e0.x, fez, fex);
				axistest_z12(e0.y, e0.x, fey, fex);
			}
			{
				float fex = fabs(e1.x);
				float fey = fabs(e1.y);
				float fez = fabs(e1.z);
				axistest_x01(e1.z, e1.y, fez, fey);
				axistest_y02(e1.z, e1.x, fez, fex);
				axistest_z0(e1.y, e1.x, fey, fex);
			}
			{
				float fex = fabs(e2.x);
				float fey = fabs(e2.y);
				float fez = fabs(e2.z);
				axistest_x2(e2.z, e2.y, fez, fey);
				axistest_y1(e2.z, e2.x, fez, fex);
				axistest_z12(e2.y, e2.x, fey, fex);
			}
			// test for overlap in crossproduct(edge from tri, {x,y,z}-direction)

			{
				float f_min, f_max;
				MinMax3(v0.x, v1.x, v2.x, f_min, f_max);
				if(f_min > v_boxhalfsize.x || f_max < -v_boxhalfsize.x)
					return 0;
			}
			{
				float f_min, f_max;
				MinMax3(v0.y, v1.y, v2.y, f_min, f_max);
				if(f_min > v_boxhalfsize.y || f_max < -v_boxhalfsize.y)
					return 0;
			}
			{
				float f_min, f_max;
				MinMax3(v0.z, v1.z, v2.z, f_min, f_max);
				if(f_min > v_boxhalfsize.z || f_max < -v_boxhalfsize.z)
					return false;
			}
			// find min, max of the triangle each direction, and test for overlap in
			// that direction - this is equivalent to testing a minimal AABB around
			// the triangle against the AABB

			{
				Vector3f v_normal = e0.v_Cross(e1);
				Vector3f v_min, v_max;
				for(int n = 0; n < 3; ++ n) {
					float v = v0[n];
					if(v_normal[n] > 0) {
						v_min[n] = -v_boxhalfsize[n] - v;
						v_max[n] = v_boxhalfsize[n] - v;
					} else {
						v_min[n] = v_boxhalfsize[n] - v;
						v_max[n] = -v_boxhalfsize[n] - v;
					}
				}
				if(v_normal.f_Dot(v_min) > 0)
					return false;
				if(v_normal.f_Dot(v_max) >= 0)
					return true;
				return false;
			}
			// test if the box intersects the plane of the triangle

			// code from http://fileadmin.cs.lth.se/cs/Personal/Tomas_Akenine-Moller/code/ (Bug-fixed code)
		}

		static inline void MinMax3(float f_x0, float f_x1, float f_x2,
			float &r_f_min, float &r_f_max)
		{
			r_f_min = r_f_max = f_x0;
			if(f_x1 < r_f_min)
				r_f_min = f_x1;
			if(f_x1 > r_f_max)
				r_f_max = f_x1;
			if(f_x2 < r_f_min)
				r_f_min = f_x2;
			if(f_x2 > r_f_max)
				r_f_max = f_x2;
		}
	};
};

/**
 *	@brief simple boolean operations implementation
 */
class CBooleanOps {
public:
	typedef CPolyMesh::_TyPolygon _TyPolygon; /**< @brief polygon type */
	typedef CPolyMesh::_TyVertex _TyVertex; /**< @brief vertex type */

	/**
	 *	@brief
	 *
	 */
	enum {
		bmode_KeepA_Mask = 1,
		bmode_KeepB_Mask = 2,
		bmode_KeepAinB_Mask = 4,
		bmode_FlipAinB_Mask = 8,
		bmode_KeepBinA_Mask = 16,
		bmode_FlipBinA_Mask = 32,

		bmode_Subtract_B_from_A = bmode_KeepA_Mask | bmode_KeepBinA_Mask | bmode_FlipBinA_Mask,
		bmode_Union = bmode_KeepA_Mask | bmode_KeepB_Mask,
		bmode_Intersection = bmode_KeepAinB_Mask | bmode_KeepBinA_Mask,
		bmode_Nonequivalence = bmode_KeepA_Mask | bmode_KeepB_Mask | bmode_KeepAinB_Mask |
			bmode_KeepBinA_Mask | bmode_FlipAinB_Mask | bmode_FlipBinA_Mask
	};

public:
	// n_mode is any (or combination) of bmode_*
	// note that the vertices are exploded in the meshes
	static inline bool BooleanOp(CPolyMesh &r_dest, int n_mode,
		const CPolyMesh &r_mesh_a, const CPolyMesh &r_mesh_b,
		int n_raytrace_retry_num = 4, float f_raytrace_retry_wiggle = .1f)
	{
		return BooleanOp(r_dest, r_mesh_a, r_mesh_b, (n_mode & bmode_KeepA_Mask) != 0,
			(n_mode & bmode_KeepB_Mask) != 0, (n_mode & bmode_KeepAinB_Mask) != 0,
			(n_mode & bmode_FlipAinB_Mask) != 0, (n_mode & bmode_KeepBinA_Mask) != 0,
			(n_mode & bmode_FlipBinA_Mask) != 0, n_raytrace_retry_num,
			f_raytrace_retry_wiggle);
	}

	static bool BooleanOp(CPolyMesh &r_dest,
		const CPolyMesh &r_mesh_a, const CPolyMesh &r_mesh_b, bool b_keep_a_outside_b,
		bool b_keep_b_outside_a, bool b_keep_a_inside_b, bool b_flip_a_inside_b,
		bool b_keep_b_inside_a, bool b_flip_b_inside_a,
		int n_raytrace_retry_num = 4, float f_raytrace_retry_wiggle = .1f)
	{
		_ASSERTE(&r_dest != &r_mesh_a && &r_dest != &r_mesh_b);
		_ASSERTE(&r_mesh_a != &r_mesh_b);
		// dest can't be the same as any of the meshes

		r_dest.DeleteVertices();
		// delete dest, don't forget

		bool b_need_intersect_a = (b_keep_a_outside_b != b_keep_a_inside_b) ||
			(b_keep_a_inside_b && b_flip_a_inside_b);
		bool b_need_intersect_b = (b_keep_b_outside_a != b_keep_b_inside_a) ||
			(b_keep_b_inside_a && b_flip_b_inside_a);
		// determine which mesh needs to be intersected by the other

		CPolyMesh temp; // to keep copy of b; copy of a is kept in r_dest
		CPolyMesh &r_mesh_a_isect = (b_need_intersect_a)? r_dest : const_cast<CPolyMesh&>(r_mesh_a);
		CPolyMesh &r_mesh_b_isect = (b_need_intersect_b)? ((b_need_intersect_a)? temp : r_dest) :
			const_cast<CPolyMesh&>(r_mesh_b);
		// choose how the meshes will be intersected (they might not be, then the references
		// point to the original meshes with const-ness stripped)

		if((b_need_intersect_a && !r_mesh_a_isect.Copy(r_mesh_a)) ||
		   (b_need_intersect_b && !r_mesh_b_isect.Copy(r_mesh_b)))
			return false;
		// copy meshes for intersection

		if(!Intersect_Meshes(r_mesh_a_isect, r_mesh_a, b_need_intersect_a, r_mesh_b_isect,
		   b_need_intersect_a, b_need_intersect_b))
			return false;
		// perform all the mesh intersections

		uint32_t n_rand = 12345;

		if(b_need_intersect_a) {
			/*Vector3f v_min, v_max;
			r_mesh_b.BoundingBox(v_min, v_max);
			v_min -= f_epsilon;
			v_max += f_epsilon;
			CPolygonHash::CUniformGrid grid(r_mesh_b, v_min, v_max);
			if(!grid.b_Status())
				return false;*/
			// build uniform grid in order to accelerate raytracing // todo - implement

			if(b_keep_a_inside_b && b_flip_a_inside_b && b_keep_a_outside_b) {
				if(!r_mesh_a_isect.ExplodeVertices())
					return false;
			}
			// in case vertices of only some polygons will be flipped, need to explode vertices

			for(size_t i = 0, n = r_mesh_a_isect.n_Polygon_Num(); i < n; ++ i) {
				_TyPolygon &r_poly = r_mesh_a_isect.r_Polygon(i);

				Vector3f v_center = r_poly.v_Center();
				Vector3f v_normal = r_poly.t_Normal().v_normal;
				float f_nearest;
				size_t n_nearest = -1;
				bool b_from_back, b_sure;
				for(int n_retry = 0; n_retry < n_raytrace_retry_num; ++ n_retry) {
					for(size_t j = 0, m = r_mesh_b.n_Polygon_Num(); j < m; ++ j) { // todo - use uniform grid for raytracing as well
						const _TyPolygon &r_poly2 = r_mesh_b.r_Polygon(j);
						float t;
						if(r_poly2.b_RayHit(v_center, v_normal, t, f_epsilon) &&
						   (n_nearest == size_t(-1) || t < f_nearest) && t >= f_epsilon * .5f) {
							f_nearest = t;
							n_nearest = j;
							b_from_back = r_poly2.t_Normal().v_normal.f_Dot(v_normal) > 0;
							b_sure = fabs(r_poly2.t_Normal().v_normal.f_Dot(v_normal)) > f_epsilon;
							if(!b_sure)
								break;
						}
					}
					// shoot a ray from a polygon, see if it gets to another polygon

					if(n_nearest == size_t(-1) || !b_sure) {
						bool b_break_out = false;
						for(;;) {
							v_normal.x += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							v_normal.y += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							v_normal.z += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							if(v_normal.f_Dot(r_poly.t_Normal().v_normal) < .1f) {
								if(fabs(v_normal.f_Length() - 1) > f_epsilon) {
									if(!r_poly.Calc_Normal(f_epsilon)) {
										b_break_out = true; // avoid infinite loops
										break;
									}
								}
								v_normal = r_poly.t_Normal().v_normal; // deviated too far; restart
							} else
								break;
						}
						if(b_break_out)
							break;
					} else
						break;
					// in case the ray missed all the polygons, try shooting again, under a slightly
					// different direction
				}

				if(n_nearest == size_t(-1) || !b_from_back) {
					if(!b_keep_a_outside_b) {
						r_mesh_a_isect.DeletePolygons(i, i + 1);
						-- i;
						-- n; // !!
					}
					// the polygon is at the outside, or it faces another (suspectably) outside polygon
				} else {
					if(!b_keep_a_inside_b) {
						r_mesh_a_isect.DeletePolygons(i, i + 1);
						-- i;
						-- n; // !!
					} else if(b_flip_a_inside_b) {
						r_poly.Swap_VertexOrder(); // note that the normals will not be correct now

						_ASSERTE(b_keep_a_inside_b && b_flip_a_inside_b); // should be set
						if(b_keep_a_outside_b)
							r_poly.for_each_vertex(Flip_RefVertexNormals); // the vertices are exploded, can go ahead
					}
					// the polygon is inside b
				}
			}

			if(b_keep_a_inside_b && b_flip_a_inside_b) {
				if(!b_keep_a_outside_b) {
					r_mesh_a_isect.r_Vertex_Pool().ForEach(0, size_t(-1), Flip_VertexNormals);
					// everything is flipped, can flip globally
				} else {
					// already handled that
				}
			}
			// need to flip vertex normals as well
		}

		if(b_need_intersect_b) {
			/*Vector3f v_min, v_max;
			r_mesh_a.BoundingBox(v_min, v_max);
			v_min -= f_epsilon;
			v_max += f_epsilon;
			CPolygonHash::CUniformGrid grid(r_mesh_a, v_min, v_max);
			if(!grid.b_Status())
				return false;*/
			// build uniform grid in order to accelerate raytracing // todo - implement

			if(b_keep_b_inside_a && b_flip_b_inside_a && b_keep_b_outside_a) {
				if(!r_mesh_b_isect.ExplodeVertices())
					return false;
			}
			// in case vertices of only some polygons will be flipped, need to explode vertices

			for(size_t i = 0, n = r_mesh_b_isect.n_Polygon_Num(); i < n; ++ i) {
				_TyPolygon &r_poly = r_mesh_b_isect.r_Polygon(i);

				Vector3f v_center = r_poly.v_Center();
				Vector3f v_normal = r_poly.t_Normal().v_normal;

				float f_nearest;
				size_t n_nearest = -1;
				bool b_from_back, b_sure;
				for(int n_retry = 0; n_retry < n_raytrace_retry_num; ++ n_retry) {
					for(size_t j = 0, m = r_mesh_a.n_Polygon_Num(); j < m; ++ j) { // todo - use uniform grid for raytracing as well
						const _TyPolygon &r_poly2 = r_mesh_a.r_Polygon(j);
						float t;
						if(r_poly2.b_RayHit(v_center, v_normal, t, f_epsilon) &&
						   (n_nearest == size_t(-1) || t < f_nearest) && t >= f_epsilon * .5f) {
							f_nearest = t;
							n_nearest = j;
							b_from_back = r_poly2.t_Normal().v_normal.f_Dot(v_normal) > 0;
							b_sure = fabs(r_poly2.t_Normal().v_normal.f_Dot(v_normal)) > f_epsilon;
							if(!b_sure)
								break;
						}
					}
					// shoot a ray from a polygon, see if it gets to another polygon

					// todo - instead of wiggling the ray, see if there is another polygon that we could hit, that seems to be much more stable
					// todo - try to raytrace some images, see if the mesh has holes

					if(n_nearest == size_t(-1) || !b_sure) {
						bool b_break_out = false;
						for(;;) {
							v_normal.x += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							v_normal.y += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							v_normal.z += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
							n_rand = 1664525 * n_rand + 1013904223;
							if(v_normal.f_Dot(r_poly.t_Normal().v_normal) < .1f) {
								if(fabs(v_normal.f_Length() - 1) > f_epsilon) {
									if(!r_poly.Calc_Normal(f_epsilon)) {
										b_break_out = true; // avoid infinite loops
										break;
									}
								}
								v_normal = r_poly.t_Normal().v_normal; // deviated too far; restart
							} else
								break;
						}
						if(b_break_out)
							break;
					} else
						break;
					// in case the ray missed all the polygons, try shooting again, under a slightly
					// different direction
				}

				if(n_nearest == size_t(-1) || !b_from_back) {
					if(!b_keep_b_outside_a)
						r_poly.Delete();
					// the polygon is at the outside, or it faces another (suspectably) outside polygon
				} else {
					if(!b_keep_b_inside_a)
						r_poly.Delete();
					else if(b_flip_b_inside_a) {
						r_poly.Swap_VertexOrder(); // note that the normals will not be correct now

						_ASSERTE(b_keep_b_inside_a && b_flip_b_inside_a); // should be set
						if(b_keep_b_outside_a)
							r_poly.for_each_vertex(Flip_RefVertexNormals); // the vertices are exploded, can go ahead
					}
					// the polygon is inside b
				}
			}

			if(b_keep_b_inside_a && b_flip_b_inside_a) {
				if(!b_keep_b_outside_a) {
					r_mesh_b_isect.r_Vertex_Pool().ForEach(0, size_t(-1), Flip_VertexNormals);
					// everything is flipped, can flip globally
				} else {
					// already handled that
				}
			}
			// need to flip vertex normals as well
		}

		// r_mesh_a_isect now contains only the part that are needed, or if !b_need_intersect_a, all of a
		// r_mesh_b_isect now contains only the part that are needed, or if !b_need_intersect_b, all of b
		// note that the vertices could be optimized at this point, since they were exploded,
		// but chaining many bool operations would then become expensive

		if(b_need_intersect_a) {
			_ASSERTE(&r_mesh_a_isect == &r_dest); // should be already there
			if(b_need_intersect_b)
				return r_dest.Merge(r_mesh_b_isect); // just add what is needed and that's it
			else {
				// need to decide where to take b's polygons from

				_ASSERTE(b_keep_b_outside_a == b_keep_b_inside_a); // this must be true since !b_need_intersect_b
				_ASSERTE(!b_keep_b_inside_a || !b_flip_b_inside_a); // this also must be true

				if(b_keep_b_outside_a && b_keep_b_inside_a && !b_flip_b_inside_a)
					return r_dest.Merge(r_mesh_b); // we wanted to keep all of b, no flips required
				else /*if(!b_keep_b_outside_a && !b_keep_b_inside_a)*/ {
					_ASSERTE(!b_keep_b_outside_a && !b_keep_b_inside_a);
					return true; // do not want anything from b, we're finished
				}
			}
		} else if(b_need_intersect_b) {
			_ASSERTE(!b_need_intersect_a); // otzherwise would take the top branch
			_ASSERTE(&r_mesh_b_isect == &r_dest); // should be already there

			_ASSERTE(b_keep_a_outside_b == b_keep_a_inside_b); // this must be true since !b_need_intersect_a
			_ASSERTE(!b_keep_a_inside_b || !b_flip_a_inside_b); // this also must be true

			if(b_keep_a_outside_b && b_keep_a_inside_b && !b_flip_a_inside_b)
				return r_dest.Merge(r_mesh_a); // we wanted to keep all of a, no flips required
			else /*if(!b_keep_a_outside_b && !b_keep_a_inside_b)*/ {
				_ASSERTE(!b_keep_a_outside_b && !b_keep_a_inside_b);
				return true; // do not want anything from b, we're finished
			}
		} else {
			// not a, nor b are intersected.

			_ASSERTE(&r_mesh_a_isect != &r_dest && &r_mesh_b_isect != &r_dest);
			_ASSERTE(r_dest.r_Polygon_Pool().b_Empty() && r_dest.r_Vertex_Pool().b_Empty());
			// dest should be blank

			if(b_keep_a_outside_b && b_keep_a_inside_b && !b_flip_a_inside_b)
				if(!r_dest.Merge(r_mesh_a)) // we wanted to keep all of a, no flips required
					return false;
			else /*if(!b_keep_a_outside_b && !b_keep_a_inside_b)*/
				_ASSERTE(!b_keep_a_outside_b && !b_keep_a_inside_b);
			// put mesh a

			if(b_keep_b_outside_a && b_keep_b_inside_a && !b_flip_b_inside_a) {
				if(!r_dest.Merge(r_mesh_b)) // we wanted to keep all of b, no flips required
					return false;
			} else /*if(!b_keep_b_outside_a && !b_keep_b_inside_a)*/
				_ASSERTE(!b_keep_b_outside_a && !b_keep_b_inside_a);
			// put mesh b
		}
		// perform the final merging of the meshes

		return true;
	}

	static bool Intersect_Meshes(CPolyMesh &r_mesh_a, CPolyMesh &r_mesh_b,
		bool b_intersect_a_with_b = true, bool b_intersect_b_with_a = true)
	{
		CPolyMesh copy_of_a;
		bool b_have_original_mesh_a = false;
		if(b_intersect_a_with_b && b_intersect_b_with_a)
			b_have_original_mesh_a = copy_of_a.Copy(r_mesh_a);
		// in case both meshes will be intersecting, we want the original a, as it will
		// be faster to cut b with that (a will get fragmented by being cut by b)

		return Intersect_Meshes(r_mesh_a, copy_of_a, b_have_original_mesh_a,
			r_mesh_b, b_intersect_a_with_b, b_intersect_b_with_a);
		// just copy A if it can, and call the actual implementation
	}

	/**
	 *	@brief removes surfaces, which are inside a (closed) polygonal mesh
	 *
	 *	@param[in,out] r_mesh is the mesh to have the inside surfaces removed
	 *	@param[in] b_no_subdivide is no subdivision flag (can be enabled to handle
	 *		simple cases where no surfaces need to be subdivided)
	 *
	 *	@todo add all the epsilons
	 */
	static bool Remove_HiddenSurfaces(CPolyMesh &r_mesh, bool b_no_subdivide = false,
		int n_raytrace_retry_num = 4, float f_raytrace_retry_wiggle = .1f)
	{
		if(n_raytrace_retry_num < 1)
			n_raytrace_retry_num = 1;
		// must raytrace at least once

		for(size_t i = 0, n = r_mesh.n_Polygon_Num(); i < n; ++ i) {
			_TyPolygon &r_poly = r_mesh.r_Polygon(i);
			if(r_poly.n_Vertex_Num() < 3) {
				r_mesh.DeletePolygons(i, i + 1);
				-- i;
				-- n; // !!
				continue;
			}
			_ASSERTE(fabs(r_poly.t_Normal().v_normal.f_Length() - 1) < f_epsilon);
		}
		// get rid of degenerate polygons, for starters

		Vector3f v_min, v_max;
		r_mesh.BoundingBox(v_min, v_max);
		// get mesh bounding box

		v_min -= f_epsilon;
		v_max += f_epsilon;
		// make sure that the side polygons are not cut off

		uint32_t n_rand = 12345;

		try {
			for(size_t n_pass = 0; n_pass < 2; ++ n_pass) {
				CPolygonHash::CUniformGrid grid(r_mesh, v_min, v_max);
				if(!grid.b_Status())
					return false;
				// build a uniform grid with polygons

				for(size_t i = 0, n = r_mesh.n_Polygon_Num(); i < n; ++ i) {
					_TyPolygon &r_poly = r_mesh.r_Polygon(i);

					Vector3f v_center = r_poly.v_Center();
					Vector3f v_normal = r_poly.t_Normal().v_normal;
					float f_nearest;
					size_t n_nearest = -1;
					bool b_from_back;
					for(int n_retry = 0; n_retry < n_raytrace_retry_num; ++ n_retry) {
						for(size_t j = 0; j < n; ++ j) { // todo - use uniform grid for raytracing as well
							if(i == j)
								continue;
							_TyPolygon &r_poly2 = r_mesh.r_Polygon(j);
							float t;
							if(r_poly2.b_RayHit(v_center, v_normal, t, f_epsilon) &&
							   (n_nearest == size_t(-1) || t < f_nearest) && t >= f_epsilon * .5f &&
							   fabs(r_poly2.t_Normal().v_normal.f_Dot(v_normal)) > f_epsilon) {
								f_nearest = t;
								n_nearest = j;
								b_from_back = r_poly2.t_Normal().v_normal.f_Dot(v_normal) > 0;
							}
						}
						// shoot a ray from a polygon, see if it gets to another polygon

						if(n_nearest == size_t(-1)) {
							for(;;) {
								v_normal.x += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
								n_rand = 1664525 * n_rand + 1013904223;
								v_normal.y += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
								n_rand = 1664525 * n_rand + 1013904223;
								v_normal.z += (n_rand / float(UINT32_MAX) * 2 - 1) * f_raytrace_retry_wiggle;
								n_rand = 1664525 * n_rand + 1013904223;
								if(v_normal.f_Dot(r_poly.t_Normal().v_normal) < .1f)
									v_normal = r_poly.t_Normal().v_normal; // deviated too far; restart
								else
									break;
							}
						} else
							break;
						// in case the ray missed all the polygons, try shooting again, under a slightly
						// different direction
					}

					if(n_nearest == size_t(-1) || !b_from_back)
						continue;
					// the polygon is at the outside, or it faces another (suspectably) outside polygon

					bool b_intersects = false;
					if(!n_pass) {
						std::vector<_TyPolygon*> intersectors;
						grid.Find_PolygonIntersectors(r_poly, intersectors);
						// the polygon is most likely inside, but we need to see if it intersects something first

						for(size_t j = 0, m = intersectors.size(); j < m; ++ j) {
							const _TyPolygon &r_poly2 = *intersectors[j];
							if(&r_poly2 == &r_poly)
								continue;
							if(r_poly2.b_Overlap_SepAxes(r_poly, (b_no_subdivide)? -f_epsilon : f_epsilon)) {
								b_intersects = true;
								break;
							}
						}
						// the intersectors are eliminated at the end of the first pass
					}
					if(!b_intersects) {
						r_poly.Delete(); // do this instead
						/*r_mesh.DeletePolygons(i, i + 1); // do not remove, will invalidate the hash
						-- i;
						-- n;*/ // !!
					}
					// if the polygon does not intersect anything, it can be safely
					// removed as it is completely inside the object
				}
				// first, remove all the polygons that are completely inside

				if(n_pass || b_no_subdivide)
					break;
				// don't do the below cutting twice, it is not required

				for(size_t i = 0, n = r_mesh.n_Polygon_Num(); i < n; ++ i) {
					_TyPolygon &r_poly = r_mesh.r_Polygon(i);

					std::vector<_TyPolygon*> intersectors;
					grid.Find_PolygonIntersectors(r_poly, intersectors); // empty polygons get no intersectors
					for(size_t j = 0, m = intersectors.size(); j < m; ++ j) {
						_TyPolygon &r_poly2 = *intersectors[j];
						if(&r_poly2 == &r_poly)
							continue;
						if(r_poly2.n_Plane_Pos(r_poly.t_Normal()) == plane_Onplane) {
							bool b_fully_overlapped = true;
							for(size_t k = 0, o = r_poly.n_Vertex_Num(); k < o; ++ k) {
								Plane3f t_plane = r_poly.t_EdgePlane(k);
								if(r_poly2.n_Plane_Pos(t_plane) != plane_Front) { // empty polygons will be always onplane
									b_fully_overlapped = false;
									break;
								}
							}
							if(b_fully_overlapped)
								r_poly2.Delete();
						}
					}
					// look for onplane polygons which are fully overlapped, these can be deleted as well
				}

				CPolyMesh mesh_copy;
				if(!mesh_copy.Copy(r_mesh))
					return false;
				// need a copy of the mesh in order to be able to cut polygons

				for(size_t i = 0, n = mesh_copy.n_Polygon_Num(); i < n; ++ i) {
					_ASSERTE(n == mesh_copy.n_Polygon_Num());
					_TyPolygon &r_poly = mesh_copy.r_Polygon(i);
					if(r_poly.b_Empty())
						continue;
					// skip "deleted" polygons

					std::vector<_TyPolygon*> intersectors;
					grid.Find_PolygonIntersectors(r_poly, intersectors);
					// intersectors are in the other mesh now

					if(!intersectors.empty()) {
						std::vector<_TyPolygon*> polygon_fragments;
						polygon_fragments.push_back(&r_poly);
						// fragments are in mesh_copy

						for(size_t j = 0, m = intersectors.size(); j < m; ++ j) {
							_TyPolygon &r_isect = *intersectors[j];

							for(size_t k = 0, o = polygon_fragments.size(); k < o; ++ k) {
								_TyPolygon &r_frag = *polygon_fragments[k];

								if(r_isect.b_Overlap_SepAxes(r_frag, f_epsilon)) {
									_TyPolygon *p_new = mesh_copy.r_Polygon_Pool().p_GetSingle();
									r_frag.n_Split(r_isect.t_Normal(), *p_new, plane_Front, f_epsilon);
									if(p_new->b_Empty())
										mesh_copy.r_Polygon_Pool().Erase_Back(mesh_copy.r_Polygon_Pool().p_End_it() - 1); // no new fragment produced
									else if(r_frag.b_Empty()) {
										p_new->Swap(r_frag); // k does not decrease, new fragment won't be cut again
										mesh_copy.r_Polygon_Pool().Erase_Back(mesh_copy.r_Polygon_Pool().p_End_it() - 1); // no new fragment produced
									} else {
										polygon_fragments.push_back(p_new); // o does not increase, new fragment won't be cut again
										++ n; // !!
									}
								}
								// in case it overlaps, cut it and remember all the non-empty parts
							}
							// go through all of the original polygon fragments
						}
						// for each intersector
					}
				}

				mesh_copy.Swap(r_mesh);
			}
		} catch(std::bad_alloc&) {
			return false;
		}

		for(size_t i = r_mesh.n_Polygon_Num(); i > 0;) {
			-- i; // here
			_TyPolygon &r_poly = r_mesh.r_Polygon(i);
			if(r_poly.b_Empty())
				r_mesh.DeletePolygons(i, i + 1);
		}
		// remove all the polygons that we degenerated

		return true;
	}

protected:
	// this can use a copy of mesh A to speed-up processing of mesh B,
	// but the copy of a mesh is hard to verify, hence it is protected
	static bool Intersect_Meshes(CPolyMesh &r_mesh_a, const CPolyMesh &r_copy_of_a,
		bool b_have_original_mesh_a, CPolyMesh &r_mesh_b,
		bool b_intersect_a_with_b = true, bool b_intersect_b_with_a = true)
	{
		_ASSERTE(&r_copy_of_a != &r_mesh_b); // dont try wild stuff
		_ASSERTE(b_have_original_mesh_a == (&r_mesh_a != &r_copy_of_a &&
			r_mesh_a.n_Vertex_Num() == r_copy_of_a.n_Vertex_Num() &&
			r_mesh_a.n_Polygon_Num() == r_copy_of_a.n_Polygon_Num()));

		try {
			Vector3f v_min_a, v_max_a;
			r_mesh_a.BoundingBox(v_min_a, v_max_a);
			Vector3f v_min_b, v_max_b;
			r_mesh_b.BoundingBox(v_min_b, v_max_b);
			// get meshes bounding boxes

			Vector3f v_min, v_max;
			for(int i = 0; i < 3; ++ i) {
				v_min[i] = max(v_min_a[i], v_min_b[i]);
				v_max[i] = min(v_max_a[i], v_max_b[i]);
			}
			// calculate the bounding box of the intersection (no need to waste grid
			// space for parts that do now intersect anyway)

			v_min -= f_epsilon;
			v_max += f_epsilon;
			// make sure that the side polygons are not cut off

			CPolyMesh *p_mesh_a = &r_mesh_a, *p_mesh_b = &r_mesh_b;
			for(int n_pass = 0; n_pass < 2; ++ n_pass) {
				_ASSERTE(!n_pass || p_mesh_b == &r_mesh_a);
				CPolyMesh &r_mesh_0 = *p_mesh_a;
				const CPolyMesh &r_mesh_1 = (n_pass &&
					b_have_original_mesh_a)? r_copy_of_a : *p_mesh_b;

				if(b_intersect_a_with_b) {
					CPolygonHash::CUniformGrid grid(r_mesh_1, v_min, v_max);
					if(!grid.b_Status())
						return false;
					// build a uniform grid with polygons

					for(size_t i = 0, n = r_mesh_0.n_Polygon_Num(); i < n; ++ i) {
						_ASSERTE(n == r_mesh_0.n_Polygon_Num());
						_TyPolygon &r_poly = r_mesh_0.r_Polygon(i);

						std::vector<_TyPolygon*> intersectors;
						grid.Find_PolygonIntersectors(r_poly, intersectors); // todo - sort polygons by abs distance from original polygon center (so that long polygon filets are avoided, also do that below)
						// intersectors are from the mesh 1

						if(!intersectors.empty()) {
							std::vector<_TyPolygon*> polygon_fragments;
							polygon_fragments.push_back(&r_poly);
							// fragments are in mesh 0

							for(size_t j = 0, m = intersectors.size(); j < m; ++ j) {
								_TyPolygon &r_isect = *intersectors[j];

								for(size_t k = 0, o = polygon_fragments.size(); k < o; ++ k) {
									_TyPolygon &r_frag = *polygon_fragments[k];

									if(r_isect.b_Overlap_SepAxes(r_frag, f_epsilon)) {
										_TyPolygon *p_new = r_mesh_0.r_Polygon_Pool().p_GetSingle();
										r_frag.n_Split(r_isect.t_Normal(), *p_new, plane_Front, f_epsilon);
										if(p_new->b_Empty())
											r_mesh_0.r_Polygon_Pool().Erase_Back(r_mesh_0.r_Polygon_Pool().p_End_it() - 1); // no new fragment produced
										else if(r_frag.b_Empty()) {
											p_new->Swap(r_frag); // k does not decrease, new fragment won't be cut again
											r_mesh_0.r_Polygon_Pool().Erase_Back(r_mesh_0.r_Polygon_Pool().p_End_it() - 1); // no new fragment produced
										} else {
											polygon_fragments.push_back(p_new); // o does not increase, new fragment won't be cut again
											++ n; // !! so the new fragment can be cut by other polygons as well
											// o will refresh automatically with the next intersector, no need to do it here
										}
									}
									// in case it overlaps, cut it and remember all the non-empty parts
								}
								// go through all of the original polygon fragments
							}
							// for each intersector
						}
					}
				}

				std::swap(b_intersect_a_with_b, b_intersect_b_with_a);
				std::swap(p_mesh_a, p_mesh_b);
				// process the other mesh in the following pass
			}
			// the algorithm is twice the same, only the meshes are swapped
		} catch(std::bad_alloc&) {
			return false;
		}

		return true;
	}

	static inline void Flip_VertexNormals(_TyVertex &r_vertex)
	{
		r_vertex.v_normal = -r_vertex.v_normal;
	}

	// note this must be named differently, otherwise template argument it is used as, will be ambiguous
	static inline void Flip_RefVertexNormals(CPolyMesh::_TyRefVertex &r_vertex)
	{
		Flip_VertexNormals(*r_vertex.m_p_ref);
	}
};

std::vector<CPolygonHash::_TyPolygon*> CPolygonHash::CUniformGrid::m_empty;
// todo - put to the .cpp

#endif // !__BOOLEAN_OPERATIONS_INCLUDED
