Performance improvements #57

GridEyes-2010 · 2021-08-17T07:48:54Z

I've made a few code improvements to your CPU version that increase efficiency by about 10 times
The test results in the same environment are as follows：
646464--->5ms
128128128--->7ms
256256256--->19ms
512512512--->121ms
102410241024--->728ms
204820482048---->5576ms

Forceflow · 2021-08-17T15:37:13Z

Cool, would be interesting to see these improvements in a PR?

GridEyes-2010 · 2021-08-18T02:19:11Z

void Voxelizer::cpu_voxelize_mesh_solid(
const voxinfo& info, Triangle_mesh& mesh,
unsigned int* voxel_table, bool morton_order)
{

	const Vec3f& move_min = info.pMin;
	const int uzero = 0;
	int nvert = (int)mesh.n_vertices();
	auto points = mesh.points();
	std::vector<Vec3f> npoints(nvert);
	for (int i = 0; i < nvert; ++i)
	{
		npoints[i] = points[i] - move_min;
	}

	int nface = (int)mesh.n_faces();

	const Vec3f delta_p(info.unit[0], info.unit[1], info.unit[2]);
	const Vec3i vzero(0, 0, 0);
	const Vec3i grid_max(info.gridsize[0] - 1, info.gridsize[1] - 1, info.gridsize[2] - 1);
	size_t yz = static_cast<size_t>(info.gridsize[1]) * static_cast<size_t>(info.gridsize[2]);
	Vec3f invuint(1 / info.unit[0], 1 / info.unit[1], 1 / info.unit[2]);
	const size_t s31 = 31;
	if (morton_order)
	{

#pragma omp parallel for
for (int i = 0; i < nface; ++i) {
auto fv = mesh.cfv_begin(FaceHandle(i));
auto& v0 = npoints[fv->idx()]; ++fv;
auto& v1 = npoints[fv->idx()]; ++fv;
auto& v2 = npoints[fv->idx()];
auto e0 = v1 - v0;
auto e1 = v2 - v1;
auto e2 = v0 - v2;
auto n = (e0.cross(e1)).normalize_cond();
if (std::fabs(n[0]) < float_error) {
continue;
}
n[0] = 1 / n[0];
Vec2f v0_yz = Vec2f(v0[1], v0[2]);
Vec2f v1_yz = Vec2f(v1[1], v1[2]);
Vec2f v2_yz = Vec2f(v2[1], v2[2]);
if (!checkCCW(v0_yz, v1_yz, v2_yz))
{
std::swap(v1_yz, v2_yz);
}
Vec2f bbox_max = OpenMesh::max(v0_yz, OpenMesh::max(v1_yz, v2_yz));
Vec2f bbox_min = OpenMesh::min(v0_yz, OpenMesh::min(v1_yz, v2_yz));
Vec2i bbox_max_grid = Vec2i(
(int)std::floor(bbox_max[0] * invuint[1] - 0.5f),
(int)std::floor(bbox_max[1] * invuint[2] - 0.5f));
Vec2i bbox_min_grid = Vec2i(
(int)std::ceil(bbox_min[0] * invuint[1] - 0.5f),
(int)std::ceil(bbox_min[1] * invuint[2] - 0.5f));

			for (int y = bbox_min_grid[0]; y <= bbox_max_grid[0]; ++y)
			{
				for (int z = bbox_min_grid[1]; z <= bbox_max_grid[1]; ++z)
				{
					Vec2f point = Vec2f((y + 0.5f) * info.unit[1], (z + 0.5f) * info.unit[2]);
					int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
					if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) ||
						(checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) ||
						(checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0))
					{
					    int xmax = int(get_x_coordinate(n, v0, point) * invuint[0] - 0.5f);
						for (int x = 0; x <= xmax; x++)
						{
							size_t location = mortonEncode_LUT((unsigned)x, (unsigned)y, (unsigned)z);
							size_t int_location = location >> 5;
							unsigned int bit_pos = size_t(31) - (location & s31);
							unsigned int mask = 1 << bit_pos;

#pragma omp atomic
voxel_table[int_location] ^= mask;
}
}
}
}
}

	}
	else
	{

#pragma omp parallel for
for (int i = 0; i < nface; ++i)
{
auto fv = mesh.cfv_begin(FaceHandle(i));
auto& v0 = npoints[fv->idx()]; ++fv;
auto& v1 = npoints[fv->idx()]; ++fv;
auto& v2 = npoints[fv->idx()];
auto e0 = v1 - v0;
auto e1 = v2 - v1;
auto e2 = v0 - v2;
auto n = (e0.cross(e1)).normalize_cond();
if (std::fabs(n[0]) < float_error) {
continue;
}
n[0] = 1 / n[0];
Vec2f v0_yz = Vec2f(v0[1], v0[2]);
Vec2f v1_yz = Vec2f(v1[1], v1[2]);
Vec2f v2_yz = Vec2f(v2[1], v2[2]);
if (!checkCCW(v0_yz, v1_yz, v2_yz))
{
std::swap(v1_yz, v2_yz);
}
Vec2f bbox_max = OpenMesh::max(v0_yz, OpenMesh::max(v1_yz, v2_yz));
Vec2f bbox_min = OpenMesh::min(v0_yz, OpenMesh::min(v1_yz, v2_yz));
Vec2i bbox_max_grid = Vec2i(std::floor(bbox_max[0] * invuint[1] - 0.5f),
std::floor(bbox_max[1] * invuint[2] - 0.5f));
Vec2i bbox_min_grid = Vec2i(std::ceil(bbox_min[0] * invuint[1] - 0.5f),
std::ceil(bbox_min[1] * invuint[2] - 0.5f));
size_t ypos = static_cast<size_t>(bbox_min_grid[0] * info.gridsize[1]);
for (int y = bbox_min_grid[0]; y <= bbox_max_grid[0]; ++y)
{
size_t zyz = bbox_min_grid[1] * yz;
for (int z = bbox_min_grid[1]; z <= bbox_max_grid[1]; ++z)
{
Vec2f point((y + 0.5f) * info.unit[1], (z + 0.5f) * info.unit[2]);
int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) ||
(checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) ||
(checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) ||
(checknum == 0))
{
size_t xmax = size_t(get_x_coordinate(n, v0, point) * invuint[0] - 0.5f);
for (int x = 0; x <= xmax; x++)
{
size_t location = x + ypos + zyz;
size_t int_location = location >> 5;
unsigned int bit_pos = size_t(31) - (location & s31);
unsigned int mask = 1 << bit_pos;
#pragma omp atomic
voxel_table[int_location] ^= mask;
}
}
zyz += yz;
}
ypos += info.gridsize[1];
}
}
}
}
Just code improvement, not algorithm improvement

Forceflow · 2021-08-19T12:07:14Z

Could you make a PR for this? It's hard comparing code with all the formatting gone.

Forceflow added the enhancement label Sep 2, 2021

Forceflow changed the title ~~Performance issues~~ Performance improvements Jul 7, 2022

Forceflow added the input/help wanted label Jul 19, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Performance improvements #57

Performance improvements #57

GridEyes-2010 commented Aug 17, 2021 •

edited

Loading

Forceflow commented Aug 17, 2021

GridEyes-2010 commented Aug 18, 2021

Forceflow commented Aug 19, 2021

Performance improvements #57

Performance improvements #57

Comments

GridEyes-2010 commented Aug 17, 2021 • edited Loading

Forceflow commented Aug 17, 2021

GridEyes-2010 commented Aug 18, 2021

Forceflow commented Aug 19, 2021

GridEyes-2010 commented Aug 17, 2021 •

edited

Loading