Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements #57

Open
GridEyes-2010 opened this issue Aug 17, 2021 · 3 comments
Open

Performance improvements #57

GridEyes-2010 opened this issue Aug 17, 2021 · 3 comments

Comments

@GridEyes-2010
Copy link

GridEyes-2010 commented Aug 17, 2021

I've made a few code improvements to your CPU version that increase efficiency by about 10 times
The test results in the same environment are as follows:
646464--->5ms
128128128--->7ms
256256256--->19ms
512512512--->121ms
102410241024--->728ms
204820482048---->5576ms

@Forceflow
Copy link
Owner

Cool, would be interesting to see these improvements in a PR?

@GridEyes-2010
Copy link
Author

void Voxelizer::cpu_voxelize_mesh_solid(
const voxinfo& info, Triangle_mesh& mesh,
unsigned int* voxel_table, bool morton_order)
{

	const Vec3f& move_min = info.pMin;
	const int uzero = 0;
	int nvert = (int)mesh.n_vertices();
	auto points = mesh.points();
	std::vector<Vec3f> npoints(nvert);
	for (int i = 0; i < nvert; ++i)
	{
		npoints[i] = points[i] - move_min;
	}

	int nface = (int)mesh.n_faces();

	const Vec3f delta_p(info.unit[0], info.unit[1], info.unit[2]);
	const Vec3i vzero(0, 0, 0);
	const Vec3i grid_max(info.gridsize[0] - 1, info.gridsize[1] - 1, info.gridsize[2] - 1);
	size_t yz = static_cast<size_t>(info.gridsize[1]) * static_cast<size_t>(info.gridsize[2]);
	Vec3f invuint(1 / info.unit[0], 1 / info.unit[1], 1 / info.unit[2]);
	const size_t s31 = 31;
	if (morton_order)
	{

#pragma omp parallel for
for (int i = 0; i < nface; ++i) {
auto fv = mesh.cfv_begin(FaceHandle(i));
auto& v0 = npoints[fv->idx()]; ++fv;
auto& v1 = npoints[fv->idx()]; ++fv;
auto& v2 = npoints[fv->idx()];
auto e0 = v1 - v0;
auto e1 = v2 - v1;
auto e2 = v0 - v2;
auto n = (e0.cross(e1)).normalize_cond();
if (std::fabs(n[0]) < float_error) {
continue;
}
n[0] = 1 / n[0];
Vec2f v0_yz = Vec2f(v0[1], v0[2]);
Vec2f v1_yz = Vec2f(v1[1], v1[2]);
Vec2f v2_yz = Vec2f(v2[1], v2[2]);
if (!checkCCW(v0_yz, v1_yz, v2_yz))
{
std::swap(v1_yz, v2_yz);
}
Vec2f bbox_max = OpenMesh::max(v0_yz, OpenMesh::max(v1_yz, v2_yz));
Vec2f bbox_min = OpenMesh::min(v0_yz, OpenMesh::min(v1_yz, v2_yz));
Vec2i bbox_max_grid = Vec2i(
(int)std::floor(bbox_max[0] * invuint[1] - 0.5f),
(int)std::floor(bbox_max[1] * invuint[2] - 0.5f));
Vec2i bbox_min_grid = Vec2i(
(int)std::ceil(bbox_min[0] * invuint[1] - 0.5f),
(int)std::ceil(bbox_min[1] * invuint[2] - 0.5f));

			for (int y = bbox_min_grid[0]; y <= bbox_max_grid[0]; ++y)
			{
				for (int z = bbox_min_grid[1]; z <= bbox_max_grid[1]; ++z)
				{
					Vec2f point = Vec2f((y + 0.5f) * info.unit[1], (z + 0.5f) * info.unit[2]);
					int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
					if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) ||
						(checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) ||
						(checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0))
					{
					    int xmax = int(get_x_coordinate(n, v0, point) * invuint[0] - 0.5f);
						for (int x = 0; x <= xmax; x++)
						{
							size_t location = mortonEncode_LUT((unsigned)x, (unsigned)y, (unsigned)z);
							size_t int_location = location >> 5;
							unsigned int bit_pos = size_t(31) - (location & s31);
							unsigned int mask = 1 << bit_pos;

#pragma omp atomic
voxel_table[int_location] ^= mask;
}
}
}
}
}

	}
	else
	{

#pragma omp parallel for
for (int i = 0; i < nface; ++i)
{
auto fv = mesh.cfv_begin(FaceHandle(i));
auto& v0 = npoints[fv->idx()]; ++fv;
auto& v1 = npoints[fv->idx()]; ++fv;
auto& v2 = npoints[fv->idx()];
auto e0 = v1 - v0;
auto e1 = v2 - v1;
auto e2 = v0 - v2;
auto n = (e0.cross(e1)).normalize_cond();
if (std::fabs(n[0]) < float_error) {
continue;
}
n[0] = 1 / n[0];
Vec2f v0_yz = Vec2f(v0[1], v0[2]);
Vec2f v1_yz = Vec2f(v1[1], v1[2]);
Vec2f v2_yz = Vec2f(v2[1], v2[2]);
if (!checkCCW(v0_yz, v1_yz, v2_yz))
{
std::swap(v1_yz, v2_yz);
}
Vec2f bbox_max = OpenMesh::max(v0_yz, OpenMesh::max(v1_yz, v2_yz));
Vec2f bbox_min = OpenMesh::min(v0_yz, OpenMesh::min(v1_yz, v2_yz));
Vec2i bbox_max_grid = Vec2i(std::floor(bbox_max[0] * invuint[1] - 0.5f),
std::floor(bbox_max[1] * invuint[2] - 0.5f));
Vec2i bbox_min_grid = Vec2i(std::ceil(bbox_min[0] * invuint[1] - 0.5f),
std::ceil(bbox_min[1] * invuint[2] - 0.5f));
size_t ypos = static_cast<size_t>(bbox_min_grid[0] * info.gridsize[1]);
for (int y = bbox_min_grid[0]; y <= bbox_max_grid[0]; ++y)
{
size_t zyz = bbox_min_grid[1] * yz;
for (int z = bbox_min_grid[1]; z <= bbox_max_grid[1]; ++z)
{
Vec2f point((y + 0.5f) * info.unit[1], (z + 0.5f) * info.unit[2]);
int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) ||
(checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) ||
(checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) ||
(checknum == 0))
{
size_t xmax = size_t(get_x_coordinate(n, v0, point) * invuint[0] - 0.5f);
for (int x = 0; x <= xmax; x++)
{
size_t location = x + ypos + zyz;
size_t int_location = location >> 5;
unsigned int bit_pos = size_t(31) - (location & s31);
unsigned int mask = 1 << bit_pos;
#pragma omp atomic
voxel_table[int_location] ^= mask;
}
}
zyz += yz;
}
ypos += info.gridsize[1];
}
}
}
}
Just code improvement, not algorithm improvement

@Forceflow
Copy link
Owner

Could you make a PR for this? It's hard comparing code with all the formatting gone.

@Forceflow Forceflow changed the title Performance issues Performance improvements Jul 7, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants