Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove OpenMP #57

Merged
merged 5 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/build-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: pip

- name: Build sdist
run: pipx run build --sdist

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ci:
autoupdate_schedule: quarterly
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.6.7
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down Expand Up @@ -36,6 +36,6 @@ repos:

# this validates our github workflow files
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.28.4
rev: 0.29.2
hooks:
- id: check-github-workflows
20 changes: 3 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
cmake_minimum_required(VERSION 3.15...3.26)

project(nanobind_project LANGUAGES CXX)
if(NOT APPLE)
find_package(OpenMP REQUIRED)
endif()

# Try to import all Python components potentially needed by nanobind
find_package(Python 3.9
REQUIRED COMPONENTS Interpreter Development.Module
Expand All @@ -28,23 +23,14 @@ nanobind_add_module(
src/clustering.cpp
)

# Link OpenMP
if(OpenMP_CXX_FOUND)
target_link_libraries(_clustering PRIVATE OpenMP::OpenMP_CXX)
endif()

# Compiler-specific options
if(MSVC)
# Use MSVC optimization levels and OpenMP setup
target_compile_options(_clustering PRIVATE /O2 /std:c++17 /openmp:llvm)
# /openmp:llvm
# Use MSVC optimization levels
target_compile_options(_clustering PRIVATE /O2 /std:c++17)
else()
# Assuming GCC or Clang
if (APPLE)
target_compile_options(_clustering PRIVATE -O3)
else()
target_compile_options(_clustering PRIVATE -O3 -fopenmp)
endif()
target_compile_options(_clustering PRIVATE -O3)

endif()

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
dependencies = [
"numpy",
"pyvista>=0.37.0",
"pykdtree"
'pykdtree',
]
description = "Uniformly remeshes surface meshes"
keywords = ["vtk", "uniform", "meshing", "remeshing", "acvd"]
Expand All @@ -34,6 +34,7 @@ archs = ["auto64"] # 64-bit only
skip = "cp38-* cp313-* pp* *musllinux*" # build Python 3.9 - Python 3.13
test-command = "pytest {project}/tests"
test-requires = "pytest"
test-skip = "*-macosx_arm64"

[tool.cibuildwheel.macos]
archs = ["native"]
Expand Down
27 changes: 1 addition & 26 deletions src/clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,9 @@

#include <nanobind/nanobind.h>
#include <nanobind/ndarray.h>
// #include <nanobind/stl/set.h> // needed to convert c++ sets to python sets
// #include <nanobind/stl/vector.h> // needed to convert c++ vectors to python lists

#include "array_support.h"

#if !defined(__APPLE__)
#include <omp.h>
#endif

#if defined(__linux__) || defined(__APPLE__)
typedef int64_t vtk_int;
#else
typedef int32_t vtk_int;
#endif

#ifdef _MSC_VER
#define restrict __restrict
#elif defined(__GNUC__) || defined(__clang__)
Expand Down Expand Up @@ -99,7 +87,6 @@ PointNormals(NDArray<const T, 2> points_arr, NDArray<const int64_t, 2> faces_arr
const T *v = points_arr.data();
const int64_t *f = faces_arr.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -163,7 +150,6 @@ PointNormals(NDArray<const T, 2> points_arr, NDArray<const int64_t, 2> faces_arr
}

// Normalize point normals
#pragma omp parallel for
for (size_t i = 0; i < n_points; i++) {
T plen = sqrt(
pnorm[i * 3 + 0] * pnorm[i * 3 + 0] + pnorm[i * 3 + 1] * pnorm[i * 3 + 1] +
Expand Down Expand Up @@ -192,7 +178,6 @@ FaceCentroid(const NDArray<const T, 2> points, const NDArray<const int64_t, 2> f
auto fmean_arr = MakeNDArray<T, 2>({n_faces, 3});
T *fmean = fmean_arr.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
const int64_t point0 = f[i * 3 + 0];
const int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -221,7 +206,6 @@ FaceNormals(const NDArray<const T, 2> points, const NDArray<const int64_t, 2> fa
const T *v = points.data();
const int64_t *f = faces.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -299,7 +283,6 @@ nb::tuple RayTrace(
int *near_ind = near_ind_arr.data();

// Loop through each face and determine intersections
#pragma omp parallel for num_threads(num_threads)
for (size_t i = 0; i < npoints; i++) {
T prev_dist = std::numeric_limits<T>::infinity();
int near_idx = -1;
Expand Down Expand Up @@ -516,11 +499,8 @@ nb::tuple PointWeights(
const T *v = points_arr.data();
const int64_t *f = faces_arr.data();

#pragma omp parallel num_threads(n_threads)
{
T *local_pweight = AllocateArray<T>(n_points, true);
T *local_pweight = AllocateArray<T>(n_points, true);

#pragma omp for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand All @@ -544,27 +524,23 @@ nb::tuple PointWeights(
local_pweight[point2] += farea_l;
}

#pragma omp critical
for (size_t i = 0; i < n_points; i++) {
pweight[i] += local_pweight[i];
}

delete[] local_pweight;
}

// ensure this actually helps
const T *pweight_const = pweight;

if (n_add_weights) {
#pragma omp parallel num_threads(n_threads)
for (size_t i = 0; i < n_points; i++) {
const T wgt = aweights[i] * pweight_const[i];
wvertex[i * 3 + 0] = wgt * v[i * 3 + 0];
wvertex[i * 3 + 1] = wgt * v[i * 3 + 1];
wvertex[i * 3 + 2] = wgt * v[i * 3 + 2];
}
} else {
#pragma omp parallel num_threads(n_threads)
for (size_t i = 0; i < n_points; i++) {
const T wgt = pweight[i];
wvertex[i * 3 + 0] = wgt * v[i * 3 + 0];
Expand Down Expand Up @@ -1315,7 +1291,6 @@ template <typename T> NDArray<T, 1> TriArea(NDArray<T, 2> points, NDArray<int64_
auto v = points.view();
auto f = faces.view();

#pragma omp parallel for firstprivate(v, f, tria_view)
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f(i, 0);
int64_t point1 = f(i, 1);
Expand Down
Loading