Skip to content

Commit

Permalink
Merge branch 'master' into singleapi
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolas-chaulet committed Jan 13, 2020
2 parents 80fd7ab + 2884bcc commit 3c9a8e4
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 56 deletions.
4 changes: 4 additions & 0 deletions cpu/include/ball_query.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
at::Tensor query_batch,
at::Tensor support_batch,
float radius, int max_num, int mode);

std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
at::Tensor support,
float radius, int max_num, int mode);
1 change: 1 addition & 0 deletions cpu/include/cloud.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct PointCloud
pts = temp;
}
void set_batch(std::vector<scalar_t> new_pts, int begin, int size){

std::vector<PointXYZ> temp(size);
for(int i=0; i < size; i++){
PointXYZ point;
Expand Down
11 changes: 11 additions & 0 deletions cpu/src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"mode=1 means a matrix of edges of size Num_edge x 2"
"return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
"query"_a, "support"_a, "query_batch"_a, "support_batch"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
m.def("dense_ball_query", &dense_ball_query,
"compute the radius search of a batch of point cloud using nanoflann"
"-query : a pytorch tensor of size B x N1 x 3,. used to query the nearest neighbors"
"- support : a pytorch tensor of size B x N2 x 3. used to build the tree"
"- radius : float number, size of the ball for the radius search."
"- max_num : int number, indicate the maximum of neaghbors allowed(if -1 then all the possible neighbors will be computed). "
" - mode : int number that indicate which format for the neighborhood"
" mode=0 mean a matrix of neighbors(-1 for shadow neighbors)"
"mode=1 means a matrix of edges of size Num_edge x 2"
"return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
"query"_a, "support"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
}
25 changes: 19 additions & 6 deletions cpu/src/neighbors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,20 @@ int nanoflann_neighbors(vector<scalar_t>& queries,

i0 = 0;

int token = 0;
for (auto& inds : list_matches){
token = inds[0].first;
for (int j = 0; j < max_count; j++){
if (j < inds.size()){
neighbors_indices[i0 * max_count + j] = inds[j].first;
dists[i0 * max_count + j] = (float) inds[j].second;


}

else {
neighbors_indices[i0 * max_count + j] = -1;
dists[i0 * max_count + j] = radius * radius;
neighbors_indices[i0 * max_count + j] = token;
dists[i0 * max_count + j] = -1;
}
}
i0++;
Expand Down Expand Up @@ -186,24 +190,30 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
search_params.sorted = true;
for (auto& p0 : query_pcd.pts){
// Check if we changed batch

if (i0 == sum_qb + q_batches[b]){
if (i0 == sum_qb + q_batches[b] && b < s_batches.size()){
sum_qb += q_batches[b];
sum_sb += s_batches[b];

b++;

// Change the points
current_cloud.pts.clear();
current_cloud.set_batch(supports, sum_sb, s_batches[b]);
// Build KDTree of the current element of the batch
delete index;

index = new my_kd_tree_t(3, current_cloud, tree_params);
index->buildIndex();
}
// Initial guess of neighbors size


all_inds_dists[i0].reserve(max_count);
// Find neighbors
//std::cerr << p0.x << p0.y << p0.z<<std::endl;
scalar_t query_pt[3] = { p0.x, p0.y, p0.z};


size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params);
// Update max count

Expand All @@ -217,8 +227,10 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
max_count = max_num;
}
// Reserve the memory

if(mode == 0){
neighbors_indices.resize(query_pcd.pts.size() * max_count);

dists.resize(query_pcd.pts.size() * max_count);
i0 = 0;
sum_sb = 0;
Expand All @@ -227,6 +239,7 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,

for (auto& inds_dists : all_inds_dists){// Check if we changed batch


if (i0 == sum_qb + q_batches[b]){
sum_qb += q_batches[b];
sum_sb += s_batches[b];
Expand All @@ -239,8 +252,8 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
dists[i0 * max_count + j] = (float) inds_dists[j].second;
}
else {
neighbors_indices[i0 * max_count + j] = supports.size();
dists[i0 * max_count + j] = radius * radius;
neighbors_indices[i0 * max_count + j] = supports.size()/3;
dists[i0 * max_count + j] = -1;
}

}
Expand Down
46 changes: 36 additions & 10 deletions cpu/src/torch_nearest_neighbors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,22 @@ std::pair<at::Tensor, at::Tensor> ball_query(at::Tensor query,

void cumsum(const vector<long>& batch, vector<long>& res){

res.resize(batch[batch.size()-1]-batch[0]+1, 0);
res.resize(batch[batch.size()-1]-batch[0]+2, 0);
long ind = batch[0];
long incr = 1;
for(int i=1; i < batch.size(); i++){

if(batch[i] == ind)
incr++;
else{
res[ind-batch[0]] = incr;
incr =1;
ind = batch[i];
if(res.size() > 1){
for(int i=1; i < batch.size(); i++){
if(batch[i] == ind)
incr++;
else{
res[ind-batch[0]+1] = incr;
incr =1;
ind = batch[i];
}
}

}
res[ind-batch[0]] = incr;
res[ind-batch[0]+1] = incr;
}

std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
Expand All @@ -89,9 +91,11 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
std::vector<long> cumsum_query_batch_stl;
cumsum(query_batch_stl, cumsum_query_batch_stl);

std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
std::vector<long> cumsum_support_batch_stl;
cumsum(support_batch_stl, cumsum_support_batch_stl);

std::vector<long> neighbors_indices;

auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
Expand All @@ -107,6 +111,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
data_s + support.size(0)*support.size(1));


max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
supports_stl,
cumsum_query_batch_stl,
Expand All @@ -117,6 +122,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
max_num,
mode);
});

long* neighbors_indices_ptr = neighbors_indices.data();
auto neighbors_dists_ptr = neighbors_dists.data();

Expand All @@ -135,3 +141,23 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
}
return std::make_pair(out.clone(), out_dists.clone());
}


std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
at::Tensor support,
float radius, int max_num, int mode){

int b = query.size(0);
vector<at::Tensor> batch_idx;
vector<at::Tensor> batch_dist;
for (int i=0; i < b; i++){

auto out_pair = ball_query(query[i], support[i], radius, max_num, mode);
batch_idx.push_back(out_pair.first);
batch_dist.push_back(out_pair.second);
}
auto out_idx = torch::stack(batch_idx);
auto out_dist = torch::stack(batch_dist);
return std::make_pair(out_idx, out_dist);

}
13 changes: 10 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, CppExtension
from torch.utils.cpp_extension import (
BuildExtension,
CUDAExtension,
CUDA_HOME,
CppExtension,
)
import glob

ext_src_root = "cuda"
Expand Down Expand Up @@ -33,12 +38,14 @@
)
)

requirements = ["torch^1.1.0"]

setup(
name="torch_points",
version="0.1.4",
version="0.1.5",
author="Nicolas Chaulet",
packages=find_packages(),
install_requires=[],
install_requires=requirements,
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension},
)
75 changes: 60 additions & 15 deletions test/test_ballquerry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,88 @@ def test_simple_gpu(self):
a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float).cuda()
b = torch.tensor([[[0, 0, 0]]]).to(torch.float).cuda()

npt.assert_array_equal(ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]]))
npt.assert_array_equal(
ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]])
)

def test_larger_gpu(self):
a = torch.randn(32, 4096, 3).to(torch.float).cuda()
idx = ball_query(1, 64, a, a).detach().cpu().numpy()
self.assertGreaterEqual(idx.min(),0)
self.assertGreaterEqual(idx.min(), 0)

# def test_simple_cpu(self):
# a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float)
# b = torch.tensor([[[0, 0, 0]]]).to(torch.float)
# npt.assert_array_equal(ball_query(1, 2, a, b).detach().numpy(), np.array([[[0, 0]]]))
def test_cpu_gpu_equality(self):
a = torch.randn(5, 1000, 3)
res_cpu = ball_query(0.1, 17, a, a).detach().numpy()
res_cuda = ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy()
for i in range(a.shape[0]):
for j in range(a.shape[1]):
# Because it is not necessary the same order
assert set(res_cpu[i][j]) == set(res_cuda[i][j])

# def test_cpu_gpu_equality(self):
# a = torch.randn(5, 1000, 3)
# npt.assert_array_equal(ball_query(0.1, 17, a, a).detach().numpy(),
# ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy())

def test_partial_gpu(self):
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float).cuda()
class TestBallPartial(unittest.TestCase):
def test_simple_gpu(self):
x = (
torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]])
.to(torch.float)
.cuda()
)
y = torch.tensor([[0, 0, 0]]).to(torch.float).cuda()
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()

batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()

idx, dist2 = ball_query(1., 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
idx, dist2 = ball_query(
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
)

idx = idx.detach().cpu().numpy()
dist2 = dist2.detach().cpu().numpy()

idx_answer = np.asarray([[1, 4]])
dist2_answer = np.asarray([[ 0.0100, -1.0000]]).astype(np.float32)
dist2_answer = np.asarray([[0.0100, -1.0000]]).astype(np.float32)

npt.assert_array_almost_equal(idx, idx_answer)
npt.assert_array_almost_equal(dist2, dist2_answer)

def test_simple_cpu(self):
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(
torch.float
)
y = torch.tensor([[0, 0, 0]]).to(torch.float)

batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
batch_y = torch.from_numpy(np.asarray([0])).long()

idx, dist2 = ball_query(
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
)

idx = idx.detach().cpu().numpy()
dist2 = dist2.detach().cpu().numpy()

idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(
np.float32
)

npt.assert_array_almost_equal(idx, idx_answer)
npt.assert_array_almost_equal(dist2, dist2_answer)

def test_random_cpu(self):
a = torch.randn(1000, 3).to(torch.float)
b = torch.randn(1500, 3).to(torch.float)
batch_a = torch.randint(1, (1000,)).sort(0)[0].long()
batch_b = torch.randint(1, (1500,)).sort(0)[0].long()
idx, dist = ball_query(
1.0, 12, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b
)
idx2, dist2 = ball_query(
1.0, 12, b, a, mode="PARTIAL_DENSE", batch_x=batch_b, batch_y=batch_a
)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 3c9a8e4

Please sign in to comment.