From 20952d9617e22d78e8807c07e50732c28a63248f Mon Sep 17 00:00:00 2001 From: Nick Angelou Date: Fri, 17 Feb 2023 10:58:41 -0600 Subject: [PATCH] Improved performance for the `Raw` container (#162) * change intersection algorithm in Raw computations * fix * update version * update changelog * update cpp bench * changelog * changelog again * update docs * comments * update comments * update docs * license formatting * fix test rounding * fix cpp rounding * changelog * golint --- CHANGES.md | 21 ++++++ README.md | 68 ++++++++++++++++- SECURITY.md | 66 +++++++++-------- package-lock.json | 4 +- package.json | 2 +- .../c/integration_test.cpp | 5 +- .../cpp/datastructure/raw.cpp | 52 ++++++++++--- .../cpp/datastructure/raw.h | 1 + .../cpp/datastructure/raw_test.cpp | 27 +++++-- .../cpp/psi_benchmark.cpp | 33 +++++++++ private_set_intersection/cpp/psi_client.cpp | 74 +++++++++++++++++-- private_set_intersection/cpp/psi_client.h | 58 ++++++++------- .../cpp/psi_client_test.cpp | 5 +- private_set_intersection/cpp/psi_server.cpp | 65 +++++++++++++--- private_set_intersection/cpp/psi_server.h | 33 +++++---- .../cpp/psi_server_test.cpp | 6 +- .../go/integration_test.go | 7 +- .../rust/tests/integration_test.rs | 4 +- tools/package.bzl | 2 +- 19 files changed, 412 insertions(+), 121 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 16d0cb9f..dfa09edb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,24 @@ +# Version 2.0.1 + +Feat: + +- The complexity of the underlying `Raw` intersection computation has improved + from `O(nmlog(m))` -> `O(nlog(n) + max(n, m))`; however, internal protobuf + deserialization remains as the dominant performance inhibitor for the + `client->GetIntersection*` methods. + +Fix: + +- The `go` integration tests were not using the datastructure param properly. + The fix did not result in any regression. + +Chore: + +- Update `C++` benchmarks to include the new `Raw` enum variant +- Misc fixes to tests which were not rounding correctly and causing CI to fail + randomly +- Update the main README to include a description of the protocol + # Version 2.0.0 Breaking: diff --git a/README.md b/README.md index 5eb4e05a..665e51a0 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,72 @@ # PSI -Private Set Intersection protocol based on ECDH, Bloom Filters, and Golomb -Compressed Sets. +Private Set Intersection protocol based on ECDH and Golomb Compressed Sets or +Bloom Filters. + +## Protocol + +The Private Set Intersection (PSI) protocol involves two parties, a client and a +server, each holding a dataset. The goal of the protocol is for the client to +determine the intersection between their dataset and the server's dataset, +without revealing any information about their respective datasets to each other. + +The protocol proceeds as follows: + +1. Setup (server) + +The server encrypts all its elements `x` under a commutative encryption scheme, +computing `H(x)^s` where `s` is its secret key. The encrypted elements are then +inserted into a container and sent to the client in the form of a serialized +protobuf and resembles the following: + +``` +[ H(x_1)^(s), H(x_2)^(s), ... , H(x_n)^(s) ] +``` + +2. Request (client) + +The client encrypts all their elements `x` using the commutative encryption +scheme, computing `H(x)^c`, where `c` is its secret key. The client sends its +encrypted elements to the server along with a boolean flag, +`reveal_intersection`, indicating whether the client wants to learn the elements +in the intersection or only its size (cardinality). The payload is sent as a +serialized protobuf and resembles the following: + +``` +[ H(x_1)^(c), H(x_2)^(c), ... , H(x_n)^(c) ] +``` + +3. Response (server) + +For each encrypted element `H(x)^c` received from the client, the server +encrypts it again under the commutative encryption scheme with its secret key +`s`, computing `(H(x)^c)^s = H(x)^(cs)`. The result is sent back to the client +in a serialized protobuf and resembles the following: + +``` +[ H(x_1)^(cs), H(x_2)^(cs), ... , H(x_n)^(cs) ] +``` + +4. Compute intersection (client) + +The client decrypts each element received from the server's response using its +secret key `c`, computing `(H(x)^(cs))^(1/c) = H(x)^s`. It then checks whether +each decrypted element is present in the container received from the server, and +reports the number of matches as the intersection size. + +It's worth noting that the protocol has several variants, some of which +introduce a small false-positive rate, while others do not generate false +positives. This behavior is selective, and the false-positive rate can be tuned. + +The protocol has configurable **containers**. Golomb Compressed Sets (`Gcs`) is +the default container but it can be overridden to be `BloomFilter` or `Raw` +encrypted strings. `Gcs` and `BloomFilter` will have false positives whereas +`Raw` will not. + +## Security + +See [SECURITY.md](SECURITY.md). ## Requirements diff --git a/SECURITY.md b/SECURITY.md index 7a775fb0..acf70607 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -5,41 +5,47 @@ Several caveats should be carefully considered before using PSI. ### Information assumed public 1. Server set size -2. Client set size - (Note that Each of these can be turned into upper bounds by adding dummy elements.) +2. Client set size (Note that Each of these can be turned into upper bounds by + adding dummy elements.) ### Security Limitations for the PSI protocol -There are two configurations for instantiating a new client/server pair by passing in a boolean switch into their respective constructors. +There are two configurations for instantiating a new client/server pair by +passing in a boolean switch into their respective constructors. -1. One that reveals only the **size** (cardinality) of the intersection to the client. +1. One that reveals only the **size** (cardinality) of the intersection to the + client. 2. One that reveals the actual **intersecion** to the client. -In the case of #1, coordinated clients could get the actual intersection. However, server set items not -in any of the client sets will never be uncovered. -Situations where it’s feasible for clients to send one request per element in the domain - -there is a possbility that coordinated clients could uncover server set. - -Presence of new client set members or absence of former client set members can be -detected by server/eavesdroppers if client secret is reused. - -In the absence of any rate limiting and assuming the client and server have enough -computing power and bandwidth, small domains may be brute-forceable. However, a query -needs to be performed for each brute-force attempt. -An example for this situation would be suppose you were trying to limit sending antibody -tests to people based on whether they’d been in an infected location, so that people would -have to share their location history to prove they’d been somewhere infected, and you were -using PSI so people wouldn’t have to share their location history without good reason. If -your health authority only covers 10 possible geohashes, people could sidestep the PSI step -entirely and submit location histories which unlock tests by brute force. +In the case of #1, coordinated clients could get the actual intersection. +However, server set items not in any of the client sets will never be uncovered. +Situations where it’s feasible for clients to send one request per element in +the domain - there is a possbility that coordinated clients could uncover server +set. + +Presence of new client set members or absence of former client set members can +be detected by server/eavesdroppers if client secret is reused. + +In the absence of any rate limiting and assuming the client and server have +enough computing power and bandwidth, small domains may be brute-forceable. +However, a query needs to be performed for each brute-force attempt. An example +for this situation would be suppose you were trying to limit sending antibody +tests to people based on whether they’d been in an infected location, so that +people would have to share their location history to prove they’d been somewhere +infected, and you were using PSI so people wouldn’t have to share their location +history without good reason. If your health authority only covers 10 possible +geohashes, people could sidestep the PSI step entirely and submit location +histories which unlock tests by brute force. A potential limitation with the PSI approach is the communication complexity, -which scales linearly with the size of the larger set. This is of particular concern -when performing PSI between a constrained device (cellphone) holding a small set, and a -large service provider (e.g. WhatsApp), such as in the Private Contact Discovery application. -Assuming a bloom filter is used, the Client set size affects the algorithmic complexity in -linear time O(n), with a constant number of lookups. The bloom filter has linear size -in the server's set, hence the algorithmic complexity of our protocol is O(n). However, -a bloom filter requires a large number of lookups on each query, if the false positive rate -is low. An alternative is the Golomb Compressed Set, which requires O(n log n) time due to sorting -operations, but in practice takes around 25-30% less space than a bloom filter. +which scales linearly with the size of the larger set. This is of particular +concern when performing PSI between a constrained device (cellphone) holding a +small set, and a large service provider (e.g. WhatsApp), such as in the Private +Contact Discovery application. Assuming a bloom filter is used, the Client set +size affects the algorithmic complexity in linear time O(n), with a constant +number of lookups. The bloom filter has linear size in the server's set, hence +the algorithmic complexity of our protocol is O(n). However, a bloom filter +requires a large number of lookups on each query, if the false positive rate is +low. An alternative is the Golomb Compressed Set, which requires O(n log n) time +due to sorting operations, but in practice takes around 25-30% less space than a +bloom filter. diff --git a/package-lock.json b/package-lock.json index 15376e6d..ac501839 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@openmined/psi.js", - "version": "2.0.0", + "version": "2.0.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@openmined/psi.js", - "version": "2.0.0", + "version": "2.0.1", "license": "Apache-2.0", "dependencies": { "@grpc/grpc-js": "^1.8.8", diff --git a/package.json b/package.json index 7b7127c5..f2bcec74 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@openmined/psi.js", - "version": "2.0.0", + "version": "2.0.1", "description": "Private Set Intersection for JavaScript", "repository": { "type": "git", diff --git a/private_set_intersection/c/integration_test.cpp b/private_set_intersection/c/integration_test.cpp index cadfee11..8b655293 100644 --- a/private_set_intersection/c/integration_test.cpp +++ b/private_set_intersection/c/integration_test.cpp @@ -14,6 +14,8 @@ // limitations under the License. // +#include + #include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "gtest/gtest.h" @@ -223,7 +225,8 @@ TEST_P(Correctness, intersection) { // Test if size is approximately as expected (up to 10%). EXPECT_GE(intersection_size, num_client_inputs / 2); - EXPECT_LT(intersection_size, (num_client_inputs / 2) * 1.1); + EXPECT_LT((double)intersection_size, + ceil((double(num_client_inputs) / 2.0) * 1.1)); } free(server_setup); free(client_request); diff --git a/private_set_intersection/cpp/datastructure/raw.cpp b/private_set_intersection/cpp/datastructure/raw.cpp index 2f172c45..eb6f2d91 100644 --- a/private_set_intersection/cpp/datastructure/raw.cpp +++ b/private_set_intersection/cpp/datastructure/raw.cpp @@ -25,16 +25,34 @@ namespace private_set_intersection { +// Computes the intersection of two collections. The first collection must be a +// `pair`. The `T` must be the same in the second collection. +// +// Requires both collections to be sorted. +// +// Complexity: +// - O(max(n, m)) +template +void custom_set_intersection(InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2, OutputIt d_first) { + while (first1 != last1 && first2 != last2) { + if ((*first1).first < *first2) + ++first1; + else { + // *first1 and *first2 are equivalent. + if (!(*first2 < (*first1).first)) { + *d_first++ = (*first1++).second; + } + ++first2; + } + } +} + Raw::Raw(std::vector elements) : encrypted_(std::move(elements)) {} StatusOr> Raw::Create(int64_t num_client_inputs, std::vector elements) { - auto num_server_inputs = static_cast(elements.size()); - - // If server inputs < client inputs, add random encrypted values - // ... - - // Then we perform a sort to make intersections easier to find + // We sort to make intersections easier to find later std::sort(elements.begin(), elements.end()); return absl::WrapUnique(new Raw(elements)); @@ -55,14 +73,24 @@ StatusOr> Raw::CreateFromProtobuf( std::vector Raw::Intersect( absl::Span elements) const { - std::vector res; - - for (size_t i = 0; i < elements.size(); i++) { - if (std::binary_search(encrypted_.begin(), encrypted_.end(), elements[i])) { - res.push_back(i); - } + // This implementation creates a copy of `elements`, but the tradeoff is that + // we can compute the intersection in O(nlog(n) + max(n, m)) where `n` and `m` + // correspond to the number of client and server elements respectively. + std::vector> vp(elements.size()); + + // Collect a pair with the index to track the original index after sorting. + for (size_t i = 0; i < elements.size(); ++i) { + vp[i] = make_pair(elements[i], (int64_t)i); } + // Next, we sort the collection. O(nlog(n)) + std::sort(vp.begin(), vp.end()); + + std::vector res; + // Compute intersection. O(max(m, n)) + custom_set_intersection(vp.begin(), vp.end(), encrypted_.begin(), + encrypted_.end(), std::back_inserter(res)); + return res; } diff --git a/private_set_intersection/cpp/datastructure/raw.h b/private_set_intersection/cpp/datastructure/raw.h index 0df362ad..f5ea0585 100644 --- a/private_set_intersection/cpp/datastructure/raw.h +++ b/private_set_intersection/cpp/datastructure/raw.h @@ -41,6 +41,7 @@ class Raw { static StatusOr> CreateFromProtobuf( const psi_proto::ServerSetup& encoded_filter); + // Calculates the intersection std::vector Intersect(absl::Span elements) const; // Returns the size of the encrypted elements diff --git a/private_set_intersection/cpp/datastructure/raw_test.cpp b/private_set_intersection/cpp/datastructure/raw_test.cpp index 4334292f..6ddfc263 100644 --- a/private_set_intersection/cpp/datastructure/raw_test.cpp +++ b/private_set_intersection/cpp/datastructure/raw_test.cpp @@ -36,12 +36,23 @@ class RawTest : public ::testing::Test { std::unique_ptr container_; }; -TEST_F(RawTest, TestAdd) { +TEST_F(RawTest, TestIntersection) { std::vector server = {"a", "b", "c", "d", "e"}; - std::vector client = {"z", "b", "c", "d"}; + std::vector client = {"b", "c", "d", "z"}; SetUp(static_cast(client.size()), server); - std::vector results = container_->Intersect(client); + std::vector results = container_->Intersect(absl::MakeSpan(client)); + std::vector expected{0, 1, 2}; + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results, expected); +} + +TEST_F(RawTest, TestIntersectionLargerClient) { + std::vector server = {"b", "c", "d", "z"}; + std::vector client = {"a", "b", "c", "d", "e"}; + + SetUp(static_cast(client.size()), server); + std::vector results = container_->Intersect(absl::MakeSpan(client)); std::vector expected{1, 2, 3}; EXPECT_EQ(results.size(), 3); EXPECT_EQ(results, expected); @@ -49,7 +60,7 @@ TEST_F(RawTest, TestAdd) { TEST_F(RawTest, TestToProtobuf) { std::vector server = {"b", "a", "c", "d", "e"}; - std::vector client = {"z", "b", "c", "d"}; + std::vector client = {"b", "c", "d", "z"}; SetUp(static_cast(client.size()), server); @@ -63,17 +74,17 @@ TEST_F(RawTest, TestToProtobuf) { EXPECT_EQ(encoded_filter.raw().encrypted_elements()[0], "a"); } -TEST_F(RawTest, TestCreateFromProtobuf) { +TEST_F(RawTest, TestIntersectionFromProtobuf) { std::vector server = {"a", "b", "c", "d", "e"}; - std::vector client = {"z", "b", "c", "d"}; + std::vector client = {"b", "c", "d", "z"}; SetUp(static_cast(client.size()), server); // Create the protobuf from the Raw container and check if it matches. PSI_ASSERT_OK_AND_ASSIGN(auto container2, Raw::CreateFromProtobuf(container_->ToProtobuf())); - std::vector results = container2->Intersect(client); - std::vector expected{1, 2, 3}; + std::vector results = container2->Intersect(absl::MakeSpan(client)); + std::vector expected{0, 1, 2}; EXPECT_EQ(results.size(), 3); EXPECT_EQ(results, expected); } diff --git a/private_set_intersection/cpp/psi_benchmark.cpp b/private_set_intersection/cpp/psi_benchmark.cpp index df9259aa..074bddf4 100644 --- a/private_set_intersection/cpp/psi_benchmark.cpp +++ b/private_set_intersection/cpp/psi_benchmark.cpp @@ -31,6 +31,23 @@ void BM_ServerSetup(benchmark::State& state, double fpr, } // Range is for the number of inputs, and the captured argument is the false // positive rate for 10k client queries. +BENCHMARK_CAPTURE(BM_ServerSetup, 0.001 size raw, 0.001, false, + DataStructure::Raw) + ->RangeMultiplier(10) + ->Range(1, 100000); +BENCHMARK_CAPTURE(BM_ServerSetup, 0.000001 size raw, 0.000001, false, + DataStructure::Raw) + ->RangeMultiplier(10) + ->Range(1, 100000); +BENCHMARK_CAPTURE(BM_ServerSetup, 0.001 intersection raw, 0.001, true, + DataStructure::Raw) + ->RangeMultiplier(10) + ->Range(1, 100000); +BENCHMARK_CAPTURE(BM_ServerSetup, 0.000001 intersection raw, 0.000001, true, + DataStructure::Raw) + ->RangeMultiplier(10) + ->Range(1, 100000); + BENCHMARK_CAPTURE(BM_ServerSetup, 0.001 size gcs, 0.001, false, DataStructure::Gcs) ->RangeMultiplier(10) @@ -159,6 +176,14 @@ void BM_ClientProcessResponse(benchmark::State& state, bool reveal_intersection, static_cast(elements_processed), benchmark::Counter::kIsRate); } // Range is for the number of inputs. +BENCHMARK_CAPTURE(BM_ClientProcessResponse, size raw, false, DataStructure::Raw, + 1.0) + ->RangeMultiplier(10) + ->Range(1, 10000); +BENCHMARK_CAPTURE(BM_ClientProcessResponse, intersection raw, true, + DataStructure::Raw, 1.0) + ->RangeMultiplier(10) + ->Range(1, 10000); BENCHMARK_CAPTURE(BM_ClientProcessResponse, size gcs, false, DataStructure::Gcs, 1.0) ->RangeMultiplier(10) @@ -175,6 +200,14 @@ BENCHMARK_CAPTURE(BM_ClientProcessResponse, intersection bloom, true, DataStructure::BloomFilter, 1.0) ->RangeMultiplier(10) ->Range(1, 10000); +BENCHMARK_CAPTURE(BM_ClientProcessResponse, size raw asymmetric, false, + DataStructure::Raw, 0.001) + ->RangeMultiplier(10) + ->Range(10000, 100000); +BENCHMARK_CAPTURE(BM_ClientProcessResponse, intersection raw asymmetric, true, + DataStructure::Raw, 0.001) + ->RangeMultiplier(10) + ->Range(10000, 100000); BENCHMARK_CAPTURE(BM_ClientProcessResponse, size gcs asymmetric, false, DataStructure::Gcs, 0.001) ->RangeMultiplier(10) diff --git a/private_set_intersection/cpp/psi_client.cpp b/private_set_intersection/cpp/psi_client.cpp index 08387f4a..0968f442 100644 --- a/private_set_intersection/cpp/psi_client.cpp +++ b/private_set_intersection/cpp/psi_client.cpp @@ -29,12 +29,29 @@ namespace private_set_intersection { +/** + * @brief Construct a new Psi Client:: Psi Client object + * + * @param ec_cipher A unique pointer to a commutative , which is used for + * encryption and decryption in the Private Set Intersection (PSI) protocol. + * @param reveal_intersection A boolean value indicating whether the + * intersection of the two sets should be revealed after the PSI protocol is + * completed. + */ PsiClient::PsiClient( std::unique_ptr<::private_join_and_compute::ECCommutativeCipher> ec_cipher, bool reveal_intersection) : ec_cipher_(std::move(ec_cipher)), reveal_intersection(reveal_intersection) {} +/** + * @brief Creates a new instance of the PsiClient class with a new key pair for + * encryption and decryption using ECCommutativeCipher. + * + * @param reveal_intersection A boolean indicating whether the client wants to + * learn the intersection values or only its size (cardinality). + * @return StatusOr> + */ StatusOr> PsiClient::CreateWithNewKey( bool reveal_intersection) { // Create an EC cipher with curve P-256. This gives 128 bits of security. @@ -43,10 +60,22 @@ StatusOr> PsiClient::CreateWithNewKey( ::private_join_and_compute::ECCommutativeCipher::CreateWithNewKey( NID_X9_62_prime256v1, ::private_join_and_compute::ECCommutativeCipher::HashType::SHA256)); + + // Create a new instance of the PsiClient class using the ECCommutativeCipher + // object and the reveal_intersection boolean. return absl::WrapUnique( new PsiClient(std::move(ec_cipher), reveal_intersection)); } +/** + * @brief Creates a new PsiClient instance using an EC cipher created from the + * provided key. + * + * @param key_bytes The bytes representing the key for the EC cipher. + * @param reveal_intersection A boolean flag indicating whether the intersection + * should be revealed. + * @return StatusOr> + */ StatusOr> PsiClient::CreateFromKey( const std::string& key_bytes, bool reveal_intersection) { // Create an EC cipher with curve P-256. This gives 128 bits of security. @@ -59,6 +88,13 @@ StatusOr> PsiClient::CreateFromKey( new PsiClient(std::move(ec_cipher), reveal_intersection)); } +/** + * @brief Creates a request protobuf with encrypted inputs and a reveal flag. + * + * @param inputs The inputs to encrypt and add to the request protobuf. + * + * @return StatusOr + */ StatusOr PsiClient::CreateRequest( absl::Span inputs) const { // Encrypt inputs one by one. @@ -82,6 +118,14 @@ StatusOr PsiClient::CreateRequest( return request; } +/** + * @brief Compute the intersection + * + * @param server_setup The original server's setup + * @param server_response The previous server's response + * + * @return StatusOr> + */ StatusOr> PsiClient::GetIntersection( const psi_proto::ServerSetup& server_setup, const psi_proto::Response& server_response) const { @@ -96,6 +140,14 @@ StatusOr> PsiClient::GetIntersection( return intersection; } +/** + * @brief Compute the intersection (cardinality) + * + * @param server_setup The original server's setup + * @param server_response The previous server's response + * + * @return StatusOr + */ StatusOr PsiClient::GetIntersectionSize( const psi_proto::ServerSetup& server_setup, const psi_proto::Response& server_response) const { @@ -104,6 +156,14 @@ StatusOr PsiClient::GetIntersectionSize( return static_cast(intersection.size()); } +/** + * @brief Process the server's response to obtain the intersection + * + * @param server_setup The original server's setup + * @param server_response The previous server's response + * + * @return StatusOr> + */ StatusOr> PsiClient::ProcessResponse( const psi_proto::ServerSetup& server_setup, const psi_proto::Response& server_response) const { @@ -132,21 +192,18 @@ StatusOr> PsiClient::ProcessResponse( case psi_proto::ServerSetup::DataStructureCase::kRaw: { // Decode Bloom Filter from the server setup. ASSIGN_OR_RETURN(auto container, Raw::CreateFromProtobuf(server_setup)); - return container->Intersect( - absl::MakeConstSpan(&decrypted[0], decrypted.size())); + return container->Intersect(absl::MakeConstSpan(decrypted)); } case psi_proto::ServerSetup::DataStructureCase::kGcs: { // Decode GCS from the server setup. ASSIGN_OR_RETURN(auto container, GCS::CreateFromProtobuf(server_setup)); - return container->Intersect( - absl::MakeConstSpan(&decrypted[0], decrypted.size())); + return container->Intersect(absl::MakeConstSpan(decrypted)); } case psi_proto::ServerSetup::DataStructureCase::kBloomFilter: { // Decode Bloom Filter from the server setup. ASSIGN_OR_RETURN(auto container, BloomFilter::CreateFromProtobuf(server_setup)); - return container->Intersect( - absl::MakeConstSpan(&decrypted[0], decrypted.size())); + return container->Intersect(absl::MakeConstSpan(decrypted)); } default: { return absl::InvalidArgumentError("Impossible"); @@ -154,6 +211,11 @@ StatusOr> PsiClient::ProcessResponse( } } +/** + * @brief Get the client's private key + * + * @return The private key as a null-terminated binary string + */ std::string PsiClient::GetPrivateKeyBytes() const { std::string key = ec_cipher_->GetPrivateKeyBytes(); key.insert(key.begin(), 32 - key.length(), '\0'); diff --git a/private_set_intersection/cpp/psi_client.h b/private_set_intersection/cpp/psi_client.h index 8689a5a4..845dcbbd 100644 --- a/private_set_intersection/cpp/psi_client.h +++ b/private_set_intersection/cpp/psi_client.h @@ -26,24 +26,25 @@ namespace private_set_intersection { using absl::StatusOr; -// Client side of a Private Set Intersection protocol. In -// PSI, two parties (client and server) each hold a dataset, and at -// the end of the protocol the client learns the size of the intersection of -// both datasets, while no party learns anything beyond that (cardinality mode). +// Client side of a Private Set Intersection protocol. In PSI, two parties +// (client and server) each hold a dataset, and at the end of the protocol the +// client learns the size of the intersection of both datasets, while no party +// learns anything beyond that (cardinality mode). // -// This variant of PSI introduces a small false-positive rate (i.e., -// the reported cardinality will be slightly larger than the actual cardinality. -// The false positive rate can be tuned by the server. +// This container selected in this PSI library can introduce a small +// false-positive rate (i.e., the reported cardinality will be slightly larger +// than the actual cardinality. This false-positive rate can be tuned by the +// server. // // The protocol works as follows. // -// // 1. Setup phase // // The server encrypts all its elements x under a commutative encryption scheme, -// computing H(x)^s where s is its secret key. The encrypted elements are then -// inserted in a Bloom filter, which is sent to the client in the form of a -// serialized protobuf. The protobuf has the following form: +// computing `H(x)^s` where `s` is its secret key. The encrypted elements are +// then inserted in a Bloom filter, which is sent to the client in the form of a +// serialized protobuf. The example `BloomFilter` container protobuf has the +// following form: // // { // "num_hash_functions": , @@ -70,30 +71,30 @@ using absl::StatusOr; // // 3. Server response // -// For each encrypted element H(x)^c received from the client, the server -// encrypts it again under the commutative encryption scheme with its secret -// key s, computing (H(x)^c)^s = H(x)^(cs). The result is sent back to the +// For each encrypted element `H(x)^c` received from the client, the server +// encrypts it again under the commutative encryption scheme with its secret key +// `s`, computing `(H(x)^c)^s = H(x)^(cs)`. The result is sent back to the // client as a serialized protobuf holding the following form: // // { // "encrypted_elements": [ H(x_1)^(cs), H(x_2)^(cs), ... ] // } // -// If reveal_intersection is false, the array is sorted to hide the order of +// If `reveal_intersection` is false, the array is sorted to hide the order of // entries from the client. // // 4. Client computes intersection // // The client decrypts each element received from the server's response using -// its secret key c, computing (H(x)^(cs))^(1/c) = H(x)^s. It then checks if +// its secret key `c`, computing `(H(x)^(cs))^(1/c) = H(x)^s`. It then checks if // each element is present in the Bloom filter, and reports the number of // matches as the intersection size. class PsiClient { public: PsiClient() = delete; - // Creates and returns a new client instance with a fresh private key. - // If `reveal_intersection` is true, the client learns the elements in the + // Creates and returns a new client instance with a fresh private key. If + // `reveal_intersection` is true, the client learns the elements in the // intersection of the two datasets. Otherwise, only the intersection size is // learned. // @@ -101,22 +102,23 @@ class PsiClient { static StatusOr> CreateWithNewKey( bool reveal_intersection); - // Creates and returns a new client instance with the provided private key. - // If `reveal_intersection` is true, the client learns the elements in the + // Creates and returns a new client instance with the provided private key. If + // `reveal_intersection` is true, the client learns the elements in the // intersection of the two datasets. Otherwise, only the intersection size is // learned. // - // WARNING: This function should be used with caution, since reusing the - // client key for multiple requests can reveal information about the input - // sets. If in doubt, use `CreateWithNewKey`. + // WARNING: This function is provided for use in deterministic testing and + // should be used with caution, since reusing the client key for multiple + // requests can reveal information about the input sets. If in doubt, use + // `CreateWithNewKey`. // // Returns INTERNAL if any OpenSSL crypto operations fail. static StatusOr> CreateFromKey( const std::string& key_bytes, bool reveal_intersection); - // Creates a request protobuf to be serialized and sent to the server. - // For each input element x, computes H(x)^c, where c is the secret - // key of ec_cipher_. + // Creates a request protobuf to be serialized and sent to the server. For + // each input element x, computes H(x)^c, where c is the secret key of + // ec_cipher_. // // Returns INTERNAL if encryption fails. StatusOr CreateRequest( @@ -148,8 +150,8 @@ class PsiClient { const psi_proto::ServerSetup& server_setup, const psi_proto::Response& server_response) const; - // Returns this instance's private key. This key should only be used to - // create other client instances. DO NOT SEND THIS KEY TO ANY OTHER PARTY! + // Returns this instance's private key. This key should only be used to create + // other client instances. DO NOT SEND THIS KEY TO ANY OTHER PARTY! std::string GetPrivateKeyBytes() const; private: diff --git a/private_set_intersection/cpp/psi_client_test.cpp b/private_set_intersection/cpp/psi_client_test.cpp index 7bb22170..60ed4877 100644 --- a/private_set_intersection/cpp/psi_client_test.cpp +++ b/private_set_intersection/cpp/psi_client_test.cpp @@ -16,6 +16,8 @@ #include "private_set_intersection/cpp/psi_client.h" +#include + #include "absl/container/flat_hash_set.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" @@ -222,7 +224,8 @@ TEST_F(PsiClientTest, TestCorrectnessIntersectionSize) { // Test if size is approximately as expected (up to 10%). EXPECT_GE(intersection_size, num_client_elements / 2); - EXPECT_LT(intersection_size, (num_client_elements / 2) * 1.1); + EXPECT_LT((double)intersection_size, + ceil(((double)num_client_elements / 2.0) * 1.1)); } TEST_F(PsiClientTest, FailIfRevealIntersectionDoesntMatch) { diff --git a/private_set_intersection/cpp/psi_server.cpp b/private_set_intersection/cpp/psi_server.cpp index 799157ae..faa5a896 100644 --- a/private_set_intersection/cpp/psi_server.cpp +++ b/private_set_intersection/cpp/psi_server.cpp @@ -29,12 +29,29 @@ namespace private_set_intersection { +/** + * @brief Construct a new Psi Server:: Psi Server object + * + * @param ec_cipher A unique pointer to a commutative , which is used for + * encryption and decryption in the Private Set Intersection (PSI) protocol. + * @param reveal_intersection A boolean value indicating whether the + * intersection of the two sets should be revealed after the PSI protocol is + * completed. + */ PsiServer::PsiServer( std::unique_ptr<::private_join_and_compute::ECCommutativeCipher> ec_cipher, bool reveal_intersection) : ec_cipher_(std::move(ec_cipher)), reveal_intersection(reveal_intersection) {} +/** + * @brief Creates a new instance of the PsiServer class with a new key pair for + * encryption and decryption using ECCommutativeCipher. + * + * @param reveal_intersection A boolean indicating whether the client wants to + * learn the intersection values or only its size (cardinality). + * @return StatusOr> + */ StatusOr> PsiServer::CreateWithNewKey( bool reveal_intersection) { // Create an EC cipher with curve P-256. This gives 128 bits of security. @@ -47,6 +64,15 @@ StatusOr> PsiServer::CreateWithNewKey( new PsiServer(std::move(ec_cipher), reveal_intersection)); } +/** + * @brief Creates a new PsiServer instance using an EC cipher created from the + * provided key. + * + * @param key_bytes The bytes representing the key for the EC cipher. + * @param reveal_intersection A boolean flag indicating whether the intersection + * should be revealed. + * @return StatusOr> + */ StatusOr> PsiServer::CreateFromKey( const std::string& key_bytes, bool reveal_intersection) { // Create an EC cipher with curve P-256. This gives 128 bits of security. @@ -59,6 +85,18 @@ StatusOr> PsiServer::CreateFromKey( new PsiServer(std::move(ec_cipher), reveal_intersection)); } +/** + * @brief Create a server setup message containing the chosen data structure, + * based on the provided parameters. + * + * @param fpr A double representing the false positive rate of the chosen data + * structure (This is ignored for the `Raw` datastructure) + * @param num_client_inputs The number of client inputs to the PSI protocol + * @param inputs The server inputs to the PSI protocol + * @param ds A datastructure enum indicating the type of data structure to use + * for the PSI protocol + * @return StatusOr + */ StatusOr PsiServer::CreateSetupMessage( double fpr, int64_t num_client_inputs, absl::Span inputs, DataStructure ds) const { @@ -77,21 +115,18 @@ StatusOr PsiServer::CreateSetupMessage( switch (ds) { case DataStructure::Gcs: { // Create a GCS and insert elements into it. - ASSIGN_OR_RETURN( - auto container, - GCS::Create(corrected_fpr, num_client_inputs, - absl::MakeConstSpan(&encrypted[0], encrypted.size()))); + ASSIGN_OR_RETURN(auto container, + GCS::Create(corrected_fpr, num_client_inputs, + absl::MakeConstSpan(encrypted))); // Return the GCS as a Protobuf return container->ToProtobuf(); } case DataStructure::BloomFilter: { // Create a Bloom Filter and insert elements into it. - ASSIGN_OR_RETURN( - auto container, - BloomFilter::Create( - corrected_fpr, num_client_inputs, - absl::MakeConstSpan(&encrypted[0], encrypted.size()))); + ASSIGN_OR_RETURN(auto container, + BloomFilter::Create(corrected_fpr, num_client_inputs, + absl::MakeConstSpan(encrypted))); // Return the Bloom Filter as a Protobuf return container->ToProtobuf(); @@ -109,6 +144,13 @@ StatusOr PsiServer::CreateSetupMessage( } } +/** + * @brief Processes a client's request by re-encrypting the request's elements + * and creating a response + * + * @param client_request The request containing the elements to re-encrypt + * @return StatusOr + */ StatusOr PsiServer::ProcessRequest( const psi_proto::Request& client_request) const { if (!client_request.IsInitialized()) { @@ -147,6 +189,11 @@ StatusOr PsiServer::ProcessRequest( return response; } +/** + * @brief Get the server's private key + * + * @return The private key as a null-terminated binary string + */ std::string PsiServer::GetPrivateKeyBytes() const { std::string key = ec_cipher_->GetPrivateKeyBytes(); key.insert(key.begin(), 32 - key.length(), '\0'); diff --git a/private_set_intersection/cpp/psi_server.h b/private_set_intersection/cpp/psi_server.h index bc06ae6e..f19e4101 100644 --- a/private_set_intersection/cpp/psi_server.h +++ b/private_set_intersection/cpp/psi_server.h @@ -27,32 +27,36 @@ namespace private_set_intersection { using absl::StatusOr; -// The server side of a Private Set Intersection protocol. -// See the documentation in PsiClient for a full description of the -// protocol. +// The server side of a Private Set Intersection protocol. See the documentation +// in PsiClient for a full description of the protocol. class PsiServer { public: PsiServer() = delete; - // Creates and returns a new server instance with a fresh private key. - // If `reveal_intersection` indicates whether the client should learn the + // Creates and returns a new server instance with a fresh private key. If + // `reveal_intersection` indicates whether the client should learn the // intersection or only its size. // // Returns INTERNAL if any OpenSSL crypto operations fail. static StatusOr> CreateWithNewKey( bool reveal_intersection); - // Creates and returns a new server instance with the provided private key. - // If `reveal_intersection` indicates whether the client should learn the + // Creates and returns a new server instance with the provided private key. If + // `reveal_intersection` indicates whether the client should learn the // intersection or only its size. // + // WARNING: This function is provided for use in deterministic testing and + // should be used with caution, since reusing the client key for multiple + // requests can reveal information about the input sets. If in doubt, use + // `CreateWithNewKey`. + // // Returns INTERNAL if any OpenSSL crypto operations fail. static StatusOr> CreateFromKey( const std::string& key_bytes, bool reveal_intersection); // Creates a setup message from the server's dataset to be sent to the client. - // The setup message is a set containing H(x)^s for each element x in - // `inputs`, where s is the server's secret key. The setup is sent to the + // The setup message is a set containing `H(x)^s` for each element `x` in + // `inputs`, where `s` is the server's secret key. The setup is sent to the // client as a serialized protobuf with the following form: // // { @@ -82,10 +86,11 @@ class PsiServer { DataStructure ds = DataStructure::Gcs) const; // Processes a client query and returns the corresponding server response to - // be sent to the client. For each encrytped element H(x)^c in the decoded - // `client_request`, computes (H(x)^c)^s = H(X)^(cs) and returns these as an + // be sent to the client. For each encrytped element `H(x)^c` in the decoded + // `client_request`, computes `(H(x)^c)^s = H(X)^(cs)` and returns these as an // array inside a protobuf. - // If reveal_intersection == false, the resulting array is sorted, which + // + // If `reveal_intersection` == false, the resulting array is sorted, which // prevents the client from matching the individual response elements to the // ones in the request, ensuring that they can only learn the intersection // size but not individual elements in the intersection. @@ -95,8 +100,8 @@ class PsiServer { StatusOr ProcessRequest( const psi_proto::Request& client_request) const; - // Returns this instance's private key. This key should only be used to - // create other server instances. DO NOT SEND THIS KEY TO ANY OTHER PARTY! + // Returns this instance's private key. This key should only be used to create + // other server instances. DO NOT SEND THIS KEY TO ANY OTHER PARTY! std::string GetPrivateKeyBytes() const; private: diff --git a/private_set_intersection/cpp/psi_server_test.cpp b/private_set_intersection/cpp/psi_server_test.cpp index 6b40fd37..dbd81295 100644 --- a/private_set_intersection/cpp/psi_server_test.cpp +++ b/private_set_intersection/cpp/psi_server_test.cpp @@ -13,9 +13,10 @@ // See the License for the specific language governing permissions and // limitations under the License. // - #include "private_set_intersection/cpp/psi_server.h" +#include + #include "absl/container/flat_hash_set.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" @@ -141,7 +142,8 @@ TEST_F(PsiServerTest, TestCorrectnessIntersectionSize) { // Test if size is approximately as expected (up to 10%). EXPECT_GE(intersection_size, num_client_elements / 2); - EXPECT_LT(intersection_size, (num_client_elements / 2) * 1.1); + EXPECT_LT((double)intersection_size, + ceil(((double)num_client_elements / 2.0) * 1.1)); } TEST_F(PsiServerTest, TestArrayIsSortedWhenNotRevealingIntersection) { diff --git a/private_set_intersection/go/integration_test.go b/private_set_intersection/go/integration_test.go index beb9056d..d3c8493d 100644 --- a/private_set_intersection/go/integration_test.go +++ b/private_set_intersection/go/integration_test.go @@ -8,6 +8,7 @@ import ( psi_server "github.com/openmined/psi/server" psi_version "github.com/openmined/psi/version" "google.golang.org/protobuf/proto" + "math" "regexp" "testing" ) @@ -124,7 +125,7 @@ func TestIntegrationIntersection(t *testing.T) { } // Create the setup - serverSetup, err := server.CreateSetupMessage(fpr, int64(len(clientInputs)), serverInputs, 0) + serverSetup, err := server.CreateSetupMessage(fpr, int64(len(clientInputs)), serverInputs, tc.ds) if err != nil { t.Errorf("Failed to create serverSetup: %v", err) } @@ -192,8 +193,8 @@ func TestIntegrationIntersection(t *testing.T) { t.Errorf("Invalid intersection. expected lower bound %v. got %v", (numClientInputs / 2), intersectionCnt) } - if float64(intersectionCnt) > float64(numClientInputs/2)*float64(1.1) { - t.Errorf("Invalid intersection. expected upper bound %v. got %v", float64(numClientInputs/2)*float64(1.1), intersectionCnt) + if float64(intersectionCnt) > math.Ceil(float64(numClientInputs/2)*float64(1.1)) { + t.Errorf("Invalid intersection. expected upper bound %v. got %v", math.Ceil(float64(numClientInputs/2)*float64(1.1)), intersectionCnt) } } diff --git a/private_set_intersection/rust/tests/integration_test.rs b/private_set_intersection/rust/tests/integration_test.rs index bf852f5f..6f8e1120 100644 --- a/private_set_intersection/rust/tests/integration_test.rs +++ b/private_set_intersection/rust/tests/integration_test.rs @@ -71,7 +71,9 @@ fn integration_test() { let intersection_size = client.get_intersection_size(&setup, &response).unwrap(); assert!(intersection_size >= (NUM_CLIENT_ELEMENTS / 2)); - assert!((intersection_size as f64) < ((NUM_CLIENT_ELEMENTS as f64) / 2.0 * 1.1)); + assert!( + (intersection_size as f64) < ((NUM_CLIENT_ELEMENTS as f64) / 2.0 * 1.1).ceil() + ); } } } diff --git a/tools/package.bzl b/tools/package.bzl index 99874ce5..663d1cff 100644 --- a/tools/package.bzl +++ b/tools/package.bzl @@ -1,2 +1,2 @@ """ Version of the current release """ -VERSION_LABEL = "2.0.0" +VERSION_LABEL = "2.0.1"