Skip to content

Commit

Permalink
Adding cosine support in build_disk_index and ensuring that the dummy…
Browse files Browse the repository at this point in the history
… map file is written in the correct location
  • Loading branch information
gopal-msr committed Jul 5, 2024
1 parent b2b0942 commit 74ce806
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
12 changes: 10 additions & 2 deletions apps/build_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,21 @@ int main(int argc, char **argv)

bool use_filters = (label_file != "") ? true : false;
diskann::Metric metric;
if (dist_fn == std::string("l2"))
if (dist_fn == std::string("l2"))
{
metric = diskann::Metric::L2;
}
else if (dist_fn == std::string("mips"))
{
metric = diskann::Metric::INNER_PRODUCT;
}
else if (dist_fn == std::string("cosine"))
{
metric = diskann::Metric::COSINE;
}
else
{
std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl;
std::cout << "Error. Only l2, cosine, and mips distance functions are supported" << std::endl;
return -1;
}

Expand Down
4 changes: 3 additions & 1 deletion src/disk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,9 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
augmented_labels_file = index_prefix_path + "_augmented_labels.txt";
if (filter_threshold != 0)
{
dummy_remap_file = index_prefix_path + "_dummy_remap.txt";
//Changing this filename to "_disk.index_dummy_map.txt" from "_dummy_map.txt" to conform
//to the convention that index files all share the _disk.index prefix.
dummy_remap_file = index_prefix_path + "_disk.index_dummy_map.txt";
breakup_dense_points<T>(data_file_to_use, labels_file_to_use, filter_threshold, augmented_data_file,
augmented_labels_file,
dummy_remap_file); // RKNOTE: This has large memory footprint,
Expand Down
20 changes: 20 additions & 0 deletions src/pq_flash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
#ifndef EXEC_ENV_OLS
infile.close();
#endif
diskann::cout << "Labels file: " << labels_file << " loaded with " << num_pts_in_label_file << " points"
<< std::endl;

#ifdef EXEC_ENV_OLS
FileContent &content_labels_map = files.getContent(labels_map_file);
Expand All @@ -889,6 +891,8 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
map_reader.close();
#endif

diskann::cout << "Labels map file: " << labels_map_file << " loaded." << std::endl;

#ifdef EXEC_ENV_OLS
if (files.fileExists(labels_to_medoids))
{
Expand All @@ -902,7 +906,16 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
assert(medoid_stream.is_open());
#endif
load_label_medoid_map(labels_to_medoids, medoid_stream);
diskann::cout << "Loaded labels_to_medoids map from: " << labels_to_medoids << std::endl;
}
else
{
std::stringstream ss;
ss << "Filter support is enabled but " << labels_to_medoids << " file cannot be opened." << std::endl;
diskann::cerr << ss.str();
throw diskann::ANNException(ss.str(), -1);
}

std::string univ_label_file = std ::string(_disk_index_file) + "_universal_label.txt";

#ifdef EXEC_ENV_OLS
Expand Down Expand Up @@ -944,6 +957,13 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
#endif
diskann::cout << "Loaded dummy map" << std::endl;
}
else
{
std::stringstream ss;
ss << "Note: Filter support is enabled but " << dummy_map_file << " file cannot be opened" << std::endl;
diskann::cerr << ss.str();
}

}
else
{
Expand Down

0 comments on commit 74ce806

Please sign in to comment.