Skip to content

Commit

Permalink
Merge pull request vespa-engine#9890 from vespa-engine/toregge/cap-in…
Browse files Browse the repository at this point in the history
…terleaved-features-in-memory-index

Cap interleaved features in memory index.
  • Loading branch information
geirst authored Jun 26, 2019
2 parents f36d37d + 14e53ea commit c0624b3
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 28 deletions.
11 changes: 11 additions & 0 deletions searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,17 @@ TEST_F(FieldIndexInterleavedFeaturesTest, no_features_are_unpacked)
expect_features_unpacked("{1000000:}", 0, 0);
}

TEST_F(FieldIndexInterleavedFeaturesTest, interleaved_features_are_capped)
{
FeatureStore::DecodeContextCooked decoder(nullptr);
WrapInserter(idx).word("b").add(11, getFeatures(66001, 66000)).flush();
auto itr = this->idx.find("b");
EXPECT_EQ(11, itr.getKey());
auto &entry = itr.getData();
EXPECT_EQ(std::numeric_limits<uint16_t>::max(), entry.get_num_occs());
EXPECT_EQ(std::numeric_limits<uint16_t>::max(), entry.get_field_length());
}

Schema
make_multi_field_schema()
{
Expand Down
7 changes: 5 additions & 2 deletions searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ namespace {
vespalib::string PosOccIdCooked = "PosOcc.3.Cooked";
vespalib::string interleaved_features("interleaved_features");

uint16_t cap_u16(uint32_t val) { return std::min(val, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); }

}

using vespalib::getLastErrorString;
Expand Down Expand Up @@ -346,8 +348,9 @@ FieldReaderStripInfo::read()
if (_hasElements && _field_length_scanner) {
field_length = _field_length_scanner->get_field_length(features.doc_id());
}
features.set_field_length(field_length);
features.set_num_occs(num_occs);
// cap interleaved features to 16 bits each, to match memory index
features.set_field_length(cap_u16(field_length));
features.set_num_occs(cap_u16(num_occs));
}
}

Expand Down
34 changes: 10 additions & 24 deletions searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,6 @@ using vespalib::GenerationHandler;

namespace search::memoryindex {

namespace {

void set_interleaved_features(DocIdAndFeatures &features)
{
// Set cheap features based on normal features.
// TODO: Update when proper cheap features are present in memory index.
assert(!features.elements().empty());
const auto &element = features.elements().front();
features.set_field_length(element.getElementLen());
features.set_num_occs(element.getNumOccs());
}

}

using datastore::EntryRef;

template <bool interleaved_features>
Expand Down Expand Up @@ -194,25 +180,25 @@ FieldIndex<interleaved_features>::dump(search::index::IndexBuilder & indexBuilde
auto pitr = tree->begin(_postingListStore.getAllocator());
assert(pitr.valid());
for (; pitr.valid(); ++pitr) {
uint32_t docId = pitr.getKey();
EntryRef featureRef(pitr.getData().get_features());
_featureStore.setupForReadFeatures(featureRef, decoder);
features.set_doc_id(pitr.getKey());
const PostingListEntryType &entry(pitr.getData());
features.set_num_occs(entry.get_num_occs());
features.set_field_length(entry.get_field_length());
_featureStore.setupForReadFeatures(entry.get_features(), decoder);
decoder.readFeatures(features);
features.set_doc_id(docId);
set_interleaved_features(features);
indexBuilder.add_document(features);
}
} else {
const PostingListKeyDataType *kd =
_postingListStore.getKeyDataEntry(plist, clusterSize);
const PostingListKeyDataType *kde = kd + clusterSize;
for (; kd != kde; ++kd) {
uint32_t docId = kd->_key;
EntryRef featureRef(kd->getData().get_features());
_featureStore.setupForReadFeatures(featureRef, decoder);
features.set_doc_id(kd->_key);
const PostingListEntryType &entry(kd->getData());
features.set_num_occs(entry.get_num_occs());
features.set_field_length(entry.get_field_length());
_featureStore.setupForReadFeatures(entry.get_features(), decoder);
decoder.readFeatures(features);
features.set_doc_id(docId);
set_interleaved_features(features);
indexBuilder.add_document(features);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace {

const vespalib::string emptyWord = "";

uint16_t cap_u16(uint32_t val) { return std::min(val, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); }

}

template <bool interleaved_features>
Expand Down Expand Up @@ -119,8 +121,8 @@ OrderedFieldIndexInserter<interleaved_features>::add(uint32_t docId,
(_prevDocId == docId && !_prevAdd));
datastore::EntryRef featureRef = _fieldIndex.addFeatures(features);
_adds.push_back(PostingListKeyDataType(docId, PostingListEntryType(featureRef,
features.num_occs(),
features.field_length())));
cap_u16(features.num_occs()),
cap_u16(features.field_length()))));
_listener.insert(_dItr.getKey()._wordRef, docId);
_prevDocId = docId;
_prevAdd = true;
Expand Down

0 comments on commit c0624b3

Please sign in to comment.