Skip to content

Commit

Permalink
fix max token size at search
Browse files Browse the repository at this point in the history
  • Loading branch information
jackdoe committed Apr 18, 2020
1 parent fa4ee0b commit a66023a
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 2 deletions.
9 changes: 9 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ $ go install github.com/jackdoe/zr/cmd/zr
$ go install github.com/jackdoe/zr/cmd/zr-stackexchange
$ go install github.com/jackdoe/zr/cmd/zr-stdin
$ go install github.com/jackdoe/zr/cmd/zr-reindex
$ go install github.com/jackdoe/zr/cmd/zr-fetch

# Download the public index
Index I build and publish, it includes man pages and RFC

$ zr-fetch

$ zr -k public/man printf
$ zr -k public/rfc tcp

# Build the index

Expand Down
7 changes: 6 additions & 1 deletion cmd/zr-debug/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ func main() {

sharded := data.NewStore(*root, *kind)
defer sharded.Close()
if *id != "" {
fmt.Printf("EXPCTED SHARD: %d\n\n", sharded.ShardID(*id))
}

for _, store := range sharded.Shards {
for shard, store := range sharded.Shards {
var doc data.Document
if *rid != 0 {
if err := store.DB.Model(data.Document{}).Where("row_id=?", *rid).First(&doc).Error; err != nil {
Expand All @@ -50,11 +53,13 @@ func main() {

doc.Body = util.Decompress(doc.Body)

fmt.Printf(" SHARD: %d\n", shard)
fmt.Printf(" TITLE: %s\n", doc.Title)
fmt.Printf(" TAGS: %s\n", doc.Tags)
fmt.Printf(" DOC ID: %d\n", doc.RowID)
fmt.Printf(" OBJECT ID: %s\n", doc.ObjectID)
fmt.Printf(" INDEXED: %d\n", doc.Indexed)
fmt.Printf(" WEIGHT: %d\n", store.ReadWeight(int32(doc.RowID)))
fmt.Printf("%s\n\n", strings.Repeat("*", 80))
os.Stdout.Write(doc.Body)

Expand Down
3 changes: 3 additions & 0 deletions pkg/data/shard.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ func (s *Shard) MakeQuery(field string, query string) iq.Query {
and := []iq.Query{}
for _, w := range ws {
if len(w) > 0 {
if len(w) > MAX_TOKEN_SIZE {
w = w[:MAX_TOKEN_SIZE]
}
term := fmt.Sprintf("%s_%d", w, i)
q := s.Dir.NewTermQuery(field, term)
and = append(and, q)
Expand Down
6 changes: 5 additions & 1 deletion pkg/data/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ func (s *Store) BulkUpsert(batch []*Document) {
}

func (s *Store) ShardFor(objectID string) *Shard {
return s.Shards[int(s.ShardID(objectID))]
}

func (s *Store) ShardID(objectID string) uint32 {
h := metro.Hash64Str(objectID, 0)
shard := uint32(h) % N_SHARDS
return s.Shards[int(shard)]
return uint32(shard)
}

func (s *Store) Close() {
Expand Down

0 comments on commit a66023a

Please sign in to comment.