From edbb704c480ea01a9d71aa4f1a2ffbec6db7edc3 Mon Sep 17 00:00:00 2001 From: Tomas Machalek Date: Tue, 17 Dec 2024 12:05:58 +0100 Subject: [PATCH] Misc fixes, mainly #134, also got rid of sonic json enc/dec --- cmd/service/fcs.go | 5 ++ cnf/conf.go | 4 +- corpus/conc/conc.go | 151 ---------------------------------------- go.mod | 37 +++++----- go.sum | 80 ++++++++++----------- handler/v12/searchrt.go | 48 +++++-------- handler/v20/searchrt.go | 69 ++++++++---------- mango/mango.cc | 36 ++++++++-- mango/mango.go | 29 ++++++-- mango/mango.h | 14 ++-- rdb/client.go | 63 +++++++++-------- rdb/results.go | 59 ---------------- result/error.go | 34 --------- result/general.go | 5 +- result/result.go | 45 ++---------- result/rscrr.go | 8 +-- result/rscrr_test.go | 106 ++++++++++++++-------------- worker/worker.go | 64 +++++++---------- 18 files changed, 298 insertions(+), 559 deletions(-) delete mode 100644 corpus/conc/conc.go delete mode 100644 rdb/results.go delete mode 100644 result/error.go diff --git a/cmd/service/fcs.go b/cmd/service/fcs.go index 2ede59e..119e570 100644 --- a/cmd/service/fcs.go +++ b/cmd/service/fcs.go @@ -24,6 +24,7 @@ package main import ( "context" + "encoding/gob" "flag" "fmt" "net/http" @@ -36,6 +37,7 @@ import ( "github.com/czcorpus/cnc-gokit/logging" "github.com/czcorpus/cnc-gokit/uniresp" + "github.com/czcorpus/mquery-common/concordance" "github.com/gin-gonic/gin" "github.com/rs/zerolog/log" @@ -65,6 +67,9 @@ func getEnv(name string) string { } func init() { + gob.Register(&concordance.Token{}) + gob.Register(&concordance.Struct{}) + gob.Register(&concordance.CloseStruct{}) } func runApiServer( diff --git a/cnf/conf.go b/cnf/conf.go index bbf27c5..302f057 100644 --- a/cnf/conf.go +++ b/cnf/conf.go @@ -19,12 +19,12 @@ package cnf import ( + "encoding/json" "errors" "os" "path/filepath" "time" - "github.com/bytedance/sonic" "github.com/czcorpus/mquery-sru/corpus" "github.com/czcorpus/mquery-sru/rdb" @@ -172,7 +172,7 @@ func LoadConfig(path string) *Conf { } var conf Conf conf.srcPath = path - err = sonic.Unmarshal(rawData, &conf) + err = json.Unmarshal(rawData, &conf) if err != nil { log.Fatal().Err(err).Msg("Cannot load config") } diff --git a/corpus/conc/conc.go b/corpus/conc/conc.go deleted file mode 100644 index 9091760..0000000 --- a/corpus/conc/conc.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2023 Tomas Machalek -// Copyright 2023 Institute of the Czech National Corpus, -// Faculty of Arts, Charles University -// This file is part of MQUERY. -// -// MQUERY is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// MQUERY is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with MQUERY. If not, see . - -package conc - -import ( - "html" - "regexp" - "strings" - "unicode/utf8" - - "github.com/czcorpus/mquery-sru/mango" - - "github.com/rs/zerolog/log" -) - -const ( - invalidParent = 1000000 -) - -var ( - splitPatt = regexp.MustCompile(`\s+`) -) - -type TokenSlice []*Token - -type Token struct { - Word string `json:"word"` - Strong bool `json:"strong"` - Attrs map[string]string `json:"attrs"` -} - -type ConcordanceLine struct { - Text TokenSlice `json:"text"` - Ref string `json:"ref"` -} - -type ConcExamples struct { - Lines []ConcordanceLine `json:"lines"` -} - -type LineParser struct { - attrs []string -} - -func (lp *LineParser) parseTokenQuadruple(s []string) *Token { - mAttrs := make(map[string]string) - rawAttrs := strings.Split(s[2], "/")[1:] - var token Token - if len(rawAttrs) != len(lp.attrs)-1 { - log.Warn(). - Str("value", s[2]). - Int("expectedNumAttrs", len(lp.attrs)-1). - Msg("cannot parse token quadruple") - token.Word = s[0] - for _, attr := range lp.attrs[1:] { - mAttrs[attr] = "N/A" - } - - } else { - for i, attr := range lp.attrs[1:] { - mAttrs[attr] = rawAttrs[i] - } - token.Word = s[0] - token.Strong = len(s[1]) > 2 - token.Attrs = mAttrs - } - return &token -} - -func (lp *LineParser) normalizeTokens(tokens []string) []string { - ans := make([]string, 0, len(tokens)) - var parTok strings.Builder - for _, tok := range tokens { - tokLen := utf8.RuneCountInString(tok) - if tok == "" { - continue - - } else if tokLen == 1 { - ans = append(ans, tok) - - } else if tok[0] == '{' { - if tok[tokLen-1] != '}' { - parTok.WriteString(tok) - - } else { - ans = append(ans, tok) - } - - } else if tok[tokLen-1] == '}' { - parTok.WriteString(tok) - ans = append(ans, parTok.String()) - parTok.Reset() - - } else { - ans = append(ans, tok) - } - } - return ans -} - -// parseRawLine -func (lp *LineParser) parseRawLine(line string) ConcordanceLine { - rtokens := splitPatt.Split(html.EscapeString(line), -1) - items := lp.normalizeTokens(rtokens[1:]) - if len(items)%4 != 0 { - log.Error(). - Str("origLine", line). - Msg("unparseable Manatee KWIC line") - return ConcordanceLine{ - Text: []*Token{{Word: "---- ERROR (unparseable) ----"}}, - Ref: rtokens[0], - } - } - tokens := make(TokenSlice, 0, len(items)/4) - for i := 0; i < len(items); i += 4 { - tokens = append(tokens, lp.parseTokenQuadruple(items[i:i+4])) - } - return ConcordanceLine{Text: tokens, Ref: rtokens[0]} -} - -// Parse converts Manatee-encoded concordance lines into MQuery format. -// It also escapes strings to make them usable in XML documents. -func (lp *LineParser) Parse(data mango.GoConcExamples) []ConcordanceLine { - pLines := make([]ConcordanceLine, len(data.Lines)) - for i, line := range data.Lines { - pLines[i] = lp.parseRawLine(line) - } - return pLines -} - -func NewLineParser(attrs []string) *LineParser { - return &LineParser{ - attrs: attrs, - } -} diff --git a/go.mod b/go.mod index 41a5280..4b762b7 100644 --- a/go.mod +++ b/go.mod @@ -5,54 +5,55 @@ go 1.21 toolchain go1.23.0 require ( - github.com/bytedance/sonic v1.12.5 github.com/czcorpus/cnc-gokit v0.11.0 github.com/czcorpus/manabuild v0.1.0 - github.com/gin-gonic/gin v1.9.1 + github.com/czcorpus/mquery-common v0.4.2 + github.com/gin-gonic/gin v1.10.0 github.com/google/uuid v1.3.0 github.com/mna/pigeon v1.2.1 github.com/redis/go-redis/v9 v9.0.5 github.com/rs/zerolog v1.31.0 - github.com/stretchr/testify v1.8.4 + github.com/stretchr/testify v1.9.0 ) require ( github.com/BurntSushi/toml v1.4.0 // indirect github.com/briandowns/spinner v1.23.0 // indirect - github.com/bytedance/sonic/loader v0.2.0 // indirect + github.com/bytedance/sonic v1.11.6 // indirect + github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/fatih/color v1.15.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.14.1 // indirect + github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect - github.com/leodido/go-urn v1.2.4 // indirect + github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/leodido/go-urn v1.4.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/natefinch/lumberjack v2.0.0+incompatible // indirect - github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.11 // indirect - golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.16.0 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + golang.org/x/arch v0.8.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/mod v0.13.0 // indirect - golang.org/x/net v0.19.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.15.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/tools v0.14.0 // indirect - google.golang.org/protobuf v1.30.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 6dbd638..37c7f9d 100644 --- a/go.sum +++ b/go.sum @@ -6,11 +6,10 @@ github.com/bsm/ginkgo/v2 v2.7.0 h1:ItPMPH90RbmZJt5GtkcNvIRuGEdwlBItdNVoyzaNQao= github.com/bsm/ginkgo/v2 v2.7.0/go.mod h1:AiKlXPm7ItEHNc/2+OkrNG4E0ITzojb9/xWzvQ9XZ9w= github.com/bsm/gomega v1.26.0 h1:LhQm+AFcgV2M0WyKroMASzAzCAJVpAxQXv4SaI9a69Y= github.com/bsm/gomega v1.26.0/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= -github.com/bytedance/sonic v1.12.5 h1:hoZxY8uW+mT+OpkcUWw4k0fDINtOcVavEsGfzwzFU/w= -github.com/bytedance/sonic v1.12.5/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= +github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= +github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= +github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= -github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM= -github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= @@ -22,6 +21,8 @@ github.com/czcorpus/cnc-gokit v0.11.0 h1:0DSWVAMu6TyBLxeBfTRB/yezoFKQPy1zW8yqUJm github.com/czcorpus/cnc-gokit v0.11.0/go.mod h1:BZSRrYUFIHXVIiuqnSoZbfXfL2X/gHWG3w35aIVW36U= github.com/czcorpus/manabuild v0.1.0 h1:60sgRj4oM+XqbCKtn/HL+6URXzsfQQCy9TyBWY2iaZE= github.com/czcorpus/manabuild v0.1.0/go.mod h1:dj2iAsZObs4yJhF6KkQs5oH2AAyZlrmaNwMGV44hLbk= +github.com/czcorpus/mquery-common v0.4.2 h1:hHeR9ih4XR46erRr7XrcaUCA0BTFdJA8wqj5lc/CEro= +github.com/czcorpus/mquery-common v0.4.2/go.mod h1:xAWGWB1e4P43bj0obOWq+Ie5NvutM+ZeSTUB7rEb6po= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -29,25 +30,23 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= -github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= -github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= +github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= -github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= +github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= +github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.14.1 h1:9c50NUPC30zyuKprjL3vNZ0m5oG+jU0zvx4AqHGnv4k= -github.com/go-playground/validator/v10 v10.14.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= +github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= +github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -56,16 +55,17 @@ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= -github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= +github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= -github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= -github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mna/pigeon v1.2.1 h1:m5FxEbGdQxLaiHF+QurbWUAjmRqd5cstjIPN89svYgg= github.com/mna/pigeon v1.2.1/go.mod h1:BUZAoRldTdU7Ac3WYkXy8hzIHfCgj1doJxGjlB+AbLI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -75,8 +75,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/natefinch/lumberjack v2.0.0+incompatible h1:4QJd3OLAMgj7ph+yZTuX13Ld4UpgHp07nNdFX7mqFfM= github.com/natefinch/lumberjack v2.0.0+incompatible/go.mod h1:Wi9p2TTF5DG5oU+6YfsmYQpsTIOm0B1VNzQg9Mw6nPk= -github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= -github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -88,43 +88,42 @@ github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWR github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= -github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= -golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY= -golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= +golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= -golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= -golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= -google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= @@ -135,3 +134,4 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/handler/v12/searchrt.go b/handler/v12/searchrt.go index 5e480b5..32fef35 100644 --- a/handler/v12/searchrt.go +++ b/handler/v12/searchrt.go @@ -25,12 +25,11 @@ import ( "strconv" "strings" - "github.com/bytedance/sonic" "github.com/czcorpus/cnc-gokit/collections" "github.com/czcorpus/cnc-gokit/logging" + "github.com/czcorpus/mquery-common/concordance" "github.com/czcorpus/mquery-sru/backlink" "github.com/czcorpus/mquery-sru/corpus" - "github.com/czcorpus/mquery-sru/corpus/conc" "github.com/czcorpus/mquery-sru/general" "github.com/czcorpus/mquery-sru/handler/v12/schema" "github.com/czcorpus/mquery-sru/mango" @@ -197,7 +196,7 @@ func (a *FCSSubHandlerV12) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ ranges := query.CalculatePartialRanges(corpora, startRecord-1, maximumRecords) // make searches - waits := make([]<-chan *rdb.WorkerResult, len(ranges)) + waits := make([]<-chan result.ConcResult, len(ranges)) for i, rng := range ranges { ast, fcsErr := a.translateQuery(rng.Rsc, fcsQuery) @@ -221,24 +220,17 @@ func (a *FCSSubHandlerV12) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ general.DCGeneralSystemError, 0, err.Error()) return ans, general.ConformandGeneralServerError } - args, err := sonic.Marshal(rdb.ConcExampleArgs{ - CorpusPath: a.corporaConf.GetRegistryPath(rng.Rsc), - Query: query, - Attrs: retrieveAttrs, - StartLine: rng.From, - MaxItems: maximumRecords, - MaxContext: a.corporaConf.MaximumContext, - ViewContextStruct: rscConf.ViewContextStruct, - }) - if err != nil { - ans.Diagnostics = schema.NewXMLDiagnostics() - ans.Diagnostics.AddDfltMsgDiagnostic( - general.DCGeneralSystemError, 0, err.Error()) - return ans, http.StatusInternalServerError - } wait, err := a.radapter.PublishQuery(rdb.Query{ Func: "concExample", - Args: args, + Args: rdb.ConcQueryArgs{ + CorpusPath: a.corporaConf.GetRegistryPath(rng.Rsc), + Query: query, + Attrs: retrieveAttrs, + StartLine: rng.From, + MaxItems: maximumRecords, + MaxContext: a.corporaConf.MaximumContext, + ViewContextStruct: rscConf.ViewContextStruct, + }, }) if err != nil { ans.Diagnostics = schema.NewXMLDiagnostics() @@ -253,16 +245,9 @@ func (a *FCSSubHandlerV12) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ usedQueries := make(map[string]string) // maps resource ID to Manatee CQL query var totalConcSize int for i, wait := range waits { - rawResult := <-wait - result, err := rdb.DeserializeConcExampleResult(rawResult) - if err != nil { - ans.Diagnostics = schema.NewXMLDiagnostics() - ans.Diagnostics.AddDfltMsgDiagnostic( - general.DCGeneralSystemError, 0, err.Error()) - return ans, http.StatusInternalServerError - } - if err := result.Err(); err != nil { - if err.Error() == mango.ErrRowsRangeOutOfConc.Error() { + result := <-wait + if result.Error != nil { + if result.Error == mango.ErrRowsRangeOutOfConc { fromResource.RscSetErrorAt(i, err) } else { @@ -325,8 +310,9 @@ func (a *FCSSubHandlerV12) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ XMLNSHits: "http://clarin.eu/fcs/dataview/hits", Data: strings.Join( collections.SliceMap( - item.Text, - func(token *conc.Token, i int) string { + item.Text.Tokens(), + func(token *concordance.Token, i int) string { + fmt.Println("TOK: ", token) if token.Strong { return "" + token.Word + "" } diff --git a/handler/v20/searchrt.go b/handler/v20/searchrt.go index e8a428a..fe65c6f 100644 --- a/handler/v20/searchrt.go +++ b/handler/v20/searchrt.go @@ -25,12 +25,11 @@ import ( "strconv" "strings" - "github.com/bytedance/sonic" "github.com/czcorpus/cnc-gokit/collections" "github.com/czcorpus/cnc-gokit/logging" + "github.com/czcorpus/mquery-common/concordance" "github.com/czcorpus/mquery-sru/backlink" "github.com/czcorpus/mquery-sru/corpus" - "github.com/czcorpus/mquery-sru/corpus/conc" "github.com/czcorpus/mquery-sru/general" "github.com/czcorpus/mquery-sru/handler/v20/schema" "github.com/czcorpus/mquery-sru/mango" @@ -100,7 +99,11 @@ func (a *FCSSubHandlerV20) translateQuery( return ast, fcsErr } -func (a *FCSSubHandlerV20) getAttrByLayers(commonPosAttrs []corpus.PosAttr, layer corpus.LayerType, token conc.Token) string { +func (a *FCSSubHandlerV20) getAttrByLayers( + commonPosAttrs []corpus.PosAttr, + layer corpus.LayerType, + token concordance.Token, +) string { for _, posAttr := range commonPosAttrs { if posAttr.Layer == layer { if v, ok := token.Attrs[posAttr.Name]; ok { @@ -236,7 +239,7 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ ranges := query.CalculatePartialRanges(corpora, startRecord-1, maximumRecords) // make searches - waits := make([]<-chan *rdb.WorkerResult, len(ranges)) + waits := make([]<-chan result.ConcResult, len(ranges)) for i, rng := range ranges { ast, fcsErr := a.translateQuery(rng.Rsc, fcsQuery, queryType) @@ -260,24 +263,17 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ general.DCGeneralSystemError, 0, err.Error()) return ans, general.ConformandGeneralServerError } - args, err := sonic.Marshal(rdb.ConcExampleArgs{ - CorpusPath: a.corporaConf.GetRegistryPath(rng.Rsc), - Query: query, - Attrs: retrieveAttrs, - StartLine: rng.From, - MaxItems: maximumRecords, - MaxContext: a.corporaConf.MaximumContext, - ViewContextStruct: rscConf.ViewContextStruct, - }) - if err != nil { - ans.Diagnostics = schema.NewXMLDiagnostics() - ans.Diagnostics.AddDfltMsgDiagnostic( - general.DCGeneralSystemError, 0, err.Error()) - return ans, http.StatusInternalServerError - } wait, err := a.radapter.PublishQuery(rdb.Query{ Func: "concExample", - Args: args, + Args: rdb.ConcQueryArgs{ + CorpusPath: a.corporaConf.GetRegistryPath(rng.Rsc), + Query: query, + Attrs: retrieveAttrs, + StartLine: rng.From, + MaxItems: maximumRecords, + MaxContext: a.corporaConf.MaximumContext, + ViewContextStruct: rscConf.ViewContextStruct, + }, }) if err != nil { ans.Diagnostics = schema.NewXMLDiagnostics() @@ -292,25 +288,16 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ usedQueries := make(map[string]string) // maps resource ID to Manatee CQL query var totalConcSize int for i, wait := range waits { - rawResult := <-wait - result, err := rdb.DeserializeConcExampleResult(rawResult) - if err != nil { + result := <-wait + if result.Error == mango.ErrRowsRangeOutOfConc { + fromResource.RscSetErrorAt(i, err) + + } else if result.Error != nil { ans.Diagnostics = schema.NewXMLDiagnostics() ans.Diagnostics.AddDfltMsgDiagnostic( - general.DCGeneralSystemError, 0, err.Error()) + general.DCQueryCannotProcess, 0, result.Error.Error()) return ans, http.StatusInternalServerError } - if err := result.Err(); err != nil { - if err.Error() == mango.ErrRowsRangeOutOfConc.Error() { - fromResource.RscSetErrorAt(i, err) - - } else { - ans.Diagnostics = schema.NewXMLDiagnostics() - ans.Diagnostics.AddDfltMsgDiagnostic( - general.DCQueryCannotProcess, 0, err.Error()) - return ans, http.StatusInternalServerError - } - } fromResource.SetRscLines(ranges[i].Rsc, result) usedQueries[ranges[i].Rsc] = result.Query totalConcSize += result.ConcSize @@ -376,8 +363,8 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ XMLNSHits: "http://clarin.eu/fcs/dataview/hits", Data: strings.Join( collections.SliceMap( - item.Text, - func(token *conc.Token, i int) string { + item.Text.Tokens(), + func(token *concordance.Token, i int) string { if token.Strong { return "" + token.Word + "" } @@ -397,8 +384,8 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ Unit: "item", XMLNSAdv: "http://clarin.eu/fcs/dataview/advanced", Segments: collections.SliceMap( - item.Text, - func(token *conc.Token, i int) schema.XMLSRAdvSegment { + item.Text.Tokens(), + func(token *concordance.Token, i int) schema.XMLSRAdvSegment { segment := schema.XMLSRAdvSegment{ ID: fmt.Sprintf("s%d", i), Start: segmentPos, @@ -414,8 +401,8 @@ func (a *FCSSubHandlerV20) searchRetrieve(ctx *gin.Context, fcsResponse *FCSRequ return schema.XMLSRAdvLayer{ ID: layer.GetResultID(), Values: collections.SliceMap( - item.Text, - func(token *conc.Token, i int) schema.XMLSRAdvValue { + item.Text.Tokens(), + func(token *concordance.Token, i int) schema.XMLSRAdvValue { return schema.XMLSRAdvValue{ Ref: fmt.Sprintf("s%d", i), Highlight: general.ReturnIf(token.Strong, fmt.Sprintf("s%d", i), ""), diff --git a/mango/mango.cc b/mango/mango.cc index 89e6f69..c38f37f 100644 --- a/mango/mango.cc +++ b/mango/mango.cc @@ -22,12 +22,31 @@ #include "concord/concget.hh" #include "query/cqpeval.hh" #include "mango.h" +#include using namespace std; + +/** + * @brief Based on provided query, return at most `limit` sentences matching the query. + * + * @param corpusPath + * @param query + * @param attrs Positional attributes (comma-separated) to be attached to returned tokens + * @param limit + * @return KWICRowsRetval + */ KWICRowsRetval conc_examples( - const char* corpusPath, const char* query, const char* attrs, PosInt fromLine, PosInt limit, - PosInt maxContext, const char* viewContextStruct) { + const char* corpusPath, + const char* query, + const char* attrs, + const char* structs, + const char* refs, + const char* refsSplitter, + PosInt fromLine, + PosInt limit, + PosInt maxContext, + const char* viewContextStruct) { string cPath(corpusPath); try { @@ -59,15 +78,18 @@ KWICRowsRetval conc_examples( } conc->shuffle(); PosInt concSize = conc->size(); + std::string cppContextStruct(viewContextStruct); + std::string halfLeft = "-" + std::to_string(int(std::floor(maxContext / 2.0))); + std::string halfRight = std::to_string(int(std::ceil(maxContext / 2.0))); KWICLines* kl = new KWICLines( corp, conc->RS(true, fromLine, fromLine+limit), - ("-1:"+std::string(viewContextStruct)).c_str(), - ("1:"+std::string(viewContextStruct)).c_str(), + cppContextStruct.empty() ? halfLeft.c_str() : ("-1:"+cppContextStruct).c_str(), + cppContextStruct.empty() ? halfRight.c_str() : ("1:"+cppContextStruct).c_str(), attrs, attrs, - "", - "#", + structs, + refs, maxContext, false ); @@ -82,7 +104,7 @@ KWICRowsRetval conc_examples( auto rgt = kl->get_right(); std::ostringstream buffer; - buffer << kl->get_refs() << " "; + buffer << kl->get_refs() << refsSplitter; for (size_t i = 0; i < lft.size(); ++i) { if (i > 0) { diff --git a/mango/mango.go b/mango/mango.go index 2483006..c768bd2 100644 --- a/mango/mango.go +++ b/mango/mango.go @@ -28,6 +28,9 @@ import ( "fmt" "strings" "unsafe" + + "github.com/czcorpus/cnc-gokit/collections" + "github.com/czcorpus/mquery-common/concordance" ) const ( @@ -45,22 +48,34 @@ type GoConcSize struct { CorpusSize int64 } -type GoConcExamples struct { +type GoConcordance struct { Lines []string ConcSize int } -func GetConcExamples( +func GetConcordance( corpusPath, query string, attrs []string, + structs []string, + refs []string, fromLine, maxItems, maxContext int, viewContextStruct string, -) (GoConcExamples, error) { +) (GoConcordance, error) { + if !collections.SliceContains(refs, "#") { + refs = append([]string{"#"}, refs...) + } ans := C.conc_examples( - C.CString(corpusPath), C.CString(query), C.CString(strings.Join(attrs, ",")), - C.longlong(fromLine), C.longlong(maxItems), C.longlong(maxContext), + C.CString(corpusPath), + C.CString(query), + C.CString(strings.Join(attrs, ",")), + C.CString(strings.Join(structs, ",")), + C.CString(strings.Join(refs, ",")), + C.CString(concordance.RefsEndMark), + C.longlong(fromLine), + C.longlong(maxItems), + C.longlong(maxContext), C.CString(viewContextStruct)) - var ret GoConcExamples + var ret GoConcordance ret.Lines = make([]string, 0, maxItems) ret.ConcSize = int(ans.concSize) if ans.err != nil { @@ -80,7 +95,7 @@ func GetConcExamples( // we must test str len as our c++ wrapper may return it // e.g. in case our offset is higher than actual num of lines if len(str) > 0 { - ret.Lines = append(ret.Lines, C.GoString(tmp[i])) + ret.Lines = append(ret.Lines, str) } } return ret, nil diff --git a/mango/mango.h b/mango/mango.h index 7f297d6..5e50ba1 100644 --- a/mango/mango.h +++ b/mango/mango.h @@ -55,10 +55,16 @@ typedef struct KWICRowsRetval { * @return KWICRowsRetval */ KWICRowsRetval conc_examples( - const char* corpusPath, const char*query, const char* attrs, PosInt fromLine, PosInt limit, - PosInt maxContext, const char* viewContextStruct); - - + const char* corpusPath, + const char*query, + const char* attrs, + const char* structs, + const char* refs, + const char* refsSplitter, + PosInt fromLine, + PosInt limit, + PosInt maxContext, + const char* viewContextStruct); /** * @brief This function frees all the allocated memory * for a concordance example. It is intended to be called diff --git a/rdb/client.go b/rdb/client.go index 9cc43d2..b08df61 100644 --- a/rdb/client.go +++ b/rdb/client.go @@ -19,13 +19,14 @@ package rdb import ( + "bytes" "context" + "encoding/gob" "encoding/json" "errors" "fmt" "time" - "github.com/bytedance/sonic" "github.com/czcorpus/mquery-sru/result" "github.com/google/uuid" @@ -48,13 +49,12 @@ var ( ) type Query struct { - ResultType result.ResultType `json:"resultType"` - Channel string `json:"channel"` - Func string `json:"func"` - Args json.RawMessage `json:"args"` + Channel string `json:"channel"` + Func string `json:"func"` + Args ConcQueryArgs `json:"args"` } -type ConcExampleArgs struct { +type ConcQueryArgs struct { CorpusPath string `json:"corpusPath"` Query string `json:"query"` Attrs []string `json:"attrs"` @@ -65,7 +65,7 @@ type ConcExampleArgs struct { } func (q Query) ToJSON() (string, error) { - ans, err := sonic.Marshal(q) + ans, err := json.Marshal(q) if err != nil { return "", err } @@ -74,7 +74,10 @@ func (q Query) ToJSON() (string, error) { func DecodeQuery(q string) (Query, error) { var ans Query - err := sonic.Unmarshal([]byte(q), &ans) + var buff bytes.Buffer + buff.WriteString(q) + dec := gob.NewDecoder(&buff) + err := dec.Decode(&ans) return ans, err } @@ -137,7 +140,7 @@ func (a *Adapter) SomeoneListens(query Query) (bool, error) { // process fails during the calculation, a respective error // is packed into the WorkerResult value. The error returned // by this method means that the publishing itself failed. -func (a *Adapter) PublishQuery(query Query) (<-chan *WorkerResult, error) { +func (a *Adapter) PublishQuery(query Query) (<-chan result.ConcResult, error) { query.Channel = fmt.Sprintf("%s:%s", a.channelResultPrefix, uuid.New().String()) log.Debug(). Str("channel", query.Channel). @@ -145,16 +148,17 @@ func (a *Adapter) PublishQuery(query Query) (<-chan *WorkerResult, error) { Any("args", query.Args). Msg("publishing query") - msg, err := query.ToJSON() + var msg bytes.Buffer + enc := gob.NewEncoder(&msg) + err := enc.Encode(query) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to publish query: %w", err) } sub := a.redis.Subscribe(a.ctx, query.Channel) - - if err := a.redis.LPush(a.ctx, DefaultQueueKey, msg).Err(); err != nil { + if err := a.redis.LPush(a.ctx, DefaultQueueKey, msg.String()).Err(); err != nil { return nil, err } - ansChan := make(chan *WorkerResult) + ansChan := make(chan result.ConcResult) // now we wait for response and send result via `ans` go func() { @@ -163,7 +167,7 @@ func (a *Adapter) PublishQuery(query Query) (<-chan *WorkerResult, error) { close(ansChan) }() - ans := new(WorkerResult) + var ans result.ConcResult tmr := time.NewTimer(a.queryAnswerTimeout) for { @@ -175,26 +179,22 @@ func (a *Adapter) PublishQuery(query Query) (<-chan *WorkerResult, error) { Msg("received result") cmd := a.redis.Get(a.ctx, item.Payload) if cmd.Err() != nil { - ans.AttachValue( - &result.ErrorResult{ - ResultType: query.ResultType, - Error: cmd.Err().Error(), - }, - ) + ans.Error = cmd.Err() } else { - err := sonic.Unmarshal([]byte(cmd.Val()), &ans) + var buf bytes.Buffer + buf.WriteString(cmd.Val()) + dec := gob.NewDecoder(&buf) + err := dec.Decode(&ans) if err != nil { - ans.AttachValue(&result.ErrorResult{Error: err.Error()}) + ans.Error = err } } ansChan <- ans tmr.Stop() return case <-tmr.C: - ans.AttachValue(&result.ErrorResult{ - Error: fmt.Sprintf("worker result timeouted (%v)", DefaultQueryAnswerTimeout), - }) + ans.Error = fmt.Errorf("worker result timeouted (%d)", DefaultQueryAnswerTimeout) ansChan <- ans return } @@ -226,16 +226,19 @@ func (a *Adapter) DequeueQuery() (Query, error) { // PublishResult sends notification via Redis PUBSUB mechanism // and also stores the result so a notified listener can retrieve // it. -func (a *Adapter) PublishResult(channelName string, value *WorkerResult) error { +func (a *Adapter) PublishResult(channelName string, value *result.ConcResult) error { log.Debug(). Str("channel", channelName). - Str("resultType", value.ResultType.String()). + Str("resultType", "concordance"). Msg("publishing result") - data, err := sonic.Marshal(value) + + var msg bytes.Buffer + enc := gob.NewEncoder(&msg) + err := enc.Encode(value) if err != nil { return fmt.Errorf("failed to serialize result: %w", err) } - a.redis.Set(a.ctx, channelName, string(data), DefaultResultExpiration) + a.redis.Set(a.ctx, channelName, msg.String(), DefaultResultExpiration) return a.redis.Publish(a.ctx, channelName, channelName).Err() } diff --git a/rdb/results.go b/rdb/results.go deleted file mode 100644 index 6de3476..0000000 --- a/rdb/results.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2023 Tomas Machalek -// Copyright 2023 Institute of the Czech National Corpus, -// Faculty of Arts, Charles University -// This file is part of MQUERY. -// -// MQUERY is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// MQUERY is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with MQUERY. If not, see . - -package rdb - -import ( - "encoding/json" - "fmt" - - "github.com/bytedance/sonic" - "github.com/czcorpus/mquery-sru/result" -) - -type WorkerResult struct { - ID string `json:"id"` - ResultType result.ResultType `json:"resultType"` - Value json.RawMessage `json:"value"` -} - -func (wr *WorkerResult) AttachValue(value result.SerializableResult) error { - rawValue, err := sonic.Marshal(value) - if err != nil { - return err - } - wr.Value = rawValue - return nil -} - -func CreateWorkerResult(value result.SerializableResult) (*WorkerResult, error) { - rawValue, err := sonic.Marshal(value) - if err != nil { - return nil, err - } - return &WorkerResult{Value: rawValue, ResultType: value.Type()}, nil -} - -func DeserializeConcExampleResult(w *WorkerResult) (result.ConcExample, error) { - var ans result.ConcExample - err := sonic.Unmarshal(w.Value, &ans) - if err != nil { - return ans, fmt.Errorf("failed to deserialize ConcExample: %w", err) - } - return ans, nil -} diff --git a/result/error.go b/result/error.go deleted file mode 100644 index bc47a56..0000000 --- a/result/error.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2023 Tomas Machalek -// Copyright 2023 Institute of the Czech National Corpus, -// Faculty of Arts, Charles University -// This file is part of MQUERY. -// -// MQUERY is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// MQUERY is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with MQUERY. If not, see . - -package result - -import "errors" - -type ErrorResult struct { - ResultType ResultType `json:"resultType"` - Error string `json:"error"` -} - -func (res *ErrorResult) Err() error { - return errors.New(res.Error) -} - -func (res *ErrorResult) Type() ResultType { - return res.ResultType -} diff --git a/result/general.go b/result/general.go index f31f3c7..bd2bd78 100644 --- a/result/general.go +++ b/result/general.go @@ -19,9 +19,8 @@ package result import ( + "encoding/json" "time" - - "github.com/bytedance/sonic" ) const ( @@ -37,7 +36,7 @@ type JobLog struct { } func (jl *JobLog) ToJSON() (string, error) { - ans, err := sonic.Marshal(jl) + ans, err := json.Marshal(jl) if err != nil { return "", err } diff --git a/result/result.go b/result/result.go index 6dcc1ce..beeab8b 100644 --- a/result/result.go +++ b/result/result.go @@ -19,9 +19,7 @@ package result import ( - "errors" - - "github.com/czcorpus/mquery-sru/corpus/conc" + "github.com/czcorpus/mquery-common/concordance" ) const ( @@ -33,42 +31,13 @@ const ( ResultTypeError = "Error" ) -type ResultType string - -func (rt ResultType) IsValid() bool { - return rt == ResultTypeFx || rt == ResultTypeFy || rt == ResultTypeFxy -} - -func (rt ResultType) String() string { - return string(rt) -} - -type SerializableResult interface { - Type() ResultType - Err() error -} - -// ---- - -type ConcExample struct { - Lines []conc.ConcordanceLine `json:"lines"` - ConcSize int `json:"concSize"` - ResultType ResultType `json:"resultType"` - Query string `json:"query"` - Error string `json:"error"` -} - -func (res *ConcExample) Err() error { - if res.Error != "" { - return errors.New(res.Error) - } - return nil -} - -func (res *ConcExample) Type() ResultType { - return res.ResultType +type ConcResult struct { + Lines []concordance.Line `json:"lines"` + ConcSize int `json:"concSize"` + Query string `json:"query"` + Error error `json:"error"` } -func (res *ConcExample) NumLines() int { +func (res *ConcResult) NumLines() int { return len(res.Lines) } diff --git a/result/rscrr.go b/result/rscrr.go index 8841524..ec65ede 100644 --- a/result/rscrr.go +++ b/result/rscrr.go @@ -21,7 +21,7 @@ package result import ( "fmt" - "github.com/czcorpus/mquery-sru/corpus/conc" + "github.com/czcorpus/mquery-common/concordance" "github.com/czcorpus/mquery-sru/mango" ) @@ -29,7 +29,7 @@ type item struct { Name string CurrLine int Err error - Lines ConcExample + Lines ConcResult Started bool } @@ -56,7 +56,7 @@ func (r *RoundRobinLineSel) DescribeCurr() string { // CurrLine returns the current line from a current resource // during an iteration. It is intended to be called within a loop // controlled by method `Next()` -func (r *RoundRobinLineSel) CurrLine() *conc.ConcordanceLine { +func (r *RoundRobinLineSel) CurrLine() *concordance.Line { if r.nextOutputLineIdx >= r.maxLines+1 { // lineCounter is always ahead by 1 (that's why `+1`) return nil } @@ -85,7 +85,7 @@ func (r *RoundRobinLineSel) iterationStarted() bool { // SetRscLines sets concordance data for a resource (corpus). // The method can be called only if the `Next()` method has not // been called yet. Otherwise the call panics. -func (r *RoundRobinLineSel) SetRscLines(rsc string, c ConcExample) { +func (r *RoundRobinLineSel) SetRscLines(rsc string, c ConcResult) { if r.iterationStarted() { panic("cannot add resource lines to an already iterating RoundRobinLineSel") } diff --git a/result/rscrr_test.go b/result/rscrr_test.go index 6faf66b..b133d30 100644 --- a/result/rscrr_test.go +++ b/result/rscrr_test.go @@ -21,106 +21,108 @@ package result import ( "testing" - "github.com/czcorpus/mquery-sru/corpus/conc" - + "github.com/czcorpus/mquery-common/concordance" "github.com/stretchr/testify/assert" ) func createSingleResourceEmptyResult() *RoundRobinLineSel { r := NewRoundRobinLineSel(3, "corp1") - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{}}) + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{}}) return r } func createSingleResource() *RoundRobinLineSel { r := NewRoundRobinLineSel(4, "corp1") - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "foo1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo3"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo4"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo3"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo4"}}}, }}) return r } func createTwoResourcesOneEmpty() *RoundRobinLineSel { r := NewRoundRobinLineSel(4, "corp1", "corp2") - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "foo1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo3"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo4"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo3"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo4"}}}, }}) - r.SetRscLines("corp2", ConcExample{Lines: []conc.ConcordanceLine{}}) + r.SetRscLines("corp2", ConcResult{Lines: []concordance.Line{}}) return r } func createTwoResourcesSecondSmaller() *RoundRobinLineSel { r := NewRoundRobinLineSel(8, "corp1", "corp2") // 8 = "we expect 8 (but we get less)" - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "foo1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo3"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo4"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo3"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo4"}}}, }}) - r.SetRscLines("corp2", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "bar1"}}}, + r.SetRscLines("corp2", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar1"}}}, }}) return r } func createTwoResourcesFirstSmaller() *RoundRobinLineSel { r := NewRoundRobinLineSel(8, "corp1", "corp2") // 8 = "we expect 8 (but we get less)" - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "foo1"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo1"}}}, }}) - r.SetRscLines("corp2", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "bar1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar3"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar4"}}}, + r.SetRscLines("corp2", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar3"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar4"}}}, }}) return r } func createResource() *RoundRobinLineSel { r := NewRoundRobinLineSel(9, "corp1", "corp2", "corp3") - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "foo1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "foo3"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "foo3"}}}, }}) - r.SetRscLines("corp2", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "bar1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar3"}}}, + r.SetRscLines("corp2", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar3"}}}, }}) - r.SetRscLines("corp3", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "baz1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "baz2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "baz3"}}}, + r.SetRscLines("corp3", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz3"}}}, }}) return r } func createResourceWithSomeEmpty() *RoundRobinLineSel { r := NewRoundRobinLineSel(9, "corp1", "corp2", "corp3") - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{}}) - r.SetRscLines("corp2", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "bar1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "bar3"}}}, + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{}}) + r.SetRscLines("corp2", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "bar3"}}}, }}) - r.SetRscLines("corp3", ConcExample{Lines: []conc.ConcordanceLine{ - {Text: conc.TokenSlice{&conc.Token{Word: "baz1"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "baz2"}}}, - {Text: conc.TokenSlice{&conc.Token{Word: "baz3"}}}, + r.SetRscLines("corp3", ConcResult{Lines: []concordance.Line{ + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz1"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz2"}}}, + {Text: concordance.TokenSlice{&concordance.Token{Word: "baz3"}}}, }}) return r } -func firstWord(line *conc.ConcordanceLine) string { - return line.Text[0].Word +func firstWord(line *concordance.Line) string { + if v, ok := line.Text[0].(*concordance.Token); ok { + return v.Word + } + return "" } func TestTypicalSetup(t *testing.T) { @@ -190,7 +192,7 @@ func TestSetRscLinesPanicsIfIterationStarted(t *testing.T) { r := createResource() r.Next() assert.Panics(t, func() { - r.SetRscLines("corp1", ConcExample{Lines: []conc.ConcordanceLine{}}) + r.SetRscLines("corp1", ConcResult{Lines: []concordance.Line{}}) }) } diff --git a/worker/worker.go b/worker/worker.go index 1305586..0a1aff0 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -24,8 +24,7 @@ import ( "os" "time" - "github.com/bytedance/sonic" - "github.com/czcorpus/mquery-sru/corpus/conc" + "github.com/czcorpus/mquery-common/concordance" "github.com/czcorpus/mquery-sru/mango" "github.com/czcorpus/mquery-sru/rdb" "github.com/czcorpus/mquery-sru/result" @@ -53,17 +52,12 @@ type Worker struct { currJobLog *result.JobLog } -func (w *Worker) publishResult(res result.SerializableResult, channel string) error { - ans, err := rdb.CreateWorkerResult(res) - if err != nil { - return err - } - +func (w *Worker) publishResult(res *result.ConcResult, channel string) error { w.currJobLog.End = time.Now() - w.currJobLog.Err = res.Err() + w.currJobLog.Err = res.Error w.jobLogger.Log(*w.currJobLog) w.currJobLog = nil - return w.radapter.PublishResult(channel, ans) + return w.radapter.PublishResult(channel, res) } func (w *Worker) tryNextQuery() error { @@ -99,23 +93,9 @@ func (w *Worker) tryNextQuery() error { Func: query.Func, Begin: time.Now(), } - - switch query.Func { - case "concExample": - var args rdb.ConcExampleArgs - if err := sonic.Unmarshal(query.Args, &args); err != nil { - return err - } - ans := w.concExample(args) - ans.ResultType = query.ResultType - if err := w.publishResult(ans, query.Channel); err != nil { - return err - } - default: - ans := &result.ErrorResult{Error: fmt.Sprintf("unknown query function: %s", query.Func)} - if err = w.publishResult(ans, query.Channel); err != nil { - return err - } + ans := w.ConcResult(query.Args) + if err := w.publishResult(ans, query.Channel); err != nil { + return fmt.Errorf("failed to publish result: %w", err) } return nil } @@ -136,29 +116,37 @@ func (w *Worker) Listen() { } } -func (w *Worker) concExample(args rdb.ConcExampleArgs) (ans *result.ConcExample) { - ans = new(result.ConcExample) +func (w *Worker) ConcResult(args rdb.ConcQueryArgs) (ans *result.ConcResult) { + ans = new(result.ConcResult) defer func() { if r := recover(); r != nil { - ans = &result.ConcExample{ - Error: fmt.Sprintf("%v", r), - Lines: make([]conc.ConcordanceLine, 0), + ans = &result.ConcResult{ + Error: fmt.Errorf("%v", r), + Lines: make([]concordance.Line, 0), } } }() - concEx, err := mango.GetConcExamples( - args.CorpusPath, args.Query, args.Attrs, args.StartLine, args.MaxItems, - args.MaxContext, args.ViewContextStruct) + concEx, err := mango.GetConcordance( + args.CorpusPath, + args.Query, + args.Attrs, + []string{}, + []string{}, + args.StartLine, + args.MaxItems, + args.MaxContext, + args.ViewContextStruct, + ) if err != nil { - ans.Error = err.Error() + ans.Error = err return } log.Debug(). Str("query", args.Query). Int("concSize", concEx.ConcSize). Msg("obtained concordance result") - parser := conc.NewLineParser(args.Attrs) - ans.Lines = parser.Parse(concEx) + parser := concordance.NewLineParser(args.Attrs) + ans.Lines = parser.Parse(concEx.Lines) ans.ConcSize = concEx.ConcSize ans.Query = args.Query return