[FIX] Set max fuzziness to 2 for bleve (#3444)
closes #3443 regression fromab5f0b7558
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3444 Reviewed-by: Otto <otto@codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com> (cherry picked from commita641ebf221
)
This commit is contained in:
parent
9cd8cd4874
commit
966975a3e0
4 changed files with 26 additions and 2 deletions
|
@ -41,6 +41,8 @@ const (
|
|||
maxBatchSize = 16
|
||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||
fuzzyDenominator = 4
|
||||
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
|
||||
maxFuzziness = 2
|
||||
)
|
||||
|
||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||
|
@ -246,7 +248,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||
keywordQuery = phraseQuery
|
||||
if opts.IsKeywordFuzzy {
|
||||
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
|
||||
phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator)
|
||||
}
|
||||
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
|
|
|
@ -49,6 +49,12 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
IDs: []int64{},
|
||||
Langs: 0,
|
||||
},
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "Description for",
|
||||
IDs: []int64{repoID},
|
||||
Langs: 1,
|
||||
},
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "repo1",
|
||||
|
|
|
@ -39,6 +39,8 @@ const (
|
|||
maxBatchSize = 16
|
||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||
fuzzyDenominator = 4
|
||||
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
|
||||
maxFuzziness = 2
|
||||
)
|
||||
|
||||
// IndexerData an update to the issue indexer
|
||||
|
@ -162,7 +164,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||
if options.Keyword != "" {
|
||||
fuzziness := 0
|
||||
if options.IsFuzzyKeyword {
|
||||
fuzziness = len(options.Keyword) / fuzzyDenominator
|
||||
fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator)
|
||||
}
|
||||
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
|
|
|
@ -130,6 +130,20 @@ var cases = []*testIndexerCase{
|
|||
ExpectedIDs: []int64{1002, 1001, 1000},
|
||||
ExpectedTotal: 3,
|
||||
},
|
||||
{
|
||||
Name: "Keyword Fuzzy",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1000, Title: "hi hello world"},
|
||||
{ID: 1001, Content: "hi hello world"},
|
||||
{ID: 1002, Comments: []string{"hi", "hello world"}},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello wrold",
|
||||
IsFuzzyKeyword: true,
|
||||
},
|
||||
ExpectedIDs: []int64{1002, 1001, 1000},
|
||||
ExpectedTotal: 3,
|
||||
},
|
||||
{
|
||||
Name: "RepoIDs",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
|
|
Loading…
Reference in a new issue