forkjo/vendor/github.com/blevesearch/bleve/document/field_text.go

140 lines
3.8 KiB
Go
Raw Normal View History

2017-01-25 03:43:02 +01:00
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
2017-01-25 03:43:02 +01:00
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/size"
2017-01-25 03:43:02 +01:00
)
var reflectStaticSizeTextField int
func init() {
var f TextField
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
}
2018-05-19 14:49:46 +02:00
const DefaultTextIndexingOptions = IndexField | DocValues
2017-01-25 03:43:02 +01:00
type TextField struct {
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
numPlainTextBytes uint64
}
func (t *TextField) Size() int {
return reflectStaticSizeTextField + size.SizeOfPtr +
len(t.name) +
len(t.arrayPositions)*size.SizeOfUint64 +
len(t.value)
}
2017-01-25 03:43:02 +01:00
func (t *TextField) Name() string {
return t.name
}
func (t *TextField) ArrayPositions() []uint64 {
return t.arrayPositions
}
func (t *TextField) Options() IndexingOptions {
return t.options
}
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
var tokens analysis.TokenStream
if t.analyzer != nil {
bytesToAnalyze := t.Value()
if t.options.IsStored() {
// need to copy
bytesCopied := make([]byte, len(bytesToAnalyze))
copy(bytesCopied, bytesToAnalyze)
bytesToAnalyze = bytesCopied
}
tokens = t.analyzer.Analyze(bytesToAnalyze)
} else {
tokens = analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(t.value),
Term: t.value,
Position: 1,
Type: analysis.AlphaNumeric,
},
}
}
fieldLength := len(tokens) // number of tokens in this doc field
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (t *TextField) Analyzer() *analysis.Analyzer {
return t.analyzer
}
2017-01-25 03:43:02 +01:00
func (t *TextField) Value() []byte {
return t.value
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}