本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

298 lines
7.9KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package search
  15. import (
  16. "fmt"
  17. "reflect"
  18. "github.com/blevesearch/bleve/index"
  19. "github.com/blevesearch/bleve/size"
  20. )
  21. var reflectStaticSizeDocumentMatch int
  22. var reflectStaticSizeSearchContext int
  23. var reflectStaticSizeLocation int
  24. func init() {
  25. var dm DocumentMatch
  26. reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
  27. var sc SearchContext
  28. reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
  29. var l Location
  30. reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
  31. }
  32. type ArrayPositions []uint64
  33. func (ap ArrayPositions) Equals(other ArrayPositions) bool {
  34. if len(ap) != len(other) {
  35. return false
  36. }
  37. for i := range ap {
  38. if ap[i] != other[i] {
  39. return false
  40. }
  41. }
  42. return true
  43. }
  44. type Location struct {
  45. // Pos is the position of the term within the field, starting at 1
  46. Pos uint64 `json:"pos"`
  47. // Start and End are the byte offsets of the term in the field
  48. Start uint64 `json:"start"`
  49. End uint64 `json:"end"`
  50. // ArrayPositions contains the positions of the term within any elements.
  51. ArrayPositions ArrayPositions `json:"array_positions"`
  52. }
  53. func (l *Location) Size() int {
  54. return reflectStaticSizeLocation + size.SizeOfPtr +
  55. len(l.ArrayPositions)*size.SizeOfUint64
  56. }
  57. type Locations []*Location
  58. type TermLocationMap map[string]Locations
  59. func (t TermLocationMap) AddLocation(term string, location *Location) {
  60. t[term] = append(t[term], location)
  61. }
  62. type FieldTermLocationMap map[string]TermLocationMap
  63. type FieldTermLocation struct {
  64. Field string
  65. Term string
  66. Location Location
  67. }
  68. type FieldFragmentMap map[string][]string
  69. type DocumentMatch struct {
  70. Index string `json:"index,omitempty"`
  71. ID string `json:"id"`
  72. IndexInternalID index.IndexInternalID `json:"-"`
  73. Score float64 `json:"score"`
  74. Expl *Explanation `json:"explanation,omitempty"`
  75. Locations FieldTermLocationMap `json:"locations,omitempty"`
  76. Fragments FieldFragmentMap `json:"fragments,omitempty"`
  77. Sort []string `json:"sort,omitempty"`
  78. // Fields contains the values for document fields listed in
  79. // SearchRequest.Fields. Text fields are returned as strings, numeric
  80. // fields as float64s and date fields as time.RFC3339 formatted strings.
  81. Fields map[string]interface{} `json:"fields,omitempty"`
  82. // used to maintain natural index order
  83. HitNumber uint64 `json:"-"`
  84. // used to temporarily hold field term location information during
  85. // search processing in an efficient, recycle-friendly manner, to
  86. // be later incorporated into the Locations map when search
  87. // results are completed
  88. FieldTermLocations []FieldTermLocation `json:"-"`
  89. }
  90. func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
  91. if dm.Fields == nil {
  92. dm.Fields = make(map[string]interface{})
  93. }
  94. existingVal, ok := dm.Fields[name]
  95. if !ok {
  96. dm.Fields[name] = value
  97. return
  98. }
  99. valSlice, ok := existingVal.([]interface{})
  100. if ok {
  101. // already a slice, append to it
  102. valSlice = append(valSlice, value)
  103. } else {
  104. // create a slice
  105. valSlice = []interface{}{existingVal, value}
  106. }
  107. dm.Fields[name] = valSlice
  108. }
  109. // Reset allows an already allocated DocumentMatch to be reused
  110. func (dm *DocumentMatch) Reset() *DocumentMatch {
  111. // remember the []byte used for the IndexInternalID
  112. indexInternalID := dm.IndexInternalID
  113. // remember the []interface{} used for sort
  114. sort := dm.Sort
  115. // remember the FieldTermLocations backing array
  116. ftls := dm.FieldTermLocations
  117. for i := range ftls { // recycle the ArrayPositions of each location
  118. ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
  119. }
  120. // idiom to copy over from empty DocumentMatch (0 allocations)
  121. *dm = DocumentMatch{}
  122. // reuse the []byte already allocated (and reset len to 0)
  123. dm.IndexInternalID = indexInternalID[:0]
  124. // reuse the []interface{} already allocated (and reset len to 0)
  125. dm.Sort = sort[:0]
  126. // reuse the FieldTermLocations already allocated (and reset len to 0)
  127. dm.FieldTermLocations = ftls[:0]
  128. return dm
  129. }
  130. func (dm *DocumentMatch) Size() int {
  131. sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
  132. len(dm.Index) +
  133. len(dm.ID) +
  134. len(dm.IndexInternalID)
  135. if dm.Expl != nil {
  136. sizeInBytes += dm.Expl.Size()
  137. }
  138. for k, v := range dm.Locations {
  139. sizeInBytes += size.SizeOfString + len(k)
  140. for k1, v1 := range v {
  141. sizeInBytes += size.SizeOfString + len(k1) +
  142. size.SizeOfSlice
  143. for _, entry := range v1 {
  144. sizeInBytes += entry.Size()
  145. }
  146. }
  147. }
  148. for k, v := range dm.Fragments {
  149. sizeInBytes += size.SizeOfString + len(k) +
  150. size.SizeOfSlice
  151. for _, entry := range v {
  152. sizeInBytes += size.SizeOfString + len(entry)
  153. }
  154. }
  155. for _, entry := range dm.Sort {
  156. sizeInBytes += size.SizeOfString + len(entry)
  157. }
  158. for k, _ := range dm.Fields {
  159. sizeInBytes += size.SizeOfString + len(k) +
  160. size.SizeOfPtr
  161. }
  162. return sizeInBytes
  163. }
  164. // Complete performs final preparation & transformation of the
  165. // DocumentMatch at the end of search processing, also allowing the
  166. // caller to provide an optional preallocated locations slice
  167. func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
  168. // transform the FieldTermLocations slice into the Locations map
  169. nlocs := len(dm.FieldTermLocations)
  170. if nlocs > 0 {
  171. if cap(prealloc) < nlocs {
  172. prealloc = make([]Location, nlocs)
  173. }
  174. prealloc = prealloc[:nlocs]
  175. var lastField string
  176. var tlm TermLocationMap
  177. for i, ftl := range dm.FieldTermLocations {
  178. if lastField != ftl.Field {
  179. lastField = ftl.Field
  180. if dm.Locations == nil {
  181. dm.Locations = make(FieldTermLocationMap)
  182. }
  183. tlm = dm.Locations[ftl.Field]
  184. if tlm == nil {
  185. tlm = make(TermLocationMap)
  186. dm.Locations[ftl.Field] = tlm
  187. }
  188. }
  189. loc := &prealloc[i]
  190. *loc = ftl.Location
  191. if len(loc.ArrayPositions) > 0 { // copy
  192. loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
  193. }
  194. tlm[ftl.Term] = append(tlm[ftl.Term], loc)
  195. dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
  196. Location: Location{
  197. ArrayPositions: ftl.Location.ArrayPositions[:0],
  198. },
  199. }
  200. }
  201. }
  202. dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
  203. return prealloc
  204. }
  205. func (dm *DocumentMatch) String() string {
  206. return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
  207. }
  208. type DocumentMatchCollection []*DocumentMatch
  209. func (c DocumentMatchCollection) Len() int { return len(c) }
  210. func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
  211. func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
  212. type Searcher interface {
  213. Next(ctx *SearchContext) (*DocumentMatch, error)
  214. Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
  215. Close() error
  216. Weight() float64
  217. SetQueryNorm(float64)
  218. Count() uint64
  219. Min() int
  220. Size() int
  221. DocumentMatchPoolSize() int
  222. }
  223. type SearcherOptions struct {
  224. Explain bool
  225. IncludeTermVectors bool
  226. Score string
  227. }
  228. // SearchContext represents the context around a single search
  229. type SearchContext struct {
  230. DocumentMatchPool *DocumentMatchPool
  231. Collector Collector
  232. }
  233. func (sc *SearchContext) Size() int {
  234. sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
  235. reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
  236. if sc.DocumentMatchPool != nil {
  237. for _, entry := range sc.DocumentMatchPool.avail {
  238. if entry != nil {
  239. sizeInBytes += entry.Size()
  240. }
  241. }
  242. }
  243. return sizeInBytes
  244. }
上海开阖软件有限公司 沪ICP备12045867号-1