本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
3.6KB

  1. // Copyright (c) 2015 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package upsidedown
  15. import (
  16. "github.com/blevesearch/bleve/analysis"
  17. "github.com/blevesearch/bleve/document"
  18. "github.com/blevesearch/bleve/index"
  19. )
  20. func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult {
  21. rv := &index.AnalysisResult{
  22. DocID: d.ID,
  23. Rows: make([]index.IndexRow, 0, 100),
  24. }
  25. docIDBytes := []byte(d.ID)
  26. // track our back index entries
  27. backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
  28. // information we collate as we merge fields with same name
  29. fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
  30. fieldLengths := make(map[uint16]int)
  31. fieldIncludeTermVectors := make(map[uint16]bool)
  32. fieldNames := make(map[uint16]string)
  33. analyzeField := func(field document.Field, storable bool) {
  34. fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
  35. if newFieldRow != nil {
  36. rv.Rows = append(rv.Rows, newFieldRow)
  37. }
  38. fieldNames[fieldIndex] = field.Name()
  39. if field.Options().IsIndexed() {
  40. fieldLength, tokenFreqs := field.Analyze()
  41. existingFreqs := fieldTermFreqs[fieldIndex]
  42. if existingFreqs == nil {
  43. fieldTermFreqs[fieldIndex] = tokenFreqs
  44. } else {
  45. existingFreqs.MergeAll(field.Name(), tokenFreqs)
  46. fieldTermFreqs[fieldIndex] = existingFreqs
  47. }
  48. fieldLengths[fieldIndex] += fieldLength
  49. fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
  50. }
  51. if storable && field.Options().IsStored() {
  52. rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
  53. }
  54. }
  55. // walk all the fields, record stored fields now
  56. // place information about indexed fields into map
  57. // this collates information across fields with
  58. // same names (arrays)
  59. for _, field := range d.Fields {
  60. analyzeField(field, true)
  61. }
  62. if len(d.CompositeFields) > 0 {
  63. for fieldIndex, tokenFreqs := range fieldTermFreqs {
  64. // see if any of the composite fields need this
  65. for _, compositeField := range d.CompositeFields {
  66. compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
  67. }
  68. }
  69. for _, compositeField := range d.CompositeFields {
  70. analyzeField(compositeField, false)
  71. }
  72. }
  73. rowsCapNeeded := len(rv.Rows) + 1
  74. for _, tokenFreqs := range fieldTermFreqs {
  75. rowsCapNeeded += len(tokenFreqs)
  76. }
  77. rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
  78. backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
  79. // walk through the collated information and process
  80. // once for each indexed field (unique name)
  81. for fieldIndex, tokenFreqs := range fieldTermFreqs {
  82. fieldLength := fieldLengths[fieldIndex]
  83. includeTermVectors := fieldIncludeTermVectors[fieldIndex]
  84. // encode this field
  85. rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
  86. }
  87. // build the back index row
  88. backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
  89. rv.Rows = append(rv.Rows, backIndexRow)
  90. return rv
  91. }
上海开阖软件有限公司 沪ICP备12045867号-1