本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

344 lines
9.8KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mapping
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "time"
  19. "github.com/blevesearch/bleve/analysis"
  20. "github.com/blevesearch/bleve/document"
  21. "github.com/blevesearch/bleve/geo"
  22. )
  23. // control the default behavior for dynamic fields (those not explicitly mapped)
  24. var (
  25. IndexDynamic = true
  26. StoreDynamic = true
  27. DocValuesDynamic = true // TODO revisit default?
  28. )
  29. // A FieldMapping describes how a specific item
  30. // should be put into the index.
  31. type FieldMapping struct {
  32. Name string `json:"name,omitempty"`
  33. Type string `json:"type,omitempty"`
  34. // Analyzer specifies the name of the analyzer to use for this field. If
  35. // Analyzer is empty, traverse the DocumentMapping tree toward the root and
  36. // pick the first non-empty DefaultAnalyzer found. If there is none, use
  37. // the IndexMapping.DefaultAnalyzer.
  38. Analyzer string `json:"analyzer,omitempty"`
  39. // Store indicates whether to store field values in the index. Stored
  40. // values can be retrieved from search results using SearchRequest.Fields.
  41. Store bool `json:"store,omitempty"`
  42. Index bool `json:"index,omitempty"`
  43. // IncludeTermVectors, if true, makes terms occurrences to be recorded for
  44. // this field. It includes the term position within the terms sequence and
  45. // the term offsets in the source document field. Term vectors are required
  46. // to perform phrase queries or terms highlighting in source documents.
  47. IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
  48. IncludeInAll bool `json:"include_in_all,omitempty"`
  49. DateFormat string `json:"date_format,omitempty"`
  50. // DocValues, if true makes the index uninverting possible for this field
  51. // It is useful for faceting and sorting queries.
  52. DocValues bool `json:"docvalues,omitempty"`
  53. }
  54. // NewTextFieldMapping returns a default field mapping for text
  55. func NewTextFieldMapping() *FieldMapping {
  56. return &FieldMapping{
  57. Type: "text",
  58. Store: true,
  59. Index: true,
  60. IncludeTermVectors: true,
  61. IncludeInAll: true,
  62. DocValues: true,
  63. }
  64. }
  65. func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
  66. rv := NewTextFieldMapping()
  67. rv.Store = im.StoreDynamic
  68. rv.Index = im.IndexDynamic
  69. rv.DocValues = im.DocValuesDynamic
  70. return rv
  71. }
  72. // NewNumericFieldMapping returns a default field mapping for numbers
  73. func NewNumericFieldMapping() *FieldMapping {
  74. return &FieldMapping{
  75. Type: "number",
  76. Store: true,
  77. Index: true,
  78. IncludeInAll: true,
  79. DocValues: true,
  80. }
  81. }
  82. func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
  83. rv := NewNumericFieldMapping()
  84. rv.Store = im.StoreDynamic
  85. rv.Index = im.IndexDynamic
  86. rv.DocValues = im.DocValuesDynamic
  87. return rv
  88. }
  89. // NewDateTimeFieldMapping returns a default field mapping for dates
  90. func NewDateTimeFieldMapping() *FieldMapping {
  91. return &FieldMapping{
  92. Type: "datetime",
  93. Store: true,
  94. Index: true,
  95. IncludeInAll: true,
  96. DocValues: true,
  97. }
  98. }
  99. func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
  100. rv := NewDateTimeFieldMapping()
  101. rv.Store = im.StoreDynamic
  102. rv.Index = im.IndexDynamic
  103. rv.DocValues = im.DocValuesDynamic
  104. return rv
  105. }
  106. // NewBooleanFieldMapping returns a default field mapping for booleans
  107. func NewBooleanFieldMapping() *FieldMapping {
  108. return &FieldMapping{
  109. Type: "boolean",
  110. Store: true,
  111. Index: true,
  112. IncludeInAll: true,
  113. DocValues: true,
  114. }
  115. }
  116. func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
  117. rv := NewBooleanFieldMapping()
  118. rv.Store = im.StoreDynamic
  119. rv.Index = im.IndexDynamic
  120. rv.DocValues = im.DocValuesDynamic
  121. return rv
  122. }
  123. // NewGeoPointFieldMapping returns a default field mapping for geo points
  124. func NewGeoPointFieldMapping() *FieldMapping {
  125. return &FieldMapping{
  126. Type: "geopoint",
  127. Store: true,
  128. Index: true,
  129. IncludeInAll: true,
  130. DocValues: true,
  131. }
  132. }
  133. // Options returns the indexing options for this field.
  134. func (fm *FieldMapping) Options() document.IndexingOptions {
  135. var rv document.IndexingOptions
  136. if fm.Store {
  137. rv |= document.StoreField
  138. }
  139. if fm.Index {
  140. rv |= document.IndexField
  141. }
  142. if fm.IncludeTermVectors {
  143. rv |= document.IncludeTermVectors
  144. }
  145. if fm.DocValues {
  146. rv |= document.DocValues
  147. }
  148. return rv
  149. }
  150. func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
  151. fieldName := getFieldName(pathString, path, fm)
  152. options := fm.Options()
  153. if fm.Type == "text" {
  154. analyzer := fm.analyzerForField(path, context)
  155. field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
  156. context.doc.AddField(field)
  157. if !fm.IncludeInAll {
  158. context.excludedFromAll = append(context.excludedFromAll, fieldName)
  159. }
  160. } else if fm.Type == "datetime" {
  161. dateTimeFormat := context.im.DefaultDateTimeParser
  162. if fm.DateFormat != "" {
  163. dateTimeFormat = fm.DateFormat
  164. }
  165. dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
  166. if dateTimeParser != nil {
  167. parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
  168. if err == nil {
  169. fm.processTime(parsedDateTime, pathString, path, indexes, context)
  170. }
  171. }
  172. }
  173. }
  174. func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
  175. fieldName := getFieldName(pathString, path, fm)
  176. if fm.Type == "number" {
  177. options := fm.Options()
  178. field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
  179. context.doc.AddField(field)
  180. if !fm.IncludeInAll {
  181. context.excludedFromAll = append(context.excludedFromAll, fieldName)
  182. }
  183. }
  184. }
  185. func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString string, path []string, indexes []uint64, context *walkContext) {
  186. fieldName := getFieldName(pathString, path, fm)
  187. if fm.Type == "datetime" {
  188. options := fm.Options()
  189. field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, options)
  190. if err == nil {
  191. context.doc.AddField(field)
  192. } else {
  193. logger.Printf("could not build date %v", err)
  194. }
  195. if !fm.IncludeInAll {
  196. context.excludedFromAll = append(context.excludedFromAll, fieldName)
  197. }
  198. }
  199. }
  200. func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) {
  201. fieldName := getFieldName(pathString, path, fm)
  202. if fm.Type == "boolean" {
  203. options := fm.Options()
  204. field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options)
  205. context.doc.AddField(field)
  206. if !fm.IncludeInAll {
  207. context.excludedFromAll = append(context.excludedFromAll, fieldName)
  208. }
  209. }
  210. }
  211. func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) {
  212. lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint)
  213. if found {
  214. fieldName := getFieldName(pathString, path, fm)
  215. options := fm.Options()
  216. field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options)
  217. context.doc.AddField(field)
  218. if !fm.IncludeInAll {
  219. context.excludedFromAll = append(context.excludedFromAll, fieldName)
  220. }
  221. }
  222. }
  223. func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer {
  224. analyzerName := fm.Analyzer
  225. if analyzerName == "" {
  226. analyzerName = context.dm.defaultAnalyzerName(path)
  227. if analyzerName == "" {
  228. analyzerName = context.im.DefaultAnalyzer
  229. }
  230. }
  231. return context.im.AnalyzerNamed(analyzerName)
  232. }
  233. func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string {
  234. fieldName := pathString
  235. if fieldMapping.Name != "" {
  236. parentName := ""
  237. if len(path) > 1 {
  238. parentName = encodePath(path[:len(path)-1]) + pathSeparator
  239. }
  240. fieldName = parentName + fieldMapping.Name
  241. }
  242. return fieldName
  243. }
  244. // UnmarshalJSON offers custom unmarshaling with optional strict validation
  245. func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
  246. var tmp map[string]json.RawMessage
  247. err := json.Unmarshal(data, &tmp)
  248. if err != nil {
  249. return err
  250. }
  251. var invalidKeys []string
  252. for k, v := range tmp {
  253. switch k {
  254. case "name":
  255. err := json.Unmarshal(v, &fm.Name)
  256. if err != nil {
  257. return err
  258. }
  259. case "type":
  260. err := json.Unmarshal(v, &fm.Type)
  261. if err != nil {
  262. return err
  263. }
  264. case "analyzer":
  265. err := json.Unmarshal(v, &fm.Analyzer)
  266. if err != nil {
  267. return err
  268. }
  269. case "store":
  270. err := json.Unmarshal(v, &fm.Store)
  271. if err != nil {
  272. return err
  273. }
  274. case "index":
  275. err := json.Unmarshal(v, &fm.Index)
  276. if err != nil {
  277. return err
  278. }
  279. case "include_term_vectors":
  280. err := json.Unmarshal(v, &fm.IncludeTermVectors)
  281. if err != nil {
  282. return err
  283. }
  284. case "include_in_all":
  285. err := json.Unmarshal(v, &fm.IncludeInAll)
  286. if err != nil {
  287. return err
  288. }
  289. case "date_format":
  290. err := json.Unmarshal(v, &fm.DateFormat)
  291. if err != nil {
  292. return err
  293. }
  294. case "docvalues":
  295. err := json.Unmarshal(v, &fm.DocValues)
  296. if err != nil {
  297. return err
  298. }
  299. default:
  300. invalidKeys = append(invalidKeys, k)
  301. }
  302. }
  303. if MappingJSONStrict && len(invalidKeys) > 0 {
  304. return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys)
  305. }
  306. return nil
  307. }
上海开阖软件有限公司 沪ICP备12045867号-1