本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

549 lines
16KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mapping
  15. import (
  16. "encoding"
  17. "encoding/json"
  18. "fmt"
  19. "reflect"
  20. "time"
  21. "github.com/blevesearch/bleve/registry"
  22. )
  23. // A DocumentMapping describes how a type of document
  24. // should be indexed.
  25. // As documents can be hierarchical, named sub-sections
  26. // of documents are mapped using the same structure in
  27. // the Properties field.
  28. // Each value inside a document can be indexed 0 or more
  29. // ways. These index entries are called fields and
  30. // are stored in the Fields field.
  31. // Entire sections of a document can be ignored or
  32. // excluded by setting Enabled to false.
  33. // If not explicitly mapped, default mapping operations
  34. // are used. To disable this automatic handling, set
  35. // Dynamic to false.
  36. type DocumentMapping struct {
  37. Enabled bool `json:"enabled"`
  38. Dynamic bool `json:"dynamic"`
  39. Properties map[string]*DocumentMapping `json:"properties,omitempty"`
  40. Fields []*FieldMapping `json:"fields,omitempty"`
  41. DefaultAnalyzer string `json:"default_analyzer,omitempty"`
  42. // StructTagKey overrides "json" when looking for field names in struct tags
  43. StructTagKey string `json:"struct_tag_key,omitempty"`
  44. }
  45. func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
  46. var err error
  47. if dm.DefaultAnalyzer != "" {
  48. _, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
  49. if err != nil {
  50. return err
  51. }
  52. }
  53. for _, property := range dm.Properties {
  54. err = property.Validate(cache)
  55. if err != nil {
  56. return err
  57. }
  58. }
  59. for _, field := range dm.Fields {
  60. if field.Analyzer != "" {
  61. _, err = cache.AnalyzerNamed(field.Analyzer)
  62. if err != nil {
  63. return err
  64. }
  65. }
  66. if field.DateFormat != "" {
  67. _, err = cache.DateTimeParserNamed(field.DateFormat)
  68. if err != nil {
  69. return err
  70. }
  71. }
  72. switch field.Type {
  73. case "text", "datetime", "number", "boolean", "geopoint":
  74. default:
  75. return fmt.Errorf("unknown field type: '%s'", field.Type)
  76. }
  77. }
  78. return nil
  79. }
  80. // analyzerNameForPath attempts to first find the field
  81. // described by this path, then returns the analyzer
  82. // configured for that field
  83. func (dm *DocumentMapping) analyzerNameForPath(path string) string {
  84. field := dm.fieldDescribedByPath(path)
  85. if field != nil {
  86. return field.Analyzer
  87. }
  88. return ""
  89. }
  90. func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
  91. pathElements := decodePath(path)
  92. if len(pathElements) > 1 {
  93. // easy case, there is more than 1 path element remaining
  94. // the next path element must match a property name
  95. // at this level
  96. for propName, subDocMapping := range dm.Properties {
  97. if propName == pathElements[0] {
  98. return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
  99. }
  100. }
  101. } else {
  102. // just 1 path elememnt
  103. // first look for property name with empty field
  104. for propName, subDocMapping := range dm.Properties {
  105. if propName == pathElements[0] {
  106. // found property name match, now look at its fields
  107. for _, field := range subDocMapping.Fields {
  108. if field.Name == "" || field.Name == pathElements[0] {
  109. // match
  110. return field
  111. }
  112. }
  113. }
  114. }
  115. // next, walk the properties again, looking for field overriding the name
  116. for propName, subDocMapping := range dm.Properties {
  117. if propName != pathElements[0] {
  118. // property name isn't a match, but field name could override it
  119. for _, field := range subDocMapping.Fields {
  120. if field.Name == pathElements[0] {
  121. return field
  122. }
  123. }
  124. }
  125. }
  126. }
  127. return nil
  128. }
  129. // documentMappingForPath only returns EXACT matches for a sub document
  130. // or for an explicitly mapped field, if you want to find the
  131. // closest document mapping to a field not explicitly mapped
  132. // use closestDocMapping
  133. func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
  134. pathElements := decodePath(path)
  135. current := dm
  136. OUTER:
  137. for i, pathElement := range pathElements {
  138. for name, subDocMapping := range current.Properties {
  139. if name == pathElement {
  140. current = subDocMapping
  141. continue OUTER
  142. }
  143. }
  144. // no subDocMapping matches this pathElement
  145. // only if this is the last element check for field name
  146. if i == len(pathElements)-1 {
  147. for _, field := range current.Fields {
  148. if field.Name == pathElement {
  149. break
  150. }
  151. }
  152. }
  153. return nil
  154. }
  155. return current
  156. }
  157. // closestDocMapping findest the most specific document mapping that matches
  158. // part of the provided path
  159. func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
  160. pathElements := decodePath(path)
  161. current := dm
  162. OUTER:
  163. for _, pathElement := range pathElements {
  164. for name, subDocMapping := range current.Properties {
  165. if name == pathElement {
  166. current = subDocMapping
  167. continue OUTER
  168. }
  169. }
  170. break
  171. }
  172. return current
  173. }
  174. // NewDocumentMapping returns a new document mapping
  175. // with all the default values.
  176. func NewDocumentMapping() *DocumentMapping {
  177. return &DocumentMapping{
  178. Enabled: true,
  179. Dynamic: true,
  180. }
  181. }
  182. // NewDocumentStaticMapping returns a new document
  183. // mapping that will not automatically index parts
  184. // of a document without an explicit mapping.
  185. func NewDocumentStaticMapping() *DocumentMapping {
  186. return &DocumentMapping{
  187. Enabled: true,
  188. }
  189. }
  190. // NewDocumentDisabledMapping returns a new document
  191. // mapping that will not perform any indexing.
  192. func NewDocumentDisabledMapping() *DocumentMapping {
  193. return &DocumentMapping{}
  194. }
  195. // AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
  196. // for the specified named subsection.
  197. func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
  198. if dm.Properties == nil {
  199. dm.Properties = make(map[string]*DocumentMapping)
  200. }
  201. dm.Properties[property] = sdm
  202. }
  203. // AddFieldMappingsAt adds one or more FieldMappings
  204. // at the named sub-document. If the named sub-document
  205. // doesn't yet exist it is created for you.
  206. // This is a convenience function to make most common
  207. // mappings more concise.
  208. // Otherwise, you would:
  209. // subMapping := NewDocumentMapping()
  210. // subMapping.AddFieldMapping(fieldMapping)
  211. // parentMapping.AddSubDocumentMapping(property, subMapping)
  212. func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
  213. if dm.Properties == nil {
  214. dm.Properties = make(map[string]*DocumentMapping)
  215. }
  216. sdm, ok := dm.Properties[property]
  217. if !ok {
  218. sdm = NewDocumentMapping()
  219. }
  220. for _, fm := range fms {
  221. sdm.AddFieldMapping(fm)
  222. }
  223. dm.Properties[property] = sdm
  224. }
  225. // AddFieldMapping adds the provided FieldMapping for this section
  226. // of the document.
  227. func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
  228. if dm.Fields == nil {
  229. dm.Fields = make([]*FieldMapping, 0)
  230. }
  231. dm.Fields = append(dm.Fields, fm)
  232. }
  233. // UnmarshalJSON offers custom unmarshaling with optional strict validation
  234. func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
  235. var tmp map[string]json.RawMessage
  236. err := json.Unmarshal(data, &tmp)
  237. if err != nil {
  238. return err
  239. }
  240. // set defaults for fields which might have been omitted
  241. dm.Enabled = true
  242. dm.Dynamic = true
  243. var invalidKeys []string
  244. for k, v := range tmp {
  245. switch k {
  246. case "enabled":
  247. err := json.Unmarshal(v, &dm.Enabled)
  248. if err != nil {
  249. return err
  250. }
  251. case "dynamic":
  252. err := json.Unmarshal(v, &dm.Dynamic)
  253. if err != nil {
  254. return err
  255. }
  256. case "default_analyzer":
  257. err := json.Unmarshal(v, &dm.DefaultAnalyzer)
  258. if err != nil {
  259. return err
  260. }
  261. case "properties":
  262. err := json.Unmarshal(v, &dm.Properties)
  263. if err != nil {
  264. return err
  265. }
  266. case "fields":
  267. err := json.Unmarshal(v, &dm.Fields)
  268. if err != nil {
  269. return err
  270. }
  271. case "struct_tag_key":
  272. err := json.Unmarshal(v, &dm.StructTagKey)
  273. if err != nil {
  274. return err
  275. }
  276. default:
  277. invalidKeys = append(invalidKeys, k)
  278. }
  279. }
  280. if MappingJSONStrict && len(invalidKeys) > 0 {
  281. return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
  282. }
  283. return nil
  284. }
  285. func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
  286. rv := ""
  287. current := dm
  288. for _, pathElement := range path {
  289. var ok bool
  290. current, ok = current.Properties[pathElement]
  291. if !ok {
  292. break
  293. }
  294. if current.DefaultAnalyzer != "" {
  295. rv = current.DefaultAnalyzer
  296. }
  297. }
  298. return rv
  299. }
  300. func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
  301. // allow default "json" tag to be overridden
  302. structTagKey := dm.StructTagKey
  303. if structTagKey == "" {
  304. structTagKey = "json"
  305. }
  306. val := reflect.ValueOf(data)
  307. if !val.IsValid() {
  308. return
  309. }
  310. typ := val.Type()
  311. switch typ.Kind() {
  312. case reflect.Map:
  313. // FIXME can add support for other map keys in the future
  314. if typ.Key().Kind() == reflect.String {
  315. for _, key := range val.MapKeys() {
  316. fieldName := key.String()
  317. fieldVal := val.MapIndex(key).Interface()
  318. dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
  319. }
  320. }
  321. case reflect.Struct:
  322. for i := 0; i < val.NumField(); i++ {
  323. field := typ.Field(i)
  324. fieldName := field.Name
  325. // anonymous fields of type struct can elide the type name
  326. if field.Anonymous && field.Type.Kind() == reflect.Struct {
  327. fieldName = ""
  328. }
  329. // if the field has a name under the specified tag, prefer that
  330. tag := field.Tag.Get(structTagKey)
  331. tagFieldName := parseTagName(tag)
  332. if tagFieldName == "-" {
  333. continue
  334. }
  335. // allow tag to set field name to empty, only if anonymous
  336. if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
  337. fieldName = tagFieldName
  338. }
  339. if val.Field(i).CanInterface() {
  340. fieldVal := val.Field(i).Interface()
  341. newpath := path
  342. if fieldName != "" {
  343. newpath = append(path, fieldName)
  344. }
  345. dm.processProperty(fieldVal, newpath, indexes, context)
  346. }
  347. }
  348. case reflect.Slice, reflect.Array:
  349. for i := 0; i < val.Len(); i++ {
  350. if val.Index(i).CanInterface() {
  351. fieldVal := val.Index(i).Interface()
  352. dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
  353. }
  354. }
  355. case reflect.Ptr:
  356. ptrElem := val.Elem()
  357. if ptrElem.IsValid() && ptrElem.CanInterface() {
  358. dm.processProperty(ptrElem.Interface(), path, indexes, context)
  359. }
  360. case reflect.String:
  361. dm.processProperty(val.String(), path, indexes, context)
  362. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  363. dm.processProperty(float64(val.Int()), path, indexes, context)
  364. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  365. dm.processProperty(float64(val.Uint()), path, indexes, context)
  366. case reflect.Float32, reflect.Float64:
  367. dm.processProperty(float64(val.Float()), path, indexes, context)
  368. case reflect.Bool:
  369. dm.processProperty(val.Bool(), path, indexes, context)
  370. }
  371. }
  372. func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
  373. pathString := encodePath(path)
  374. // look to see if there is a mapping for this field
  375. subDocMapping := dm.documentMappingForPath(pathString)
  376. closestDocMapping := dm.closestDocMapping(pathString)
  377. // check to see if we even need to do further processing
  378. if subDocMapping != nil && !subDocMapping.Enabled {
  379. return
  380. }
  381. propertyValue := reflect.ValueOf(property)
  382. if !propertyValue.IsValid() {
  383. // cannot do anything with the zero value
  384. return
  385. }
  386. propertyType := propertyValue.Type()
  387. switch propertyType.Kind() {
  388. case reflect.String:
  389. propertyValueString := propertyValue.String()
  390. if subDocMapping != nil {
  391. // index by explicit mapping
  392. for _, fieldMapping := range subDocMapping.Fields {
  393. if fieldMapping.Type == "geopoint" {
  394. fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
  395. } else {
  396. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  397. }
  398. }
  399. } else if closestDocMapping.Dynamic {
  400. // automatic indexing behavior
  401. // first see if it can be parsed by the default date parser
  402. dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
  403. if dateTimeParser != nil {
  404. parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
  405. if err != nil {
  406. // index as text
  407. fieldMapping := newTextFieldMappingDynamic(context.im)
  408. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  409. } else {
  410. // index as datetime
  411. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  412. fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
  413. }
  414. }
  415. }
  416. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  417. dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
  418. return
  419. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  420. dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
  421. return
  422. case reflect.Float64, reflect.Float32:
  423. propertyValFloat := propertyValue.Float()
  424. if subDocMapping != nil {
  425. // index by explicit mapping
  426. for _, fieldMapping := range subDocMapping.Fields {
  427. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  428. }
  429. } else if closestDocMapping.Dynamic {
  430. // automatic indexing behavior
  431. fieldMapping := newNumericFieldMappingDynamic(context.im)
  432. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  433. }
  434. case reflect.Bool:
  435. propertyValBool := propertyValue.Bool()
  436. if subDocMapping != nil {
  437. // index by explicit mapping
  438. for _, fieldMapping := range subDocMapping.Fields {
  439. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  440. }
  441. } else if closestDocMapping.Dynamic {
  442. // automatic indexing behavior
  443. fieldMapping := newBooleanFieldMappingDynamic(context.im)
  444. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  445. }
  446. case reflect.Struct:
  447. switch property := property.(type) {
  448. case time.Time:
  449. // don't descend into the time struct
  450. if subDocMapping != nil {
  451. // index by explicit mapping
  452. for _, fieldMapping := range subDocMapping.Fields {
  453. fieldMapping.processTime(property, pathString, path, indexes, context)
  454. }
  455. } else if closestDocMapping.Dynamic {
  456. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  457. fieldMapping.processTime(property, pathString, path, indexes, context)
  458. }
  459. case encoding.TextMarshaler:
  460. txt, err := property.MarshalText()
  461. if err == nil && subDocMapping != nil {
  462. // index by explicit mapping
  463. for _, fieldMapping := range subDocMapping.Fields {
  464. if fieldMapping.Type == "text" {
  465. fieldMapping.processString(string(txt), pathString, path, indexes, context)
  466. }
  467. }
  468. }
  469. dm.walkDocument(property, path, indexes, context)
  470. default:
  471. if subDocMapping != nil {
  472. for _, fieldMapping := range subDocMapping.Fields {
  473. if fieldMapping.Type == "geopoint" {
  474. fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
  475. }
  476. }
  477. }
  478. dm.walkDocument(property, path, indexes, context)
  479. }
  480. case reflect.Map, reflect.Slice:
  481. if subDocMapping != nil {
  482. for _, fieldMapping := range subDocMapping.Fields {
  483. if fieldMapping.Type == "geopoint" {
  484. fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
  485. }
  486. }
  487. }
  488. dm.walkDocument(property, path, indexes, context)
  489. case reflect.Ptr:
  490. if !propertyValue.IsNil() {
  491. switch property := property.(type) {
  492. case encoding.TextMarshaler:
  493. txt, err := property.MarshalText()
  494. if err == nil && subDocMapping != nil {
  495. // index by explicit mapping
  496. for _, fieldMapping := range subDocMapping.Fields {
  497. if fieldMapping.Type == "text" {
  498. fieldMapping.processString(string(txt), pathString, path, indexes, context)
  499. }
  500. }
  501. } else {
  502. dm.walkDocument(property, path, indexes, context)
  503. }
  504. default:
  505. dm.walkDocument(property, path, indexes, context)
  506. }
  507. }
  508. default:
  509. dm.walkDocument(property, path, indexes, context)
  510. }
  511. }
上海开阖软件有限公司 沪ICP备12045867号-1