本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
2.6KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package registry
  15. import (
  16. "fmt"
  17. "github.com/blevesearch/bleve/analysis"
  18. )
  19. func RegisterTokenizer(name string, constructor TokenizerConstructor) {
  20. _, exists := tokenizers[name]
  21. if exists {
  22. panic(fmt.Errorf("attempted to register duplicate tokenizer named '%s'", name))
  23. }
  24. tokenizers[name] = constructor
  25. }
  26. type TokenizerConstructor func(config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error)
  27. type TokenizerRegistry map[string]TokenizerConstructor
  28. type TokenizerCache struct {
  29. *ConcurrentCache
  30. }
  31. func NewTokenizerCache() *TokenizerCache {
  32. return &TokenizerCache{
  33. NewConcurrentCache(),
  34. }
  35. }
  36. func TokenizerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
  37. cons, registered := tokenizers[name]
  38. if !registered {
  39. return nil, fmt.Errorf("no tokenizer with name or type '%s' registered", name)
  40. }
  41. tokenizer, err := cons(config, cache)
  42. if err != nil {
  43. return nil, fmt.Errorf("error building tokenizer: %v", err)
  44. }
  45. return tokenizer, nil
  46. }
  47. func (c *TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) {
  48. item, err := c.ItemNamed(name, cache, TokenizerBuild)
  49. if err != nil {
  50. return nil, err
  51. }
  52. return item.(analysis.Tokenizer), nil
  53. }
  54. func (c *TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) {
  55. item, err := c.DefineItem(name, typ, config, cache, TokenizerBuild)
  56. if err != nil {
  57. if err == ErrAlreadyDefined {
  58. return nil, fmt.Errorf("tokenizer named '%s' already defined", name)
  59. }
  60. return nil, err
  61. }
  62. return item.(analysis.Tokenizer), nil
  63. }
  64. func TokenizerTypesAndInstances() ([]string, []string) {
  65. emptyConfig := map[string]interface{}{}
  66. emptyCache := NewCache()
  67. var types []string
  68. var instances []string
  69. for name, cons := range tokenizers {
  70. _, err := cons(emptyConfig, emptyCache)
  71. if err == nil {
  72. instances = append(instances, name)
  73. } else {
  74. types = append(types, name)
  75. }
  76. }
  77. return types, instances
  78. }
上海开阖软件有限公司 沪ICP备12045867号-1