本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
1.9KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package stop implements a TokenFilter removing tokens found in
  15. // a TokenMap.
  16. //
  17. // It constructor takes the following arguments:
  18. //
  19. // "stop_token_map" (string): the name of the token map identifying tokens to
  20. // remove.
  21. package stop
  22. import (
  23. "fmt"
  24. "github.com/blevesearch/bleve/analysis"
  25. "github.com/blevesearch/bleve/registry"
  26. )
  27. const Name = "stop_tokens"
  28. type StopTokensFilter struct {
  29. stopTokens analysis.TokenMap
  30. }
  31. func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
  32. return &StopTokensFilter{
  33. stopTokens: stopTokens,
  34. }
  35. }
  36. func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
  37. j := 0
  38. for _, token := range input {
  39. _, isStopToken := f.stopTokens[string(token.Term)]
  40. if !isStopToken {
  41. input[j] = token
  42. j++
  43. }
  44. }
  45. return input[:j]
  46. }
  47. func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
  48. stopTokenMapName, ok := config["stop_token_map"].(string)
  49. if !ok {
  50. return nil, fmt.Errorf("must specify stop_token_map")
  51. }
  52. stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
  53. if err != nil {
  54. return nil, fmt.Errorf("error building stop words filter: %v", err)
  55. }
  56. return NewStopTokensFilter(stopTokenMap), nil
  57. }
  58. func init() {
  59. registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
  60. }
上海开阖软件有限公司 沪ICP备12045867号-1