|
- // Copyright 2018 The Gitea Authors. All rights reserved.
- // Use of this source code is governed by a MIT-style
- // license that can be found in the LICENSE file.
-
- package issues
-
- import (
- "fmt"
- "os"
- "strconv"
-
- "github.com/blevesearch/bleve"
- "github.com/blevesearch/bleve/analysis/analyzer/custom"
- "github.com/blevesearch/bleve/analysis/token/lowercase"
- "github.com/blevesearch/bleve/analysis/token/unicodenorm"
- "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
- "github.com/blevesearch/bleve/index/upsidedown"
- "github.com/blevesearch/bleve/mapping"
- "github.com/blevesearch/bleve/search/query"
- "github.com/ethantkoenig/rupture"
- )
-
- const (
- issueIndexerAnalyzer = "issueIndexer"
- issueIndexerDocType = "issueIndexerDocType"
- issueIndexerLatestVersion = 1
- )
-
- // indexerID a bleve-compatible unique identifier for an integer id
- func indexerID(id int64) string {
- return strconv.FormatInt(id, 36)
- }
-
- // idOfIndexerID the integer id associated with an indexer id
- func idOfIndexerID(indexerID string) (int64, error) {
- id, err := strconv.ParseInt(indexerID, 36, 64)
- if err != nil {
- return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err)
- }
- return id, nil
- }
-
- // numericEqualityQuery a numeric equality query for the given value and field
- func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
- f := float64(value)
- tru := true
- q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
- q.SetField(field)
- return q
- }
-
- func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
- q := bleve.NewMatchPhraseQuery(matchPhrase)
- q.FieldVal = field
- q.Analyzer = analyzer
- return q
- }
-
- const unicodeNormalizeName = "unicodeNormalize"
-
- func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
- return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{
- "type": unicodenorm.Name,
- "form": unicodenorm.NFC,
- })
- }
-
- const maxBatchSize = 16
-
- // openIndexer open the index at the specified path, checking for metadata
- // updates and bleve version updates. If index needs to be created (or
- // re-created), returns (nil, nil)
- func openIndexer(path string, latestVersion int) (bleve.Index, error) {
- _, err := os.Stat(path)
- if err != nil && os.IsNotExist(err) {
- return nil, nil
- } else if err != nil {
- return nil, err
- }
-
- metadata, err := rupture.ReadIndexMetadata(path)
- if err != nil {
- return nil, err
- }
- if metadata.Version < latestVersion {
- // the indexer is using a previous version, so we should delete it and
- // re-populate
- return nil, os.RemoveAll(path)
- }
-
- index, err := bleve.Open(path)
- if err != nil && err == upsidedown.IncompatibleVersion {
- // the indexer was built with a previous version of bleve, so we should
- // delete it and re-populate
- return nil, os.RemoveAll(path)
- } else if err != nil {
- return nil, err
- }
-
- return index, nil
- }
-
- // BleveIndexerData an update to the issue indexer
- type BleveIndexerData IndexerData
-
- // Type returns the document type, for bleve's mapping.Classifier interface.
- func (i *BleveIndexerData) Type() string {
- return issueIndexerDocType
- }
-
- // createIssueIndexer create an issue indexer if one does not already exist
- func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
- mapping := bleve.NewIndexMapping()
- docMapping := bleve.NewDocumentMapping()
-
- numericFieldMapping := bleve.NewNumericFieldMapping()
- numericFieldMapping.IncludeInAll = false
- docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
-
- textFieldMapping := bleve.NewTextFieldMapping()
- textFieldMapping.Store = false
- textFieldMapping.IncludeInAll = false
- docMapping.AddFieldMappingsAt("Title", textFieldMapping)
- docMapping.AddFieldMappingsAt("Content", textFieldMapping)
- docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
-
- if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
- return nil, err
- } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
- "type": custom.Name,
- "char_filters": []string{},
- "tokenizer": unicode.Name,
- "token_filters": []string{unicodeNormalizeName, lowercase.Name},
- }); err != nil {
- return nil, err
- }
-
- mapping.DefaultAnalyzer = issueIndexerAnalyzer
- mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
- mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
-
- index, err := bleve.New(path, mapping)
- if err != nil {
- return nil, err
- }
-
- if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{
- Version: latestVersion,
- }); err != nil {
- return nil, err
- }
- return index, nil
- }
-
- var (
- _ Indexer = &BleveIndexer{}
- )
-
- // BleveIndexer implements Indexer interface
- type BleveIndexer struct {
- indexDir string
- indexer bleve.Index
- }
-
- // NewBleveIndexer creates a new bleve local indexer
- func NewBleveIndexer(indexDir string) *BleveIndexer {
- return &BleveIndexer{
- indexDir: indexDir,
- }
- }
-
- // Init will initial the indexer
- func (b *BleveIndexer) Init() (bool, error) {
- var err error
- b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion)
- if err != nil {
- return false, err
- }
- if b.indexer != nil {
- return true, nil
- }
-
- b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion)
- return false, err
- }
-
- // Index will save the index data
- func (b *BleveIndexer) Index(issues []*IndexerData) error {
- batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
- for _, issue := range issues {
- if err := batch.Index(indexerID(issue.ID), struct {
- RepoID int64
- Title string
- Content string
- Comments []string
- }{
- RepoID: issue.RepoID,
- Title: issue.Title,
- Content: issue.Content,
- Comments: issue.Comments,
- }); err != nil {
- return err
- }
- }
- return batch.Flush()
- }
-
- // Delete deletes indexes by ids
- func (b *BleveIndexer) Delete(ids ...int64) error {
- batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
- for _, id := range ids {
- if err := batch.Delete(indexerID(id)); err != nil {
- return err
- }
- }
- return batch.Flush()
- }
-
- // Search searches for issues by given conditions.
- // Returns the matching issue IDs
- func (b *BleveIndexer) Search(keyword string, repoID int64, limit, start int) (*SearchResult, error) {
- indexerQuery := bleve.NewConjunctionQuery(
- numericEqualityQuery(repoID, "RepoID"),
- bleve.NewDisjunctionQuery(
- newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
- newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
- newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
- ))
- search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
-
- result, err := b.indexer.Search(search)
- if err != nil {
- return nil, err
- }
-
- var ret = SearchResult{
- Hits: make([]Match, 0, len(result.Hits)),
- }
- for _, hit := range result.Hits {
- id, err := idOfIndexerID(hit.ID)
- if err != nil {
- return nil, err
- }
- ret.Hits = append(ret.Hits, Match{
- ID: id,
- RepoID: repoID,
- })
- }
- return &ret, nil
- }
|