初始提交: Gitea 项目代码
This commit is contained in:
@@ -0,0 +1,392 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/analyze"
|
||||
"gitea.dev/modules/charset"
|
||||
"gitea.dev/modules/git"
|
||||
"gitea.dev/modules/git/gitcmd"
|
||||
"gitea.dev/modules/gitrepo"
|
||||
"gitea.dev/modules/indexer"
|
||||
path_filter "gitea.dev/modules/indexer/code/bleve/token/path"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
indexer_internal "gitea.dev/modules/indexer/internal"
|
||||
inner_bleve "gitea.dev/modules/indexer/internal/bleve"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/timeutil"
|
||||
"gitea.dev/modules/typesniffer"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
unicodeNormalizeName = "unicodeNormalize"
|
||||
maxBatchSize = 16
|
||||
)
|
||||
|
||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
|
||||
"type": unicodenorm.Name,
|
||||
"form": unicodenorm.NFC,
|
||||
})
|
||||
}
|
||||
|
||||
// RepoIndexerData data stored in the repo indexer
|
||||
type RepoIndexerData struct {
|
||||
RepoID int64
|
||||
CommitID string
|
||||
Content string
|
||||
Filename string
|
||||
Language string
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
// Type returns the document type, for bleve's mapping.Classifier interface.
|
||||
func (d *RepoIndexerData) Type() string {
|
||||
return repoIndexerDocType
|
||||
}
|
||||
|
||||
const (
|
||||
repoIndexerAnalyzer = "repoIndexerAnalyzer"
|
||||
filenameIndexerAnalyzer = "filenameIndexerAnalyzer"
|
||||
filenameIndexerTokenizer = "filenameIndexerTokenizer"
|
||||
repoIndexerDocType = "repoIndexerDocType"
|
||||
repoIndexerLatestVersion = 9
|
||||
)
|
||||
|
||||
// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
|
||||
func generateBleveIndexMapping() (mapping.IndexMapping, error) {
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
numericFieldMapping := bleve.NewNumericFieldMapping()
|
||||
numericFieldMapping.IncludeInAll = false
|
||||
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
|
||||
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
textFieldMapping.IncludeInAll = false
|
||||
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
|
||||
|
||||
fileNamedMapping := bleve.NewTextFieldMapping()
|
||||
fileNamedMapping.IncludeInAll = false
|
||||
fileNamedMapping.Analyzer = filenameIndexerAnalyzer
|
||||
docMapping.AddFieldMappingsAt("Filename", fileNamedMapping)
|
||||
|
||||
termFieldMapping := bleve.NewTextFieldMapping()
|
||||
termFieldMapping.IncludeInAll = false
|
||||
termFieldMapping.Analyzer = analyzer_keyword.Name
|
||||
docMapping.AddFieldMappingsAt("Language", termFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("CommitID", termFieldMapping)
|
||||
|
||||
timeFieldMapping := bleve.NewDateTimeFieldMapping()
|
||||
timeFieldMapping.IncludeInAll = false
|
||||
docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
|
||||
return nil, err
|
||||
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{
|
||||
"type": analyzer_custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": letter.Name,
|
||||
"token_filters": []string{unicodeNormalizeName, lowercase.Name},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := mapping.AddCustomAnalyzer(filenameIndexerAnalyzer, map[string]any{
|
||||
"type": analyzer_custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": unicode.Name,
|
||||
"token_filters": []string{unicodeNormalizeName, path_filter.Name, lowercase.Name},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mapping.DefaultAnalyzer = repoIndexerAnalyzer
|
||||
mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
|
||||
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
|
||||
|
||||
return mapping, nil
|
||||
}
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer represents a bleve indexer implementation
|
||||
type Indexer struct {
|
||||
inner *inner_bleve.Indexer
|
||||
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new bleve local indexer
|
||||
func NewIndexer(indexDir string) *Indexer {
|
||||
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
|
||||
return &Indexer{
|
||||
Indexer: inner,
|
||||
inner: inner,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Indexer) addUpdate(ctx context.Context, catFileBatch git.CatFileBatch, commitSha string,
|
||||
update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch,
|
||||
) error {
|
||||
// Ignore vendored files in code search
|
||||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
|
||||
return nil
|
||||
}
|
||||
|
||||
size := update.Size
|
||||
|
||||
var err error
|
||||
if !update.Sized {
|
||||
var stdout string
|
||||
stdout, _, err = gitrepo.RunCmdString(ctx, repo, gitcmd.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
||||
return fmt.Errorf("misformatted git cat-file output: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if size > setting.Indexer.MaxIndexerFileSize {
|
||||
return b.addDelete(update.Filename, repo, batch)
|
||||
}
|
||||
|
||||
info, batchReader, err := catFileBatch.QueryContent(update.BlobSha)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fileContents, err := io.ReadAll(io.LimitReader(batchReader, info.Size))
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !typesniffer.DetectContentType(fileContents).IsText() {
|
||||
// FIXME: UTF-16 files will probably fail here
|
||||
// Even if the file is not recognized as a "text file", we could still put its name into the indexers to make the filename become searchable, while leave the content to empty.
|
||||
fileContents = nil
|
||||
}
|
||||
|
||||
if _, err = batchReader.Discard(1); err != nil {
|
||||
return err
|
||||
}
|
||||
id := internal.FilenameIndexerID(repo.ID, update.Filename)
|
||||
return batch.Index(id, &RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
CommitID: commitSha,
|
||||
Filename: update.Filename,
|
||||
Content: string(charset.ToUTF8DropErrors(fileContents)),
|
||||
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error {
|
||||
id := internal.FilenameIndexerID(repo.ID, filename)
|
||||
return batch.Delete(id)
|
||||
}
|
||||
|
||||
// Index indexes the data
|
||||
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
|
||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||
if len(changes.Updates) > 0 {
|
||||
catfileBatch, err := gitrepo.NewBatch(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer catfileBatch.Close()
|
||||
|
||||
for _, update := range changes.Updates {
|
||||
if err := b.addUpdate(ctx, catfileBatch, sha, update, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, filename := range changes.RemovedFilenames {
|
||||
if err := b.addDelete(filename, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return batch.Flush()
|
||||
}
|
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(_ context.Context, repoID int64) error {
|
||||
query := inner_bleve.NumericEqualityQuery(repoID, "RepoID")
|
||||
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
|
||||
result, err := b.inner.Indexer.Search(searchRequest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||
for _, hit := range result.Hits {
|
||||
if err = batch.Delete(hit.ID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return batch.Flush()
|
||||
}
|
||||
|
||||
// Search searches for files in the specified repo.
|
||||
// Returns the matching file-paths
|
||||
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
var (
|
||||
indexerQuery query.Query
|
||||
keywordQuery query.Query
|
||||
contentQuery query.Query
|
||||
)
|
||||
|
||||
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
|
||||
pathQuery.FieldVal = "Filename"
|
||||
pathQuery.SetBoost(10)
|
||||
|
||||
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||
if searchMode == indexer.SearchModeExact {
|
||||
// 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery
|
||||
q := bleve.NewMatchPhraseQuery(opts.Keyword)
|
||||
q.Analyzer = repoIndexerAnalyzer
|
||||
q.FieldVal = "Content"
|
||||
contentQuery = q
|
||||
} else /* words */ {
|
||||
q := bleve.NewMatchQuery(opts.Keyword)
|
||||
q.FieldVal = "Content"
|
||||
q.Analyzer = repoIndexerAnalyzer
|
||||
if searchMode == indexer.SearchModeFuzzy {
|
||||
// this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
|
||||
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
||||
} else {
|
||||
q.Operator = query.MatchQueryOperatorAnd
|
||||
}
|
||||
contentQuery = q
|
||||
}
|
||||
|
||||
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
|
||||
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
|
||||
for _, repoID := range opts.RepoIDs {
|
||||
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
|
||||
}
|
||||
|
||||
indexerQuery = bleve.NewConjunctionQuery(
|
||||
bleve.NewDisjunctionQuery(repoQueries...),
|
||||
keywordQuery,
|
||||
)
|
||||
} else {
|
||||
indexerQuery = keywordQuery
|
||||
}
|
||||
|
||||
// Save for reuse without language filter
|
||||
facetQuery := indexerQuery
|
||||
if len(opts.Language) > 0 {
|
||||
languageQuery := bleve.NewMatchQuery(opts.Language)
|
||||
languageQuery.FieldVal = "Language"
|
||||
languageQuery.Analyzer = analyzer_keyword.Name
|
||||
|
||||
indexerQuery = bleve.NewConjunctionQuery(
|
||||
indexerQuery,
|
||||
languageQuery,
|
||||
)
|
||||
}
|
||||
|
||||
from, pageSize := opts.GetSkipTake()
|
||||
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
|
||||
searchRequest.Fields = []string{"Content", "Filename", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||
searchRequest.IncludeLocations = true
|
||||
|
||||
if len(opts.Language) == 0 {
|
||||
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
|
||||
}
|
||||
|
||||
searchRequest.SortBy([]string{"-_score", "UpdatedAt"})
|
||||
|
||||
result, err := b.inner.Indexer.SearchInContext(ctx, searchRequest)
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
total := int64(result.Total)
|
||||
|
||||
searchResults := make([]*internal.SearchResult, len(result.Hits))
|
||||
for i, hit := range result.Hits {
|
||||
startIndex, endIndex := -1, -1
|
||||
for _, locations := range hit.Locations["Content"] {
|
||||
location := locations[0]
|
||||
locationStart := int(location.Start)
|
||||
locationEnd := int(location.End)
|
||||
if startIndex < 0 || locationStart < startIndex {
|
||||
startIndex = locationStart
|
||||
}
|
||||
if endIndex < 0 || locationEnd > endIndex {
|
||||
endIndex = locationEnd
|
||||
}
|
||||
}
|
||||
if len(hit.Locations["Filename"]) > 0 {
|
||||
startIndex, endIndex = internal.FilenameMatchIndexPos(hit.Fields["Content"].(string))
|
||||
}
|
||||
|
||||
language := hit.Fields["Language"].(string)
|
||||
var updatedUnix timeutil.TimeStamp
|
||||
if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil {
|
||||
updatedUnix = timeutil.TimeStamp(t.Unix())
|
||||
}
|
||||
searchResults[i] = &internal.SearchResult{
|
||||
RepoID: int64(hit.Fields["RepoID"].(float64)),
|
||||
StartIndex: startIndex,
|
||||
EndIndex: endIndex,
|
||||
Filename: internal.FilenameOfIndexerID(hit.ID),
|
||||
Content: hit.Fields["Content"].(string),
|
||||
CommitID: hit.Fields["CommitID"].(string),
|
||||
UpdatedUnix: updatedUnix,
|
||||
Language: language,
|
||||
Color: enry.GetColor(language),
|
||||
}
|
||||
}
|
||||
|
||||
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
|
||||
if len(opts.Language) > 0 {
|
||||
// Use separate query to go get all language counts
|
||||
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
|
||||
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||
facetRequest.IncludeLocations = true
|
||||
facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
|
||||
|
||||
if result, err = b.inner.Indexer.Search(facetRequest); err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
}
|
||||
languagesFacet := result.Facets["languages"]
|
||||
for _, term := range languagesFacet.Terms.Terms() {
|
||||
if len(term.Term) == 0 {
|
||||
continue
|
||||
}
|
||||
searchResultLanguages = append(searchResultLanguages, &internal.SearchResultLanguages{
|
||||
Language: term.Term,
|
||||
Color: enry.GetColor(term.Term),
|
||||
Count: term.Count,
|
||||
})
|
||||
}
|
||||
return total, searchResults, searchResultLanguages, nil
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package path
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "gitea/path"
|
||||
)
|
||||
|
||||
type TokenFilter struct{}
|
||||
|
||||
func NewTokenFilter() *TokenFilter {
|
||||
return &TokenFilter{}
|
||||
}
|
||||
|
||||
func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewTokenFilter(), nil
|
||||
}
|
||||
|
||||
func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
if len(input) == 1 {
|
||||
// if there is only one token, we don't need to generate the reversed chain
|
||||
return generatePathTokens(input, false)
|
||||
}
|
||||
|
||||
normal := generatePathTokens(input, false)
|
||||
reversed := generatePathTokens(input, true)
|
||||
|
||||
return append(normal, reversed...)
|
||||
}
|
||||
|
||||
// Generates path tokens from the input tokens.
|
||||
// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
|
||||
// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
|
||||
//
|
||||
// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
|
||||
// to efficiently search for filenames without supplying the fullpath.
|
||||
func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
|
||||
terms := make([]string, 0, len(input))
|
||||
longestTerm := 0
|
||||
|
||||
if reversed {
|
||||
slices.Reverse(input)
|
||||
}
|
||||
|
||||
for i := range input {
|
||||
var sb strings.Builder
|
||||
sb.Write(input[0].Term)
|
||||
|
||||
for j := 1; j < i; j++ {
|
||||
sb.WriteString("/")
|
||||
sb.Write(input[j].Term)
|
||||
}
|
||||
|
||||
term := sb.String()
|
||||
|
||||
if longestTerm < len(term) {
|
||||
longestTerm = len(term)
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
}
|
||||
|
||||
output := make(analysis.TokenStream, 0, len(terms))
|
||||
|
||||
for _, term := range terms {
|
||||
var start, end int
|
||||
|
||||
if reversed {
|
||||
start = 0
|
||||
end = len(term)
|
||||
} else {
|
||||
start = longestTerm - len(term)
|
||||
end = longestTerm
|
||||
}
|
||||
|
||||
token := analysis.Token{
|
||||
Position: 1,
|
||||
Start: start,
|
||||
End: end,
|
||||
Type: analysis.AlphaNumeric,
|
||||
Term: []byte(term),
|
||||
}
|
||||
|
||||
output = append(output, &token)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
func init() {
|
||||
// FIXME: move it to the bleve's init function, but do not call it in global init
|
||||
err := registry.RegisterTokenFilter(Name, TokenFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package path
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type Scenario struct {
|
||||
Input string
|
||||
Tokens []string
|
||||
}
|
||||
|
||||
func TestTokenFilter(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
Input string
|
||||
Terms []string
|
||||
}{
|
||||
{
|
||||
Input: "Dockerfile",
|
||||
Terms: []string{"Dockerfile"},
|
||||
},
|
||||
{
|
||||
Input: "Dockerfile.rootless",
|
||||
Terms: []string{"Dockerfile.rootless"},
|
||||
},
|
||||
{
|
||||
Input: "a/b/c/Dockerfile.rootless",
|
||||
Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
|
||||
},
|
||||
{
|
||||
Input: "",
|
||||
Terms: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
|
||||
terms := extractTerms(scenario.Input)
|
||||
|
||||
assert.Len(t, terms, len(scenario.Terms))
|
||||
|
||||
for _, term := range terms {
|
||||
assert.Contains(t, scenario.Terms, term)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func extractTerms(input string) []string {
|
||||
tokens := tokenize(input)
|
||||
filteredTokens := filter(tokens)
|
||||
terms := make([]string, 0, len(filteredTokens))
|
||||
|
||||
for _, token := range filteredTokens {
|
||||
terms = append(terms, string(token.Term))
|
||||
}
|
||||
|
||||
return terms
|
||||
}
|
||||
|
||||
func filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
filter := NewTokenFilter()
|
||||
return filter.Filter(input)
|
||||
}
|
||||
|
||||
func tokenize(input string) analysis.TokenStream {
|
||||
tokenizer := unicode.NewUnicodeTokenizer()
|
||||
return tokenizer.Tokenize([]byte(input))
|
||||
}
|
||||
@@ -0,0 +1,405 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/analyze"
|
||||
"gitea.dev/modules/charset"
|
||||
"gitea.dev/modules/git"
|
||||
"gitea.dev/modules/git/gitcmd"
|
||||
"gitea.dev/modules/gitrepo"
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
es "gitea.dev/modules/indexer/internal/elasticsearch"
|
||||
"gitea.dev/modules/json"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/timeutil"
|
||||
"gitea.dev/modules/typesniffer"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
)
|
||||
|
||||
const esRepoIndexerLatestVersion = 3
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct {
|
||||
*es.Indexer
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new elasticsearch indexer
|
||||
func NewIndexer(url, indexerName string) *Indexer {
|
||||
return &Indexer{Indexer: es.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)}
|
||||
}
|
||||
|
||||
const (
|
||||
defaultMapping = `{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"content_analyzer": {
|
||||
"tokenizer": "content_tokenizer",
|
||||
"filter" : ["lowercase"]
|
||||
},
|
||||
"filename_path_analyzer": {
|
||||
"tokenizer": "path_tokenizer"
|
||||
},
|
||||
"reversed_filename_path_analyzer": {
|
||||
"tokenizer": "reversed_path_tokenizer"
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"content_tokenizer": {
|
||||
"type": "simple_pattern_split",
|
||||
"pattern": "[^a-zA-Z0-9]"
|
||||
},
|
||||
"path_tokenizer": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/"
|
||||
},
|
||||
"reversed_path_tokenizer": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/",
|
||||
"reverse": true
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"repo_id": {
|
||||
"type": "long",
|
||||
"index": true
|
||||
},
|
||||
"filename": {
|
||||
"type": "text",
|
||||
"term_vector": "with_positions_offsets",
|
||||
"index": true,
|
||||
"fields": {
|
||||
"path": {
|
||||
"type": "text",
|
||||
"analyzer": "reversed_filename_path_analyzer"
|
||||
},
|
||||
"path_reversed": {
|
||||
"type": "text",
|
||||
"analyzer": "filename_path_analyzer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"content": {
|
||||
"type": "text",
|
||||
"term_vector": "with_positions_offsets",
|
||||
"index": true,
|
||||
"analyzer": "content_analyzer"
|
||||
},
|
||||
"commit_id": {
|
||||
"type": "keyword",
|
||||
"index": true
|
||||
},
|
||||
"language": {
|
||||
"type": "keyword",
|
||||
"index": true
|
||||
},
|
||||
"updated_at": {
|
||||
"type": "long",
|
||||
"index": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
)
|
||||
|
||||
func (b *Indexer) addUpdate(ctx context.Context, catFileBatch git.CatFileBatch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]es.BulkOp, error) {
|
||||
// Ignore vendored files in code search
|
||||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
size := update.Size
|
||||
var err error
|
||||
if !update.Sized {
|
||||
var stdout string
|
||||
stdout, _, err = gitrepo.RunCmdString(ctx, repo, gitcmd.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
||||
return nil, fmt.Errorf("misformatted git cat-file output: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
id := internal.FilenameIndexerID(repo.ID, update.Filename)
|
||||
if size > setting.Indexer.MaxIndexerFileSize {
|
||||
return []es.BulkOp{es.DeleteOp(id)}, nil
|
||||
}
|
||||
|
||||
info, batchReader, err := catFileBatch.QueryContent(update.BlobSha)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fileContents, err := io.ReadAll(io.LimitReader(batchReader, info.Size))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if !typesniffer.DetectContentType(fileContents).IsText() {
|
||||
// FIXME: UTF-16 files will probably fail here
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if _, err = batchReader.Discard(1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return []es.BulkOp{es.IndexOp(id, map[string]any{
|
||||
"repo_id": repo.ID,
|
||||
"filename": update.Filename,
|
||||
"content": string(charset.ToUTF8DropErrors(fileContents)),
|
||||
"commit_id": sha,
|
||||
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
"updated_at": timeutil.TimeStampNow(),
|
||||
})}, nil
|
||||
}
|
||||
|
||||
func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) es.BulkOp {
|
||||
return es.DeleteOp(internal.FilenameIndexerID(repo.ID, filename))
|
||||
}
|
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
|
||||
ops := make([]es.BulkOp, 0)
|
||||
if len(changes.Updates) > 0 {
|
||||
batch, err := gitrepo.NewBatch(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer batch.Close()
|
||||
|
||||
for _, update := range changes.Updates {
|
||||
updateOps, err := b.addUpdate(ctx, batch, sha, update, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(updateOps) > 0 {
|
||||
ops = append(ops, updateOps...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, filename := range changes.RemovedFilenames {
|
||||
ops = append(ops, b.addDelete(filename, repo))
|
||||
}
|
||||
|
||||
if len(ops) > 0 {
|
||||
esBatchSize := 50
|
||||
|
||||
for i := 0; i < len(ops); i += esBatchSize {
|
||||
if err := b.Bulk(ctx, ops[i:min(i+esBatchSize, len(ops))]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete entries by repoId
|
||||
func (b *Indexer) Delete(ctx context.Context, repoID int64) error {
|
||||
if err := b.doDelete(ctx, repoID); err != nil {
|
||||
// Maybe there is a conflict during the delete operation, so we should retry after a refresh
|
||||
log.Warn("Deletion of entries of repo %v within index %v was erroneous: %v. Trying to refresh index before trying again", repoID, b.VersionedIndexName(), err)
|
||||
if err := b.Refresh(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := b.doDelete(ctx, repoID); err != nil {
|
||||
log.Error("Could not delete entries of repo %v within index %v", repoID, b.VersionedIndexName())
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete entries by repoId
|
||||
func (b *Indexer) doDelete(ctx context.Context, repoID int64) error {
|
||||
return b.DeleteByQuery(ctx, es.TermsQuery("repo_id", repoID))
|
||||
}
|
||||
|
||||
// contentMatchIndexPos find words positions for start and the following end on content. It will
|
||||
// return the beginning position of the first start and the ending position of the
|
||||
// first end following the start string.
|
||||
// If not found any of the positions, it will return -1, -1.
|
||||
func contentMatchIndexPos(content, start, end string) (int, int) {
|
||||
startIdx := strings.Index(content, start)
|
||||
if startIdx < 0 {
|
||||
return -1, -1
|
||||
}
|
||||
endIdx := strings.Index(content[startIdx+len(start):], end)
|
||||
if endIdx < 0 {
|
||||
return -1, -1
|
||||
}
|
||||
return startIdx, (startIdx + len(start) + endIdx + len(end)) - 9 // remove the length <em></em> since we give Content the original data
|
||||
}
|
||||
|
||||
func convertResult(searchResult *es.SearchResponse, kw string, pageSize int) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
hits := make([]*internal.SearchResult, 0, pageSize)
|
||||
for _, hit := range searchResult.Hits {
|
||||
repoID, fileName := internal.ParseIndexerID(hit.ID)
|
||||
res := make(map[string]any)
|
||||
if err := json.Unmarshal(hit.Source, &res); err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
// FIXME: There is no way to get the position the keyword on the content currently on the same request.
|
||||
// So we get it from content, this may made the query slower. See
|
||||
// https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
|
||||
var startIndex, endIndex int
|
||||
if c, ok := hit.Highlight["filename"]; ok && len(c) > 0 {
|
||||
startIndex, endIndex = internal.FilenameMatchIndexPos(res["content"].(string))
|
||||
} else if c, ok := hit.Highlight["content"]; ok && len(c) > 0 {
|
||||
// FIXME: Since the highlighting content will include <em> and </em> for the keywords,
|
||||
// now we should find the positions. But how to avoid html content which contains the
|
||||
// <em> and </em> tags? If elastic search has handled that?
|
||||
startIndex, endIndex = contentMatchIndexPos(c[0], "<em>", "</em>")
|
||||
if startIndex == -1 {
|
||||
panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
|
||||
}
|
||||
} else {
|
||||
panic(fmt.Sprintf("2===%#v", hit.Highlight))
|
||||
}
|
||||
|
||||
language := res["language"].(string)
|
||||
|
||||
hits = append(hits, &internal.SearchResult{
|
||||
RepoID: repoID,
|
||||
Filename: fileName,
|
||||
CommitID: res["commit_id"].(string),
|
||||
Content: res["content"].(string),
|
||||
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
|
||||
Language: language,
|
||||
StartIndex: startIndex,
|
||||
EndIndex: endIndex,
|
||||
Color: enry.GetColor(language),
|
||||
})
|
||||
}
|
||||
|
||||
return searchResult.Total, hits, extractAggs(searchResult), nil
|
||||
}
|
||||
|
||||
func extractAggs(searchResult *es.SearchResponse) []*internal.SearchResultLanguages {
|
||||
buckets, found := searchResult.Aggregations["language"]
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
|
||||
for _, bucket := range buckets {
|
||||
// language is mapped as keyword so the key is always a string; if the
|
||||
// mapping ever changes, skip rather than emit an empty-language bucket.
|
||||
key, ok := bucket.Key.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
searchResultLanguages = append(searchResultLanguages, &internal.SearchResultLanguages{
|
||||
Language: key,
|
||||
Color: enry.GetColor(key),
|
||||
Count: int(bucket.DocCount),
|
||||
})
|
||||
}
|
||||
return searchResultLanguages
|
||||
}
|
||||
|
||||
// Search searches for codes and language stats by given conditions.
|
||||
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||
contentQuery := es.Query(es.NewMultiMatchQuery(opts.Keyword, "content").Type(es.MultiMatchTypeBestFields).Operator("and"))
|
||||
if searchMode == indexer.SearchModeExact {
|
||||
contentQuery = es.MatchPhraseQuery("content", opts.Keyword)
|
||||
}
|
||||
kwQuery := es.NewBoolQuery().Should(
|
||||
contentQuery,
|
||||
es.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(es.MultiMatchTypePhrasePrefix),
|
||||
)
|
||||
query := es.NewBoolQuery().Must(kwQuery)
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
query.Must(es.TermsQuery("repo_id", es.ToAnySlice(opts.RepoIDs)...))
|
||||
}
|
||||
|
||||
start, pageSize := opts.GetSkipTake()
|
||||
kw := "<em>" + opts.Keyword + "</em>"
|
||||
languageAggs := map[string]any{
|
||||
"language": map[string]any{
|
||||
"terms": map[string]any{
|
||||
"field": "language",
|
||||
"size": 10,
|
||||
"order": map[string]any{"_count": "desc"},
|
||||
},
|
||||
},
|
||||
}
|
||||
// number_of_fragments=0 returns the full highlighted content (no fragmentation).
|
||||
highlight := map[string]any{
|
||||
"fields": map[string]any{
|
||||
"content": map[string]any{},
|
||||
"filename": map[string]any{},
|
||||
},
|
||||
"number_of_fragments": 0,
|
||||
"type": "fvh",
|
||||
}
|
||||
sort := []es.SortField{
|
||||
{Field: "_score", Desc: true},
|
||||
{Field: "updated_at", Desc: false},
|
||||
}
|
||||
|
||||
if len(opts.Language) == 0 {
|
||||
resp, err := b.Indexer.Search(ctx, es.SearchRequest{
|
||||
Query: query,
|
||||
Sort: sort,
|
||||
From: start,
|
||||
Size: pageSize,
|
||||
TrackTotal: true,
|
||||
Aggregations: languageAggs,
|
||||
Highlight: highlight,
|
||||
})
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
return convertResult(resp, kw, pageSize)
|
||||
}
|
||||
|
||||
countResp, err := b.Indexer.Search(ctx, es.SearchRequest{
|
||||
Query: query,
|
||||
Size: 0, // stats only
|
||||
TrackTotal: true,
|
||||
Aggregations: languageAggs,
|
||||
})
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
query.Must(es.MatchQuery("language", opts.Language))
|
||||
resp, err := b.Indexer.Search(ctx, es.SearchRequest{
|
||||
Query: query,
|
||||
Sort: sort,
|
||||
From: start,
|
||||
Size: pageSize,
|
||||
TrackTotal: true,
|
||||
Highlight: highlight,
|
||||
})
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
total, hits, _, err := convertResult(resp, kw, pageSize)
|
||||
return total, hits, extractAggs(countResp), err
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIndexPos(t *testing.T) {
|
||||
startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end")
|
||||
assert.Equal(t, 11, startIdx)
|
||||
assert.Equal(t, 15, endIdx)
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/git"
|
||||
"gitea.dev/modules/git/gitcmd"
|
||||
"gitea.dev/modules/gitrepo"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/setting"
|
||||
)
|
||||
|
||||
func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) {
|
||||
stdout, _, err := gitrepo.RunCmdString(ctx, repo, gitcmd.NewCommand("show-ref", "-s").AddDynamicArguments(git.BranchPrefix+repo.DefaultBranch))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(stdout), nil
|
||||
}
|
||||
|
||||
// getRepoChanges returns changes to repo since last indexer update
|
||||
func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
|
||||
status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
needGenesis := len(status.CommitSha) == 0
|
||||
if !needGenesis {
|
||||
hasAncestorCmd := gitcmd.NewCommand("merge-base").AddDynamicArguments(status.CommitSha, revision)
|
||||
stdout, _, _ := gitrepo.RunCmdString(ctx, repo, hasAncestorCmd) // FIXME: error is not handled
|
||||
needGenesis = len(stdout) == 0
|
||||
}
|
||||
|
||||
if needGenesis {
|
||||
return genesisChanges(ctx, repo, revision)
|
||||
}
|
||||
return nonGenesisChanges(ctx, repo, revision)
|
||||
}
|
||||
|
||||
func isIndexable(entry *git.TreeEntry) bool {
|
||||
if !entry.IsRegular() && !entry.IsExecutable() {
|
||||
return false
|
||||
}
|
||||
name := strings.ToLower(entry.Name())
|
||||
for _, g := range setting.Indexer.ExcludePatterns {
|
||||
if g.Match(name) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, g := range setting.Indexer.IncludePatterns {
|
||||
if g.Match(name) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return len(setting.Indexer.IncludePatterns) == 0
|
||||
}
|
||||
|
||||
// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
|
||||
func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) {
|
||||
entries, err := git.ParseTreeEntries(stdout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
idxCount := 0
|
||||
updates := make([]internal.FileUpdate, len(entries))
|
||||
for _, entry := range entries {
|
||||
if isIndexable(entry) {
|
||||
updates[idxCount] = internal.FileUpdate{
|
||||
Filename: entry.Name(),
|
||||
BlobSha: entry.ID.String(),
|
||||
Size: entry.Size(),
|
||||
Sized: true,
|
||||
}
|
||||
idxCount++
|
||||
}
|
||||
}
|
||||
return updates[:idxCount], nil
|
||||
}
|
||||
|
||||
// genesisChanges get changes to add repo to the indexer for the first time
|
||||
func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
|
||||
var changes internal.RepoChanges
|
||||
stdout, _, runErr := gitrepo.RunCmdBytes(ctx, repo, gitcmd.NewCommand("ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision))
|
||||
if runErr != nil {
|
||||
return nil, runErr
|
||||
}
|
||||
|
||||
var err error
|
||||
changes.Updates, err = parseGitLsTreeOutput(stdout)
|
||||
return &changes, err
|
||||
}
|
||||
|
||||
// nonGenesisChanges get changes since the previous indexer update
|
||||
func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
|
||||
diffCmd := gitcmd.NewCommand("diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
|
||||
stdout, _, runErr := gitrepo.RunCmdString(ctx, repo, diffCmd)
|
||||
if runErr != nil {
|
||||
// previous commit sha may have been removed by a force push, so
|
||||
// try rebuilding from scratch
|
||||
log.Warn("git diff: %v", runErr)
|
||||
if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return genesisChanges(ctx, repo, revision)
|
||||
}
|
||||
|
||||
var changes internal.RepoChanges
|
||||
var err error
|
||||
updatedFilenames := make([]string, 0, 10)
|
||||
|
||||
updateChanges := func() error {
|
||||
cmd := gitcmd.NewCommand("ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
|
||||
AddDashesAndList(updatedFilenames...)
|
||||
lsTreeStdout, _, err := gitrepo.RunCmdBytes(ctx, repo, cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
updates, err1 := parseGitLsTreeOutput(lsTreeStdout)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
changes.Updates = append(changes.Updates, updates...)
|
||||
return nil
|
||||
}
|
||||
lines := strings.SplitSeq(stdout, "\n")
|
||||
for line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(line, "\t")
|
||||
if len(fields) < 2 {
|
||||
log.Warn("Unparseable output for diff --name-status: `%s`)", line)
|
||||
continue
|
||||
}
|
||||
filename := fields[1]
|
||||
if len(filename) == 0 {
|
||||
continue
|
||||
} else if filename[0] == '"' {
|
||||
filename, err = strconv.Unquote(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
switch status := fields[0][0]; status {
|
||||
case 'M', 'A':
|
||||
updatedFilenames = append(updatedFilenames, filename)
|
||||
case 'D':
|
||||
changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
|
||||
case 'R', 'C':
|
||||
if len(fields) < 3 {
|
||||
log.Warn("Unparseable output for diff --name-status: `%s`)", line)
|
||||
continue
|
||||
}
|
||||
dest := fields[2]
|
||||
if len(dest) == 0 {
|
||||
log.Warn("Unparseable output for diff --name-status: `%s`)", line)
|
||||
continue
|
||||
}
|
||||
if dest[0] == '"' {
|
||||
dest, err = strconv.Unquote(dest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if status == 'R' {
|
||||
changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
|
||||
}
|
||||
updatedFilenames = append(updatedFilenames, dest)
|
||||
default:
|
||||
log.Warn("Unrecognized status: %c (line=%s)", status, line)
|
||||
}
|
||||
|
||||
// According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information
|
||||
// the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30
|
||||
if len(updatedFilenames) >= 30 {
|
||||
if err := updateChanges(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
updatedFilenames = updatedFilenames[0:0]
|
||||
}
|
||||
}
|
||||
|
||||
if len(updatedFilenames) > 0 {
|
||||
if err := updateChanges(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &changes, err
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright 2025 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package gitgrep
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.dev/modules/git"
|
||||
"gitea.dev/modules/indexer"
|
||||
code_indexer "gitea.dev/modules/indexer/code"
|
||||
"gitea.dev/modules/setting"
|
||||
)
|
||||
|
||||
func indexSettingToGitGrepPathspecList() (list []string) {
|
||||
for _, expr := range setting.Indexer.IncludePatterns {
|
||||
list = append(list, ":(glob)"+expr.PatternString())
|
||||
}
|
||||
for _, expr := range setting.Indexer.ExcludePatterns {
|
||||
list = append(list, ":(glob,exclude)"+expr.PatternString())
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int64, err error) {
|
||||
grepMode := git.GrepModeWords
|
||||
switch searchMode {
|
||||
case indexer.SearchModeExact:
|
||||
grepMode = git.GrepModeExact
|
||||
case indexer.SearchModeRegexp:
|
||||
grepMode = git.GrepModeRegexp
|
||||
}
|
||||
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
|
||||
ContextLineNumber: 1,
|
||||
GrepMode: grepMode,
|
||||
RefName: ref.String(),
|
||||
PathspecList: indexSettingToGitGrepPathspecList(),
|
||||
})
|
||||
if err != nil {
|
||||
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
|
||||
return nil, 0, fmt.Errorf("git.GrepSearch: %w", err)
|
||||
}
|
||||
commitID, err := gitRepo.GetRefCommitID(ref.String())
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err)
|
||||
}
|
||||
|
||||
total = int64(len(res))
|
||||
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
|
||||
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
|
||||
res = res[pageStart:pageEnd]
|
||||
for _, r := range res {
|
||||
searchResults = append(searchResults, &code_indexer.Result{
|
||||
RepoID: repoID,
|
||||
Filename: r.Filename,
|
||||
CommitID: commitID,
|
||||
// UpdatedUnix: not supported yet
|
||||
// Language: not supported yet
|
||||
// Color: not supported yet
|
||||
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
|
||||
})
|
||||
}
|
||||
return searchResults, total, nil
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package gitgrep
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIndexSettingToGitGrepPathspecList(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("a"))()
|
||||
defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("b"))()
|
||||
assert.Equal(t, []string{":(glob)a", ":(glob,exclude)b"}, indexSettingToGitGrepPathspecList())
|
||||
}
|
||||
@@ -0,0 +1,314 @@
|
||||
// Copyright 2016 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
"slices"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/code/bleve"
|
||||
"gitea.dev/modules/indexer/code/elasticsearch"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/process"
|
||||
"gitea.dev/modules/queue"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
var (
|
||||
indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData]
|
||||
// globalIndexer is the global indexer, it cannot be nil.
|
||||
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
|
||||
// So it's always safe use it as *globalIndexer.Load() and call its methods.
|
||||
globalIndexer atomic.Pointer[internal.Indexer]
|
||||
)
|
||||
|
||||
func init() {
|
||||
dummyIndexer := internal.NewDummyIndexer()
|
||||
globalIndexer.Store(&dummyIndexer)
|
||||
}
|
||||
|
||||
func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
|
||||
repo, err := repo_model.GetRepositoryByID(ctx, repoID)
|
||||
if repo_model.IsErrRepoNotExist(err) {
|
||||
return indexer.Delete(ctx, repoID)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
repoTypes := setting.Indexer.RepoIndexerRepoTypes
|
||||
|
||||
if len(repoTypes) == 0 {
|
||||
repoTypes = []string{"sources"}
|
||||
}
|
||||
|
||||
// skip forks from being indexed if unit is not present
|
||||
if !slices.Contains(repoTypes, "forks") && repo.IsFork {
|
||||
return nil
|
||||
}
|
||||
|
||||
// skip mirrors from being indexed if unit is not present
|
||||
if !slices.Contains(repoTypes, "mirrors") && repo.IsMirror {
|
||||
return nil
|
||||
}
|
||||
|
||||
// skip templates from being indexed if unit is not present
|
||||
if !slices.Contains(repoTypes, "templates") && repo.IsTemplate {
|
||||
return nil
|
||||
}
|
||||
|
||||
// skip regular repos from being indexed if unit is not present
|
||||
if !slices.Contains(repoTypes, "sources") && !repo.IsFork && !repo.IsMirror && !repo.IsTemplate {
|
||||
return nil
|
||||
}
|
||||
|
||||
sha, err := getDefaultBranchSha(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
changes, err := getRepoChanges(ctx, repo, sha)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if changes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := indexer.Index(ctx, repo, sha, changes); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return repo_model.UpdateIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode, sha)
|
||||
}
|
||||
|
||||
// Init initialize the repo indexer
|
||||
func Init() {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
(*globalIndexer.Load()).Close()
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel, finished := process.GetManager().AddTypedContext(context.Background(), "Service: CodeIndexer", process.SystemProcessType, false)
|
||||
|
||||
graceful.GetManager().RunAtTerminate(func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
cancel()
|
||||
log.Debug("Closing repository indexer")
|
||||
(*globalIndexer.Load()).Close()
|
||||
log.Info("PID: %d Repository Indexer closed", os.Getpid())
|
||||
finished()
|
||||
})
|
||||
|
||||
waitChannel := make(chan time.Duration, 1)
|
||||
|
||||
// Create the Queue
|
||||
switch setting.Indexer.RepoType {
|
||||
case "bleve", "elasticsearch":
|
||||
handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) {
|
||||
indexer := *globalIndexer.Load()
|
||||
for _, indexerData := range items {
|
||||
log.Trace("IndexerData Process Repo: %d", indexerData.RepoID)
|
||||
if err := index(ctx, indexer, indexerData.RepoID); err != nil {
|
||||
if !setting.IsInTesting {
|
||||
log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil // do not re-queue the failed items, otherwise some broken repo will block the queue
|
||||
}
|
||||
|
||||
indexerQueue = queue.CreateUniqueQueue(ctx, "code_indexer", handler)
|
||||
if indexerQueue == nil {
|
||||
log.Fatal("Unable to create codes indexer queue")
|
||||
}
|
||||
default:
|
||||
log.Fatal("Unknown codes indexer type; %s", setting.Indexer.RepoType)
|
||||
}
|
||||
|
||||
go func() {
|
||||
pprof.SetGoroutineLabels(ctx)
|
||||
start := time.Now()
|
||||
var (
|
||||
rIndexer internal.Indexer
|
||||
existed bool
|
||||
err error
|
||||
)
|
||||
switch setting.Indexer.RepoType {
|
||||
case "bleve":
|
||||
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
|
||||
log.Error("The indexer files are likely corrupted and may need to be deleted")
|
||||
log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
|
||||
}
|
||||
}()
|
||||
|
||||
rIndexer = bleve.NewIndexer(setting.Indexer.RepoPath)
|
||||
existed, err = rIndexer.Init(ctx)
|
||||
if err != nil {
|
||||
cancel()
|
||||
(*globalIndexer.Load()).Close()
|
||||
close(waitChannel)
|
||||
log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
|
||||
}
|
||||
case "elasticsearch":
|
||||
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), util.SanitizeCredentialURLs(setting.Indexer.RepoConnStr))
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
|
||||
log.Error("The indexer files are likely corrupted and may need to be deleted")
|
||||
log.Error("You can completely remove the \"%s\" index to make Gitea recreate the indexes", util.SanitizeCredentialURLs(setting.Indexer.RepoConnStr))
|
||||
}
|
||||
}()
|
||||
|
||||
rIndexer = elasticsearch.NewIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName)
|
||||
existed, err = rIndexer.Init(ctx)
|
||||
if err != nil {
|
||||
cancel()
|
||||
(*globalIndexer.Load()).Close()
|
||||
close(waitChannel)
|
||||
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), util.SanitizeCredentialURLs(setting.Indexer.RepoConnStr), err)
|
||||
}
|
||||
|
||||
default:
|
||||
log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)
|
||||
}
|
||||
|
||||
globalIndexer.Store(&rIndexer)
|
||||
|
||||
// Start processing the queue
|
||||
go graceful.GetManager().RunWithCancel(indexerQueue)
|
||||
|
||||
if !existed { // populate the index because it's created for the first time
|
||||
go graceful.GetManager().RunWithShutdownContext(populateRepoIndexer)
|
||||
}
|
||||
select {
|
||||
case waitChannel <- time.Since(start):
|
||||
case <-graceful.GetManager().IsShutdown():
|
||||
}
|
||||
|
||||
close(waitChannel)
|
||||
}()
|
||||
|
||||
if setting.Indexer.StartupTimeout > 0 {
|
||||
go func() {
|
||||
pprof.SetGoroutineLabels(ctx)
|
||||
timeout := setting.Indexer.StartupTimeout
|
||||
if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
|
||||
timeout += setting.GracefulHammerTime
|
||||
}
|
||||
select {
|
||||
case <-graceful.GetManager().IsShutdown():
|
||||
log.Warn("Shutdown before Repository Indexer completed initialization")
|
||||
cancel()
|
||||
(*globalIndexer.Load()).Close()
|
||||
case duration, ok := <-waitChannel:
|
||||
if !ok {
|
||||
log.Warn("Repository Indexer Initialization failed")
|
||||
cancel()
|
||||
(*globalIndexer.Load()).Close()
|
||||
return
|
||||
}
|
||||
log.Info("Repository Indexer Initialization took %v", duration)
|
||||
case <-time.After(timeout):
|
||||
cancel()
|
||||
(*globalIndexer.Load()).Close()
|
||||
log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateRepoIndexer update a repository's entries in the indexer
|
||||
func UpdateRepoIndexer(repo *repo_model.Repository) {
|
||||
indexData := &internal.IndexerData{RepoID: repo.ID}
|
||||
if err := indexerQueue.Push(indexData); err != nil {
|
||||
log.Error("Update repo index data %v failed: %v", indexData, err)
|
||||
}
|
||||
}
|
||||
|
||||
// IsAvailable checks if issue indexer is available
|
||||
func IsAvailable(ctx context.Context) bool {
|
||||
return (*globalIndexer.Load()).Ping(ctx) == nil
|
||||
}
|
||||
|
||||
// populateRepoIndexer populate the repo indexer with pre-existing data. This
|
||||
// should only be run when the indexer is created for the first time.
|
||||
func populateRepoIndexer(ctx context.Context) {
|
||||
log.Info("Populating the repo indexer with existing repositories")
|
||||
|
||||
exist, err := db.IsTableNotEmpty("repository")
|
||||
if err != nil {
|
||||
log.Fatal("System error: %v", err)
|
||||
} else if !exist {
|
||||
return
|
||||
}
|
||||
|
||||
// if there is any existing repo indexer metadata in the DB, delete it
|
||||
// since we are starting afresh. Also, xorm requires deletes to have a
|
||||
// condition, and we want to delete everything, thus 1=1.
|
||||
if err := db.DeleteAllRecords("repo_indexer_status"); err != nil {
|
||||
log.Fatal("System error: %v", err)
|
||||
}
|
||||
|
||||
var maxRepoID int64
|
||||
if maxRepoID, err = db.GetMaxID("repository"); err != nil {
|
||||
log.Fatal("System error: %v", err)
|
||||
}
|
||||
|
||||
// start with the maximum existing repo ID and work backwards, so that we
|
||||
// don't include repos that are created after gitea starts; such repos will
|
||||
// already be added to the indexer, and we don't need to add them again.
|
||||
for maxRepoID > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info("Repository Indexer population shutdown before completion")
|
||||
return
|
||||
default:
|
||||
}
|
||||
ids, err := repo_model.GetUnindexedRepos(ctx, repo_model.RepoIndexerTypeCode, maxRepoID, 0, 50)
|
||||
if err != nil {
|
||||
log.Error("populateRepoIndexer: %v", err)
|
||||
return
|
||||
} else if len(ids) == 0 {
|
||||
break
|
||||
}
|
||||
for _, id := range ids {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info("Repository Indexer population shutdown before completion")
|
||||
return
|
||||
default:
|
||||
}
|
||||
if err := indexerQueue.Push(&internal.IndexerData{RepoID: id}); err != nil {
|
||||
log.Error("indexerQueue.Push: %v", err)
|
||||
return
|
||||
}
|
||||
maxRepoID = id - 1
|
||||
}
|
||||
}
|
||||
log.Info("Done (re)populating the repo indexer with existing repositories")
|
||||
}
|
||||
|
||||
func SupportedSearchModes() []indexer.SearchMode {
|
||||
gi := globalIndexer.Load()
|
||||
if gi == nil {
|
||||
return nil
|
||||
}
|
||||
return (*gi).SupportedSearchModes()
|
||||
}
|
||||
@@ -0,0 +1,352 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"slices"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
"gitea.dev/models/unittest"
|
||||
indexer_module "gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/code/bleve"
|
||||
"gitea.dev/modules/indexer/code/elasticsearch"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/test"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
_ "gitea.dev/models"
|
||||
_ "gitea.dev/models/actions"
|
||||
_ "gitea.dev/models/activities"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type codeSearchResult struct {
|
||||
Filename string
|
||||
Content string
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
unittest.MainTest(m)
|
||||
}
|
||||
|
||||
func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer))
|
||||
// Wait for the index to catch up: ES/OpenSearch make writes visible
|
||||
// only after a refresh (default interval: 1s). Bleve is synchronous
|
||||
// and passes on the first iteration.
|
||||
require.Eventually(t, func() bool {
|
||||
total, _, _, err := indexer.Search(t.Context(), &internal.SearchOptions{
|
||||
Keyword: "Description",
|
||||
Paginator: &db.ListOptions{Page: 1, PageSize: 1},
|
||||
})
|
||||
return err == nil && total > 0
|
||||
}, 10*time.Second, 100*time.Millisecond, "index did not become searchable")
|
||||
|
||||
keywords := []struct {
|
||||
RepoIDs []int64
|
||||
Keyword string
|
||||
Langs int
|
||||
SearchMode indexer_module.SearchModeType
|
||||
Results []codeSearchResult
|
||||
}{
|
||||
// Search for an exact match on the contents of a file
|
||||
// This scenario yields a single result (the file README.md on the repo '1')
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "Description",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "README.md",
|
||||
Content: "# repo1\n\nDescription for repo1",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for an exact match on the contents of a file within the repo '2'.
|
||||
// This scenario yields no results
|
||||
{
|
||||
RepoIDs: []int64{2},
|
||||
Keyword: "Description",
|
||||
Langs: 0,
|
||||
},
|
||||
// Search for an exact match on the contents of a file
|
||||
// This scenario yields a single result (the file README.md on the repo '1')
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "repo1",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "README.md",
|
||||
Content: "# repo1\n\nDescription for repo1",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for an exact match on the contents of a file within the repo '2'.
|
||||
// This scenario yields no results
|
||||
{
|
||||
RepoIDs: []int64{2},
|
||||
Keyword: "repo1",
|
||||
Langs: 0,
|
||||
},
|
||||
// Search for a non-existing term.
|
||||
// This scenario yields no results
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "non-exist",
|
||||
Langs: 0,
|
||||
},
|
||||
// Search for an exact match on the contents of a file within the repo '62'.
|
||||
// This scenario yields a single result (the file avocado.md on the repo '62')
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "pineaple",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "avocado.md",
|
||||
Content: "# repo1\n\npineaple pie of cucumber juice",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for an exact match on the filename within the repo '62'.
|
||||
// This scenario yields a single result (the file avocado.md on the repo '62')
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "avocado.md",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "avocado.md",
|
||||
Content: "# repo1\n\npineaple pie of cucumber juice",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for an partial match on the filename within the repo '62'.
|
||||
// This scenario yields a single result (the file avocado.md on the repo '62')
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "avo",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "avocado.md",
|
||||
Content: "# repo1\n\npineaple pie of cucumber juice",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on both the contents and the filenames within the repo '62'.
|
||||
// This scenario yields two results: the first result is based on the file (cucumber.md) while the second is based on the contents
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "cucumber",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "cucumber.md",
|
||||
Content: "Salad is good for your health",
|
||||
},
|
||||
{
|
||||
Filename: "avocado.md",
|
||||
Content: "# repo1\n\npineaple pie of cucumber juice",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on the filenames within the repo '62'.
|
||||
// This scenario yields two results (both are based on filename, the first one is an exact match)
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "ham",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "ham.md",
|
||||
Content: "This is also not cheese",
|
||||
},
|
||||
{
|
||||
Filename: "potato/ham.md",
|
||||
Content: "This is not cheese",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on the contents of files within the repo '62'.
|
||||
// This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "This is not cheese",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "potato/ham.md",
|
||||
Content: "This is not cheese",
|
||||
},
|
||||
{
|
||||
Filename: "ham.md",
|
||||
Content: "This is also not cheese",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on the contents of files regardless of case.
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "dESCRIPTION",
|
||||
Langs: 1,
|
||||
SearchMode: indexer_module.SearchModeFuzzy,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "README.md",
|
||||
Content: "# repo1\n\nDescription for repo1",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for an exact match on the filename within the repo '62' (case-insensitive).
|
||||
// This scenario yields a single result (the file avocado.md on the repo '62')
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "AVOCADO.MD",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "avocado.md",
|
||||
Content: "# repo1\n\npineaple pie of cucumber juice",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on the contents of files when the criteria are an expression.
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "console.log",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "example-file.js",
|
||||
Content: "console.log(\"Hello, World!\")",
|
||||
},
|
||||
},
|
||||
},
|
||||
// Search for matches on the contents of files when the criteria are parts of an expression.
|
||||
{
|
||||
RepoIDs: []int64{62},
|
||||
Keyword: "log",
|
||||
Langs: 1,
|
||||
Results: []codeSearchResult{
|
||||
{
|
||||
Filename: "example-file.js",
|
||||
Content: "console.log(\"Hello, World!\")",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, kw := range keywords {
|
||||
t.Run(kw.Keyword, func(t *testing.T) {
|
||||
total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
|
||||
RepoIDs: kw.RepoIDs,
|
||||
Keyword: kw.Keyword,
|
||||
SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords),
|
||||
Paginator: &db.ListOptions{
|
||||
Page: 1,
|
||||
PageSize: 10,
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, langs, kw.Langs)
|
||||
|
||||
hits := make([]codeSearchResult, 0, len(res))
|
||||
|
||||
if total > 0 {
|
||||
assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results")
|
||||
}
|
||||
|
||||
for _, hit := range res {
|
||||
hits = append(hits, codeSearchResult{
|
||||
Filename: hit.Filename,
|
||||
Content: hit.Content,
|
||||
})
|
||||
}
|
||||
|
||||
lastIndex := -1
|
||||
|
||||
for _, expected := range kw.Results {
|
||||
index := slices.Index(hits, expected)
|
||||
if index == -1 {
|
||||
assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits)
|
||||
} else if lastIndex > index {
|
||||
assert.Failf(t, "Result is out of order", "The order of %v within %v is wrong", expected, hits)
|
||||
} else {
|
||||
lastIndex = index
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
assert.NoError(t, tearDownRepositoryIndexes(t.Context(), indexer))
|
||||
})
|
||||
}
|
||||
|
||||
func TestBleveIndexAndSearch(t *testing.T) {
|
||||
unittest.PrepareTestEnv(t)
|
||||
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
|
||||
dir := t.TempDir()
|
||||
|
||||
idx := bleve.NewIndexer(dir)
|
||||
defer idx.Close()
|
||||
|
||||
_, err := idx.Init(t.Context())
|
||||
require.NoError(t, err)
|
||||
|
||||
testIndexer("bleve", t, idx)
|
||||
}
|
||||
|
||||
func TestESIndexAndSearch(t *testing.T) {
|
||||
unittest.PrepareTestEnv(t)
|
||||
|
||||
u := os.Getenv("TEST_INDEXER_CODE_ES_URL")
|
||||
if u == "" {
|
||||
t.SkipNow()
|
||||
return
|
||||
}
|
||||
|
||||
indexer := elasticsearch.NewIndexer(u, "gitea_codes")
|
||||
if _, err := indexer.Init(t.Context()); err != nil {
|
||||
if indexer != nil {
|
||||
indexer.Close()
|
||||
}
|
||||
require.NoError(t, err, "Unable to init ES indexer")
|
||||
}
|
||||
|
||||
defer indexer.Close()
|
||||
|
||||
testIndexer("elastic_search", t, indexer)
|
||||
}
|
||||
|
||||
func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
|
||||
for _, repoID := range repositoriesToSearch() {
|
||||
if err := index(ctx, indexer, repoID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tearDownRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
|
||||
for _, repoID := range repositoriesToSearch() {
|
||||
if err := indexer.Delete(ctx, repoID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func repositoriesToSearch() []int64 {
|
||||
return []int64{1, 62}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
)
|
||||
|
||||
// Indexer defines an interface to index and search code contents
|
||||
type Indexer interface {
|
||||
internal.Indexer
|
||||
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
|
||||
Delete(ctx context.Context, repoID int64) error
|
||||
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
|
||||
SupportedSearchModes() []indexer.SearchMode
|
||||
}
|
||||
|
||||
type SearchOptions struct {
|
||||
RepoIDs []int64
|
||||
Keyword string
|
||||
Language string
|
||||
|
||||
SearchMode indexer.SearchModeType
|
||||
|
||||
db.Paginator
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer {
|
||||
return &dummyIndexer{
|
||||
Indexer: internal.NewDummyIndexer(),
|
||||
}
|
||||
}
|
||||
|
||||
type dummyIndexer struct {
|
||||
internal.Indexer
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
||||
return 0, nil, nil, errors.New("indexer is not ready")
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import "gitea.dev/modules/timeutil"
|
||||
|
||||
type FileUpdate struct {
|
||||
Filename string
|
||||
BlobSha string
|
||||
Size int64
|
||||
Sized bool
|
||||
}
|
||||
|
||||
// RepoChanges changes (file additions/updates/removals) to a repo
|
||||
type RepoChanges struct {
|
||||
Updates []FileUpdate
|
||||
RemovedFilenames []string
|
||||
}
|
||||
|
||||
// IndexerData represents data stored in the code indexer
|
||||
type IndexerData struct {
|
||||
RepoID int64
|
||||
}
|
||||
|
||||
// SearchResult result of performing a search in a repo
|
||||
type SearchResult struct {
|
||||
RepoID int64
|
||||
StartIndex int
|
||||
EndIndex int
|
||||
Filename string
|
||||
Content string
|
||||
CommitID string
|
||||
UpdatedUnix timeutil.TimeStamp
|
||||
Language string
|
||||
Color string
|
||||
}
|
||||
|
||||
// SearchResultLanguages result of top languages count in search results
|
||||
type SearchResultLanguages struct {
|
||||
Language string
|
||||
Color string
|
||||
Count int
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
const filenameMatchNumberOfLines = 7 // Copied from GitHub search
|
||||
|
||||
func FilenameIndexerID(repoID int64, filename string) string {
|
||||
return internal.Base36(repoID) + "_" + filename
|
||||
}
|
||||
|
||||
func ParseIndexerID(indexerID string) (int64, string) {
|
||||
before, after, ok := strings.Cut(indexerID, "_")
|
||||
if !ok {
|
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID)
|
||||
}
|
||||
repoID, _ := internal.ParseBase36(before)
|
||||
return repoID, after
|
||||
}
|
||||
|
||||
func FilenameOfIndexerID(indexerID string) string {
|
||||
_, after, ok := strings.Cut(indexerID, "_")
|
||||
if !ok {
|
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID)
|
||||
}
|
||||
return after
|
||||
}
|
||||
|
||||
// FilenameMatchIndexPos returns the boundaries of its first seven lines.
|
||||
func FilenameMatchIndexPos(content string) (int, int) {
|
||||
count := 1
|
||||
for i, c := range content {
|
||||
if c == '\n' {
|
||||
count++
|
||||
if count == filenameMatchNumberOfLines {
|
||||
return 0, i
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, len(content)
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
"gitea.dev/modules/highlight"
|
||||
"gitea.dev/modules/indexer/code/internal"
|
||||
"gitea.dev/modules/timeutil"
|
||||
)
|
||||
|
||||
// Result a search result to display
|
||||
type Result struct {
|
||||
RepoID int64
|
||||
Filename string
|
||||
CommitID string
|
||||
UpdatedUnix timeutil.TimeStamp
|
||||
Language string
|
||||
Color string
|
||||
Lines []*ResultLine
|
||||
}
|
||||
|
||||
type ResultLine struct {
|
||||
Num int
|
||||
FormattedContent template.HTML
|
||||
}
|
||||
|
||||
type SearchResultLanguages = internal.SearchResultLanguages
|
||||
|
||||
type SearchOptions = internal.SearchOptions
|
||||
|
||||
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
||||
startIndex := selectionStartIndex
|
||||
numLinesBefore := 0
|
||||
for ; startIndex > 0; startIndex-- {
|
||||
if content[startIndex-1] == '\n' {
|
||||
if numLinesBefore == 1 {
|
||||
break
|
||||
}
|
||||
numLinesBefore++
|
||||
}
|
||||
}
|
||||
|
||||
endIndex := selectionEndIndex
|
||||
numLinesAfter := 0
|
||||
for ; endIndex < len(content); endIndex++ {
|
||||
if content[endIndex] == '\n' {
|
||||
if numLinesAfter == 1 {
|
||||
break
|
||||
}
|
||||
numLinesAfter++
|
||||
}
|
||||
}
|
||||
|
||||
return startIndex, endIndex
|
||||
}
|
||||
|
||||
func writeStrings(buf *bytes.Buffer, strs ...string) error {
|
||||
for _, s := range strs {
|
||||
_, err := buf.WriteString(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func HighlightSearchResultCode(filename, language string, lineNums []int, code string) []*ResultLine {
|
||||
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
|
||||
lexer := highlight.DetectChromaLexerByFileName(filename, language)
|
||||
hl := highlight.RenderCodeByLexer(lexer, code)
|
||||
highlightedLines := highlight.UnsafeSplitHighlightedLines(hl)
|
||||
|
||||
// The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n`
|
||||
lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums)))
|
||||
for i := range lines {
|
||||
lines[i] = &ResultLine{
|
||||
Num: lineNums[i],
|
||||
FormattedContent: template.HTML(highlightedLines[i]),
|
||||
}
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
|
||||
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
|
||||
|
||||
var formattedLinesBuffer bytes.Buffer
|
||||
|
||||
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
|
||||
lineNums := make([]int, 0, len(contentLines))
|
||||
index := startIndex
|
||||
for i, line := range contentLines {
|
||||
var err error
|
||||
if index < result.EndIndex &&
|
||||
result.StartIndex < index+len(line) &&
|
||||
result.StartIndex < result.EndIndex {
|
||||
openActiveIndex := max(result.StartIndex-index, 0)
|
||||
closeActiveIndex := min(result.EndIndex-index, len(line))
|
||||
err = writeStrings(&formattedLinesBuffer,
|
||||
line[:openActiveIndex],
|
||||
line[openActiveIndex:closeActiveIndex],
|
||||
line[closeActiveIndex:],
|
||||
)
|
||||
} else {
|
||||
err = writeStrings(&formattedLinesBuffer, line)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lineNums = append(lineNums, startLineNum+i)
|
||||
index += len(line)
|
||||
}
|
||||
|
||||
return &Result{
|
||||
RepoID: result.RepoID,
|
||||
Filename: result.Filename,
|
||||
CommitID: result.CommitID,
|
||||
UpdatedUnix: result.UpdatedUnix,
|
||||
Language: result.Language,
|
||||
Color: result.Color,
|
||||
Lines: HighlightSearchResultCode(result.Filename, result.Language, lineNums, formattedLinesBuffer.String()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// PerformSearch perform a search on a repository
|
||||
func PerformSearch(ctx context.Context, opts *SearchOptions) (int64, []*Result, []*SearchResultLanguages, error) {
|
||||
if opts == nil || len(opts.Keyword) == 0 {
|
||||
return 0, nil, nil, nil
|
||||
}
|
||||
|
||||
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
displayResults := make([]*Result, len(results))
|
||||
|
||||
for i, result := range results {
|
||||
startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex)
|
||||
displayResults[i], err = searchResult(result, startIndex, endIndex)
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
}
|
||||
return total, displayResults, resultLanguages, nil
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright 2025 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package indexer
|
||||
|
||||
type SearchModeType string
|
||||
|
||||
const (
|
||||
SearchModeExact SearchModeType = "exact"
|
||||
SearchModeWords SearchModeType = "words"
|
||||
SearchModeFuzzy SearchModeType = "fuzzy"
|
||||
SearchModeRegexp SearchModeType = "regexp"
|
||||
)
|
||||
|
||||
type SearchMode struct {
|
||||
ModeValue SearchModeType
|
||||
TooltipTrKey string
|
||||
TitleTrKey string
|
||||
}
|
||||
|
||||
func SearchModesExactWords() []SearchMode {
|
||||
return []SearchMode{
|
||||
{
|
||||
ModeValue: SearchModeExact,
|
||||
TooltipTrKey: "search.exact_tooltip",
|
||||
TitleTrKey: "search.exact",
|
||||
},
|
||||
{
|
||||
ModeValue: SearchModeWords,
|
||||
TooltipTrKey: "search.words_tooltip",
|
||||
TitleTrKey: "search.words",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func SearchModesExactWordsFuzzy() []SearchMode {
|
||||
return append(SearchModesExactWords(), []SearchMode{
|
||||
{
|
||||
ModeValue: SearchModeFuzzy,
|
||||
TooltipTrKey: "search.fuzzy_tooltip",
|
||||
TitleTrKey: "search.fuzzy",
|
||||
},
|
||||
}...)
|
||||
}
|
||||
|
||||
func GitGrepSupportedSearchModes() []SearchMode {
|
||||
return append(SearchModesExactWords(), []SearchMode{
|
||||
{
|
||||
ModeValue: SearchModeRegexp,
|
||||
TooltipTrKey: "search.regexp_tooltip",
|
||||
TitleTrKey: "search.regexp",
|
||||
},
|
||||
}...)
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func Base36(i int64) string {
|
||||
return strconv.FormatInt(i, 36)
|
||||
}
|
||||
|
||||
func ParseBase36(s string) (int64, error) {
|
||||
i, err := strconv.ParseInt(s, 36, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid base36 integer %q: %w", s, err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
)
|
||||
|
||||
// FlushingBatch is a batch of operations that automatically flushes to the
|
||||
// underlying index once it reaches a certain size.
|
||||
type FlushingBatch struct {
|
||||
maxBatchSize int
|
||||
batch *bleve.Batch
|
||||
index bleve.Index
|
||||
}
|
||||
|
||||
// NewFlushingBatch creates a new flushing batch for the specified index. Once
|
||||
// the number of operations in the batch reaches the specified limit, the batch
|
||||
// automatically flushes its operations to the index.
|
||||
func NewFlushingBatch(index bleve.Index, maxBatchSize int) *FlushingBatch {
|
||||
return &FlushingBatch{
|
||||
maxBatchSize: maxBatchSize,
|
||||
batch: index.NewBatch(),
|
||||
index: index,
|
||||
}
|
||||
}
|
||||
|
||||
// Index add a new index to batch
|
||||
func (b *FlushingBatch) Index(id string, data any) error {
|
||||
if err := b.batch.Index(id, data); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.flushIfFull()
|
||||
}
|
||||
|
||||
// Delete add a delete index to batch
|
||||
func (b *FlushingBatch) Delete(id string) error {
|
||||
b.batch.Delete(id)
|
||||
return b.flushIfFull()
|
||||
}
|
||||
|
||||
func (b *FlushingBatch) flushIfFull() error {
|
||||
if b.batch.Size() < b.maxBatchSize {
|
||||
return nil
|
||||
}
|
||||
return b.Flush()
|
||||
}
|
||||
|
||||
// Flush submit the batch and create a new one
|
||||
func (b *FlushingBatch) Flush() error {
|
||||
err := b.index.Batch(b.batch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.batch = b.index.NewBatch()
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
"gitea.dev/modules/log"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/ethantkoenig/rupture"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer represents a basic bleve indexer implementation
|
||||
type Indexer struct {
|
||||
Indexer bleve.Index
|
||||
|
||||
indexDir string
|
||||
version int
|
||||
mappingGetter MappingGetter
|
||||
}
|
||||
|
||||
type MappingGetter func() (mapping.IndexMapping, error)
|
||||
|
||||
func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.IndexMapping, error)) *Indexer {
|
||||
return &Indexer{
|
||||
indexDir: indexDir,
|
||||
version: version,
|
||||
mappingGetter: mappingGetter,
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
if i == nil {
|
||||
return false, errors.New("cannot init nil indexer")
|
||||
}
|
||||
|
||||
if i.Indexer != nil {
|
||||
return false, errors.New("indexer is already initialized")
|
||||
}
|
||||
|
||||
indexer, version, err := openIndexer(i.indexDir, i.version)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if indexer != nil {
|
||||
i.Indexer = indexer
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if version != 0 {
|
||||
log.Warn("Found older bleve index with version %d, Gitea will remove it and rebuild", version)
|
||||
}
|
||||
|
||||
indexMapping, err := i.mappingGetter()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
indexer, err = bleve.New(i.indexDir, indexMapping)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if err = rupture.WriteIndexMetadata(i.indexDir, &rupture.IndexMetadata{
|
||||
Version: i.version,
|
||||
}); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
i.Indexer = indexer
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error {
|
||||
if i == nil {
|
||||
return errors.New("cannot ping nil indexer")
|
||||
}
|
||||
if i.Indexer == nil {
|
||||
return errors.New("indexer is not initialized")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil || i.Indexer == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err := i.Indexer.Close(); err != nil {
|
||||
log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err)
|
||||
}
|
||||
i.Indexer = nil
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"gitea.dev/modules/optional"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
)
|
||||
|
||||
// NumericEqualityQuery generates a numeric equality query for the given value and field
|
||||
func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
|
||||
f := float64(value)
|
||||
tru := true
|
||||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
|
||||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
||||
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
|
||||
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
||||
q.FieldVal = field
|
||||
q.Analyzer = analyzer
|
||||
q.Fuzziness = fuzziness
|
||||
return q
|
||||
}
|
||||
|
||||
// MatchAndQuery generates a match query for the given phrase, field and analyzer
|
||||
func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery {
|
||||
q := bleve.NewMatchQuery(matchPhrase)
|
||||
q.FieldVal = field
|
||||
q.Analyzer = analyzer
|
||||
q.Fuzziness = fuzziness
|
||||
q.Operator = query.MatchQueryOperatorAnd
|
||||
return q
|
||||
}
|
||||
|
||||
// BoolFieldQuery generates a bool field query for the given value and field
|
||||
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
|
||||
q := bleve.NewBoolFieldQuery(value)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
|
||||
func NumericRangeInclusiveQuery(minOption, maxOption optional.Option[int64], field string) *query.NumericRangeQuery {
|
||||
var minF, maxF *float64
|
||||
var minI, maxI *bool
|
||||
if minOption.Has() {
|
||||
minF = new(float64)
|
||||
*minF = float64(minOption.Value())
|
||||
minI = new(bool)
|
||||
*minI = true
|
||||
}
|
||||
if maxOption.Has() {
|
||||
maxF = new(float64)
|
||||
*maxF = float64(maxOption.Value())
|
||||
maxI = new(bool)
|
||||
*maxI = true
|
||||
}
|
||||
q := bleve.NewNumericRangeInclusiveQuery(minF, maxF, minI, maxI)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"unicode"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
unicode_tokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
"github.com/ethantkoenig/rupture"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFuzziness = 2
|
||||
)
|
||||
|
||||
// openIndexer open the index at the specified path, checking for metadata
|
||||
// updates and bleve version updates. If index needs to be created (or
|
||||
// re-created), returns (nil, nil)
|
||||
func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
|
||||
_, err := os.Stat(path)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return nil, 0, nil
|
||||
} else if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
metadata, err := rupture.ReadIndexMetadata(path)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if metadata.Version < latestVersion {
|
||||
// the indexer is using a previous version, so we should delete it and
|
||||
// re-populate
|
||||
return nil, metadata.Version, util.RemoveAll(path)
|
||||
}
|
||||
|
||||
index, err := bleve.Open(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, upsidedown.IncompatibleVersion) {
|
||||
log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding")
|
||||
return nil, 0, util.RemoveAll(path)
|
||||
}
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return index, 0, nil
|
||||
}
|
||||
|
||||
// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
|
||||
// may be different on two string, and they still be considered equivalent.
|
||||
// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
|
||||
func GuessFuzzinessByKeyword(s string) int {
|
||||
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
|
||||
tokens := tokenizer.Tokenize([]byte(s))
|
||||
|
||||
if len(tokens) > 0 {
|
||||
fuzziness := maxFuzziness
|
||||
|
||||
for _, token := range tokens {
|
||||
fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term)))
|
||||
}
|
||||
|
||||
return fuzziness
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func guessFuzzinessByKeyword(s string) int {
|
||||
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
|
||||
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
|
||||
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
|
||||
// Likewise, queries whose terms contains characters that are *not* letters should not use fuzziness
|
||||
|
||||
for _, r := range s {
|
||||
if r >= 128 || !unicode.IsLetter(r) {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
|
||||
|
||||
scenarios := []struct {
|
||||
Input string
|
||||
Fuzziness int // See util.go for the definition of fuzziness in this particular context
|
||||
}{
|
||||
{
|
||||
Input: "",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "Avocado",
|
||||
Fuzziness: 1,
|
||||
},
|
||||
{
|
||||
Input: "Geschwindigkeit",
|
||||
Fuzziness: 2,
|
||||
},
|
||||
{
|
||||
Input: "non-exist",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "갃갃갃",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "repo1",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "avocado.md",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(fmt.Sprintf("Fuziniess:%s=%d", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
|
||||
assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer represents a basic db indexer implementation
|
||||
type Indexer struct{}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
// Return true to indicate that the index was opened/existed.
|
||||
// So that the indexer will not try to populate the index, the data is already there.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error {
|
||||
// No need to ping database to check if it is available.
|
||||
// If the database goes down, Gitea will go down, so nobody will care if the indexer is available.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() {
|
||||
// nothing to do
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
"gitea.dev/modules/json"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer is a narrow wrapper around an Elasticsearch/OpenSearch cluster.
|
||||
// It targets the REST subset shared by Elasticsearch 7/8/9 and OpenSearch 3.
|
||||
type Indexer struct {
|
||||
client *http.Client
|
||||
base string // base URL with trailing slash, no userinfo
|
||||
user string
|
||||
pass string
|
||||
|
||||
indexName string
|
||||
version int
|
||||
mapping string
|
||||
}
|
||||
|
||||
// NewIndexer builds an Indexer. The connection is opened by Init.
|
||||
func NewIndexer(rawURL, indexName string, version int, mapping string) *Indexer {
|
||||
return &Indexer{
|
||||
base: rawURL,
|
||||
indexName: indexName,
|
||||
version: version,
|
||||
mapping: mapping,
|
||||
}
|
||||
}
|
||||
|
||||
// Init connects and creates the versioned index if missing, returning true if it already existed.
|
||||
func (i *Indexer) Init(ctx context.Context) (bool, error) {
|
||||
parsed, err := url.Parse(i.base)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("parse elasticsearch url: %w", err)
|
||||
}
|
||||
if parsed.User != nil {
|
||||
i.user = parsed.User.Username()
|
||||
i.pass, _ = parsed.User.Password()
|
||||
parsed.User = nil
|
||||
}
|
||||
base := parsed.String()
|
||||
if !strings.HasSuffix(base, "/") {
|
||||
base += "/"
|
||||
}
|
||||
i.base = base
|
||||
// No client-level Timeout: bulk/_delete_by_query can legitimately run for
|
||||
// minutes on large repos. Per-request deadlines come from the caller's ctx;
|
||||
// transport-level timeouts cover stalled connects/handshakes/headers so a
|
||||
// half-open server cannot wedge the indexer indefinitely.
|
||||
i.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ResponseHeaderTimeout: 30 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
MaxIdleConns: 100,
|
||||
},
|
||||
}
|
||||
|
||||
exists, err := i.indexExists(ctx, i.VersionedIndexName())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if exists {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if err := i.createIndex(ctx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Ping returns an error when the cluster is unusable (status != green/yellow).
|
||||
func (i *Indexer) Ping(ctx context.Context) error {
|
||||
var body struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
if err := i.doJSON(ctx, http.MethodGet, "_cluster/health", nil, &body); err != nil {
|
||||
return err
|
||||
}
|
||||
// Healthy = green; usable = yellow. Red is unusable.
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
|
||||
if body.Status != "green" && body.Status != "yellow" {
|
||||
return fmt.Errorf("status of elasticsearch cluster is %s", body.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close releases idle HTTP connections held by the client.
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil || i.client == nil {
|
||||
return
|
||||
}
|
||||
i.client.CloseIdleConnections()
|
||||
i.client = nil
|
||||
}
|
||||
|
||||
// Bulk submits index/delete ops. Returns the first item-level failure, if any.
|
||||
func (i *Indexer) Bulk(ctx context.Context, ops []BulkOp) error {
|
||||
if len(ops) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
index := i.VersionedIndexName()
|
||||
var buf bytes.Buffer
|
||||
buf.Grow(len(ops) * 256)
|
||||
for _, op := range ops {
|
||||
meta := map[string]any{op.action: map[string]any{"_index": index, "_id": op.id}}
|
||||
if err := writeJSONLine(&buf, meta); err != nil {
|
||||
return err
|
||||
}
|
||||
if op.action == bulkActionIndex {
|
||||
if err := writeJSONLine(&buf, op.doc); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res, err := i.do(ctx, http.MethodPost, urlPath(index, "_bulk"), "application/x-ndjson", bytes.NewReader(buf.Bytes()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
|
||||
var body struct {
|
||||
Errors bool `json:"errors"`
|
||||
Items []map[string]struct {
|
||||
Status int `json:"status"`
|
||||
Error json.Value `json:"error"`
|
||||
} `json:"items"`
|
||||
}
|
||||
if err := json.NewDecoder(res.Body).Decode(&body); err != nil {
|
||||
return err
|
||||
}
|
||||
if !body.Errors {
|
||||
return nil
|
||||
}
|
||||
return firstBulkError(body.Items)
|
||||
}
|
||||
|
||||
// firstBulkError returns the first item-level failure in a bulk response.
|
||||
// Each items entry is a single-key map ({"index": {...}} or {"delete": {...}}).
|
||||
// Delete-of-missing (404) is idempotent and not reported.
|
||||
func firstBulkError(items []map[string]struct {
|
||||
Status int `json:"status"`
|
||||
Error json.Value `json:"error"`
|
||||
},
|
||||
) error {
|
||||
for _, item := range items {
|
||||
for action, result := range item {
|
||||
if action == bulkActionDelete && result.Status == http.StatusNotFound {
|
||||
continue
|
||||
}
|
||||
if result.Status >= 300 {
|
||||
return fmt.Errorf("bulk %s failed (status %d): %s", action, result.Status, string(result.Error))
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Index writes a single document.
|
||||
func (i *Indexer) Index(ctx context.Context, id string, doc any) error {
|
||||
body, err := json.Marshal(doc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName(), "_doc", id), bytes.NewReader(body), nil)
|
||||
}
|
||||
|
||||
// Delete removes a single document by id. Missing ids are not an error.
|
||||
func (i *Indexer) Delete(ctx context.Context, id string) error {
|
||||
res, err := i.do(ctx, http.MethodDelete, urlPath(i.VersionedIndexName(), "_doc", id), "", nil, http.StatusNotFound)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
drainAndClose(res)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByQuery removes every document matching the query.
|
||||
func (i *Indexer) DeleteByQuery(ctx context.Context, query Query) error {
|
||||
body, err := json.Marshal(map[string]any{"query": query.querySource()})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_delete_by_query"), bytes.NewReader(body), nil)
|
||||
}
|
||||
|
||||
// Refresh forces a refresh so recent writes are searchable.
|
||||
func (i *Indexer) Refresh(ctx context.Context) error {
|
||||
return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_refresh"), nil, nil)
|
||||
}
|
||||
|
||||
// Search runs a search request and decodes the reply.
|
||||
func (i *Indexer) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
|
||||
body := map[string]any{}
|
||||
if req.Query != nil {
|
||||
body["query"] = req.Query.querySource()
|
||||
}
|
||||
if len(req.Sort) > 0 {
|
||||
sorts := make([]map[string]any, len(req.Sort))
|
||||
for idx, s := range req.Sort {
|
||||
sorts[idx] = s.source()
|
||||
}
|
||||
body["sort"] = sorts
|
||||
}
|
||||
if req.From > 0 {
|
||||
body["from"] = req.From
|
||||
}
|
||||
body["size"] = req.Size
|
||||
if len(req.Aggregations) > 0 {
|
||||
body["aggs"] = req.Aggregations
|
||||
}
|
||||
if len(req.Highlight) > 0 {
|
||||
body["highlight"] = req.Highlight
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Default track_total_hits is 10000 (capped count); send it explicitly so
|
||||
// callers can choose between exact totals (true) and skipping counting (false).
|
||||
path := urlPath(i.VersionedIndexName(), "_search") + "?track_total_hits=" + strconv.FormatBool(req.TrackTotal)
|
||||
res, err := i.do(ctx, http.MethodPost, path, "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
return decodeSearchResponse(res.Body)
|
||||
}
|
||||
|
||||
func (i *Indexer) indexExists(ctx context.Context, name string) (bool, error) {
|
||||
res, err := i.do(ctx, http.MethodHead, urlPath(name), "", nil, http.StatusNotFound)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
drainAndClose(res)
|
||||
return res.StatusCode == http.StatusOK, nil
|
||||
}
|
||||
|
||||
func (i *Indexer) createIndex(ctx context.Context) error {
|
||||
var body struct {
|
||||
Acknowledged bool `json:"acknowledged"`
|
||||
}
|
||||
if err := i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName()), bytes.NewBufferString(i.mapping), &body); err != nil {
|
||||
return fmt.Errorf("create index %s: %w", i.VersionedIndexName(), err)
|
||||
}
|
||||
if !body.Acknowledged {
|
||||
return fmt.Errorf("create index %s not acknowledged", i.VersionedIndexName())
|
||||
}
|
||||
|
||||
i.checkOldIndexes(ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// do sends a request and returns the response. Status >= 300 is turned into
|
||||
// an error unless the status appears in okStatus. The caller closes Body.
|
||||
func (i *Indexer) do(ctx context.Context, method, path, contentType string, body io.Reader, okStatus ...int) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, i.base+path, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if contentType != "" {
|
||||
req.Header.Set("Content-Type", contentType)
|
||||
}
|
||||
if i.user != "" || i.pass != "" {
|
||||
req.SetBasicAuth(i.user, i.pass)
|
||||
}
|
||||
res, err := i.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if res.StatusCode >= 300 && !slices.Contains(okStatus, res.StatusCode) {
|
||||
msg := readErrBody(res)
|
||||
res.Body.Close()
|
||||
return nil, fmt.Errorf("%s %s: %s", method, path, msg)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// doJSON sends a request with a JSON body and, when out is non-nil, decodes
|
||||
// the JSON response into it.
|
||||
func (i *Indexer) doJSON(ctx context.Context, method, path string, body io.Reader, out any) error {
|
||||
contentType := ""
|
||||
if body != nil {
|
||||
contentType = "application/json"
|
||||
}
|
||||
res, err := i.do(ctx, method, path, contentType, body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
if out == nil {
|
||||
return nil
|
||||
}
|
||||
return json.NewDecoder(res.Body).Decode(out)
|
||||
}
|
||||
|
||||
// drainAndClose discards any unread response body before closing so the
|
||||
// underlying TCP connection can be reused for keep-alive.
|
||||
func drainAndClose(res *http.Response) {
|
||||
_, _ = io.Copy(io.Discard, res.Body)
|
||||
res.Body.Close()
|
||||
}
|
||||
|
||||
func writeJSONLine(buf *bytes.Buffer, v any) error {
|
||||
enc, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
buf.Write(enc)
|
||||
buf.WriteByte('\n')
|
||||
return nil
|
||||
}
|
||||
|
||||
// readErrBody reads up to 4 KiB of an error response and drains the rest so
|
||||
// the underlying connection can be reused (keep-alive needs Body fully read).
|
||||
func readErrBody(res *http.Response) string {
|
||||
const limit = 4 << 10
|
||||
b, _ := io.ReadAll(io.LimitReader(res.Body, limit))
|
||||
_, _ = io.Copy(io.Discard, res.Body)
|
||||
return fmt.Sprintf("status %d: %s", res.StatusCode, bytes.TrimSpace(b))
|
||||
}
|
||||
|
||||
func decodeSearchResponse(r io.Reader) (*SearchResponse, error) {
|
||||
var raw struct {
|
||||
Hits struct {
|
||||
Total struct {
|
||||
Value int64 `json:"value"`
|
||||
} `json:"total"`
|
||||
Hits []struct {
|
||||
ID string `json:"_id"`
|
||||
Score float64 `json:"_score"`
|
||||
Source json.Value `json:"_source"`
|
||||
Highlight map[string][]string `json:"highlight"`
|
||||
} `json:"hits"`
|
||||
} `json:"hits"`
|
||||
Aggregations map[string]struct {
|
||||
Buckets []struct {
|
||||
Key any `json:"key"`
|
||||
DocCount int64 `json:"doc_count"`
|
||||
} `json:"buckets"`
|
||||
} `json:"aggregations"`
|
||||
}
|
||||
if err := json.NewDecoder(r).Decode(&raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &SearchResponse{
|
||||
Total: raw.Hits.Total.Value,
|
||||
Hits: make([]SearchHit, 0, len(raw.Hits.Hits)),
|
||||
}
|
||||
for _, h := range raw.Hits.Hits {
|
||||
resp.Hits = append(resp.Hits, SearchHit{
|
||||
ID: h.ID,
|
||||
Score: h.Score,
|
||||
Source: h.Source,
|
||||
Highlight: h.Highlight,
|
||||
})
|
||||
}
|
||||
if len(raw.Aggregations) > 0 {
|
||||
resp.Aggregations = make(map[string][]AggBucket, len(raw.Aggregations))
|
||||
for name, agg := range raw.Aggregations {
|
||||
buckets := make([]AggBucket, len(agg.Buckets))
|
||||
for idx, b := range agg.Buckets {
|
||||
buckets[idx] = AggBucket{Key: b.Key, DocCount: b.DocCount}
|
||||
}
|
||||
resp.Aggregations[name] = buckets
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// urlPath joins path segments with `/` and percent-escapes each.
|
||||
func urlPath(segments ...string) string {
|
||||
var b bytes.Buffer
|
||||
for idx, s := range segments {
|
||||
if idx > 0 {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(url.PathEscape(s))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func newRealIndexer(t *testing.T) *Indexer {
|
||||
t.Helper()
|
||||
esURL := test.ExternalServiceHTTP(t, "TEST_ELASTICSEARCH_URL", "http://elasticsearch:9200")
|
||||
indexName := "gitea_test_" + strings.ReplaceAll(strings.ToLower(t.Name()), "/", "_")
|
||||
ix := NewIndexer(esURL, indexName, 1, `{"mappings":{"properties":{"x":{"type":"keyword"}}}}`)
|
||||
_, err := ix.Init(t.Context())
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(ix.Close)
|
||||
return ix
|
||||
}
|
||||
|
||||
func TestPing(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Ping(t.Context()))
|
||||
}
|
||||
|
||||
func TestDeleteSwallows404(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Delete(t.Context(), "missing-id"))
|
||||
}
|
||||
|
||||
func TestBulkAcceptsDelete404(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Bulk(t.Context(), []BulkOp{DeleteOp("missing-id")}))
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
// MultiMatch types used by the call sites. See
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
|
||||
const (
|
||||
MultiMatchTypeBestFields = "best_fields"
|
||||
MultiMatchTypePhrasePrefix = "phrase_prefix"
|
||||
)
|
||||
|
||||
// ToAnySlice converts []T to []any for variadic query args like TermsQuery.
|
||||
func ToAnySlice[T any](s []T) []any {
|
||||
out := make([]any, len(s))
|
||||
for idx, v := range s {
|
||||
out[idx] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Query is an Elasticsearch query DSL node. It marshals to the JSON
|
||||
// object expected by the ES query API.
|
||||
type Query interface {
|
||||
querySource() map[string]any
|
||||
}
|
||||
|
||||
type rawQuery map[string]any
|
||||
|
||||
func (q rawQuery) querySource() map[string]any { return q }
|
||||
|
||||
// TermQuery matches documents whose `field` exactly equals `value`.
|
||||
func TermQuery(field string, value any) Query {
|
||||
return rawQuery{"term": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// TermsQuery matches documents whose `field` equals any of `values`.
|
||||
func TermsQuery(field string, values ...any) Query {
|
||||
return rawQuery{"terms": map[string]any{field: values}}
|
||||
}
|
||||
|
||||
// MatchQuery is a full-text match on a single field.
|
||||
func MatchQuery(field string, value any) Query {
|
||||
return rawQuery{"match": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// MatchPhraseQuery matches the exact phrase on `field`.
|
||||
func MatchPhraseQuery(field, value string) Query {
|
||||
return rawQuery{"match_phrase": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// MultiMatchQuery is the fluent builder for a multi_match query.
|
||||
type MultiMatchQuery struct {
|
||||
query any
|
||||
fields []string
|
||||
typ string
|
||||
operator string
|
||||
}
|
||||
|
||||
// NewMultiMatchQuery creates a multi_match query over the given fields.
|
||||
func NewMultiMatchQuery(query any, fields ...string) *MultiMatchQuery {
|
||||
return &MultiMatchQuery{query: query, fields: fields}
|
||||
}
|
||||
|
||||
func (m *MultiMatchQuery) Type(t string) *MultiMatchQuery { m.typ = t; return m }
|
||||
func (m *MultiMatchQuery) Operator(op string) *MultiMatchQuery { m.operator = op; return m }
|
||||
|
||||
func (m *MultiMatchQuery) querySource() map[string]any {
|
||||
body := map[string]any{"query": m.query}
|
||||
if len(m.fields) > 0 {
|
||||
body["fields"] = m.fields
|
||||
}
|
||||
if m.typ != "" {
|
||||
body["type"] = m.typ
|
||||
}
|
||||
if m.operator != "" {
|
||||
body["operator"] = m.operator
|
||||
}
|
||||
return map[string]any{"multi_match": body}
|
||||
}
|
||||
|
||||
// RangeQuery is the fluent builder for a range query.
|
||||
type RangeQuery struct {
|
||||
field string
|
||||
body map[string]any
|
||||
}
|
||||
|
||||
func NewRangeQuery(field string) *RangeQuery {
|
||||
return &RangeQuery{field: field, body: map[string]any{}}
|
||||
}
|
||||
|
||||
func (r *RangeQuery) Gte(v any) *RangeQuery { r.body["gte"] = v; return r }
|
||||
func (r *RangeQuery) Lte(v any) *RangeQuery { r.body["lte"] = v; return r }
|
||||
|
||||
func (r *RangeQuery) querySource() map[string]any {
|
||||
return map[string]any{"range": map[string]any{r.field: r.body}}
|
||||
}
|
||||
|
||||
// BoolQuery is the fluent builder for a bool query.
|
||||
type BoolQuery struct {
|
||||
must []Query
|
||||
should []Query
|
||||
mustNot []Query
|
||||
}
|
||||
|
||||
func NewBoolQuery() *BoolQuery { return &BoolQuery{} }
|
||||
|
||||
func (b *BoolQuery) Must(q ...Query) *BoolQuery { b.must = append(b.must, q...); return b }
|
||||
func (b *BoolQuery) Should(q ...Query) *BoolQuery { b.should = append(b.should, q...); return b }
|
||||
func (b *BoolQuery) MustNot(q ...Query) *BoolQuery { b.mustNot = append(b.mustNot, q...); return b }
|
||||
|
||||
func (b *BoolQuery) querySource() map[string]any {
|
||||
body := map[string]any{}
|
||||
if len(b.must) > 0 {
|
||||
body["must"] = querySlice(b.must)
|
||||
}
|
||||
if len(b.should) > 0 {
|
||||
body["should"] = querySlice(b.should)
|
||||
}
|
||||
if len(b.mustNot) > 0 {
|
||||
body["must_not"] = querySlice(b.mustNot)
|
||||
}
|
||||
return map[string]any{"bool": body}
|
||||
}
|
||||
|
||||
func querySlice(queries []Query) []map[string]any {
|
||||
out := make([]map[string]any, len(queries))
|
||||
for idx, q := range queries {
|
||||
out[idx] = q.querySource()
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import "gitea.dev/modules/json"
|
||||
|
||||
const (
|
||||
bulkActionIndex = "index"
|
||||
bulkActionDelete = "delete"
|
||||
)
|
||||
|
||||
// BulkOp is a single write inside a Bulk call. Construct with IndexOp or DeleteOp.
|
||||
type BulkOp struct {
|
||||
action string
|
||||
id string
|
||||
doc any
|
||||
}
|
||||
|
||||
// IndexOp builds a bulk index operation.
|
||||
func IndexOp(id string, doc any) BulkOp {
|
||||
return BulkOp{action: bulkActionIndex, id: id, doc: doc}
|
||||
}
|
||||
|
||||
// DeleteOp builds a bulk delete operation.
|
||||
func DeleteOp(id string) BulkOp {
|
||||
return BulkOp{action: bulkActionDelete, id: id}
|
||||
}
|
||||
|
||||
// SortField is one entry of the search sort array.
|
||||
type SortField struct {
|
||||
Field string
|
||||
Desc bool
|
||||
}
|
||||
|
||||
func (s SortField) source() map[string]any {
|
||||
order := "asc"
|
||||
if s.Desc {
|
||||
order = "desc"
|
||||
}
|
||||
return map[string]any{s.Field: map[string]any{"order": order}}
|
||||
}
|
||||
|
||||
// SearchRequest captures everything Gitea sends to the _search endpoint.
|
||||
// Aggregations and Highlight are raw ES JSON bodies — callers write them as
|
||||
// map[string]any since each has exactly one call site with a fixed shape.
|
||||
type SearchRequest struct {
|
||||
Query Query
|
||||
Sort []SortField
|
||||
From int
|
||||
Size int
|
||||
TrackTotal bool
|
||||
Aggregations map[string]any
|
||||
Highlight map[string]any
|
||||
}
|
||||
|
||||
// SearchHit is a single result row.
|
||||
type SearchHit struct {
|
||||
ID string
|
||||
Score float64
|
||||
Source json.Value
|
||||
Highlight map[string][]string
|
||||
}
|
||||
|
||||
// AggBucket is a terms-aggregation bucket.
|
||||
type AggBucket struct {
|
||||
Key any
|
||||
DocCount int64
|
||||
}
|
||||
|
||||
// SearchResponse is Gitea's decoded view of the search reply.
|
||||
type SearchResponse struct {
|
||||
Total int64
|
||||
Hits []SearchHit
|
||||
Aggregations map[string][]AggBucket
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
// VersionedIndexName returns the full index name with version suffix.
|
||||
func (i *Indexer) VersionedIndexName() string {
|
||||
return versionedIndexName(i.indexName, i.version)
|
||||
}
|
||||
|
||||
func versionedIndexName(indexName string, version int) string {
|
||||
if version == 0 {
|
||||
// Old index name without version
|
||||
return indexName
|
||||
}
|
||||
return fmt.Sprintf("%s.v%d", indexName, version)
|
||||
}
|
||||
|
||||
func (i *Indexer) checkOldIndexes(ctx context.Context) {
|
||||
for v := range i.version {
|
||||
indexName := versionedIndexName(i.indexName, v)
|
||||
exists, err := i.indexExists(ctx, indexName)
|
||||
if err == nil && exists {
|
||||
log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// Indexer defines an basic indexer interface
|
||||
type Indexer interface {
|
||||
// Init initializes the indexer
|
||||
// returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data)
|
||||
Init(ctx context.Context) (bool, error)
|
||||
// Ping checks if the indexer is available
|
||||
Ping(ctx context.Context) error
|
||||
// Close closes the indexer
|
||||
Close()
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer {
|
||||
return &dummyIndexer{}
|
||||
}
|
||||
|
||||
type dummyIndexer struct{}
|
||||
|
||||
func (d *dummyIndexer) Init(ctx context.Context) (bool, error) {
|
||||
return false, errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Ping(ctx context.Context) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Close() {}
|
||||
@@ -0,0 +1,119 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Filter represents a filter for meilisearch queries.
|
||||
// It's just a simple wrapper around a string.
|
||||
// DO NOT assume that it is a complete implementation.
|
||||
type Filter interface {
|
||||
Statement() string
|
||||
}
|
||||
|
||||
type FilterAnd struct {
|
||||
filters []Filter
|
||||
}
|
||||
|
||||
func (f *FilterAnd) Statement() string {
|
||||
var statements []string
|
||||
for _, filter := range f.filters {
|
||||
if s := filter.Statement(); s != "" {
|
||||
statements = append(statements, fmt.Sprintf("(%s)", s))
|
||||
}
|
||||
}
|
||||
return strings.Join(statements, " AND ")
|
||||
}
|
||||
|
||||
func (f *FilterAnd) And(filter Filter) *FilterAnd {
|
||||
f.filters = append(f.filters, filter)
|
||||
return f
|
||||
}
|
||||
|
||||
type FilterOr struct {
|
||||
filters []Filter
|
||||
}
|
||||
|
||||
func (f *FilterOr) Statement() string {
|
||||
var statements []string
|
||||
for _, filter := range f.filters {
|
||||
if s := filter.Statement(); s != "" {
|
||||
statements = append(statements, fmt.Sprintf("(%s)", s))
|
||||
}
|
||||
}
|
||||
return strings.Join(statements, " OR ")
|
||||
}
|
||||
|
||||
func (f *FilterOr) Or(filter Filter) *FilterOr {
|
||||
f.filters = append(f.filters, filter)
|
||||
return f
|
||||
}
|
||||
|
||||
type FilterIn string
|
||||
|
||||
// NewFilterIn creates a new FilterIn.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterIn[T int64](field string, values ...T) FilterIn {
|
||||
if len(values) == 0 {
|
||||
return ""
|
||||
}
|
||||
vs := make([]string, len(values))
|
||||
for i, v := range values {
|
||||
vs[i] = fmt.Sprintf("%v", v)
|
||||
}
|
||||
return FilterIn(fmt.Sprintf("%s IN [%v]", field, strings.Join(vs, ", ")))
|
||||
}
|
||||
|
||||
func (f FilterIn) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterEq string
|
||||
|
||||
// NewFilterEq creates a new FilterEq.
|
||||
// It supports int64 and bool only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterEq[T bool | int64](field string, value T) FilterEq {
|
||||
return FilterEq(fmt.Sprintf("%s = %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterEq) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterNot string
|
||||
|
||||
func NewFilterNot(filter Filter) FilterNot {
|
||||
return FilterNot(fmt.Sprintf("NOT (%s)", filter.Statement()))
|
||||
}
|
||||
|
||||
func (f FilterNot) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterGte string
|
||||
|
||||
// NewFilterGte creates a new FilterGte.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterGte[T int64](field string, value T) FilterGte {
|
||||
return FilterGte(fmt.Sprintf("%s >= %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterGte) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterLte string
|
||||
|
||||
// NewFilterLte creates a new FilterLte.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterLte[T int64](field string, value T) FilterLte {
|
||||
return FilterLte(fmt.Sprintf("%s <= %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterLte) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
)
|
||||
|
||||
// Indexer represents a basic meilisearch indexer implementation
|
||||
type Indexer struct {
|
||||
Client meilisearch.ServiceManager
|
||||
|
||||
url, apiKey string
|
||||
indexName string
|
||||
version int
|
||||
settings *meilisearch.Settings
|
||||
}
|
||||
|
||||
func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearch.Settings) *Indexer {
|
||||
return &Indexer{
|
||||
url: url,
|
||||
apiKey: apiKey,
|
||||
indexName: indexName,
|
||||
version: version,
|
||||
settings: settings,
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
if i == nil {
|
||||
return false, errors.New("cannot init nil indexer")
|
||||
}
|
||||
|
||||
if i.Client != nil {
|
||||
return false, errors.New("indexer is already initialized")
|
||||
}
|
||||
|
||||
i.Client = meilisearch.New(i.url, meilisearch.WithAPIKey(i.apiKey))
|
||||
_, err := i.Client.GetIndex(i.VersionedIndexName())
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
_, err = i.Client.CreateIndex(&meilisearch.IndexConfig{
|
||||
Uid: i.VersionedIndexName(),
|
||||
PrimaryKey: "id",
|
||||
})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
i.checkOldIndexes()
|
||||
|
||||
_, err = i.Client.Index(i.VersionedIndexName()).UpdateSettings(i.settings)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(ctx context.Context) error {
|
||||
if i == nil {
|
||||
return errors.New("cannot ping nil indexer")
|
||||
}
|
||||
if i.Client == nil {
|
||||
return errors.New("indexer is not initialized")
|
||||
}
|
||||
resp, err := i.Client.Health()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.Status != "available" {
|
||||
// See https://docs.meilisearch.com/reference/api/health.html#status
|
||||
return fmt.Errorf("status of meilisearch is not available: %s", resp.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil {
|
||||
return
|
||||
}
|
||||
i.Client = nil
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
// VersionedIndexName returns the full index name with version
|
||||
func (i *Indexer) VersionedIndexName() string {
|
||||
return versionedIndexName(i.indexName, i.version)
|
||||
}
|
||||
|
||||
func versionedIndexName(indexName string, version int) string {
|
||||
if version == 0 {
|
||||
// Old index name without version
|
||||
return indexName
|
||||
}
|
||||
|
||||
// The format of the index name is <index_name>_v<version>, not <index_name>.v<version> like elasticsearch.
|
||||
// Because meilisearch does not support "." in index name, it should contain only alphanumeric characters, hyphens (-) and underscores (_).
|
||||
// See https://www.meilisearch.com/docs/learn/core_concepts/indexes#index-uid
|
||||
|
||||
return fmt.Sprintf("%s_v%d", indexName, version)
|
||||
}
|
||||
|
||||
func (i *Indexer) checkOldIndexes() {
|
||||
for v := 0; v < i.version; v++ {
|
||||
indexName := versionedIndexName(i.indexName, v)
|
||||
_, err := i.Client.GetIndex(indexName)
|
||||
if err == nil {
|
||||
log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
)
|
||||
|
||||
// ParsePaginator parses a db.Paginator into a skip and limit
|
||||
func ParsePaginator(paginator *db.ListOptions, maxNums ...int) (int, int) {
|
||||
// Use a very large number to indicate no limit
|
||||
unlimited := math.MaxInt32
|
||||
if len(maxNums) > 0 {
|
||||
// Some indexer engines have a limit on the page size, respect that
|
||||
unlimited = maxNums[0]
|
||||
}
|
||||
|
||||
if paginator == nil || paginator.IsListAll() {
|
||||
// It shouldn't happen. In actual usage scenarios, there should not be requests to search all.
|
||||
// But if it does happen, respect it and return "unlimited".
|
||||
// And it's also useful for testing.
|
||||
return 0, unlimited
|
||||
}
|
||||
|
||||
if paginator.PageSize == 0 {
|
||||
// Do not return any results when searching, it's used to get the total count only.
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
return paginator.GetSkipTake()
|
||||
}
|
||||
@@ -0,0 +1,326 @@
|
||||
// Copyright 2018 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
|
||||
"gitea.dev/modules/indexer"
|
||||
indexer_internal "gitea.dev/modules/indexer/internal"
|
||||
inner_bleve "gitea.dev/modules/indexer/internal/bleve"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
)
|
||||
|
||||
const (
|
||||
issueIndexerAnalyzer = "issueIndexer"
|
||||
issueIndexerDocType = "issueIndexerDocType"
|
||||
issueIndexerLatestVersion = 6
|
||||
)
|
||||
|
||||
const unicodeNormalizeName = "unicodeNormalize"
|
||||
|
||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
|
||||
"type": unicodenorm.Name,
|
||||
"form": unicodenorm.NFC,
|
||||
})
|
||||
}
|
||||
|
||||
const maxBatchSize = 16
|
||||
|
||||
// IndexerData an update to the issue indexer
|
||||
type IndexerData internal.IndexerData
|
||||
|
||||
// Type returns the document type, for bleve's mapping.Classifier interface.
|
||||
func (i *IndexerData) Type() string {
|
||||
return issueIndexerDocType
|
||||
}
|
||||
|
||||
// generateIssueIndexMapping generates the bleve index mapping for issues
|
||||
func generateIssueIndexMapping() (mapping.IndexMapping, error) {
|
||||
mapping := bleve.NewIndexMapping()
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
|
||||
numericFieldMapping := bleve.NewNumericFieldMapping()
|
||||
numericFieldMapping.Store = false
|
||||
numericFieldMapping.IncludeInAll = false
|
||||
docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
|
||||
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
textFieldMapping.Store = false
|
||||
textFieldMapping.IncludeInAll = false
|
||||
|
||||
boolFieldMapping := bleve.NewBooleanFieldMapping()
|
||||
boolFieldMapping.Store = false
|
||||
boolFieldMapping.IncludeInAll = false
|
||||
|
||||
numberFieldMapping := bleve.NewNumericFieldMapping()
|
||||
numberFieldMapping.Store = false
|
||||
numberFieldMapping.IncludeInAll = false
|
||||
|
||||
docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
|
||||
|
||||
docMapping.AddFieldMappingsAt("title", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("content", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
|
||||
|
||||
docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("is_archived", boolFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("project_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("no_project", boolFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
|
||||
|
||||
docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
|
||||
|
||||
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
|
||||
return nil, err
|
||||
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
|
||||
"type": custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": unicode.Name,
|
||||
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mapping.DefaultAnalyzer = issueIndexerAnalyzer
|
||||
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
|
||||
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
|
||||
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
|
||||
|
||||
return mapping, nil
|
||||
}
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct {
|
||||
inner *inner_bleve.Indexer
|
||||
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWordsFuzzy()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new bleve local indexer
|
||||
func NewIndexer(indexDir string) *Indexer {
|
||||
inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
|
||||
return &Indexer{
|
||||
Indexer: inner,
|
||||
inner: inner,
|
||||
}
|
||||
}
|
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
|
||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||
for _, issue := range issues {
|
||||
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return batch.Flush()
|
||||
}
|
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
|
||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||
for _, id := range ids {
|
||||
if err := batch.Delete(indexer_internal.Base36(id)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return batch.Flush()
|
||||
}
|
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
var queries []query.Query
|
||||
|
||||
if options.Keyword != "" {
|
||||
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||
if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy {
|
||||
fuzziness := 0
|
||||
if searchMode == indexer.SearchModeFuzzy {
|
||||
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
||||
}
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
|
||||
}...))
|
||||
} else /* exact */ {
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0),
|
||||
}...))
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.RepoIDs) > 0 || options.AllPublic {
|
||||
var repoQueries []query.Query
|
||||
for _, repoID := range options.RepoIDs {
|
||||
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
|
||||
}
|
||||
if options.AllPublic {
|
||||
repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
|
||||
}
|
||||
queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
|
||||
}
|
||||
|
||||
if options.IsPull.Has() {
|
||||
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull"))
|
||||
}
|
||||
if options.IsClosed.Has() {
|
||||
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed"))
|
||||
}
|
||||
if options.IsArchived.Has() {
|
||||
queries = append(queries, inner_bleve.BoolFieldQuery(options.IsArchived.Value(), "is_archived"))
|
||||
}
|
||||
|
||||
if options.NoLabelOnly {
|
||||
queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
|
||||
} else {
|
||||
if len(options.IncludedLabelIDs) > 0 {
|
||||
var includeQueries []query.Query
|
||||
for _, labelID := range options.IncludedLabelIDs {
|
||||
includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
|
||||
}
|
||||
queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
|
||||
} else if len(options.IncludedAnyLabelIDs) > 0 {
|
||||
var includeQueries []query.Query
|
||||
for _, labelID := range options.IncludedAnyLabelIDs {
|
||||
includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
|
||||
}
|
||||
queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
|
||||
}
|
||||
if len(options.ExcludedLabelIDs) > 0 {
|
||||
var excludeQueries []query.Query
|
||||
for _, labelID := range options.ExcludedLabelIDs {
|
||||
q := bleve.NewBooleanQuery()
|
||||
q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
|
||||
excludeQueries = append(excludeQueries, q)
|
||||
}
|
||||
queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.MilestoneIDs) > 0 {
|
||||
var milestoneQueries []query.Query
|
||||
for _, milestoneID := range options.MilestoneIDs {
|
||||
milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
|
||||
}
|
||||
queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
|
||||
}
|
||||
|
||||
if options.NoProjectOnly {
|
||||
queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_project"))
|
||||
} else if len(options.ProjectIDs) > 0 {
|
||||
var projectQueries []query.Query
|
||||
for _, projectID := range options.ProjectIDs {
|
||||
projectQueries = append(projectQueries, inner_bleve.NumericEqualityQuery(projectID, "project_ids"))
|
||||
}
|
||||
// FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: this logic is not right, it should use "AND" but not "OR"
|
||||
queries = append(queries, bleve.NewDisjunctionQuery(projectQueries...))
|
||||
}
|
||||
|
||||
if options.PosterID != "" {
|
||||
// "(none)" becomes 0, it means no poster
|
||||
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(posterIDInt64, "poster_id"))
|
||||
}
|
||||
|
||||
if options.AssigneeID != "" {
|
||||
if options.AssigneeID == "(any)" {
|
||||
queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(optional.Some[int64](1), optional.None[int64](), "assignee_id"))
|
||||
} else {
|
||||
// "(none)" becomes 0, it means no assignee
|
||||
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(assigneeIDInt64, "assignee_id"))
|
||||
}
|
||||
}
|
||||
|
||||
if options.MentionID.Has() {
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids"))
|
||||
}
|
||||
|
||||
if options.ReviewedID.Has() {
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids"))
|
||||
}
|
||||
if options.ReviewRequestedID.Has() {
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids"))
|
||||
}
|
||||
|
||||
if options.SubscriberID.Has() {
|
||||
queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids"))
|
||||
}
|
||||
|
||||
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
|
||||
queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(
|
||||
options.UpdatedAfterUnix,
|
||||
options.UpdatedBeforeUnix,
|
||||
"updated_unix"))
|
||||
}
|
||||
|
||||
var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
|
||||
if len(queries) == 0 {
|
||||
indexerQuery = bleve.NewMatchAllQuery()
|
||||
}
|
||||
|
||||
skip, limit := indexer_internal.ParsePaginator(options.Paginator)
|
||||
search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
|
||||
|
||||
if options.SortBy == "" {
|
||||
options.SortBy = internal.SortByCreatedAsc
|
||||
}
|
||||
|
||||
search.SortBy([]string{string(options.SortBy), "-_id"})
|
||||
|
||||
result, err := b.inner.Indexer.SearchInContext(ctx, search)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret := &internal.SearchResult{
|
||||
Total: int64(result.Total),
|
||||
Hits: make([]internal.Match, 0, len(result.Hits)),
|
||||
}
|
||||
for _, hit := range result.Hits {
|
||||
id, err := indexer_internal.ParseBase36(hit.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret.Hits = append(ret.Hits, internal.Match{
|
||||
ID: id,
|
||||
})
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
// Copyright 2018 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/indexer/issues/internal/tests"
|
||||
)
|
||||
|
||||
func TestBleveIndexer(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
indexer := NewIndexer(dir)
|
||||
defer indexer.Close()
|
||||
|
||||
tests.TestIndexer(t, indexer)
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
issue_model "gitea.dev/models/issues"
|
||||
"gitea.dev/modules/indexer"
|
||||
indexer_internal "gitea.dev/modules/indexer/internal"
|
||||
inner_db "gitea.dev/modules/indexer/internal/db"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"xorm.io/builder"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = (*Indexer)(nil)
|
||||
|
||||
// Indexer implements Indexer interface to use database's like search
|
||||
type Indexer struct {
|
||||
indexer_internal.Indexer
|
||||
}
|
||||
|
||||
func (i *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
var GetIndexer = sync.OnceValue(func() *Indexer {
|
||||
return &Indexer{Indexer: &inner_db.Indexer{}}
|
||||
})
|
||||
|
||||
// Index dummy function
|
||||
func (i *Indexer) Index(_ context.Context, _ ...*internal.IndexerData) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete dummy function
|
||||
func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
|
||||
if mode == indexer.SearchModeExact {
|
||||
return db.BuildCaseInsensitiveLike(colName, keyword)
|
||||
}
|
||||
|
||||
// match words
|
||||
cond := builder.NewCond()
|
||||
fields := strings.Fields(keyword)
|
||||
if len(fields) == 0 {
|
||||
return builder.Expr("1=1")
|
||||
}
|
||||
for _, field := range fields {
|
||||
if field == "" {
|
||||
continue
|
||||
}
|
||||
cond = cond.And(db.BuildCaseInsensitiveLike(colName, field))
|
||||
}
|
||||
return cond
|
||||
}
|
||||
|
||||
// Search searches for issues
|
||||
func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
// FIXME: I tried to avoid importing models here, but it seems to be impossible.
|
||||
// We can provide a function to register the search function, so models/issues can register it.
|
||||
// So models/issues will import modules/indexer/issues, it's OK because it's by design.
|
||||
// But modules/indexer/issues has already imported models/issues to do UpdateRepoIndexer and UpdateIssueIndexer.
|
||||
// And to avoid circular import, we have to move the functions to another package.
|
||||
// I believe it should be services/indexer, sounds great!
|
||||
// But the two functions are used in modules/notification/indexer, that means we will import services/indexer in modules/notification/indexer.
|
||||
// So that's the root problem:
|
||||
// The notification is defined in modules, but it's using lots of things should be in services.
|
||||
|
||||
cond := builder.NewCond()
|
||||
|
||||
if options.Keyword != "" {
|
||||
repoCond := builder.In("repo_id", options.RepoIDs)
|
||||
if len(options.RepoIDs) == 1 {
|
||||
repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
|
||||
}
|
||||
subQuery := builder.Select("id").From("issue").Where(repoCond)
|
||||
searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue)
|
||||
cond = builder.Or(
|
||||
buildMatchQuery(searchMode, "issue.name", options.Keyword),
|
||||
buildMatchQuery(searchMode, "issue.content", options.Keyword),
|
||||
builder.In("issue.id", builder.Select("issue_id").
|
||||
From("comment").
|
||||
Where(builder.And(
|
||||
builder.Eq{"type": issue_model.CommentTypeComment},
|
||||
builder.In("issue_id", subQuery),
|
||||
buildMatchQuery(searchMode, "content", options.Keyword),
|
||||
)),
|
||||
),
|
||||
)
|
||||
|
||||
if options.IsKeywordNumeric() {
|
||||
cond = cond.Or(
|
||||
builder.Eq{"`index`": options.Keyword},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
opt, err := ToDBOptions(ctx, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If pagesize == 0, return total count only. It's a special case for search count.
|
||||
if options.Paginator != nil && options.Paginator.PageSize == 0 {
|
||||
total, err := issue_model.CountIssues(ctx, opt, cond)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &internal.SearchResult{
|
||||
Total: total,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return i.FindWithIssueOptions(ctx, opt, cond)
|
||||
}
|
||||
|
||||
func (i *Indexer) FindWithIssueOptions(ctx context.Context, opt *issue_model.IssuesOptions, otherConds ...builder.Cond) (*internal.SearchResult, error) {
|
||||
ids, total, err := issue_model.IssueIDs(ctx, opt, otherConds...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := make([]internal.Match, 0, len(ids))
|
||||
for _, id := range ids {
|
||||
hits = append(hits, internal.Match{
|
||||
ID: id,
|
||||
})
|
||||
}
|
||||
return &internal.SearchResult{
|
||||
Total: total,
|
||||
Hits: hits,
|
||||
}, nil
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
issue_model "gitea.dev/models/issues"
|
||||
"gitea.dev/modules/container"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_model.IssuesOptions, error) {
|
||||
var sortType string
|
||||
switch options.SortBy {
|
||||
case internal.SortByCreatedAsc:
|
||||
sortType = "oldest"
|
||||
case internal.SortByUpdatedAsc:
|
||||
sortType = "leastupdate"
|
||||
case internal.SortByCommentsAsc:
|
||||
sortType = "leastcomment"
|
||||
case internal.SortByDeadlineDesc:
|
||||
sortType = "farduedate"
|
||||
case internal.SortByCreatedDesc:
|
||||
sortType = "newest"
|
||||
case internal.SortByUpdatedDesc:
|
||||
sortType = "recentupdate"
|
||||
case internal.SortByCommentsDesc:
|
||||
sortType = "mostcomment"
|
||||
case internal.SortByDeadlineAsc:
|
||||
sortType = "nearduedate"
|
||||
default:
|
||||
if strings.HasPrefix(string(options.SortBy), issue_model.ScopeSortPrefix) {
|
||||
sortType = string(options.SortBy)
|
||||
} else {
|
||||
sortType = "newest"
|
||||
}
|
||||
}
|
||||
|
||||
// See the comment of issues_model.SearchOptions for the reason why we need to convert
|
||||
convertID := func(id optional.Option[int64]) int64 {
|
||||
if !id.Has() {
|
||||
return 0
|
||||
}
|
||||
value := id.Value()
|
||||
if value == 0 {
|
||||
return db.NoConditionID
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
opts := &issue_model.IssuesOptions{
|
||||
Paginator: options.Paginator,
|
||||
RepoIDs: options.RepoIDs,
|
||||
AllPublic: options.AllPublic,
|
||||
RepoCond: nil,
|
||||
AssigneeID: options.AssigneeID,
|
||||
PosterID: options.PosterID,
|
||||
MentionedID: convertID(options.MentionID),
|
||||
ReviewRequestedID: convertID(options.ReviewRequestedID),
|
||||
ReviewedID: convertID(options.ReviewedID),
|
||||
SubscriberID: convertID(options.SubscriberID),
|
||||
ProjectIDs: util.Iif(options.NoProjectOnly, []int64{db.NoConditionID}, options.ProjectIDs),
|
||||
IsClosed: options.IsClosed,
|
||||
IsPull: options.IsPull,
|
||||
IncludedLabelNames: nil,
|
||||
ExcludedLabelNames: nil,
|
||||
IncludeMilestones: nil,
|
||||
SortType: sortType,
|
||||
UpdatedAfterUnix: options.UpdatedAfterUnix.Value(),
|
||||
UpdatedBeforeUnix: options.UpdatedBeforeUnix.Value(),
|
||||
PriorityRepoID: 0,
|
||||
IsArchived: options.IsArchived,
|
||||
Owner: nil,
|
||||
Team: nil,
|
||||
Doer: nil,
|
||||
}
|
||||
|
||||
if len(options.MilestoneIDs) == 1 && options.MilestoneIDs[0] == 0 {
|
||||
opts.MilestoneIDs = []int64{db.NoConditionID}
|
||||
} else {
|
||||
opts.MilestoneIDs = options.MilestoneIDs
|
||||
}
|
||||
|
||||
if options.NoLabelOnly {
|
||||
opts.LabelIDs = []int64{0} // Be careful, it's zero, not db.NoConditionID
|
||||
} else {
|
||||
opts.LabelIDs = make([]int64, 0, len(options.IncludedLabelIDs)+len(options.ExcludedLabelIDs))
|
||||
opts.LabelIDs = append(opts.LabelIDs, options.IncludedLabelIDs...)
|
||||
for _, id := range options.ExcludedLabelIDs {
|
||||
opts.LabelIDs = append(opts.LabelIDs, -id)
|
||||
}
|
||||
|
||||
if len(options.IncludedLabelIDs) == 0 && len(options.IncludedAnyLabelIDs) > 0 {
|
||||
labels, err := issue_model.GetLabelsByIDs(ctx, options.IncludedAnyLabelIDs, "name")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GetLabelsByIDs: %v", err)
|
||||
}
|
||||
set := container.Set[string]{}
|
||||
for _, label := range labels {
|
||||
if !set.Contains(label.Name) {
|
||||
set.Add(label.Name)
|
||||
opts.IncludedLabelNames = append(opts.IncludedLabelNames, label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
issues_model "gitea.dev/models/issues"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/setting"
|
||||
)
|
||||
|
||||
func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOptions {
|
||||
if opts.IssueIDs != nil {
|
||||
setting.PanicInDevOrTesting("Indexer SearchOptions doesn't support IssueIDs")
|
||||
}
|
||||
searchOpt := &SearchOptions{
|
||||
Keyword: keyword,
|
||||
RepoIDs: opts.RepoIDs,
|
||||
AllPublic: opts.AllPublic,
|
||||
IsPull: opts.IsPull,
|
||||
IsClosed: opts.IsClosed,
|
||||
IsArchived: opts.IsArchived,
|
||||
}
|
||||
|
||||
if len(opts.LabelIDs) == 1 && opts.LabelIDs[0] == 0 {
|
||||
searchOpt.NoLabelOnly = true
|
||||
} else {
|
||||
for _, labelID := range opts.LabelIDs {
|
||||
if labelID > 0 {
|
||||
searchOpt.IncludedLabelIDs = append(searchOpt.IncludedLabelIDs, labelID)
|
||||
} else {
|
||||
searchOpt.ExcludedLabelIDs = append(searchOpt.ExcludedLabelIDs, -labelID)
|
||||
}
|
||||
}
|
||||
// opts.IncludedLabelNames and opts.ExcludedLabelNames are not supported here.
|
||||
// It's not a TO DO, it's just unnecessary.
|
||||
}
|
||||
|
||||
if len(opts.MilestoneIDs) == 1 && opts.MilestoneIDs[0] == db.NoConditionID {
|
||||
searchOpt.MilestoneIDs = []int64{0}
|
||||
} else {
|
||||
searchOpt.MilestoneIDs = opts.MilestoneIDs
|
||||
}
|
||||
|
||||
if len(opts.ProjectIDs) == 1 && opts.ProjectIDs[0] == db.NoConditionID {
|
||||
searchOpt.NoProjectOnly = true
|
||||
} else {
|
||||
searchOpt.ProjectIDs = opts.ProjectIDs
|
||||
}
|
||||
|
||||
searchOpt.AssigneeID = opts.AssigneeID
|
||||
|
||||
// See the comment of issues_model.SearchOptions for the reason why we need to convert
|
||||
convertID := func(id int64) optional.Option[int64] {
|
||||
if id > 0 {
|
||||
return optional.Some(id)
|
||||
}
|
||||
if id == db.NoConditionID {
|
||||
return optional.None[int64]()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
searchOpt.PosterID = opts.PosterID
|
||||
searchOpt.MentionID = convertID(opts.MentionedID)
|
||||
searchOpt.ReviewedID = convertID(opts.ReviewedID)
|
||||
searchOpt.ReviewRequestedID = convertID(opts.ReviewRequestedID)
|
||||
searchOpt.SubscriberID = convertID(opts.SubscriberID)
|
||||
|
||||
if opts.UpdatedAfterUnix > 0 {
|
||||
searchOpt.UpdatedAfterUnix = optional.Some(opts.UpdatedAfterUnix)
|
||||
}
|
||||
if opts.UpdatedBeforeUnix > 0 {
|
||||
searchOpt.UpdatedBeforeUnix = optional.Some(opts.UpdatedBeforeUnix)
|
||||
}
|
||||
|
||||
searchOpt.Paginator = opts.Paginator
|
||||
|
||||
switch opts.SortType {
|
||||
case "", "latest":
|
||||
searchOpt.SortBy = SortByCreatedDesc
|
||||
case "oldest":
|
||||
searchOpt.SortBy = SortByCreatedAsc
|
||||
case "recentupdate":
|
||||
searchOpt.SortBy = SortByUpdatedDesc
|
||||
case "leastupdate":
|
||||
searchOpt.SortBy = SortByUpdatedAsc
|
||||
case "mostcomment":
|
||||
searchOpt.SortBy = SortByCommentsDesc
|
||||
case "leastcomment":
|
||||
searchOpt.SortBy = SortByCommentsAsc
|
||||
case "nearduedate":
|
||||
searchOpt.SortBy = SortByDeadlineAsc
|
||||
case "farduedate":
|
||||
searchOpt.SortBy = SortByDeadlineDesc
|
||||
case "priority", "priorityrepo", "project-column-sorting":
|
||||
// Unsupported sort type for search
|
||||
fallthrough
|
||||
default:
|
||||
if strings.HasPrefix(opts.SortType, issues_model.ScopeSortPrefix) {
|
||||
searchOpt.SortBy = internal.SortBy(opts.SortType)
|
||||
} else {
|
||||
searchOpt.SortBy = SortByUpdatedDesc
|
||||
}
|
||||
}
|
||||
|
||||
return searchOpt
|
||||
}
|
||||
@@ -0,0 +1,255 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/indexer"
|
||||
indexer_internal "gitea.dev/modules/indexer/internal"
|
||||
es "gitea.dev/modules/indexer/internal/elasticsearch"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
const issueIndexerLatestVersion = 3
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct {
|
||||
*es.Indexer
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new elasticsearch indexer
|
||||
func NewIndexer(url, indexerName string) *Indexer {
|
||||
return &Indexer{Indexer: es.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)}
|
||||
}
|
||||
|
||||
const (
|
||||
defaultMapping = `
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"id": { "type": "integer", "index": true },
|
||||
"repo_id": { "type": "integer", "index": true },
|
||||
"is_public": { "type": "boolean", "index": true },
|
||||
|
||||
"title": { "type": "text", "index": true },
|
||||
"content": { "type": "text", "index": true },
|
||||
"comments": { "type" : "text", "index": true },
|
||||
|
||||
"is_pull": { "type": "boolean", "index": true },
|
||||
"is_closed": { "type": "boolean", "index": true },
|
||||
"is_archived": { "type": "boolean", "index": true },
|
||||
"label_ids": { "type": "integer", "index": true },
|
||||
"no_label": { "type": "boolean", "index": true },
|
||||
"milestone_id": { "type": "integer", "index": true },
|
||||
"project_ids": { "type": "integer", "index": true },
|
||||
"no_project": { "type": "boolean", "index": true },
|
||||
"poster_id": { "type": "integer", "index": true },
|
||||
"assignee_id": { "type": "integer", "index": true },
|
||||
"mention_ids": { "type": "integer", "index": true },
|
||||
"reviewed_ids": { "type": "integer", "index": true },
|
||||
"review_requested_ids": { "type": "integer", "index": true },
|
||||
"subscriber_ids": { "type": "integer", "index": true },
|
||||
"updated_unix": { "type": "integer", "index": true },
|
||||
|
||||
"created_unix": { "type": "integer", "index": true },
|
||||
"deadline_unix": { "type": "integer", "index": true },
|
||||
"comment_count": { "type": "integer", "index": true }
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
)
|
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
|
||||
if len(issues) == 0 {
|
||||
return nil
|
||||
} else if len(issues) == 1 {
|
||||
issue := issues[0]
|
||||
return b.Indexer.Index(ctx, strconv.FormatInt(issue.ID, 10), issue)
|
||||
}
|
||||
|
||||
ops := make([]es.BulkOp, 0, len(issues))
|
||||
for _, issue := range issues {
|
||||
ops = append(ops, es.IndexOp(strconv.FormatInt(issue.ID, 10), issue))
|
||||
}
|
||||
return b.Bulk(graceful.GetManager().HammerContext(), ops)
|
||||
}
|
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
|
||||
if len(ids) == 0 {
|
||||
return nil
|
||||
} else if len(ids) == 1 {
|
||||
return b.Indexer.Delete(ctx, strconv.FormatInt(ids[0], 10))
|
||||
}
|
||||
|
||||
ops := make([]es.BulkOp, 0, len(ids))
|
||||
for _, id := range ids {
|
||||
ops = append(ops, es.DeleteOp(strconv.FormatInt(id, 10)))
|
||||
}
|
||||
return b.Bulk(graceful.GetManager().HammerContext(), ops)
|
||||
}
|
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
query := es.NewBoolQuery()
|
||||
|
||||
if options.Keyword != "" {
|
||||
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||
mm := es.NewMultiMatchQuery(options.Keyword, "title", "content", "comments")
|
||||
if searchMode == indexer.SearchModeExact {
|
||||
mm = mm.Type(es.MultiMatchTypePhrasePrefix)
|
||||
} else {
|
||||
mm = mm.Type(es.MultiMatchTypeBestFields).Operator("and")
|
||||
}
|
||||
query.Must(mm)
|
||||
}
|
||||
|
||||
if len(options.RepoIDs) > 0 {
|
||||
q := es.NewBoolQuery()
|
||||
q.Should(es.TermsQuery("repo_id", es.ToAnySlice(options.RepoIDs)...))
|
||||
if options.AllPublic {
|
||||
q.Should(es.TermQuery("is_public", true))
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
|
||||
if options.IsPull.Has() {
|
||||
query.Must(es.TermQuery("is_pull", options.IsPull.Value()))
|
||||
}
|
||||
if options.IsClosed.Has() {
|
||||
query.Must(es.TermQuery("is_closed", options.IsClosed.Value()))
|
||||
}
|
||||
if options.IsArchived.Has() {
|
||||
query.Must(es.TermQuery("is_archived", options.IsArchived.Value()))
|
||||
}
|
||||
|
||||
if options.NoLabelOnly {
|
||||
query.Must(es.TermQuery("no_label", true))
|
||||
} else {
|
||||
if len(options.IncludedLabelIDs) > 0 {
|
||||
q := es.NewBoolQuery()
|
||||
for _, labelID := range options.IncludedLabelIDs {
|
||||
q.Must(es.TermQuery("label_ids", labelID))
|
||||
}
|
||||
query.Must(q)
|
||||
} else if len(options.IncludedAnyLabelIDs) > 0 {
|
||||
query.Must(es.TermsQuery("label_ids", es.ToAnySlice(options.IncludedAnyLabelIDs)...))
|
||||
}
|
||||
if len(options.ExcludedLabelIDs) > 0 {
|
||||
q := es.NewBoolQuery()
|
||||
for _, labelID := range options.ExcludedLabelIDs {
|
||||
q.MustNot(es.TermQuery("label_ids", labelID))
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.MilestoneIDs) > 0 {
|
||||
query.Must(es.TermsQuery("milestone_id", es.ToAnySlice(options.MilestoneIDs)...))
|
||||
}
|
||||
|
||||
if options.NoProjectOnly {
|
||||
query.Must(es.TermQuery("no_project", true))
|
||||
} else if len(options.ProjectIDs) > 0 {
|
||||
// FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: this logic is not right, it should use "AND" but not "OR"
|
||||
query.Must(es.TermsQuery("project_ids", es.ToAnySlice(options.ProjectIDs)...))
|
||||
}
|
||||
|
||||
if options.PosterID != "" {
|
||||
// "(none)" becomes 0, it means no poster
|
||||
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
|
||||
query.Must(es.TermQuery("poster_id", posterIDInt64))
|
||||
}
|
||||
|
||||
if options.AssigneeID != "" {
|
||||
if options.AssigneeID == "(any)" {
|
||||
query.Must(es.NewRangeQuery("assignee_id").Gte(1))
|
||||
} else {
|
||||
// "(none)" becomes 0, it means no assignee
|
||||
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
|
||||
query.Must(es.TermQuery("assignee_id", assigneeIDInt64))
|
||||
}
|
||||
}
|
||||
|
||||
if options.MentionID.Has() {
|
||||
query.Must(es.TermQuery("mention_ids", options.MentionID.Value()))
|
||||
}
|
||||
|
||||
if options.ReviewedID.Has() {
|
||||
query.Must(es.TermQuery("reviewed_ids", options.ReviewedID.Value()))
|
||||
}
|
||||
if options.ReviewRequestedID.Has() {
|
||||
query.Must(es.TermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
|
||||
}
|
||||
|
||||
if options.SubscriberID.Has() {
|
||||
query.Must(es.TermQuery("subscriber_ids", options.SubscriberID.Value()))
|
||||
}
|
||||
|
||||
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
|
||||
q := es.NewRangeQuery("updated_unix")
|
||||
if options.UpdatedAfterUnix.Has() {
|
||||
q.Gte(options.UpdatedAfterUnix.Value())
|
||||
}
|
||||
if options.UpdatedBeforeUnix.Has() {
|
||||
q.Lte(options.UpdatedBeforeUnix.Value())
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
|
||||
if options.SortBy == "" {
|
||||
options.SortBy = internal.SortByCreatedAsc
|
||||
}
|
||||
sortBy := []es.SortField{
|
||||
parseSortBy(options.SortBy),
|
||||
{Field: "id", Desc: true},
|
||||
}
|
||||
|
||||
// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
|
||||
// TODO: make it configurable since it's configurable in elasticsearch
|
||||
const maxPageSize = 10000
|
||||
|
||||
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
|
||||
resp, err := b.Indexer.Search(ctx, es.SearchRequest{
|
||||
Query: query,
|
||||
Sort: sortBy,
|
||||
From: skip,
|
||||
Size: limit,
|
||||
TrackTotal: true,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := make([]internal.Match, 0, len(resp.Hits))
|
||||
for _, hit := range resp.Hits {
|
||||
id, _ := strconv.ParseInt(hit.ID, 10, 64)
|
||||
hits = append(hits, internal.Match{ID: id})
|
||||
}
|
||||
|
||||
return &internal.SearchResult{
|
||||
Total: resp.Total,
|
||||
Hits: hits,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseSortBy(sortBy internal.SortBy) es.SortField {
|
||||
field, desc := strings.CutPrefix(string(sortBy), "-")
|
||||
return es.SortField{Field: field, Desc: desc}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dev/modules/indexer/issues/internal/tests"
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestElasticsearchIndexer(t *testing.T) {
|
||||
// The elasticsearch instance started by pull-db-tests.yml > test-unit > services > elasticsearch
|
||||
rawURL := test.ExternalServiceHTTP(t, "TEST_ELASTICSEARCH_URL", "http://elastic:changeme@elasticsearch:9200")
|
||||
|
||||
// Go's net/http does not auto-attach URL userinfo as Basic Auth, so extract
|
||||
// it and set the header explicitly; otherwise auth-enforced clusters answer
|
||||
// 401 and the probe never reports ready.
|
||||
parsed, err := url.Parse(rawURL)
|
||||
require.NoError(t, err)
|
||||
user := parsed.User
|
||||
parsed.User = nil
|
||||
probeURL := parsed.String()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
req, err := http.NewRequest(http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if user != nil {
|
||||
pass, _ := user.Password()
|
||||
req.SetBasicAuth(user.Username(), pass)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}, time.Minute, time.Second, "Expected elasticsearch to be up")
|
||||
|
||||
indexer := NewIndexer(rawURL, fmt.Sprintf("test_elasticsearch_indexer_%d", time.Now().Unix()))
|
||||
defer indexer.Close()
|
||||
|
||||
tests.TestIndexer(t, indexer)
|
||||
}
|
||||
@@ -0,0 +1,327 @@
|
||||
// Copyright 2018 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
db_model "gitea.dev/models/db"
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/issues/bleve"
|
||||
"gitea.dev/modules/indexer/issues/db"
|
||||
"gitea.dev/modules/indexer/issues/elasticsearch"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/indexer/issues/meilisearch"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/process"
|
||||
"gitea.dev/modules/queue"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
// IndexerMetadata is used to send data to the queue, so it contains only the ids.
|
||||
// It may look weird, because it has to be compatible with the old queue data format.
|
||||
// If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
|
||||
// If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
|
||||
// It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
|
||||
// Valid values:
|
||||
// - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
|
||||
// - IsDelete = false, ID = 1, and IDs will be ignored
|
||||
type IndexerMetadata struct {
|
||||
ID int64 `json:"id"`
|
||||
|
||||
IsDelete bool `json:"is_delete"`
|
||||
IDs []int64 `json:"ids"`
|
||||
}
|
||||
|
||||
var (
|
||||
// issueIndexerQueue queue of issue ids to be updated
|
||||
issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
|
||||
// globalIndexer is the global indexer, it cannot be nil.
|
||||
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
|
||||
// So it's always safe use it as *globalIndexer.Load() and call its methods.
|
||||
globalIndexer atomic.Pointer[internal.Indexer]
|
||||
dummyIndexer *internal.Indexer
|
||||
)
|
||||
|
||||
func init() {
|
||||
i := internal.NewDummyIndexer()
|
||||
dummyIndexer = &i
|
||||
globalIndexer.Store(dummyIndexer)
|
||||
}
|
||||
|
||||
// InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
|
||||
// all issue index done.
|
||||
func InitIssueIndexer(syncReindex bool) {
|
||||
ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
|
||||
|
||||
indexerInitWaitChannel := make(chan time.Duration, 1)
|
||||
|
||||
// Create the Queue
|
||||
issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
|
||||
|
||||
graceful.GetManager().RunAtTerminate(finished)
|
||||
|
||||
// Create the Indexer
|
||||
go func() {
|
||||
pprof.SetGoroutineLabels(ctx)
|
||||
start := time.Now()
|
||||
log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
|
||||
var (
|
||||
issueIndexer internal.Indexer
|
||||
existed bool
|
||||
err error
|
||||
)
|
||||
switch setting.Indexer.IssueType {
|
||||
case "bleve":
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
|
||||
log.Error("The indexer files are likely corrupted and may need to be deleted")
|
||||
log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath)
|
||||
globalIndexer.Store(dummyIndexer)
|
||||
log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
|
||||
}
|
||||
}()
|
||||
issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath)
|
||||
existed, err = issueIndexer.Init(ctx)
|
||||
if err != nil {
|
||||
log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
|
||||
}
|
||||
case "elasticsearch":
|
||||
issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
|
||||
existed, err = issueIndexer.Init(ctx)
|
||||
if err != nil {
|
||||
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", util.SanitizeCredentialURLs(setting.Indexer.IssueConnStr), err)
|
||||
}
|
||||
case "db":
|
||||
issueIndexer = db.GetIndexer()
|
||||
case "meilisearch":
|
||||
issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
|
||||
existed, err = issueIndexer.Init(ctx)
|
||||
if err != nil {
|
||||
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", util.SanitizeCredentialURLs(setting.Indexer.IssueConnStr), err)
|
||||
}
|
||||
default:
|
||||
log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
|
||||
}
|
||||
globalIndexer.Store(&issueIndexer)
|
||||
|
||||
graceful.GetManager().RunAtTerminate(func() {
|
||||
log.Debug("Closing issue indexer")
|
||||
(*globalIndexer.Load()).Close()
|
||||
log.Info("PID: %d Issue Indexer closed", os.Getpid())
|
||||
})
|
||||
|
||||
// Start processing the queue
|
||||
go graceful.GetManager().RunWithCancel(issueIndexerQueue)
|
||||
|
||||
// Populate the index
|
||||
if !existed {
|
||||
if syncReindex {
|
||||
graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
|
||||
} else {
|
||||
go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
|
||||
}
|
||||
}
|
||||
|
||||
indexerInitWaitChannel <- time.Since(start)
|
||||
close(indexerInitWaitChannel)
|
||||
}()
|
||||
|
||||
if syncReindex {
|
||||
select {
|
||||
case <-indexerInitWaitChannel:
|
||||
case <-graceful.GetManager().IsShutdown():
|
||||
}
|
||||
} else if setting.Indexer.StartupTimeout > 0 {
|
||||
go func() {
|
||||
pprof.SetGoroutineLabels(ctx)
|
||||
timeout := setting.Indexer.StartupTimeout
|
||||
if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
|
||||
timeout += setting.GracefulHammerTime
|
||||
}
|
||||
select {
|
||||
case duration := <-indexerInitWaitChannel:
|
||||
log.Info("Issue Indexer Initialization took %v", duration)
|
||||
case <-graceful.GetManager().IsShutdown():
|
||||
log.Warn("Shutdown occurred before issue index initialisation was complete")
|
||||
case <-time.After(timeout):
|
||||
issueIndexerQueue.ShutdownWait(5 * time.Second)
|
||||
log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
|
||||
return func(items ...*IndexerMetadata) []*IndexerMetadata {
|
||||
var unhandled []*IndexerMetadata
|
||||
|
||||
indexer := *globalIndexer.Load()
|
||||
for _, item := range items {
|
||||
log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
|
||||
if item.IsDelete {
|
||||
if err := indexer.Delete(ctx, item.IDs...); err != nil {
|
||||
log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
|
||||
unhandled = append(unhandled, item)
|
||||
}
|
||||
continue
|
||||
}
|
||||
data, existed, err := getIssueIndexerData(ctx, item.ID)
|
||||
if err != nil {
|
||||
log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
|
||||
unhandled = append(unhandled, item)
|
||||
continue
|
||||
}
|
||||
if !existed {
|
||||
if err := indexer.Delete(ctx, item.ID); err != nil {
|
||||
log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
|
||||
unhandled = append(unhandled, item)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err := indexer.Index(ctx, data); err != nil {
|
||||
log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
|
||||
unhandled = append(unhandled, item)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return unhandled
|
||||
}
|
||||
}
|
||||
|
||||
// populateIssueIndexer populate the issue indexer with issue data
|
||||
func populateIssueIndexer(ctx context.Context) {
|
||||
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
|
||||
defer finished()
|
||||
ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task
|
||||
if err := PopulateIssueIndexer(ctx); err != nil {
|
||||
log.Error("Issue indexer population failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func PopulateIssueIndexer(ctx context.Context) error {
|
||||
for page := 1; ; page++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("shutdown before completion: %w", ctx.Err())
|
||||
default:
|
||||
}
|
||||
repos, _, err := repo_model.SearchRepositoryByName(ctx, repo_model.SearchRepoOptions{
|
||||
ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
|
||||
OrderBy: db_model.SearchOrderByID,
|
||||
Private: true,
|
||||
Collaborate: optional.Some(false),
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("SearchRepositoryByName: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(repos) == 0 {
|
||||
log.Debug("Issue Indexer population complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, repo := range repos {
|
||||
if err := updateRepoIndexer(ctx, repo.ID); err != nil {
|
||||
return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateRepoIndexer add/update all issues of the repositories
|
||||
func UpdateRepoIndexer(ctx context.Context, repoID int64) {
|
||||
if err := updateRepoIndexer(ctx, repoID); err != nil {
|
||||
log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateIssueIndexer add/update an issue to the issue indexer
|
||||
func UpdateIssueIndexer(ctx context.Context, issueID int64) {
|
||||
if err := updateIssueIndexer(ctx, issueID); err != nil {
|
||||
log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteRepoIssueIndexer deletes repo's all issues indexes
|
||||
func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
|
||||
if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
|
||||
log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// IsAvailable checks if issue indexer is available
|
||||
func IsAvailable(ctx context.Context) bool {
|
||||
return (*globalIndexer.Load()).Ping(ctx) == nil
|
||||
}
|
||||
|
||||
// SearchOptions indicates the options for searching issues
|
||||
type SearchOptions = internal.SearchOptions
|
||||
|
||||
const (
|
||||
SortByCreatedDesc = internal.SortByCreatedDesc
|
||||
SortByUpdatedDesc = internal.SortByUpdatedDesc
|
||||
SortByCommentsDesc = internal.SortByCommentsDesc
|
||||
SortByDeadlineDesc = internal.SortByDeadlineDesc
|
||||
SortByCreatedAsc = internal.SortByCreatedAsc
|
||||
SortByUpdatedAsc = internal.SortByUpdatedAsc
|
||||
SortByCommentsAsc = internal.SortByCommentsAsc
|
||||
SortByDeadlineAsc = internal.SortByDeadlineAsc
|
||||
)
|
||||
|
||||
// SearchIssues search issues by options.
|
||||
func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
|
||||
ix := *globalIndexer.Load()
|
||||
|
||||
if opts.Keyword == "" || opts.IsKeywordNumeric() {
|
||||
// This is a conservative shortcut.
|
||||
// If the keyword is empty or an integer, db has better (at least not worse) performance to filter issues.
|
||||
// When the keyword is empty, it tends to listing rather than searching issues.
|
||||
// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
|
||||
// Even worse, the external indexer like elastic search may not be available for a while,
|
||||
// and the user may not be able to list issues completely until it is available again.
|
||||
ix = db.GetIndexer()
|
||||
}
|
||||
|
||||
result, err := ix.Search(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
return SearchResultToIDSlice(result), result.Total, nil
|
||||
}
|
||||
|
||||
func SearchResultToIDSlice(result *internal.SearchResult) []int64 {
|
||||
ret := make([]int64, 0, len(result.Hits))
|
||||
for _, hit := range result.Hits {
|
||||
ret = append(ret, hit.ID)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count.
|
||||
func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
|
||||
opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} })
|
||||
|
||||
_, total, err := SearchIssues(ctx, opts)
|
||||
return total, err
|
||||
}
|
||||
|
||||
func SupportedSearchModes() []indexer.SearchMode {
|
||||
gi := globalIndexer.Load()
|
||||
if gi == nil {
|
||||
return nil
|
||||
}
|
||||
return (*gi).SupportedSearchModes()
|
||||
}
|
||||
@@ -0,0 +1,501 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
"gitea.dev/models/issues"
|
||||
"gitea.dev/models/unittest"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/setting"
|
||||
|
||||
_ "gitea.dev/models"
|
||||
_ "gitea.dev/models/actions"
|
||||
_ "gitea.dev/models/activities"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
unittest.MainTest(m)
|
||||
}
|
||||
|
||||
func TestDBSearchIssues(t *testing.T) {
|
||||
require.NoError(t, unittest.PrepareTestDatabase())
|
||||
|
||||
setting.Indexer.IssueType = "db"
|
||||
InitIssueIndexer(true)
|
||||
|
||||
t.Run("search issues with keyword", searchIssueWithKeyword)
|
||||
t.Run("search issues by index", searchIssueByIndex)
|
||||
t.Run("search issues in repo", searchIssueInRepo)
|
||||
t.Run("search issues by ID", searchIssueByID)
|
||||
t.Run("search issues is pr", searchIssueIsPull)
|
||||
t.Run("search issues is closed", searchIssueIsClosed)
|
||||
t.Run("search issues is archived", searchIssueIsArchived)
|
||||
t.Run("search issues by milestone", searchIssueByMilestoneID)
|
||||
t.Run("search issues by label", searchIssueByLabelID)
|
||||
t.Run("search issues by time", searchIssueByTime)
|
||||
t.Run("search issues with order", searchIssueWithOrder)
|
||||
t.Run("search issues in project", searchIssueInProject)
|
||||
t.Run("search issues with paginator", searchIssueWithPaginator)
|
||||
t.Run("search issues with any assignee", searchIssueWithAnyAssignee)
|
||||
}
|
||||
|
||||
func searchIssueWithKeyword(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "issue2",
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{2},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "first",
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "for",
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{11, 5, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "good",
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.opts.Keyword, func(t *testing.T) {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueByIndex(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "1000",
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "2",
|
||||
RepoIDs: []int64{1, 2, 3, 32},
|
||||
},
|
||||
[]int64{17, 12, 7, 2},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
Keyword: "1",
|
||||
RepoIDs: []int64{58},
|
||||
},
|
||||
[]int64{19},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueInRepo(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
RepoIDs: []int64{1},
|
||||
},
|
||||
[]int64{11, 5, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
RepoIDs: []int64{2},
|
||||
},
|
||||
[]int64{7, 4},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
RepoIDs: []int64{3},
|
||||
},
|
||||
[]int64{12, 6},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
RepoIDs: []int64{4},
|
||||
},
|
||||
[]int64{},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
RepoIDs: []int64{5},
|
||||
},
|
||||
[]int64{15},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueByID(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
opts: SearchOptions{
|
||||
PosterID: "1",
|
||||
},
|
||||
expectedIDs: []int64{11, 6, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
opts: SearchOptions{
|
||||
AssigneeID: "1",
|
||||
},
|
||||
expectedIDs: []int64{6, 1},
|
||||
},
|
||||
{
|
||||
// NOTE: This tests no assignees filtering and also ToSearchOptions() to ensure it handles the filter correctly
|
||||
opts: *ToSearchOptions("", &issues.IssuesOptions{AssigneeID: "(none)"}),
|
||||
expectedIDs: []int64{22, 21, 16, 15, 14, 13, 12, 11, 20, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2},
|
||||
},
|
||||
{
|
||||
opts: SearchOptions{
|
||||
MentionID: optional.Some(int64(4)),
|
||||
},
|
||||
expectedIDs: []int64{1},
|
||||
},
|
||||
{
|
||||
opts: SearchOptions{
|
||||
ReviewedID: optional.Some(int64(1)),
|
||||
},
|
||||
expectedIDs: []int64{},
|
||||
},
|
||||
{
|
||||
opts: SearchOptions{
|
||||
ReviewRequestedID: optional.Some(int64(1)),
|
||||
},
|
||||
expectedIDs: []int64{12},
|
||||
},
|
||||
{
|
||||
opts: SearchOptions{
|
||||
SubscriberID: optional.Some(int64(1)),
|
||||
},
|
||||
expectedIDs: []int64{11, 6, 5, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
// issue 20 request user 15 and team 5 which user 15 belongs to
|
||||
// the review request number of issue 20 should be 1
|
||||
opts: SearchOptions{
|
||||
ReviewRequestedID: optional.Some(int64(15)),
|
||||
},
|
||||
expectedIDs: []int64{12, 20},
|
||||
},
|
||||
{
|
||||
// user 20 approved the issue 20, so return nothing
|
||||
opts: SearchOptions{
|
||||
ReviewRequestedID: optional.Some(int64(20)),
|
||||
},
|
||||
expectedIDs: []int64{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueIsPull(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
IsPull: optional.Some(false),
|
||||
},
|
||||
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
IsPull: optional.Some(true),
|
||||
},
|
||||
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueIsClosed(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
IsClosed: optional.Some(false),
|
||||
},
|
||||
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
IsClosed: optional.Some(true),
|
||||
},
|
||||
[]int64{5, 4},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueIsArchived(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
IsArchived: optional.Some(false),
|
||||
},
|
||||
[]int64{22, 21, 17, 16, 15, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
IsArchived: optional.Some(true),
|
||||
},
|
||||
[]int64{14},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueByMilestoneID(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
MilestoneIDs: []int64{1},
|
||||
},
|
||||
[]int64{2},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
MilestoneIDs: []int64{3},
|
||||
},
|
||||
[]int64{3},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueByLabelID(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
IncludedLabelIDs: []int64{1},
|
||||
},
|
||||
[]int64{2, 1},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
IncludedLabelIDs: []int64{4},
|
||||
},
|
||||
[]int64{2},
|
||||
},
|
||||
{
|
||||
SearchOptions{
|
||||
ExcludedLabelIDs: []int64{1},
|
||||
},
|
||||
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueByTime(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
UpdatedAfterUnix: optional.Some(int64(0)),
|
||||
},
|
||||
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueWithOrder(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
SortBy: internal.SortByCreatedAsc,
|
||||
},
|
||||
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueInProject(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
ProjectIDs: []int64{1},
|
||||
},
|
||||
[]int64{5, 3, 2, 1},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
}
|
||||
|
||||
// Test filtering for issues with no project assigned using dynamic validation
|
||||
t.Run("no project assigned", func(t *testing.T) {
|
||||
issueIDs, total, err := SearchIssues(t.Context(), &SearchOptions{
|
||||
ProjectIDs: []int64{db.NoConditionID},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.NotEmpty(t, issueIDs)
|
||||
assert.Equal(t, total, int64(len(issueIDs)))
|
||||
|
||||
// Verify each returned issue actually has no project
|
||||
for _, issueID := range issueIDs {
|
||||
issue, err := issues.GetIssueByID(t.Context(), issueID)
|
||||
require.NoError(t, err)
|
||||
err = issue.LoadProjects(t.Context())
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, issue.Projects, "Issue %d should have no projects", issueID)
|
||||
}
|
||||
|
||||
// Count total issues with no project to verify we got them all
|
||||
allIssues, err := issues.Issues(t.Context(), &issues.IssuesOptions{
|
||||
ProjectIDs: []int64{db.NoConditionID},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, issueIDs, len(allIssues), "Should return all issues with no project")
|
||||
})
|
||||
}
|
||||
|
||||
func searchIssueWithPaginator(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
expectedTotal int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
},
|
||||
[]int64{22, 21, 17, 16, 15},
|
||||
22,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, total, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
assert.Equal(t, test.expectedTotal, total)
|
||||
}
|
||||
}
|
||||
|
||||
func searchIssueWithAnyAssignee(t *testing.T) {
|
||||
tests := []struct {
|
||||
opts SearchOptions
|
||||
expectedIDs []int64
|
||||
expectedTotal int64
|
||||
}{
|
||||
{
|
||||
SearchOptions{
|
||||
AssigneeID: "(any)",
|
||||
},
|
||||
[]int64{17, 6, 1},
|
||||
3,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
issueIDs, total, err := SearchIssues(t.Context(), &test.opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||
assert.Equal(t, test.expectedTotal, total)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
)
|
||||
|
||||
// Indexer defines an interface to indexer issues contents
|
||||
type Indexer interface {
|
||||
internal.Indexer
|
||||
Index(ctx context.Context, issue ...*IndexerData) error
|
||||
Delete(ctx context.Context, ids ...int64) error
|
||||
Search(ctx context.Context, options *SearchOptions) (*SearchResult, error)
|
||||
SupportedSearchModes() []indexer.SearchMode
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer {
|
||||
return &dummyIndexer{
|
||||
Indexer: internal.NewDummyIndexer(),
|
||||
}
|
||||
}
|
||||
|
||||
type dummyIndexer struct {
|
||||
internal.Indexer
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Delete(_ context.Context, _ ...int64) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Search(_ context.Context, _ *SearchOptions) (*SearchResult, error) {
|
||||
return nil, errors.New("indexer is not ready")
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
"gitea.dev/modules/indexer"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/timeutil"
|
||||
)
|
||||
|
||||
// IndexerData data stored in the issue indexer
|
||||
type IndexerData struct {
|
||||
ID int64 `json:"id"`
|
||||
RepoID int64 `json:"repo_id"`
|
||||
IsPublic bool `json:"is_public"` // If the repo is public
|
||||
|
||||
// Fields used for keyword searching
|
||||
Title string `json:"title"`
|
||||
Content string `json:"content"`
|
||||
Comments []string `json:"comments"`
|
||||
|
||||
// Fields used for filtering
|
||||
IsPull bool `json:"is_pull"`
|
||||
IsClosed bool `json:"is_closed"`
|
||||
IsArchived bool `json:"is_archived"`
|
||||
LabelIDs []int64 `json:"label_ids"`
|
||||
NoLabel bool `json:"no_label"` // True if LabelIDs is empty
|
||||
MilestoneID int64 `json:"milestone_id"`
|
||||
ProjectIDs []int64 `json:"project_ids"`
|
||||
NoProject bool `json:"no_project"` // True if ProjectIDs is empty
|
||||
ProjectColumnMap map[int64]int64 `json:"project_column_map,omitempty"` // Maps project ID to column ID for each project the issue is in
|
||||
PosterID int64 `json:"poster_id"`
|
||||
AssigneeID int64 `json:"assignee_id"`
|
||||
MentionIDs []int64 `json:"mention_ids"`
|
||||
ReviewedIDs []int64 `json:"reviewed_ids"`
|
||||
ReviewRequestedIDs []int64 `json:"review_requested_ids"`
|
||||
SubscriberIDs []int64 `json:"subscriber_ids"`
|
||||
UpdatedUnix timeutil.TimeStamp `json:"updated_unix"`
|
||||
|
||||
// Fields used for sorting
|
||||
// UpdatedUnix is both used for filtering and sorting.
|
||||
// ID is used for sorting too, to make the sorting stable.
|
||||
CreatedUnix timeutil.TimeStamp `json:"created_unix"`
|
||||
DeadlineUnix timeutil.TimeStamp `json:"deadline_unix"`
|
||||
CommentCount int64 `json:"comment_count"`
|
||||
}
|
||||
|
||||
// Match represents on search result
|
||||
type Match struct {
|
||||
ID int64 `json:"id"`
|
||||
Score float64 `json:"score"`
|
||||
}
|
||||
|
||||
// SearchResult represents search results
|
||||
type SearchResult struct {
|
||||
Total int64
|
||||
Hits []Match
|
||||
}
|
||||
|
||||
// SearchOptions represents search options.
|
||||
//
|
||||
// It has a slightly different design from database query options.
|
||||
// In database query options, a field is never a pointer, so it could be confusing when it's zero value:
|
||||
// Do you want to find data with a field value of 0, or do you not specify the field in the options?
|
||||
// To avoid this confusion, db introduced db.NoConditionID(-1).
|
||||
// So zero value means the field is not specified in the search options, and db.NoConditionID means "== 0" or "id NOT IN (SELECT id FROM ...)"
|
||||
// It's still not ideal, it trapped developers many times.
|
||||
// And sometimes -1 could be a valid value, like issue ID, negative numbers indicate exclusion.
|
||||
// Since db.NoConditionID is for "db" (the package name is db), it makes sense not to use it in the indexer:
|
||||
// Why do bleve/elasticsearch/meilisearch indexers need to know about db.NoConditionID?
|
||||
// So in SearchOptions, we use pointer for fields which could be not specified,
|
||||
// and always use the value to filter if it's not nil, even if it's zero or negative.
|
||||
// It can handle almost all cases, if there is an exception, we can add a new field, like NoLabelOnly.
|
||||
// Unfortunately, we still use db for the indexer and have to convert between db.NoConditionID and nil for legacy reasons.
|
||||
type SearchOptions struct {
|
||||
Keyword string // keyword to search
|
||||
|
||||
SearchMode indexer.SearchModeType
|
||||
|
||||
RepoIDs []int64 // repository IDs which the issues belong to
|
||||
AllPublic bool // if include all public repositories
|
||||
|
||||
IsPull optional.Option[bool] // if the issues is a pull request
|
||||
IsClosed optional.Option[bool] // if the issues is closed
|
||||
IsArchived optional.Option[bool] // if the repo is archived
|
||||
|
||||
IncludedLabelIDs []int64 // labels the issues have
|
||||
ExcludedLabelIDs []int64 // labels the issues don't have
|
||||
IncludedAnyLabelIDs []int64 // labels the issues have at least one. It will be ignored if IncludedLabelIDs is not empty. It's an uncommon filter, but it has been supported accidentally by issues.IssuesOptions.IncludedLabelNames.
|
||||
NoLabelOnly bool // if the issues have no label, if true, IncludedLabelIDs and ExcludedLabelIDs, IncludedAnyLabelIDs will be ignored
|
||||
|
||||
MilestoneIDs []int64 // milestones the issues have
|
||||
|
||||
ProjectIDs []int64 // project the issues belong to. FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: no multiple project filter support yet. Search logic is wrong.
|
||||
NoProjectOnly bool // if the issues have no project, if true, ProjectIDs will be ignored
|
||||
|
||||
PosterID string // poster of the issues, "(none)" or "(any)" or a user ID
|
||||
AssigneeID string // assignee of the issues, "(none)" or "(any)" or a user ID
|
||||
|
||||
MentionID optional.Option[int64] // mentioned user of the issues
|
||||
|
||||
ReviewedID optional.Option[int64] // reviewer of the issues
|
||||
ReviewRequestedID optional.Option[int64] // requested reviewer of the issues
|
||||
|
||||
SubscriberID optional.Option[int64] // subscriber of the issues
|
||||
|
||||
UpdatedAfterUnix optional.Option[int64]
|
||||
UpdatedBeforeUnix optional.Option[int64]
|
||||
|
||||
Paginator *db.ListOptions
|
||||
|
||||
SortBy SortBy // sort by field
|
||||
}
|
||||
|
||||
// Copy returns a copy of the options.
|
||||
// Be careful, it's not a deep copy, so `SearchOptions.RepoIDs = {...}` is OK while `SearchOptions.RepoIDs[0] = ...` is not.
|
||||
func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOptions {
|
||||
if o == nil {
|
||||
return nil
|
||||
}
|
||||
v := *o
|
||||
for _, e := range edit {
|
||||
e(&v)
|
||||
}
|
||||
return &v
|
||||
}
|
||||
|
||||
// used for optimized issue index based search
|
||||
func (o *SearchOptions) IsKeywordNumeric() bool {
|
||||
_, err := strconv.Atoi(o.Keyword)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
type SortBy string
|
||||
|
||||
const (
|
||||
SortByCreatedDesc SortBy = "-created_unix"
|
||||
SortByUpdatedDesc SortBy = "-updated_unix"
|
||||
SortByCommentsDesc SortBy = "-comment_count"
|
||||
SortByDeadlineDesc SortBy = "-deadline_unix"
|
||||
SortByCreatedAsc SortBy = "created_unix"
|
||||
SortByUpdatedAsc SortBy = "updated_unix"
|
||||
SortByCommentsAsc SortBy = "comment_count"
|
||||
SortByDeadlineAsc SortBy = "deadline_unix"
|
||||
// Unsupported sort types which are supported by issues.IssuesOptions.SortType:
|
||||
//
|
||||
// - "priorityrepo":
|
||||
// It's impossible to support it in the indexer.
|
||||
// It is based on the specified repository in the request, so we cannot add static field to the indexer.
|
||||
// If we do something like that query the issues in the specified repository first then append other issues,
|
||||
// it will break the pagination.
|
||||
//
|
||||
// - "project-column-sorting":
|
||||
// Although it's possible to support it by adding project.ProjectIssue.Sorting to the indexer,
|
||||
// but what if the issue belongs to multiple projects?
|
||||
// Since it's unsupported to search issues with keyword in project page, we don't need to support it.
|
||||
)
|
||||
@@ -0,0 +1,739 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
// This package contains tests for issues indexer modules.
|
||||
// All the code in this package is only used for testing.
|
||||
// Do not put any production code in this package to avoid it being included in the final binary.
|
||||
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/optional"
|
||||
"gitea.dev/modules/timeutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIndexer(t *testing.T, indexer internal.Indexer) {
|
||||
_, err := indexer.Init(t.Context())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, indexer.Ping(t.Context()))
|
||||
|
||||
var (
|
||||
ids []int64
|
||||
data = map[int64]*internal.IndexerData{}
|
||||
)
|
||||
{
|
||||
d := generateDefaultIndexerData()
|
||||
for _, v := range d {
|
||||
ids = append(ids, v.ID)
|
||||
data[v.ID] = v
|
||||
}
|
||||
require.NoError(t, indexer.Index(t.Context(), d...))
|
||||
waitData(t, indexer, int64(len(data)))
|
||||
}
|
||||
|
||||
defer func() {
|
||||
require.NoError(t, indexer.Delete(t.Context(), ids...))
|
||||
}()
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.Name, func(t *testing.T) {
|
||||
if len(c.ExtraData) > 0 {
|
||||
require.NoError(t, indexer.Index(t.Context(), c.ExtraData...))
|
||||
for _, v := range c.ExtraData {
|
||||
data[v.ID] = v
|
||||
}
|
||||
waitData(t, indexer, int64(len(data)))
|
||||
defer func() {
|
||||
for _, v := range c.ExtraData {
|
||||
require.NoError(t, indexer.Delete(t.Context(), v.ID))
|
||||
delete(data, v.ID)
|
||||
}
|
||||
waitData(t, indexer, int64(len(data)))
|
||||
}()
|
||||
}
|
||||
|
||||
result, err := indexer.Search(t.Context(), c.SearchOptions)
|
||||
require.NoError(t, err)
|
||||
|
||||
if c.Expected != nil {
|
||||
c.Expected(t, data, result)
|
||||
} else {
|
||||
ids := make([]int64, 0, len(result.Hits))
|
||||
for _, hit := range result.Hits {
|
||||
ids = append(ids, hit.ID)
|
||||
}
|
||||
assert.Equal(t, c.ExpectedIDs, ids)
|
||||
assert.Equal(t, c.ExpectedTotal, result.Total)
|
||||
}
|
||||
|
||||
// test counting
|
||||
c.SearchOptions.Paginator = &db.ListOptions{PageSize: 0}
|
||||
countResult, err := indexer.Search(t.Context(), c.SearchOptions)
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, countResult.Hits)
|
||||
assert.Equal(t, result.Total, countResult.Total)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var cases = []*testIndexerCase{
|
||||
{
|
||||
Name: "default",
|
||||
SearchOptions: &internal.SearchOptions{},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "empty",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "f1dfac73-fda6-4a6b-b8a4-2408fcb8ef69",
|
||||
},
|
||||
ExpectedIDs: []int64{},
|
||||
ExpectedTotal: 0,
|
||||
},
|
||||
{
|
||||
Name: "with limit",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
},
|
||||
},
|
||||
{
|
||||
// Exercises the single-doc Index/Delete fast path in backends that have one (e.g. Elasticsearch).
|
||||
Name: "single-doc index",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 999, Title: "solo-issue-marker"},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{Keyword: "solo-issue-marker"},
|
||||
ExpectedIDs: []int64{999},
|
||||
ExpectedTotal: 1,
|
||||
},
|
||||
{
|
||||
Name: "Keyword",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1000, Title: "hi hello world"},
|
||||
{ID: 1001, Content: "hi hello world"},
|
||||
{ID: 1002, Comments: []string{"hi", "hello world"}},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello",
|
||||
},
|
||||
ExpectedIDs: []int64{1002, 1001, 1000},
|
||||
ExpectedTotal: 3,
|
||||
},
|
||||
{
|
||||
Name: "RepoIDs",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
|
||||
{ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
|
||||
{ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
|
||||
{ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
|
||||
{ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
|
||||
{ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
|
||||
{ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello",
|
||||
RepoIDs: []int64{1, 4},
|
||||
},
|
||||
ExpectedIDs: []int64{1006, 1002, 1001},
|
||||
ExpectedTotal: 3,
|
||||
},
|
||||
{
|
||||
Name: "RepoIDs and AllPublic",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
|
||||
{ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
|
||||
{ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
|
||||
{ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
|
||||
{ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
|
||||
{ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
|
||||
{ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello",
|
||||
RepoIDs: []int64{1, 4},
|
||||
AllPublic: true,
|
||||
},
|
||||
ExpectedIDs: []int64{1006, 1005, 1004, 1003, 1002, 1001},
|
||||
ExpectedTotal: 6,
|
||||
},
|
||||
{
|
||||
Name: "issue only",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
IsPull: optional.Some(false),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.False(t, data[v.ID].IsPull)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsPull }), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "pull only",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
IsPull: optional.Some(true),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.True(t, data[v.ID].IsPull)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsPull }), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "opened only",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
IsClosed: optional.Some(false),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.False(t, data[v.ID].IsClosed)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsClosed }), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "closed only",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
IsClosed: optional.Some(true),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.True(t, data[v.ID].IsClosed)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsClosed }), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "labels",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
|
||||
{ID: 1001, Title: "hello b", LabelIDs: []int64{2000, 2001}},
|
||||
{ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
|
||||
{ID: 1003, Title: "hello d", LabelIDs: []int64{2000}},
|
||||
{ID: 1004, Title: "hello e", LabelIDs: []int64{}},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello",
|
||||
IncludedLabelIDs: []int64{2000, 2001},
|
||||
ExcludedLabelIDs: []int64{2003},
|
||||
},
|
||||
ExpectedIDs: []int64{1001, 1000},
|
||||
ExpectedTotal: 2,
|
||||
},
|
||||
{
|
||||
Name: "include any labels",
|
||||
ExtraData: []*internal.IndexerData{
|
||||
{ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
|
||||
{ID: 1001, Title: "hello b", LabelIDs: []int64{2001}},
|
||||
{ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
|
||||
{ID: 1003, Title: "hello d", LabelIDs: []int64{2002}},
|
||||
{ID: 1004, Title: "hello e", LabelIDs: []int64{}},
|
||||
},
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Keyword: "hello",
|
||||
IncludedAnyLabelIDs: []int64{2001, 2002},
|
||||
ExcludedLabelIDs: []int64{2003},
|
||||
},
|
||||
ExpectedIDs: []int64{1003, 1001, 1000},
|
||||
ExpectedTotal: 3,
|
||||
},
|
||||
{
|
||||
Name: "MilestoneIDs",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
MilestoneIDs: []int64{1, 2, 6},
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, []int64{1, 2, 6}, data[v.ID].MilestoneID)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.MilestoneID == 1 || v.MilestoneID == 2 || v.MilestoneID == 6
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "no MilestoneIDs",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
MilestoneIDs: []int64{0},
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Equal(t, int64(0), data[v.ID].MilestoneID)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.MilestoneID == 0
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ProjectIDs",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
ProjectIDs: []int64{1},
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, data[v.ID].ProjectIDs, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return slices.Contains(v.ProjectIDs, int64(1))
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "no ProjectIDs (empty array)",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 50,
|
||||
},
|
||||
NoProjectOnly: true,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
// Verify only issues with no projects are returned
|
||||
for _, v := range result.Hits {
|
||||
assert.Empty(t, data[v.ID].ProjectIDs, "Issue %d should have no projects", v.ID)
|
||||
}
|
||||
// Verify we got ALL issues with no projects
|
||||
expectedCount := countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return len(v.ProjectIDs) == 0
|
||||
})
|
||||
assert.Equal(t, expectedCount, result.Total, "Should return all %d issues with no project", expectedCount)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "PosterID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
PosterID: "1",
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Equal(t, int64(1), data[v.ID].PosterID)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.PosterID == 1
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "AssigneeID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
AssigneeID: "1",
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Equal(t, int64(1), data[v.ID].AssigneeID)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.AssigneeID == 1
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "no AssigneeID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
AssigneeID: "(none)",
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Equal(t, int64(0), data[v.ID].AssigneeID)
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.AssigneeID == 0
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "MentionID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
MentionID: optional.Some(int64(1)),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, data[v.ID].MentionIDs, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return slices.Contains(v.MentionIDs, 1)
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ReviewedID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
ReviewedID: optional.Some(int64(1)),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, data[v.ID].ReviewedIDs, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return slices.Contains(v.ReviewedIDs, 1)
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ReviewRequestedID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
ReviewRequestedID: optional.Some(int64(1)),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, data[v.ID].ReviewRequestedIDs, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return slices.Contains(v.ReviewRequestedIDs, 1)
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SubscriberID",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
SubscriberID: optional.Some(int64(1)),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.Contains(t, data[v.ID].SubscriberIDs, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return slices.Contains(v.SubscriberIDs, 1)
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "updated",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptions{
|
||||
PageSize: 5,
|
||||
},
|
||||
UpdatedAfterUnix: optional.Some(int64(20)),
|
||||
UpdatedBeforeUnix: optional.Some(int64(30)),
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 5)
|
||||
for _, v := range result.Hits {
|
||||
assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, int64(20))
|
||||
assert.LessOrEqual(t, data[v.ID].UpdatedUnix, int64(30))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return data[v.ID].UpdatedUnix >= 20 && data[v.ID].UpdatedUnix <= 30
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByCreatedDesc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByCreatedDesc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.GreaterOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByUpdatedDesc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByUpdatedDesc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByCommentsDesc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByCommentsDesc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.GreaterOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByDeadlineDesc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByDeadlineDesc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.GreaterOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByCreatedAsc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByCreatedAsc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.LessOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByUpdatedAsc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByUpdatedAsc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.LessOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByCommentsAsc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByCommentsAsc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.LessOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SortByDeadlineAsc",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
Paginator: &db.ListOptionsAll,
|
||||
SortBy: internal.SortByDeadlineAsc,
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, len(data))
|
||||
assert.Equal(t, len(data), int(result.Total))
|
||||
for i, v := range result.Hits {
|
||||
if i < len(result.Hits)-1 {
|
||||
assert.LessOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "SearchAnyAssignee",
|
||||
SearchOptions: &internal.SearchOptions{
|
||||
AssigneeID: "(any)",
|
||||
},
|
||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||
assert.Len(t, result.Hits, 180)
|
||||
for _, v := range result.Hits {
|
||||
assert.GreaterOrEqual(t, data[v.ID].AssigneeID, int64(1))
|
||||
}
|
||||
assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
|
||||
return v.AssigneeID >= 1
|
||||
}), result.Total)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
type testIndexerCase struct {
|
||||
Name string
|
||||
ExtraData []*internal.IndexerData
|
||||
|
||||
SearchOptions *internal.SearchOptions
|
||||
|
||||
Expected func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) // if nil, use ExpectedIDs, ExpectedTotal
|
||||
ExpectedIDs []int64
|
||||
ExpectedTotal int64
|
||||
}
|
||||
|
||||
func generateDefaultIndexerData() []*internal.IndexerData {
|
||||
var id int64
|
||||
var data []*internal.IndexerData
|
||||
for repoID := int64(1); repoID <= 10; repoID++ {
|
||||
for issueIndex := int64(1); issueIndex <= 20; issueIndex++ {
|
||||
id++
|
||||
|
||||
comments := make([]string, id%4)
|
||||
for i := range comments {
|
||||
comments[i] = fmt.Sprintf("comment%d", i)
|
||||
}
|
||||
|
||||
labelIDs := make([]int64, id%5)
|
||||
for i := range labelIDs {
|
||||
labelIDs[i] = int64(i) + 1 // LabelID should not be 0
|
||||
}
|
||||
mentionIDs := make([]int64, id%6)
|
||||
for i := range mentionIDs {
|
||||
mentionIDs[i] = int64(i) + 1 // MentionID should not be 0
|
||||
}
|
||||
reviewedIDs := make([]int64, id%7)
|
||||
for i := range reviewedIDs {
|
||||
reviewedIDs[i] = int64(i) + 1 // ReviewID should not be 0
|
||||
}
|
||||
reviewRequestedIDs := make([]int64, id%8)
|
||||
for i := range reviewRequestedIDs {
|
||||
reviewRequestedIDs[i] = int64(i) + 1 // ReviewRequestedID should not be 0
|
||||
}
|
||||
subscriberIDs := make([]int64, id%9)
|
||||
for i := range subscriberIDs {
|
||||
subscriberIDs[i] = int64(i) + 1 // SubscriberID should not be 0
|
||||
}
|
||||
projectIDs := make([]int64, id%5)
|
||||
for i := range projectIDs {
|
||||
projectIDs[i] = int64(i) + 1 // projectID should not be 0
|
||||
}
|
||||
|
||||
data = append(data, &internal.IndexerData{
|
||||
ID: id,
|
||||
RepoID: repoID,
|
||||
IsPublic: repoID%2 == 0,
|
||||
Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID),
|
||||
Content: fmt.Sprintf("content%d", issueIndex),
|
||||
Comments: comments,
|
||||
IsPull: issueIndex%2 == 0,
|
||||
IsClosed: issueIndex%3 == 0,
|
||||
LabelIDs: labelIDs,
|
||||
NoLabel: len(labelIDs) == 0,
|
||||
MilestoneID: issueIndex % 4,
|
||||
ProjectIDs: projectIDs,
|
||||
NoProject: len(projectIDs) == 0,
|
||||
PosterID: id%10 + 1, // PosterID should not be 0
|
||||
AssigneeID: issueIndex % 10,
|
||||
MentionIDs: mentionIDs,
|
||||
ReviewedIDs: reviewedIDs,
|
||||
ReviewRequestedIDs: reviewRequestedIDs,
|
||||
SubscriberIDs: subscriberIDs,
|
||||
UpdatedUnix: timeutil.TimeStamp(id + issueIndex),
|
||||
CreatedUnix: timeutil.TimeStamp(id),
|
||||
DeadlineUnix: timeutil.TimeStamp(id + issueIndex + repoID),
|
||||
CommentCount: int64(len(comments)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func countIndexerData(data map[int64]*internal.IndexerData, f func(v *internal.IndexerData) bool) int64 {
|
||||
var count int64
|
||||
for _, v := range data {
|
||||
if f(v) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// waitData waits for the indexer to index all data.
|
||||
// Some engines like Elasticsearch index data asynchronously, so we need to wait for a while.
|
||||
func waitData(t *testing.T, indexer internal.Indexer, total int64) {
|
||||
assert.Eventually(t, func() bool {
|
||||
result, err := indexer.Search(t.Context(), &internal.SearchOptions{Paginator: &db.ListOptions{}})
|
||||
require.NoError(t, err)
|
||||
return result.Total == total
|
||||
}, 10*time.Second, 100*time.Millisecond, "expected total=%d", total)
|
||||
}
|
||||
@@ -0,0 +1,314 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"gitea.dev/modules/indexer"
|
||||
indexer_internal "gitea.dev/modules/indexer/internal"
|
||||
inner_meilisearch "gitea.dev/modules/indexer/internal/meilisearch"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/json"
|
||||
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
)
|
||||
|
||||
const (
|
||||
issueIndexerLatestVersion = 5
|
||||
|
||||
// TODO: make this configurable if necessary
|
||||
maxTotalHits = 10000
|
||||
)
|
||||
|
||||
// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
|
||||
var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct {
|
||||
inner *inner_meilisearch.Indexer
|
||||
indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new meilisearch indexer
|
||||
func NewIndexer(url, apiKey, indexerName string) *Indexer {
|
||||
settings := &meilisearch.Settings{
|
||||
// The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
|
||||
// So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
|
||||
// So we need to specify the ranking rules to make sure the sort order is respected.
|
||||
// See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
|
||||
RankingRules: []string{"sort", // make sure "sort" has the highest priority
|
||||
"words", "typo", "proximity", "attribute", "exactness"},
|
||||
|
||||
SearchableAttributes: []string{
|
||||
"title",
|
||||
"content",
|
||||
"comments",
|
||||
},
|
||||
DisplayedAttributes: []string{
|
||||
"id",
|
||||
"title",
|
||||
"content",
|
||||
"comments",
|
||||
},
|
||||
FilterableAttributes: []string{
|
||||
"repo_id",
|
||||
"is_public",
|
||||
"is_pull",
|
||||
"is_closed",
|
||||
"is_archived",
|
||||
"label_ids",
|
||||
"no_label",
|
||||
"milestone_id",
|
||||
"project_ids",
|
||||
"no_project",
|
||||
"poster_id",
|
||||
"assignee_id",
|
||||
"mention_ids",
|
||||
"reviewed_ids",
|
||||
"review_requested_ids",
|
||||
"subscriber_ids",
|
||||
"updated_unix",
|
||||
},
|
||||
SortableAttributes: []string{
|
||||
"updated_unix",
|
||||
"created_unix",
|
||||
"deadline_unix",
|
||||
"comment_count",
|
||||
"id",
|
||||
},
|
||||
Pagination: &meilisearch.Pagination{
|
||||
MaxTotalHits: maxTotalHits,
|
||||
},
|
||||
}
|
||||
|
||||
inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
|
||||
indexer := &Indexer{
|
||||
inner: inner,
|
||||
Indexer: inner,
|
||||
}
|
||||
return indexer
|
||||
}
|
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
|
||||
if len(issues) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, issue := range issues {
|
||||
// use default primary key which should be "id"
|
||||
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// TODO: bulk send index data
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
|
||||
if len(ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, id := range ids {
|
||||
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// TODO: bulk send deletes
|
||||
return nil
|
||||
}
|
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
query := inner_meilisearch.FilterAnd{}
|
||||
|
||||
if len(options.RepoIDs) > 0 {
|
||||
q := &inner_meilisearch.FilterOr{}
|
||||
q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
|
||||
if options.AllPublic {
|
||||
q.Or(inner_meilisearch.NewFilterEq("is_public", true))
|
||||
}
|
||||
query.And(q)
|
||||
}
|
||||
|
||||
if options.IsPull.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value()))
|
||||
}
|
||||
if options.IsClosed.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value()))
|
||||
}
|
||||
if options.IsArchived.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("is_archived", options.IsArchived.Value()))
|
||||
}
|
||||
|
||||
if options.NoLabelOnly {
|
||||
query.And(inner_meilisearch.NewFilterEq("no_label", true))
|
||||
} else {
|
||||
if len(options.IncludedLabelIDs) > 0 {
|
||||
q := &inner_meilisearch.FilterAnd{}
|
||||
for _, labelID := range options.IncludedLabelIDs {
|
||||
q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
|
||||
}
|
||||
query.And(q)
|
||||
} else if len(options.IncludedAnyLabelIDs) > 0 {
|
||||
query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
|
||||
}
|
||||
if len(options.ExcludedLabelIDs) > 0 {
|
||||
q := &inner_meilisearch.FilterAnd{}
|
||||
for _, labelID := range options.ExcludedLabelIDs {
|
||||
q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
|
||||
}
|
||||
query.And(q)
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.MilestoneIDs) > 0 {
|
||||
query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
|
||||
}
|
||||
|
||||
if options.NoProjectOnly {
|
||||
query.And(inner_meilisearch.NewFilterEq("no_project", true))
|
||||
} else if len(options.ProjectIDs) > 0 {
|
||||
// FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: this logic is not right, it should use "AND" but not "OR"
|
||||
query.And(inner_meilisearch.NewFilterIn("project_ids", options.ProjectIDs...))
|
||||
}
|
||||
|
||||
if options.PosterID != "" {
|
||||
// "(none)" becomes 0, it means no poster
|
||||
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
|
||||
query.And(inner_meilisearch.NewFilterEq("poster_id", posterIDInt64))
|
||||
}
|
||||
|
||||
if options.AssigneeID != "" {
|
||||
if options.AssigneeID == "(any)" {
|
||||
query.And(inner_meilisearch.NewFilterGte("assignee_id", 1))
|
||||
} else {
|
||||
// "(none)" becomes 0, it means no assignee
|
||||
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
|
||||
query.And(inner_meilisearch.NewFilterEq("assignee_id", assigneeIDInt64))
|
||||
}
|
||||
}
|
||||
|
||||
if options.MentionID.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value()))
|
||||
}
|
||||
|
||||
if options.ReviewedID.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value()))
|
||||
}
|
||||
if options.ReviewRequestedID.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value()))
|
||||
}
|
||||
|
||||
if options.SubscriberID.Has() {
|
||||
query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value()))
|
||||
}
|
||||
|
||||
if options.UpdatedAfterUnix.Has() {
|
||||
query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value()))
|
||||
}
|
||||
if options.UpdatedBeforeUnix.Has() {
|
||||
query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value()))
|
||||
}
|
||||
|
||||
if options.SortBy == "" {
|
||||
options.SortBy = internal.SortByCreatedAsc
|
||||
}
|
||||
sortBy := []string{
|
||||
parseSortBy(options.SortBy),
|
||||
"id:desc",
|
||||
}
|
||||
|
||||
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
|
||||
|
||||
counting := limit == 0
|
||||
if counting {
|
||||
// If set limit to 0, it will be 20 by default, and -1 is not allowed.
|
||||
// See https://www.meilisearch.com/docs/reference/api/search#limit
|
||||
// So set limit to 1 to make the cost as low as possible, then clear the result before returning.
|
||||
limit = 1
|
||||
}
|
||||
|
||||
keyword := options.Keyword // default to match "words"
|
||||
if options.SearchMode == indexer.SearchModeExact {
|
||||
// https://www.meilisearch.com/docs/reference/api/search#phrase-search
|
||||
keyword = doubleQuoteKeyword(keyword)
|
||||
}
|
||||
|
||||
searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
|
||||
Filter: query.Statement(),
|
||||
Limit: int64(limit),
|
||||
Offset: int64(skip),
|
||||
Sort: sortBy,
|
||||
MatchingStrategy: "all",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if counting {
|
||||
searchRes.Hits = nil
|
||||
}
|
||||
|
||||
hits, err := convertHits(searchRes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &internal.SearchResult{
|
||||
Total: searchRes.EstimatedTotalHits,
|
||||
Hits: hits,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseSortBy(sortBy internal.SortBy) string {
|
||||
field := strings.TrimPrefix(string(sortBy), "-")
|
||||
if strings.HasPrefix(string(sortBy), "-") {
|
||||
return field + ":desc"
|
||||
}
|
||||
return field + ":asc"
|
||||
}
|
||||
|
||||
func doubleQuoteKeyword(k string) string {
|
||||
kp := strings.Split(k, " ")
|
||||
parts := 0
|
||||
for i := range kp {
|
||||
part := strings.Trim(kp[i], "\"")
|
||||
if part != "" {
|
||||
kp[parts] = fmt.Sprintf(`"%s"`, part)
|
||||
parts++
|
||||
}
|
||||
}
|
||||
return strings.Join(kp[:parts], " ")
|
||||
}
|
||||
|
||||
func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
|
||||
hits := make([]internal.Match, 0, len(searchRes.Hits))
|
||||
for _, hit := range searchRes.Hits {
|
||||
var issueID int64
|
||||
if err := json.Unmarshal(hit["id"], &issueID); err != nil {
|
||||
return nil, ErrMalformedResponse
|
||||
}
|
||||
|
||||
hits = append(hits, internal.Match{
|
||||
ID: issueID,
|
||||
})
|
||||
}
|
||||
return hits, nil
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/indexer/issues/internal/tests"
|
||||
"gitea.dev/modules/json"
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMeilisearchIndexer(t *testing.T) {
|
||||
// The meilisearch instance started by pull-db-tests.yml > test-unit > services > meilisearch
|
||||
url := test.ExternalServiceHTTP(t, "TEST_MEILISEARCH_URL", "http://meilisearch:7700")
|
||||
key := os.Getenv("TEST_MEILISEARCH_KEY")
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}, time.Minute, time.Second, "Expected meilisearch to be up")
|
||||
|
||||
indexer := NewIndexer(url, key, fmt.Sprintf("test_meilisearch_indexer_%d", time.Now().Unix()))
|
||||
defer indexer.Close()
|
||||
|
||||
tests.TestIndexer(t, indexer)
|
||||
}
|
||||
|
||||
func TestConvertHits(t *testing.T) {
|
||||
convert := func(d any) []byte {
|
||||
b, _ := json.Marshal(d)
|
||||
return b
|
||||
}
|
||||
|
||||
_, err := convertHits(&meilisearch.SearchResponse{
|
||||
Hits: []meilisearch.Hit{
|
||||
{
|
||||
"aa": convert(1),
|
||||
"bb": convert(2),
|
||||
"cc": convert(3),
|
||||
"dd": convert(4),
|
||||
},
|
||||
},
|
||||
})
|
||||
assert.ErrorIs(t, err, ErrMalformedResponse)
|
||||
|
||||
validResponse := &meilisearch.SearchResponse{
|
||||
Hits: []meilisearch.Hit{
|
||||
{
|
||||
"id": convert(float64(11)),
|
||||
"title": convert("a title"),
|
||||
"content": convert("issue body with no match"),
|
||||
"comments": convert([]any{"hey whats up?", "I'm currently bowling", "nice"}),
|
||||
},
|
||||
{
|
||||
"id": convert(float64(22)),
|
||||
"title": convert("Bowling as title"),
|
||||
"content": convert(""),
|
||||
"comments": convert([]any{}),
|
||||
},
|
||||
{
|
||||
"id": convert(float64(33)),
|
||||
"title": convert("Bowl-ing as fuzzy match"),
|
||||
"content": convert(""),
|
||||
"comments": convert([]any{}),
|
||||
},
|
||||
},
|
||||
}
|
||||
hits, err := convertHits(validResponse)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
|
||||
}
|
||||
|
||||
func TestDoubleQuoteKeyword(t *testing.T) {
|
||||
assert.Empty(t, doubleQuoteKeyword(""))
|
||||
assert.Equal(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c"))
|
||||
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
|
||||
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
|
||||
assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`))
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
issue_model "gitea.dev/models/issues"
|
||||
"gitea.dev/modules/container"
|
||||
"gitea.dev/modules/indexer/issues/internal"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/queue"
|
||||
)
|
||||
|
||||
// getIssueIndexerData returns the indexer data of an issue and a bool value indicating whether the issue exists.
|
||||
func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerData, bool, error) {
|
||||
issue, err := issue_model.GetIssueByID(ctx, issueID)
|
||||
if err != nil {
|
||||
if issue_model.IsErrIssueNotExist(err) {
|
||||
return nil, false, nil
|
||||
}
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
// FIXME: what if users want to search for a review comment of a pull request?
|
||||
// The comment type is CommentTypeCode or CommentTypeReview.
|
||||
// But LoadDiscussComments only loads CommentTypeComment.
|
||||
if err := issue.LoadDiscussComments(ctx); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
comments := make([]string, 0, len(issue.Comments))
|
||||
for _, comment := range issue.Comments {
|
||||
if comment.Content != "" {
|
||||
// what ever the comment type is, index the content if it is not empty.
|
||||
comments = append(comments, comment.Content)
|
||||
}
|
||||
}
|
||||
|
||||
if err := issue.LoadAttributes(ctx); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
labels := make([]int64, 0, len(issue.Labels))
|
||||
for _, label := range issue.Labels {
|
||||
labels = append(labels, label.ID)
|
||||
}
|
||||
|
||||
mentionIDs, err := issue_model.GetIssueMentionIDs(ctx, issueID)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
var (
|
||||
reviewedIDs []int64
|
||||
reviewRequestedIDs []int64
|
||||
)
|
||||
{
|
||||
reviews, err := issue_model.FindReviews(ctx, issue_model.FindReviewOptions{
|
||||
ListOptions: db.ListOptionsAll,
|
||||
IssueID: issueID,
|
||||
OfficialOnly: false,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
reviewedIDsSet := make(container.Set[int64], len(reviews))
|
||||
reviewRequestedIDsSet := make(container.Set[int64], len(reviews))
|
||||
for _, review := range reviews {
|
||||
if review.Type == issue_model.ReviewTypeRequest {
|
||||
reviewRequestedIDsSet.Add(review.ReviewerID)
|
||||
} else {
|
||||
reviewedIDsSet.Add(review.ReviewerID)
|
||||
}
|
||||
}
|
||||
reviewedIDs = reviewedIDsSet.Values()
|
||||
reviewRequestedIDs = reviewRequestedIDsSet.Values()
|
||||
}
|
||||
|
||||
subscriberIDs, err := issue_model.GetIssueWatchersIDs(ctx, issue.ID, true)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
projectIDs := make([]int64, 0, len(issue.Projects))
|
||||
for _, project := range issue.Projects {
|
||||
projectIDs = append(projectIDs, project.ID)
|
||||
}
|
||||
|
||||
if err := issue.Repo.LoadOwner(ctx); err != nil {
|
||||
return nil, false, fmt.Errorf("issue.Repo.LoadOwner: %w", err)
|
||||
}
|
||||
|
||||
return &internal.IndexerData{
|
||||
ID: issue.ID,
|
||||
RepoID: issue.RepoID,
|
||||
IsPublic: !issue.Repo.IsPrivate && issue.Repo.Owner.Visibility.IsPublic(),
|
||||
Title: issue.Title,
|
||||
Content: issue.Content,
|
||||
Comments: comments,
|
||||
IsPull: issue.IsPull,
|
||||
IsClosed: issue.IsClosed,
|
||||
IsArchived: issue.Repo.IsArchived,
|
||||
LabelIDs: labels,
|
||||
NoLabel: len(labels) == 0,
|
||||
MilestoneID: issue.MilestoneID,
|
||||
ProjectIDs: projectIDs,
|
||||
NoProject: len(projectIDs) == 0,
|
||||
PosterID: issue.PosterID,
|
||||
AssigneeID: issue.AssigneeID,
|
||||
MentionIDs: mentionIDs,
|
||||
ReviewedIDs: reviewedIDs,
|
||||
ReviewRequestedIDs: reviewRequestedIDs,
|
||||
SubscriberIDs: subscriberIDs,
|
||||
UpdatedUnix: issue.UpdatedUnix,
|
||||
CreatedUnix: issue.CreatedUnix,
|
||||
DeadlineUnix: issue.DeadlineUnix,
|
||||
CommentCount: int64(len(issue.Comments)),
|
||||
}, true, nil
|
||||
}
|
||||
|
||||
func updateRepoIndexer(ctx context.Context, repoID int64) error {
|
||||
ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
|
||||
}
|
||||
for _, id := range ids {
|
||||
if err := updateIssueIndexer(ctx, id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateIssueIndexer(ctx context.Context, issueID int64) error {
|
||||
return pushIssueIndexerQueue(ctx, &IndexerMetadata{ID: issueID})
|
||||
}
|
||||
|
||||
func deleteRepoIssueIndexer(ctx context.Context, repoID int64) error {
|
||||
var ids []int64
|
||||
ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
|
||||
}
|
||||
|
||||
if len(ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
return pushIssueIndexerQueue(ctx, &IndexerMetadata{
|
||||
IDs: ids,
|
||||
IsDelete: true,
|
||||
})
|
||||
}
|
||||
|
||||
type keepRetryKey struct{}
|
||||
|
||||
// contextWithKeepRetry returns a context with a key indicating that the indexer should keep retrying.
|
||||
// Please note that it's for background tasks only, and it should not be used for user requests, or it may cause blocking.
|
||||
func contextWithKeepRetry(ctx context.Context) context.Context {
|
||||
return context.WithValue(ctx, keepRetryKey{}, true)
|
||||
}
|
||||
|
||||
func pushIssueIndexerQueue(ctx context.Context, data *IndexerMetadata) error {
|
||||
if issueIndexerQueue == nil {
|
||||
// Some unit tests will trigger indexing, but the queue is not initialized.
|
||||
// It's OK to ignore it, but log a warning message in case it's not a unit test.
|
||||
log.Warn("Trying to push %+v to issue indexer queue, but the queue is not initialized, it's OK if it's a unit test", data)
|
||||
return nil
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
err := issueIndexerQueue.Push(data)
|
||||
if errors.Is(err, queue.ErrAlreadyInQueue) {
|
||||
return nil
|
||||
}
|
||||
if errors.Is(err, context.DeadlineExceeded) { // the queue is full
|
||||
log.Warn("It seems that issue indexer is slow and the queue is full. Please check the issue indexer or increase the queue size.")
|
||||
if ctx.Value(keepRetryKey{}) == nil {
|
||||
return err
|
||||
}
|
||||
// It will be better to increase the queue size instead of retrying, but users may ignore the previous warning message.
|
||||
// However, even it retries, it may still cause index loss when there's a deadline in the context.
|
||||
log.Debug("Retry to push %+v to issue indexer queue", data)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/git"
|
||||
"gitea.dev/modules/git/languagestats"
|
||||
"gitea.dev/modules/gitrepo"
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/process"
|
||||
"gitea.dev/modules/setting"
|
||||
)
|
||||
|
||||
// DBIndexer implements Indexer interface to use database's like search
|
||||
type DBIndexer struct{}
|
||||
|
||||
// Index repository status function
|
||||
func (db *DBIndexer) Index(id int64) error {
|
||||
ctx, _, finished := process.GetManager().AddContext(graceful.GetManager().ShutdownContext(), fmt.Sprintf("Stats.DB Index Repo[%d]", id))
|
||||
defer finished()
|
||||
|
||||
repo, err := repo_model.GetRepositoryByID(ctx, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if repo.IsEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeStats)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
gitRepo, err := gitrepo.OpenRepository(ctx, repo)
|
||||
if err != nil {
|
||||
if err.Error() == "no such file or directory" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer gitRepo.Close()
|
||||
|
||||
// Get latest commit for default branch
|
||||
commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch)
|
||||
if err != nil {
|
||||
if git.IsErrBranchNotExist(err) || git.IsErrNotExist(err) || setting.IsInTesting {
|
||||
log.Debug("Unable to get commit ID for default branch %s in %s ... skipping this repository", repo.DefaultBranch, repo.FullName())
|
||||
return nil
|
||||
}
|
||||
log.Error("Unable to get commit ID for default branch %s in %s. Error: %v", repo.DefaultBranch, repo.FullName(), err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Do not recalculate stats if already calculated for this commit
|
||||
if status.CommitSha == commitID {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Calculate and save language statistics to database
|
||||
stats, err := languagestats.GetLanguageStats(gitRepo, commitID)
|
||||
if err != nil {
|
||||
if !setting.IsInTesting {
|
||||
log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = repo_model.UpdateLanguageStats(ctx, repo, commitID, stats)
|
||||
if err != nil {
|
||||
log.Error("Unable to update language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug("DBIndexer completed language stats for ID %s for default branch %s in %s. stats count: %d", commitID, repo.DefaultBranch, repo.FullName(), len(stats))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close dummy function
|
||||
func (db *DBIndexer) Close() {
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
// Indexer defines an interface to index repository stats
|
||||
// TODO: this indexer is quite different from the others, maybe this package should be moved out from module/indexer
|
||||
type Indexer interface {
|
||||
Index(id int64) error
|
||||
Close()
|
||||
}
|
||||
|
||||
// indexer represents a indexer instance
|
||||
var indexer Indexer
|
||||
|
||||
// Init initialize the repo indexer
|
||||
func Init() error {
|
||||
indexer = &DBIndexer{}
|
||||
|
||||
if err := initStatsQueue(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go populateRepoIndexer(graceful.GetManager().ShutdownContext())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// populateRepoIndexer populate the repo indexer with pre-existing data. This
|
||||
// should only be run when the indexer is created for the first time.
|
||||
func populateRepoIndexer(ctx context.Context) {
|
||||
log.Info("Populating the repo stats indexer with existing repositories")
|
||||
|
||||
isShutdown := graceful.GetManager().IsShutdown()
|
||||
|
||||
exist, err := db.IsTableNotEmpty("repository")
|
||||
if err != nil {
|
||||
log.Fatal("System error: %v", err)
|
||||
} else if !exist {
|
||||
return
|
||||
}
|
||||
|
||||
var maxRepoID int64
|
||||
if maxRepoID, err = db.GetMaxID("repository"); err != nil {
|
||||
log.Fatal("System error: %v", err)
|
||||
}
|
||||
|
||||
// start with the maximum existing repo ID and work backwards, so that we
|
||||
// don't include repos that are created after gitea starts; such repos will
|
||||
// already be added to the indexer, and we don't need to add them again.
|
||||
for maxRepoID > 0 {
|
||||
select {
|
||||
case <-isShutdown:
|
||||
log.Info("Repository Stats Indexer population shutdown before completion")
|
||||
return
|
||||
default:
|
||||
}
|
||||
ids, err := repo_model.GetUnindexedRepos(ctx, repo_model.RepoIndexerTypeStats, maxRepoID, 0, 50)
|
||||
if err != nil {
|
||||
log.Error("populateRepoIndexer: %v", err)
|
||||
return
|
||||
} else if len(ids) == 0 {
|
||||
break
|
||||
}
|
||||
for _, id := range ids {
|
||||
select {
|
||||
case <-isShutdown:
|
||||
log.Info("Repository Stats Indexer population shutdown before completion")
|
||||
return
|
||||
default:
|
||||
}
|
||||
if err := statsQueue.Push(id); err != nil {
|
||||
log.Error("statsQueue.Push: %v", err)
|
||||
}
|
||||
maxRepoID = id - 1
|
||||
}
|
||||
}
|
||||
log.Info("Done (re)populating the repo stats indexer with existing repositories")
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/models/unittest"
|
||||
"gitea.dev/modules/queue"
|
||||
"gitea.dev/modules/setting"
|
||||
|
||||
_ "gitea.dev/models"
|
||||
_ "gitea.dev/models/actions"
|
||||
_ "gitea.dev/models/activities"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
unittest.MainTest(m)
|
||||
}
|
||||
|
||||
func TestRepoStatsIndex(t *testing.T) {
|
||||
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||
setting.CfgProvider, _ = setting.NewConfigProviderFromData("")
|
||||
|
||||
setting.LoadQueueSettings()
|
||||
|
||||
err := Init()
|
||||
assert.NoError(t, err)
|
||||
|
||||
repo, err := repo_model.GetRepositoryByID(t.Context(), 1)
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = UpdateRepoIndexer(repo)
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.NoError(t, queue.GetManager().FlushAll(t.Context(), 5*time.Second))
|
||||
|
||||
status, err := repo_model.GetIndexerStatus(t.Context(), repo, repo_model.RepoIndexerTypeStats)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
|
||||
langs, err := repo_model.GetTopLanguageStats(t.Context(), repo, 5)
|
||||
assert.NoError(t, err)
|
||||
assert.Empty(t, langs)
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
repo_model "gitea.dev/models/repo"
|
||||
"gitea.dev/modules/graceful"
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/queue"
|
||||
"gitea.dev/modules/setting"
|
||||
)
|
||||
|
||||
// statsQueue represents a queue to handle repository stats updates
|
||||
var statsQueue *queue.WorkerPoolQueue[int64]
|
||||
|
||||
// handle passed PR IDs and test the PRs
|
||||
func handler(items ...int64) []int64 {
|
||||
for _, opts := range items {
|
||||
if err := indexer.Index(opts); err != nil {
|
||||
if !setting.IsInTesting {
|
||||
log.Error("stats queue indexer.Index(%d) failed: %v", opts, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func initStatsQueue() error {
|
||||
statsQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_stats_update", handler)
|
||||
if statsQueue == nil {
|
||||
return errors.New("unable to create repo_stats_update queue")
|
||||
}
|
||||
go graceful.GetManager().RunWithCancel(statsQueue)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateRepoIndexer update a repository's entries in the indexer
|
||||
func UpdateRepoIndexer(repo *repo_model.Repository) error {
|
||||
if err := statsQueue.Push(repo.ID); err != nil {
|
||||
if err != queue.ErrAlreadyInQueue {
|
||||
return err
|
||||
}
|
||||
log.Debug("Repo ID: %d already queued", repo.ID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user