初始提交: Gitea 项目代码
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func Base36(i int64) string {
|
||||
return strconv.FormatInt(i, 36)
|
||||
}
|
||||
|
||||
func ParseBase36(s string) (int64, error) {
|
||||
i, err := strconv.ParseInt(s, 36, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid base36 integer %q: %w", s, err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
)
|
||||
|
||||
// FlushingBatch is a batch of operations that automatically flushes to the
|
||||
// underlying index once it reaches a certain size.
|
||||
type FlushingBatch struct {
|
||||
maxBatchSize int
|
||||
batch *bleve.Batch
|
||||
index bleve.Index
|
||||
}
|
||||
|
||||
// NewFlushingBatch creates a new flushing batch for the specified index. Once
|
||||
// the number of operations in the batch reaches the specified limit, the batch
|
||||
// automatically flushes its operations to the index.
|
||||
func NewFlushingBatch(index bleve.Index, maxBatchSize int) *FlushingBatch {
|
||||
return &FlushingBatch{
|
||||
maxBatchSize: maxBatchSize,
|
||||
batch: index.NewBatch(),
|
||||
index: index,
|
||||
}
|
||||
}
|
||||
|
||||
// Index add a new index to batch
|
||||
func (b *FlushingBatch) Index(id string, data any) error {
|
||||
if err := b.batch.Index(id, data); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.flushIfFull()
|
||||
}
|
||||
|
||||
// Delete add a delete index to batch
|
||||
func (b *FlushingBatch) Delete(id string) error {
|
||||
b.batch.Delete(id)
|
||||
return b.flushIfFull()
|
||||
}
|
||||
|
||||
func (b *FlushingBatch) flushIfFull() error {
|
||||
if b.batch.Size() < b.maxBatchSize {
|
||||
return nil
|
||||
}
|
||||
return b.Flush()
|
||||
}
|
||||
|
||||
// Flush submit the batch and create a new one
|
||||
func (b *FlushingBatch) Flush() error {
|
||||
err := b.index.Batch(b.batch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.batch = b.index.NewBatch()
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
"gitea.dev/modules/log"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/ethantkoenig/rupture"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer represents a basic bleve indexer implementation
|
||||
type Indexer struct {
|
||||
Indexer bleve.Index
|
||||
|
||||
indexDir string
|
||||
version int
|
||||
mappingGetter MappingGetter
|
||||
}
|
||||
|
||||
type MappingGetter func() (mapping.IndexMapping, error)
|
||||
|
||||
func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.IndexMapping, error)) *Indexer {
|
||||
return &Indexer{
|
||||
indexDir: indexDir,
|
||||
version: version,
|
||||
mappingGetter: mappingGetter,
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
if i == nil {
|
||||
return false, errors.New("cannot init nil indexer")
|
||||
}
|
||||
|
||||
if i.Indexer != nil {
|
||||
return false, errors.New("indexer is already initialized")
|
||||
}
|
||||
|
||||
indexer, version, err := openIndexer(i.indexDir, i.version)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if indexer != nil {
|
||||
i.Indexer = indexer
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if version != 0 {
|
||||
log.Warn("Found older bleve index with version %d, Gitea will remove it and rebuild", version)
|
||||
}
|
||||
|
||||
indexMapping, err := i.mappingGetter()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
indexer, err = bleve.New(i.indexDir, indexMapping)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if err = rupture.WriteIndexMetadata(i.indexDir, &rupture.IndexMetadata{
|
||||
Version: i.version,
|
||||
}); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
i.Indexer = indexer
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error {
|
||||
if i == nil {
|
||||
return errors.New("cannot ping nil indexer")
|
||||
}
|
||||
if i.Indexer == nil {
|
||||
return errors.New("indexer is not initialized")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil || i.Indexer == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err := i.Indexer.Close(); err != nil {
|
||||
log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err)
|
||||
}
|
||||
i.Indexer = nil
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"gitea.dev/modules/optional"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
)
|
||||
|
||||
// NumericEqualityQuery generates a numeric equality query for the given value and field
|
||||
func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
|
||||
f := float64(value)
|
||||
tru := true
|
||||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
|
||||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
||||
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
|
||||
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
||||
q.FieldVal = field
|
||||
q.Analyzer = analyzer
|
||||
q.Fuzziness = fuzziness
|
||||
return q
|
||||
}
|
||||
|
||||
// MatchAndQuery generates a match query for the given phrase, field and analyzer
|
||||
func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery {
|
||||
q := bleve.NewMatchQuery(matchPhrase)
|
||||
q.FieldVal = field
|
||||
q.Analyzer = analyzer
|
||||
q.Fuzziness = fuzziness
|
||||
q.Operator = query.MatchQueryOperatorAnd
|
||||
return q
|
||||
}
|
||||
|
||||
// BoolFieldQuery generates a bool field query for the given value and field
|
||||
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
|
||||
q := bleve.NewBoolFieldQuery(value)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
|
||||
func NumericRangeInclusiveQuery(minOption, maxOption optional.Option[int64], field string) *query.NumericRangeQuery {
|
||||
var minF, maxF *float64
|
||||
var minI, maxI *bool
|
||||
if minOption.Has() {
|
||||
minF = new(float64)
|
||||
*minF = float64(minOption.Value())
|
||||
minI = new(bool)
|
||||
*minI = true
|
||||
}
|
||||
if maxOption.Has() {
|
||||
maxF = new(float64)
|
||||
*maxF = float64(maxOption.Value())
|
||||
maxI = new(bool)
|
||||
*maxI = true
|
||||
}
|
||||
q := bleve.NewNumericRangeInclusiveQuery(minF, maxF, minI, maxI)
|
||||
q.SetField(field)
|
||||
return q
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"unicode"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
unicode_tokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
"github.com/ethantkoenig/rupture"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFuzziness = 2
|
||||
)
|
||||
|
||||
// openIndexer open the index at the specified path, checking for metadata
|
||||
// updates and bleve version updates. If index needs to be created (or
|
||||
// re-created), returns (nil, nil)
|
||||
func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
|
||||
_, err := os.Stat(path)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return nil, 0, nil
|
||||
} else if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
metadata, err := rupture.ReadIndexMetadata(path)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if metadata.Version < latestVersion {
|
||||
// the indexer is using a previous version, so we should delete it and
|
||||
// re-populate
|
||||
return nil, metadata.Version, util.RemoveAll(path)
|
||||
}
|
||||
|
||||
index, err := bleve.Open(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, upsidedown.IncompatibleVersion) {
|
||||
log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding")
|
||||
return nil, 0, util.RemoveAll(path)
|
||||
}
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return index, 0, nil
|
||||
}
|
||||
|
||||
// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
|
||||
// may be different on two string, and they still be considered equivalent.
|
||||
// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
|
||||
func GuessFuzzinessByKeyword(s string) int {
|
||||
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
|
||||
tokens := tokenizer.Tokenize([]byte(s))
|
||||
|
||||
if len(tokens) > 0 {
|
||||
fuzziness := maxFuzziness
|
||||
|
||||
for _, token := range tokens {
|
||||
fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term)))
|
||||
}
|
||||
|
||||
return fuzziness
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func guessFuzzinessByKeyword(s string) int {
|
||||
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
|
||||
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
|
||||
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
|
||||
// Likewise, queries whose terms contains characters that are *not* letters should not use fuzziness
|
||||
|
||||
for _, r := range s {
|
||||
if r >= 128 || !unicode.IsLetter(r) {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
|
||||
|
||||
scenarios := []struct {
|
||||
Input string
|
||||
Fuzziness int // See util.go for the definition of fuzziness in this particular context
|
||||
}{
|
||||
{
|
||||
Input: "",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "Avocado",
|
||||
Fuzziness: 1,
|
||||
},
|
||||
{
|
||||
Input: "Geschwindigkeit",
|
||||
Fuzziness: 2,
|
||||
},
|
||||
{
|
||||
Input: "non-exist",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "갃갃갃",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "repo1",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
{
|
||||
Input: "avocado.md",
|
||||
Fuzziness: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(fmt.Sprintf("Fuziniess:%s=%d", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
|
||||
assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer represents a basic db indexer implementation
|
||||
type Indexer struct{}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
// Return true to indicate that the index was opened/existed.
|
||||
// So that the indexer will not try to populate the index, the data is already there.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error {
|
||||
// No need to ping database to check if it is available.
|
||||
// If the database goes down, Gitea will go down, so nobody will care if the indexer is available.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() {
|
||||
// nothing to do
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.dev/modules/indexer/internal"
|
||||
"gitea.dev/modules/json"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer is a narrow wrapper around an Elasticsearch/OpenSearch cluster.
|
||||
// It targets the REST subset shared by Elasticsearch 7/8/9 and OpenSearch 3.
|
||||
type Indexer struct {
|
||||
client *http.Client
|
||||
base string // base URL with trailing slash, no userinfo
|
||||
user string
|
||||
pass string
|
||||
|
||||
indexName string
|
||||
version int
|
||||
mapping string
|
||||
}
|
||||
|
||||
// NewIndexer builds an Indexer. The connection is opened by Init.
|
||||
func NewIndexer(rawURL, indexName string, version int, mapping string) *Indexer {
|
||||
return &Indexer{
|
||||
base: rawURL,
|
||||
indexName: indexName,
|
||||
version: version,
|
||||
mapping: mapping,
|
||||
}
|
||||
}
|
||||
|
||||
// Init connects and creates the versioned index if missing, returning true if it already existed.
|
||||
func (i *Indexer) Init(ctx context.Context) (bool, error) {
|
||||
parsed, err := url.Parse(i.base)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("parse elasticsearch url: %w", err)
|
||||
}
|
||||
if parsed.User != nil {
|
||||
i.user = parsed.User.Username()
|
||||
i.pass, _ = parsed.User.Password()
|
||||
parsed.User = nil
|
||||
}
|
||||
base := parsed.String()
|
||||
if !strings.HasSuffix(base, "/") {
|
||||
base += "/"
|
||||
}
|
||||
i.base = base
|
||||
// No client-level Timeout: bulk/_delete_by_query can legitimately run for
|
||||
// minutes on large repos. Per-request deadlines come from the caller's ctx;
|
||||
// transport-level timeouts cover stalled connects/handshakes/headers so a
|
||||
// half-open server cannot wedge the indexer indefinitely.
|
||||
i.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ResponseHeaderTimeout: 30 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
MaxIdleConns: 100,
|
||||
},
|
||||
}
|
||||
|
||||
exists, err := i.indexExists(ctx, i.VersionedIndexName())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if exists {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if err := i.createIndex(ctx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Ping returns an error when the cluster is unusable (status != green/yellow).
|
||||
func (i *Indexer) Ping(ctx context.Context) error {
|
||||
var body struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
if err := i.doJSON(ctx, http.MethodGet, "_cluster/health", nil, &body); err != nil {
|
||||
return err
|
||||
}
|
||||
// Healthy = green; usable = yellow. Red is unusable.
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
|
||||
if body.Status != "green" && body.Status != "yellow" {
|
||||
return fmt.Errorf("status of elasticsearch cluster is %s", body.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close releases idle HTTP connections held by the client.
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil || i.client == nil {
|
||||
return
|
||||
}
|
||||
i.client.CloseIdleConnections()
|
||||
i.client = nil
|
||||
}
|
||||
|
||||
// Bulk submits index/delete ops. Returns the first item-level failure, if any.
|
||||
func (i *Indexer) Bulk(ctx context.Context, ops []BulkOp) error {
|
||||
if len(ops) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
index := i.VersionedIndexName()
|
||||
var buf bytes.Buffer
|
||||
buf.Grow(len(ops) * 256)
|
||||
for _, op := range ops {
|
||||
meta := map[string]any{op.action: map[string]any{"_index": index, "_id": op.id}}
|
||||
if err := writeJSONLine(&buf, meta); err != nil {
|
||||
return err
|
||||
}
|
||||
if op.action == bulkActionIndex {
|
||||
if err := writeJSONLine(&buf, op.doc); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res, err := i.do(ctx, http.MethodPost, urlPath(index, "_bulk"), "application/x-ndjson", bytes.NewReader(buf.Bytes()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
|
||||
var body struct {
|
||||
Errors bool `json:"errors"`
|
||||
Items []map[string]struct {
|
||||
Status int `json:"status"`
|
||||
Error json.Value `json:"error"`
|
||||
} `json:"items"`
|
||||
}
|
||||
if err := json.NewDecoder(res.Body).Decode(&body); err != nil {
|
||||
return err
|
||||
}
|
||||
if !body.Errors {
|
||||
return nil
|
||||
}
|
||||
return firstBulkError(body.Items)
|
||||
}
|
||||
|
||||
// firstBulkError returns the first item-level failure in a bulk response.
|
||||
// Each items entry is a single-key map ({"index": {...}} or {"delete": {...}}).
|
||||
// Delete-of-missing (404) is idempotent and not reported.
|
||||
func firstBulkError(items []map[string]struct {
|
||||
Status int `json:"status"`
|
||||
Error json.Value `json:"error"`
|
||||
},
|
||||
) error {
|
||||
for _, item := range items {
|
||||
for action, result := range item {
|
||||
if action == bulkActionDelete && result.Status == http.StatusNotFound {
|
||||
continue
|
||||
}
|
||||
if result.Status >= 300 {
|
||||
return fmt.Errorf("bulk %s failed (status %d): %s", action, result.Status, string(result.Error))
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Index writes a single document.
|
||||
func (i *Indexer) Index(ctx context.Context, id string, doc any) error {
|
||||
body, err := json.Marshal(doc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName(), "_doc", id), bytes.NewReader(body), nil)
|
||||
}
|
||||
|
||||
// Delete removes a single document by id. Missing ids are not an error.
|
||||
func (i *Indexer) Delete(ctx context.Context, id string) error {
|
||||
res, err := i.do(ctx, http.MethodDelete, urlPath(i.VersionedIndexName(), "_doc", id), "", nil, http.StatusNotFound)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
drainAndClose(res)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByQuery removes every document matching the query.
|
||||
func (i *Indexer) DeleteByQuery(ctx context.Context, query Query) error {
|
||||
body, err := json.Marshal(map[string]any{"query": query.querySource()})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_delete_by_query"), bytes.NewReader(body), nil)
|
||||
}
|
||||
|
||||
// Refresh forces a refresh so recent writes are searchable.
|
||||
func (i *Indexer) Refresh(ctx context.Context) error {
|
||||
return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_refresh"), nil, nil)
|
||||
}
|
||||
|
||||
// Search runs a search request and decodes the reply.
|
||||
func (i *Indexer) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
|
||||
body := map[string]any{}
|
||||
if req.Query != nil {
|
||||
body["query"] = req.Query.querySource()
|
||||
}
|
||||
if len(req.Sort) > 0 {
|
||||
sorts := make([]map[string]any, len(req.Sort))
|
||||
for idx, s := range req.Sort {
|
||||
sorts[idx] = s.source()
|
||||
}
|
||||
body["sort"] = sorts
|
||||
}
|
||||
if req.From > 0 {
|
||||
body["from"] = req.From
|
||||
}
|
||||
body["size"] = req.Size
|
||||
if len(req.Aggregations) > 0 {
|
||||
body["aggs"] = req.Aggregations
|
||||
}
|
||||
if len(req.Highlight) > 0 {
|
||||
body["highlight"] = req.Highlight
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Default track_total_hits is 10000 (capped count); send it explicitly so
|
||||
// callers can choose between exact totals (true) and skipping counting (false).
|
||||
path := urlPath(i.VersionedIndexName(), "_search") + "?track_total_hits=" + strconv.FormatBool(req.TrackTotal)
|
||||
res, err := i.do(ctx, http.MethodPost, path, "application/json", bytes.NewReader(payload))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
return decodeSearchResponse(res.Body)
|
||||
}
|
||||
|
||||
func (i *Indexer) indexExists(ctx context.Context, name string) (bool, error) {
|
||||
res, err := i.do(ctx, http.MethodHead, urlPath(name), "", nil, http.StatusNotFound)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
drainAndClose(res)
|
||||
return res.StatusCode == http.StatusOK, nil
|
||||
}
|
||||
|
||||
func (i *Indexer) createIndex(ctx context.Context) error {
|
||||
var body struct {
|
||||
Acknowledged bool `json:"acknowledged"`
|
||||
}
|
||||
if err := i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName()), bytes.NewBufferString(i.mapping), &body); err != nil {
|
||||
return fmt.Errorf("create index %s: %w", i.VersionedIndexName(), err)
|
||||
}
|
||||
if !body.Acknowledged {
|
||||
return fmt.Errorf("create index %s not acknowledged", i.VersionedIndexName())
|
||||
}
|
||||
|
||||
i.checkOldIndexes(ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// do sends a request and returns the response. Status >= 300 is turned into
|
||||
// an error unless the status appears in okStatus. The caller closes Body.
|
||||
func (i *Indexer) do(ctx context.Context, method, path, contentType string, body io.Reader, okStatus ...int) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, i.base+path, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if contentType != "" {
|
||||
req.Header.Set("Content-Type", contentType)
|
||||
}
|
||||
if i.user != "" || i.pass != "" {
|
||||
req.SetBasicAuth(i.user, i.pass)
|
||||
}
|
||||
res, err := i.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if res.StatusCode >= 300 && !slices.Contains(okStatus, res.StatusCode) {
|
||||
msg := readErrBody(res)
|
||||
res.Body.Close()
|
||||
return nil, fmt.Errorf("%s %s: %s", method, path, msg)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// doJSON sends a request with a JSON body and, when out is non-nil, decodes
|
||||
// the JSON response into it.
|
||||
func (i *Indexer) doJSON(ctx context.Context, method, path string, body io.Reader, out any) error {
|
||||
contentType := ""
|
||||
if body != nil {
|
||||
contentType = "application/json"
|
||||
}
|
||||
res, err := i.do(ctx, method, path, contentType, body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer drainAndClose(res)
|
||||
if out == nil {
|
||||
return nil
|
||||
}
|
||||
return json.NewDecoder(res.Body).Decode(out)
|
||||
}
|
||||
|
||||
// drainAndClose discards any unread response body before closing so the
|
||||
// underlying TCP connection can be reused for keep-alive.
|
||||
func drainAndClose(res *http.Response) {
|
||||
_, _ = io.Copy(io.Discard, res.Body)
|
||||
res.Body.Close()
|
||||
}
|
||||
|
||||
func writeJSONLine(buf *bytes.Buffer, v any) error {
|
||||
enc, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
buf.Write(enc)
|
||||
buf.WriteByte('\n')
|
||||
return nil
|
||||
}
|
||||
|
||||
// readErrBody reads up to 4 KiB of an error response and drains the rest so
|
||||
// the underlying connection can be reused (keep-alive needs Body fully read).
|
||||
func readErrBody(res *http.Response) string {
|
||||
const limit = 4 << 10
|
||||
b, _ := io.ReadAll(io.LimitReader(res.Body, limit))
|
||||
_, _ = io.Copy(io.Discard, res.Body)
|
||||
return fmt.Sprintf("status %d: %s", res.StatusCode, bytes.TrimSpace(b))
|
||||
}
|
||||
|
||||
func decodeSearchResponse(r io.Reader) (*SearchResponse, error) {
|
||||
var raw struct {
|
||||
Hits struct {
|
||||
Total struct {
|
||||
Value int64 `json:"value"`
|
||||
} `json:"total"`
|
||||
Hits []struct {
|
||||
ID string `json:"_id"`
|
||||
Score float64 `json:"_score"`
|
||||
Source json.Value `json:"_source"`
|
||||
Highlight map[string][]string `json:"highlight"`
|
||||
} `json:"hits"`
|
||||
} `json:"hits"`
|
||||
Aggregations map[string]struct {
|
||||
Buckets []struct {
|
||||
Key any `json:"key"`
|
||||
DocCount int64 `json:"doc_count"`
|
||||
} `json:"buckets"`
|
||||
} `json:"aggregations"`
|
||||
}
|
||||
if err := json.NewDecoder(r).Decode(&raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &SearchResponse{
|
||||
Total: raw.Hits.Total.Value,
|
||||
Hits: make([]SearchHit, 0, len(raw.Hits.Hits)),
|
||||
}
|
||||
for _, h := range raw.Hits.Hits {
|
||||
resp.Hits = append(resp.Hits, SearchHit{
|
||||
ID: h.ID,
|
||||
Score: h.Score,
|
||||
Source: h.Source,
|
||||
Highlight: h.Highlight,
|
||||
})
|
||||
}
|
||||
if len(raw.Aggregations) > 0 {
|
||||
resp.Aggregations = make(map[string][]AggBucket, len(raw.Aggregations))
|
||||
for name, agg := range raw.Aggregations {
|
||||
buckets := make([]AggBucket, len(agg.Buckets))
|
||||
for idx, b := range agg.Buckets {
|
||||
buckets[idx] = AggBucket{Key: b.Key, DocCount: b.DocCount}
|
||||
}
|
||||
resp.Aggregations[name] = buckets
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// urlPath joins path segments with `/` and percent-escapes each.
|
||||
func urlPath(segments ...string) string {
|
||||
var b bytes.Buffer
|
||||
for idx, s := range segments {
|
||||
if idx > 0 {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(url.PathEscape(s))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.dev/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func newRealIndexer(t *testing.T) *Indexer {
|
||||
t.Helper()
|
||||
esURL := test.ExternalServiceHTTP(t, "TEST_ELASTICSEARCH_URL", "http://elasticsearch:9200")
|
||||
indexName := "gitea_test_" + strings.ReplaceAll(strings.ToLower(t.Name()), "/", "_")
|
||||
ix := NewIndexer(esURL, indexName, 1, `{"mappings":{"properties":{"x":{"type":"keyword"}}}}`)
|
||||
_, err := ix.Init(t.Context())
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(ix.Close)
|
||||
return ix
|
||||
}
|
||||
|
||||
func TestPing(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Ping(t.Context()))
|
||||
}
|
||||
|
||||
func TestDeleteSwallows404(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Delete(t.Context(), "missing-id"))
|
||||
}
|
||||
|
||||
func TestBulkAcceptsDelete404(t *testing.T) {
|
||||
ix := newRealIndexer(t)
|
||||
require.NoError(t, ix.Bulk(t.Context(), []BulkOp{DeleteOp("missing-id")}))
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
// MultiMatch types used by the call sites. See
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
|
||||
const (
|
||||
MultiMatchTypeBestFields = "best_fields"
|
||||
MultiMatchTypePhrasePrefix = "phrase_prefix"
|
||||
)
|
||||
|
||||
// ToAnySlice converts []T to []any for variadic query args like TermsQuery.
|
||||
func ToAnySlice[T any](s []T) []any {
|
||||
out := make([]any, len(s))
|
||||
for idx, v := range s {
|
||||
out[idx] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Query is an Elasticsearch query DSL node. It marshals to the JSON
|
||||
// object expected by the ES query API.
|
||||
type Query interface {
|
||||
querySource() map[string]any
|
||||
}
|
||||
|
||||
type rawQuery map[string]any
|
||||
|
||||
func (q rawQuery) querySource() map[string]any { return q }
|
||||
|
||||
// TermQuery matches documents whose `field` exactly equals `value`.
|
||||
func TermQuery(field string, value any) Query {
|
||||
return rawQuery{"term": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// TermsQuery matches documents whose `field` equals any of `values`.
|
||||
func TermsQuery(field string, values ...any) Query {
|
||||
return rawQuery{"terms": map[string]any{field: values}}
|
||||
}
|
||||
|
||||
// MatchQuery is a full-text match on a single field.
|
||||
func MatchQuery(field string, value any) Query {
|
||||
return rawQuery{"match": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// MatchPhraseQuery matches the exact phrase on `field`.
|
||||
func MatchPhraseQuery(field, value string) Query {
|
||||
return rawQuery{"match_phrase": map[string]any{field: value}}
|
||||
}
|
||||
|
||||
// MultiMatchQuery is the fluent builder for a multi_match query.
|
||||
type MultiMatchQuery struct {
|
||||
query any
|
||||
fields []string
|
||||
typ string
|
||||
operator string
|
||||
}
|
||||
|
||||
// NewMultiMatchQuery creates a multi_match query over the given fields.
|
||||
func NewMultiMatchQuery(query any, fields ...string) *MultiMatchQuery {
|
||||
return &MultiMatchQuery{query: query, fields: fields}
|
||||
}
|
||||
|
||||
func (m *MultiMatchQuery) Type(t string) *MultiMatchQuery { m.typ = t; return m }
|
||||
func (m *MultiMatchQuery) Operator(op string) *MultiMatchQuery { m.operator = op; return m }
|
||||
|
||||
func (m *MultiMatchQuery) querySource() map[string]any {
|
||||
body := map[string]any{"query": m.query}
|
||||
if len(m.fields) > 0 {
|
||||
body["fields"] = m.fields
|
||||
}
|
||||
if m.typ != "" {
|
||||
body["type"] = m.typ
|
||||
}
|
||||
if m.operator != "" {
|
||||
body["operator"] = m.operator
|
||||
}
|
||||
return map[string]any{"multi_match": body}
|
||||
}
|
||||
|
||||
// RangeQuery is the fluent builder for a range query.
|
||||
type RangeQuery struct {
|
||||
field string
|
||||
body map[string]any
|
||||
}
|
||||
|
||||
func NewRangeQuery(field string) *RangeQuery {
|
||||
return &RangeQuery{field: field, body: map[string]any{}}
|
||||
}
|
||||
|
||||
func (r *RangeQuery) Gte(v any) *RangeQuery { r.body["gte"] = v; return r }
|
||||
func (r *RangeQuery) Lte(v any) *RangeQuery { r.body["lte"] = v; return r }
|
||||
|
||||
func (r *RangeQuery) querySource() map[string]any {
|
||||
return map[string]any{"range": map[string]any{r.field: r.body}}
|
||||
}
|
||||
|
||||
// BoolQuery is the fluent builder for a bool query.
|
||||
type BoolQuery struct {
|
||||
must []Query
|
||||
should []Query
|
||||
mustNot []Query
|
||||
}
|
||||
|
||||
func NewBoolQuery() *BoolQuery { return &BoolQuery{} }
|
||||
|
||||
func (b *BoolQuery) Must(q ...Query) *BoolQuery { b.must = append(b.must, q...); return b }
|
||||
func (b *BoolQuery) Should(q ...Query) *BoolQuery { b.should = append(b.should, q...); return b }
|
||||
func (b *BoolQuery) MustNot(q ...Query) *BoolQuery { b.mustNot = append(b.mustNot, q...); return b }
|
||||
|
||||
func (b *BoolQuery) querySource() map[string]any {
|
||||
body := map[string]any{}
|
||||
if len(b.must) > 0 {
|
||||
body["must"] = querySlice(b.must)
|
||||
}
|
||||
if len(b.should) > 0 {
|
||||
body["should"] = querySlice(b.should)
|
||||
}
|
||||
if len(b.mustNot) > 0 {
|
||||
body["must_not"] = querySlice(b.mustNot)
|
||||
}
|
||||
return map[string]any{"bool": body}
|
||||
}
|
||||
|
||||
func querySlice(queries []Query) []map[string]any {
|
||||
out := make([]map[string]any, len(queries))
|
||||
for idx, q := range queries {
|
||||
out[idx] = q.querySource()
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import "gitea.dev/modules/json"
|
||||
|
||||
const (
|
||||
bulkActionIndex = "index"
|
||||
bulkActionDelete = "delete"
|
||||
)
|
||||
|
||||
// BulkOp is a single write inside a Bulk call. Construct with IndexOp or DeleteOp.
|
||||
type BulkOp struct {
|
||||
action string
|
||||
id string
|
||||
doc any
|
||||
}
|
||||
|
||||
// IndexOp builds a bulk index operation.
|
||||
func IndexOp(id string, doc any) BulkOp {
|
||||
return BulkOp{action: bulkActionIndex, id: id, doc: doc}
|
||||
}
|
||||
|
||||
// DeleteOp builds a bulk delete operation.
|
||||
func DeleteOp(id string) BulkOp {
|
||||
return BulkOp{action: bulkActionDelete, id: id}
|
||||
}
|
||||
|
||||
// SortField is one entry of the search sort array.
|
||||
type SortField struct {
|
||||
Field string
|
||||
Desc bool
|
||||
}
|
||||
|
||||
func (s SortField) source() map[string]any {
|
||||
order := "asc"
|
||||
if s.Desc {
|
||||
order = "desc"
|
||||
}
|
||||
return map[string]any{s.Field: map[string]any{"order": order}}
|
||||
}
|
||||
|
||||
// SearchRequest captures everything Gitea sends to the _search endpoint.
|
||||
// Aggregations and Highlight are raw ES JSON bodies — callers write them as
|
||||
// map[string]any since each has exactly one call site with a fixed shape.
|
||||
type SearchRequest struct {
|
||||
Query Query
|
||||
Sort []SortField
|
||||
From int
|
||||
Size int
|
||||
TrackTotal bool
|
||||
Aggregations map[string]any
|
||||
Highlight map[string]any
|
||||
}
|
||||
|
||||
// SearchHit is a single result row.
|
||||
type SearchHit struct {
|
||||
ID string
|
||||
Score float64
|
||||
Source json.Value
|
||||
Highlight map[string][]string
|
||||
}
|
||||
|
||||
// AggBucket is a terms-aggregation bucket.
|
||||
type AggBucket struct {
|
||||
Key any
|
||||
DocCount int64
|
||||
}
|
||||
|
||||
// SearchResponse is Gitea's decoded view of the search reply.
|
||||
type SearchResponse struct {
|
||||
Total int64
|
||||
Hits []SearchHit
|
||||
Aggregations map[string][]AggBucket
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
// VersionedIndexName returns the full index name with version suffix.
|
||||
func (i *Indexer) VersionedIndexName() string {
|
||||
return versionedIndexName(i.indexName, i.version)
|
||||
}
|
||||
|
||||
func versionedIndexName(indexName string, version int) string {
|
||||
if version == 0 {
|
||||
// Old index name without version
|
||||
return indexName
|
||||
}
|
||||
return fmt.Sprintf("%s.v%d", indexName, version)
|
||||
}
|
||||
|
||||
func (i *Indexer) checkOldIndexes(ctx context.Context) {
|
||||
for v := range i.version {
|
||||
indexName := versionedIndexName(i.indexName, v)
|
||||
exists, err := i.indexExists(ctx, indexName)
|
||||
if err == nil && exists {
|
||||
log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// Indexer defines an basic indexer interface
|
||||
type Indexer interface {
|
||||
// Init initializes the indexer
|
||||
// returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data)
|
||||
Init(ctx context.Context) (bool, error)
|
||||
// Ping checks if the indexer is available
|
||||
Ping(ctx context.Context) error
|
||||
// Close closes the indexer
|
||||
Close()
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer {
|
||||
return &dummyIndexer{}
|
||||
}
|
||||
|
||||
type dummyIndexer struct{}
|
||||
|
||||
func (d *dummyIndexer) Init(ctx context.Context) (bool, error) {
|
||||
return false, errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Ping(ctx context.Context) error {
|
||||
return errors.New("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Close() {}
|
||||
@@ -0,0 +1,119 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Filter represents a filter for meilisearch queries.
|
||||
// It's just a simple wrapper around a string.
|
||||
// DO NOT assume that it is a complete implementation.
|
||||
type Filter interface {
|
||||
Statement() string
|
||||
}
|
||||
|
||||
type FilterAnd struct {
|
||||
filters []Filter
|
||||
}
|
||||
|
||||
func (f *FilterAnd) Statement() string {
|
||||
var statements []string
|
||||
for _, filter := range f.filters {
|
||||
if s := filter.Statement(); s != "" {
|
||||
statements = append(statements, fmt.Sprintf("(%s)", s))
|
||||
}
|
||||
}
|
||||
return strings.Join(statements, " AND ")
|
||||
}
|
||||
|
||||
func (f *FilterAnd) And(filter Filter) *FilterAnd {
|
||||
f.filters = append(f.filters, filter)
|
||||
return f
|
||||
}
|
||||
|
||||
type FilterOr struct {
|
||||
filters []Filter
|
||||
}
|
||||
|
||||
func (f *FilterOr) Statement() string {
|
||||
var statements []string
|
||||
for _, filter := range f.filters {
|
||||
if s := filter.Statement(); s != "" {
|
||||
statements = append(statements, fmt.Sprintf("(%s)", s))
|
||||
}
|
||||
}
|
||||
return strings.Join(statements, " OR ")
|
||||
}
|
||||
|
||||
func (f *FilterOr) Or(filter Filter) *FilterOr {
|
||||
f.filters = append(f.filters, filter)
|
||||
return f
|
||||
}
|
||||
|
||||
type FilterIn string
|
||||
|
||||
// NewFilterIn creates a new FilterIn.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterIn[T int64](field string, values ...T) FilterIn {
|
||||
if len(values) == 0 {
|
||||
return ""
|
||||
}
|
||||
vs := make([]string, len(values))
|
||||
for i, v := range values {
|
||||
vs[i] = fmt.Sprintf("%v", v)
|
||||
}
|
||||
return FilterIn(fmt.Sprintf("%s IN [%v]", field, strings.Join(vs, ", ")))
|
||||
}
|
||||
|
||||
func (f FilterIn) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterEq string
|
||||
|
||||
// NewFilterEq creates a new FilterEq.
|
||||
// It supports int64 and bool only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterEq[T bool | int64](field string, value T) FilterEq {
|
||||
return FilterEq(fmt.Sprintf("%s = %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterEq) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterNot string
|
||||
|
||||
func NewFilterNot(filter Filter) FilterNot {
|
||||
return FilterNot(fmt.Sprintf("NOT (%s)", filter.Statement()))
|
||||
}
|
||||
|
||||
func (f FilterNot) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterGte string
|
||||
|
||||
// NewFilterGte creates a new FilterGte.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterGte[T int64](field string, value T) FilterGte {
|
||||
return FilterGte(fmt.Sprintf("%s >= %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterGte) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
|
||||
type FilterLte string
|
||||
|
||||
// NewFilterLte creates a new FilterLte.
|
||||
// It supports int64 only, to avoid extra works to handle strings with special characters.
|
||||
func NewFilterLte[T int64](field string, value T) FilterLte {
|
||||
return FilterLte(fmt.Sprintf("%s <= %v", field, value))
|
||||
}
|
||||
|
||||
func (f FilterLte) Statement() string {
|
||||
return string(f)
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
)
|
||||
|
||||
// Indexer represents a basic meilisearch indexer implementation
|
||||
type Indexer struct {
|
||||
Client meilisearch.ServiceManager
|
||||
|
||||
url, apiKey string
|
||||
indexName string
|
||||
version int
|
||||
settings *meilisearch.Settings
|
||||
}
|
||||
|
||||
func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearch.Settings) *Indexer {
|
||||
return &Indexer{
|
||||
url: url,
|
||||
apiKey: apiKey,
|
||||
indexName: indexName,
|
||||
version: version,
|
||||
settings: settings,
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) {
|
||||
if i == nil {
|
||||
return false, errors.New("cannot init nil indexer")
|
||||
}
|
||||
|
||||
if i.Client != nil {
|
||||
return false, errors.New("indexer is already initialized")
|
||||
}
|
||||
|
||||
i.Client = meilisearch.New(i.url, meilisearch.WithAPIKey(i.apiKey))
|
||||
_, err := i.Client.GetIndex(i.VersionedIndexName())
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
_, err = i.Client.CreateIndex(&meilisearch.IndexConfig{
|
||||
Uid: i.VersionedIndexName(),
|
||||
PrimaryKey: "id",
|
||||
})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
i.checkOldIndexes()
|
||||
|
||||
_, err = i.Client.Index(i.VersionedIndexName()).UpdateSettings(i.settings)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(ctx context.Context) error {
|
||||
if i == nil {
|
||||
return errors.New("cannot ping nil indexer")
|
||||
}
|
||||
if i.Client == nil {
|
||||
return errors.New("indexer is not initialized")
|
||||
}
|
||||
resp, err := i.Client.Health()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.Status != "available" {
|
||||
// See https://docs.meilisearch.com/reference/api/health.html#status
|
||||
return fmt.Errorf("status of meilisearch is not available: %s", resp.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() {
|
||||
if i == nil {
|
||||
return
|
||||
}
|
||||
i.Client = nil
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitea.dev/modules/log"
|
||||
)
|
||||
|
||||
// VersionedIndexName returns the full index name with version
|
||||
func (i *Indexer) VersionedIndexName() string {
|
||||
return versionedIndexName(i.indexName, i.version)
|
||||
}
|
||||
|
||||
func versionedIndexName(indexName string, version int) string {
|
||||
if version == 0 {
|
||||
// Old index name without version
|
||||
return indexName
|
||||
}
|
||||
|
||||
// The format of the index name is <index_name>_v<version>, not <index_name>.v<version> like elasticsearch.
|
||||
// Because meilisearch does not support "." in index name, it should contain only alphanumeric characters, hyphens (-) and underscores (_).
|
||||
// See https://www.meilisearch.com/docs/learn/core_concepts/indexes#index-uid
|
||||
|
||||
return fmt.Sprintf("%s_v%d", indexName, version)
|
||||
}
|
||||
|
||||
func (i *Indexer) checkOldIndexes() {
|
||||
for v := 0; v < i.version; v++ {
|
||||
indexName := versionedIndexName(i.indexName, v)
|
||||
_, err := i.Client.GetIndex(indexName)
|
||||
if err == nil {
|
||||
log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
)
|
||||
|
||||
// ParsePaginator parses a db.Paginator into a skip and limit
|
||||
func ParsePaginator(paginator *db.ListOptions, maxNums ...int) (int, int) {
|
||||
// Use a very large number to indicate no limit
|
||||
unlimited := math.MaxInt32
|
||||
if len(maxNums) > 0 {
|
||||
// Some indexer engines have a limit on the page size, respect that
|
||||
unlimited = maxNums[0]
|
||||
}
|
||||
|
||||
if paginator == nil || paginator.IsListAll() {
|
||||
// It shouldn't happen. In actual usage scenarios, there should not be requests to search all.
|
||||
// But if it does happen, respect it and return "unlimited".
|
||||
// And it's also useful for testing.
|
||||
return 0, unlimited
|
||||
}
|
||||
|
||||
if paginator.PageSize == 0 {
|
||||
// Do not return any results when searching, it's used to get the total count only.
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
return paginator.GetSkipTake()
|
||||
}
|
||||
Reference in New Issue
Block a user