初始提交: Gitea 项目代码

2026-05-30 22:47:36 +08:00
commit f288f76350
6116 changed files with 776822 additions and 0 deletions
@@ -0,0 +1,21 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+	"fmt"
+	"strconv"
+)
+
+func Base36(i int64) string {
+	return strconv.FormatInt(i, 36)
+}
+
+func ParseBase36(s string) (int64, error) {
+	i, err := strconv.ParseInt(s, 36, 64)
+	if err != nil {
+		return 0, fmt.Errorf("invalid base36 integer %q: %w", s, err)
+	}
+	return i, nil
+}
@@ -0,0 +1,58 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+	"github.com/blevesearch/bleve/v2"
+)
+
+// FlushingBatch is a batch of operations that automatically flushes to the
+// underlying index once it reaches a certain size.
+type FlushingBatch struct {
+	maxBatchSize int
+	batch        *bleve.Batch
+	index        bleve.Index
+}
+
+// NewFlushingBatch creates a new flushing batch for the specified index. Once
+// the number of operations in the batch reaches the specified limit, the batch
+// automatically flushes its operations to the index.
+func NewFlushingBatch(index bleve.Index, maxBatchSize int) *FlushingBatch {
+	return &FlushingBatch{
+		maxBatchSize: maxBatchSize,
+		batch:        index.NewBatch(),
+		index:        index,
+	}
+}
+
+// Index add a new index to batch
+func (b *FlushingBatch) Index(id string, data any) error {
+	if err := b.batch.Index(id, data); err != nil {
+		return err
+	}
+	return b.flushIfFull()
+}
+
+// Delete add a delete index to batch
+func (b *FlushingBatch) Delete(id string) error {
+	b.batch.Delete(id)
+	return b.flushIfFull()
+}
+
+func (b *FlushingBatch) flushIfFull() error {
+	if b.batch.Size() < b.maxBatchSize {
+		return nil
+	}
+	return b.Flush()
+}
+
+// Flush submit the batch and create a new one
+func (b *FlushingBatch) Flush() error {
+	err := b.index.Batch(b.batch)
+	if err != nil {
+		return err
+	}
+	b.batch = b.index.NewBatch()
+	return nil
+}
@@ -0,0 +1,103 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+	"context"
+	"errors"
+
+	"gitea.dev/modules/indexer/internal"
+	"gitea.dev/modules/log"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/mapping"
+	"github.com/ethantkoenig/rupture"
+)
+
+var _ internal.Indexer = &Indexer{}
+
+// Indexer represents a basic bleve indexer implementation
+type Indexer struct {
+	Indexer bleve.Index
+
+	indexDir      string
+	version       int
+	mappingGetter MappingGetter
+}
+
+type MappingGetter func() (mapping.IndexMapping, error)
+
+func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.IndexMapping, error)) *Indexer {
+	return &Indexer{
+		indexDir:      indexDir,
+		version:       version,
+		mappingGetter: mappingGetter,
+	}
+}
+
+// Init initializes the indexer
+func (i *Indexer) Init(_ context.Context) (bool, error) {
+	if i == nil {
+		return false, errors.New("cannot init nil indexer")
+	}
+
+	if i.Indexer != nil {
+		return false, errors.New("indexer is already initialized")
+	}
+
+	indexer, version, err := openIndexer(i.indexDir, i.version)
+	if err != nil {
+		return false, err
+	}
+	if indexer != nil {
+		i.Indexer = indexer
+		return true, nil
+	}
+
+	if version != 0 {
+		log.Warn("Found older bleve index with version %d, Gitea will remove it and rebuild", version)
+	}
+
+	indexMapping, err := i.mappingGetter()
+	if err != nil {
+		return false, err
+	}
+
+	indexer, err = bleve.New(i.indexDir, indexMapping)
+	if err != nil {
+		return false, err
+	}
+
+	if err = rupture.WriteIndexMetadata(i.indexDir, &rupture.IndexMetadata{
+		Version: i.version,
+	}); err != nil {
+		return false, err
+	}
+
+	i.Indexer = indexer
+
+	return false, nil
+}
+
+// Ping checks if the indexer is available
+func (i *Indexer) Ping(_ context.Context) error {
+	if i == nil {
+		return errors.New("cannot ping nil indexer")
+	}
+	if i.Indexer == nil {
+		return errors.New("indexer is not initialized")
+	}
+	return nil
+}
+
+func (i *Indexer) Close() {
+	if i == nil || i.Indexer == nil {
+		return
+	}
+
+	if err := i.Indexer.Close(); err != nil {
+		log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err)
+	}
+	i.Indexer = nil
+}
@@ -0,0 +1,66 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+	"gitea.dev/modules/optional"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/search/query"
+)
+
+// NumericEqualityQuery generates a numeric equality query for the given value and field
+func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
+	f := float64(value)
+	tru := true
+	q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
+	q.SetField(field)
+	return q
+}
+
+// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
+func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
+	q := bleve.NewMatchPhraseQuery(matchPhrase)
+	q.FieldVal = field
+	q.Analyzer = analyzer
+	q.Fuzziness = fuzziness
+	return q
+}
+
+// MatchAndQuery generates a match query for the given phrase, field and analyzer
+func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery {
+	q := bleve.NewMatchQuery(matchPhrase)
+	q.FieldVal = field
+	q.Analyzer = analyzer
+	q.Fuzziness = fuzziness
+	q.Operator = query.MatchQueryOperatorAnd
+	return q
+}
+
+// BoolFieldQuery generates a bool field query for the given value and field
+func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
+	q := bleve.NewBoolFieldQuery(value)
+	q.SetField(field)
+	return q
+}
+
+func NumericRangeInclusiveQuery(minOption, maxOption optional.Option[int64], field string) *query.NumericRangeQuery {
+	var minF, maxF *float64
+	var minI, maxI *bool
+	if minOption.Has() {
+		minF = new(float64)
+		*minF = float64(minOption.Value())
+		minI = new(bool)
+		*minI = true
+	}
+	if maxOption.Has() {
+		maxF = new(float64)
+		*maxF = float64(maxOption.Value())
+		maxI = new(bool)
+		*maxI = true
+	}
+	q := bleve.NewNumericRangeInclusiveQuery(minF, maxF, minI, maxI)
+	q.SetField(field)
+	return q
+}
@@ -0,0 +1,90 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+	"errors"
+	"os"
+	"unicode"
+
+	"gitea.dev/modules/log"
+	"gitea.dev/modules/setting"
+	"gitea.dev/modules/util"
+
+	"github.com/blevesearch/bleve/v2"
+	unicode_tokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/v2/index/upsidedown"
+	"github.com/ethantkoenig/rupture"
+)
+
+const (
+	maxFuzziness = 2
+)
+
+// openIndexer open the index at the specified path, checking for metadata
+// updates and bleve version updates.  If index needs to be created (or
+// re-created), returns (nil, nil)
+func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
+	_, err := os.Stat(path)
+	if err != nil && os.IsNotExist(err) {
+		return nil, 0, nil
+	} else if err != nil {
+		return nil, 0, err
+	}
+
+	metadata, err := rupture.ReadIndexMetadata(path)
+	if err != nil {
+		return nil, 0, err
+	}
+	if metadata.Version < latestVersion {
+		// the indexer is using a previous version, so we should delete it and
+		// re-populate
+		return nil, metadata.Version, util.RemoveAll(path)
+	}
+
+	index, err := bleve.Open(path)
+	if err != nil {
+		if errors.Is(err, upsidedown.IncompatibleVersion) {
+			log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding")
+			return nil, 0, util.RemoveAll(path)
+		}
+		return nil, 0, err
+	}
+
+	return index, 0, nil
+}
+
+// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
+// may be different on two string, and they still be considered equivalent.
+// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
+func GuessFuzzinessByKeyword(s string) int {
+	tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
+	tokens := tokenizer.Tokenize([]byte(s))
+
+	if len(tokens) > 0 {
+		fuzziness := maxFuzziness
+
+		for _, token := range tokens {
+			fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term)))
+		}
+
+		return fuzziness
+	}
+
+	return 0
+}
+
+func guessFuzzinessByKeyword(s string) int {
+	// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
+	// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
+	// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
+	// Likewise, queries whose terms contains characters that are *not* letters should not use fuzziness
+
+	for _, r := range s {
+		if r >= 128 || !unicode.IsLetter(r) {
+			return 0
+		}
+	}
+	return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
+}
@@ -0,0 +1,58 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+	"fmt"
+	"testing"
+
+	"gitea.dev/modules/setting"
+	"gitea.dev/modules/test"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
+	defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
+
+	scenarios := []struct {
+		Input     string
+		Fuzziness int // See util.go for the definition of fuzziness in this particular context
+	}{
+		{
+			Input:     "",
+			Fuzziness: 0,
+		},
+		{
+			Input:     "Avocado",
+			Fuzziness: 1,
+		},
+		{
+			Input:     "Geschwindigkeit",
+			Fuzziness: 2,
+		},
+		{
+			Input:     "non-exist",
+			Fuzziness: 0,
+		},
+		{
+			Input:     "갃갃갃",
+			Fuzziness: 0,
+		},
+		{
+			Input:     "repo1",
+			Fuzziness: 0,
+		},
+		{
+			Input:     "avocado.md",
+			Fuzziness: 0,
+		},
+	}
+
+	for _, scenario := range scenarios {
+		t.Run(fmt.Sprintf("Fuziniess:%s=%d", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
+			assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
+		})
+	}
+}
@@ -0,0 +1,34 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package db
+
+import (
+	"context"
+
+	"gitea.dev/modules/indexer/internal"
+)
+
+var _ internal.Indexer = &Indexer{}
+
+// Indexer represents a basic db indexer implementation
+type Indexer struct{}
+
+// Init initializes the indexer
+func (i *Indexer) Init(_ context.Context) (bool, error) {
+	// Return true to indicate that the index was opened/existed.
+	// So that the indexer will not try to populate the index, the data is already there.
+	return true, nil
+}
+
+// Ping checks if the indexer is available
+func (i *Indexer) Ping(_ context.Context) error {
+	// No need to ping database to check if it is available.
+	// If the database goes down, Gitea will go down, so nobody will care if the indexer is available.
+	return nil
+}
+
+// Close closes the indexer
+func (i *Indexer) Close() {
+	// nothing to do
+}
@@ -0,0 +1,409 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"slices"
+	"strconv"
+	"strings"
+	"time"
+
+	"gitea.dev/modules/indexer/internal"
+	"gitea.dev/modules/json"
+)
+
+var _ internal.Indexer = &Indexer{}
+
+// Indexer is a narrow wrapper around an Elasticsearch/OpenSearch cluster.
+// It targets the REST subset shared by Elasticsearch 7/8/9 and OpenSearch 3.
+type Indexer struct {
+	client *http.Client
+	base   string // base URL with trailing slash, no userinfo
+	user   string
+	pass   string
+
+	indexName string
+	version   int
+	mapping   string
+}
+
+// NewIndexer builds an Indexer. The connection is opened by Init.
+func NewIndexer(rawURL, indexName string, version int, mapping string) *Indexer {
+	return &Indexer{
+		base:      rawURL,
+		indexName: indexName,
+		version:   version,
+		mapping:   mapping,
+	}
+}
+
+// Init connects and creates the versioned index if missing, returning true if it already existed.
+func (i *Indexer) Init(ctx context.Context) (bool, error) {
+	parsed, err := url.Parse(i.base)
+	if err != nil {
+		return false, fmt.Errorf("parse elasticsearch url: %w", err)
+	}
+	if parsed.User != nil {
+		i.user = parsed.User.Username()
+		i.pass, _ = parsed.User.Password()
+		parsed.User = nil
+	}
+	base := parsed.String()
+	if !strings.HasSuffix(base, "/") {
+		base += "/"
+	}
+	i.base = base
+	// No client-level Timeout: bulk/_delete_by_query can legitimately run for
+	// minutes on large repos. Per-request deadlines come from the caller's ctx;
+	// transport-level timeouts cover stalled connects/handshakes/headers so a
+	// half-open server cannot wedge the indexer indefinitely.
+	i.client = &http.Client{
+		Transport: &http.Transport{
+			Proxy:                 http.ProxyFromEnvironment,
+			DialContext:           (&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
+			TLSHandshakeTimeout:   10 * time.Second,
+			ResponseHeaderTimeout: 30 * time.Second,
+			ExpectContinueTimeout: 1 * time.Second,
+			IdleConnTimeout:       90 * time.Second,
+			MaxIdleConns:          100,
+		},
+	}
+
+	exists, err := i.indexExists(ctx, i.VersionedIndexName())
+	if err != nil {
+		return false, err
+	}
+	if exists {
+		return true, nil
+	}
+
+	if err := i.createIndex(ctx); err != nil {
+		return false, err
+	}
+
+	return false, nil
+}
+
+// Ping returns an error when the cluster is unusable (status != green/yellow).
+func (i *Indexer) Ping(ctx context.Context) error {
+	var body struct {
+		Status string `json:"status"`
+	}
+	if err := i.doJSON(ctx, http.MethodGet, "_cluster/health", nil, &body); err != nil {
+		return err
+	}
+	// Healthy = green; usable = yellow. Red is unusable.
+	// https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
+	if body.Status != "green" && body.Status != "yellow" {
+		return fmt.Errorf("status of elasticsearch cluster is %s", body.Status)
+	}
+	return nil
+}
+
+// Close releases idle HTTP connections held by the client.
+func (i *Indexer) Close() {
+	if i == nil || i.client == nil {
+		return
+	}
+	i.client.CloseIdleConnections()
+	i.client = nil
+}
+
+// Bulk submits index/delete ops. Returns the first item-level failure, if any.
+func (i *Indexer) Bulk(ctx context.Context, ops []BulkOp) error {
+	if len(ops) == 0 {
+		return nil
+	}
+
+	index := i.VersionedIndexName()
+	var buf bytes.Buffer
+	buf.Grow(len(ops) * 256)
+	for _, op := range ops {
+		meta := map[string]any{op.action: map[string]any{"_index": index, "_id": op.id}}
+		if err := writeJSONLine(&buf, meta); err != nil {
+			return err
+		}
+		if op.action == bulkActionIndex {
+			if err := writeJSONLine(&buf, op.doc); err != nil {
+				return err
+			}
+		}
+	}
+
+	res, err := i.do(ctx, http.MethodPost, urlPath(index, "_bulk"), "application/x-ndjson", bytes.NewReader(buf.Bytes()))
+	if err != nil {
+		return err
+	}
+	defer drainAndClose(res)
+
+	var body struct {
+		Errors bool `json:"errors"`
+		Items  []map[string]struct {
+			Status int        `json:"status"`
+			Error  json.Value `json:"error"`
+		} `json:"items"`
+	}
+	if err := json.NewDecoder(res.Body).Decode(&body); err != nil {
+		return err
+	}
+	if !body.Errors {
+		return nil
+	}
+	return firstBulkError(body.Items)
+}
+
+// firstBulkError returns the first item-level failure in a bulk response.
+// Each items entry is a single-key map ({"index": {...}} or {"delete": {...}}).
+// Delete-of-missing (404) is idempotent and not reported.
+func firstBulkError(items []map[string]struct {
+	Status int        `json:"status"`
+	Error  json.Value `json:"error"`
+},
+) error {
+	for _, item := range items {
+		for action, result := range item {
+			if action == bulkActionDelete && result.Status == http.StatusNotFound {
+				continue
+			}
+			if result.Status >= 300 {
+				return fmt.Errorf("bulk %s failed (status %d): %s", action, result.Status, string(result.Error))
+			}
+		}
+	}
+	return nil
+}
+
+// Index writes a single document.
+func (i *Indexer) Index(ctx context.Context, id string, doc any) error {
+	body, err := json.Marshal(doc)
+	if err != nil {
+		return err
+	}
+	return i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName(), "_doc", id), bytes.NewReader(body), nil)
+}
+
+// Delete removes a single document by id. Missing ids are not an error.
+func (i *Indexer) Delete(ctx context.Context, id string) error {
+	res, err := i.do(ctx, http.MethodDelete, urlPath(i.VersionedIndexName(), "_doc", id), "", nil, http.StatusNotFound)
+	if err != nil {
+		return err
+	}
+	drainAndClose(res)
+	return nil
+}
+
+// DeleteByQuery removes every document matching the query.
+func (i *Indexer) DeleteByQuery(ctx context.Context, query Query) error {
+	body, err := json.Marshal(map[string]any{"query": query.querySource()})
+	if err != nil {
+		return err
+	}
+	return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_delete_by_query"), bytes.NewReader(body), nil)
+}
+
+// Refresh forces a refresh so recent writes are searchable.
+func (i *Indexer) Refresh(ctx context.Context) error {
+	return i.doJSON(ctx, http.MethodPost, urlPath(i.VersionedIndexName(), "_refresh"), nil, nil)
+}
+
+// Search runs a search request and decodes the reply.
+func (i *Indexer) Search(ctx context.Context, req SearchRequest) (*SearchResponse, error) {
+	body := map[string]any{}
+	if req.Query != nil {
+		body["query"] = req.Query.querySource()
+	}
+	if len(req.Sort) > 0 {
+		sorts := make([]map[string]any, len(req.Sort))
+		for idx, s := range req.Sort {
+			sorts[idx] = s.source()
+		}
+		body["sort"] = sorts
+	}
+	if req.From > 0 {
+		body["from"] = req.From
+	}
+	body["size"] = req.Size
+	if len(req.Aggregations) > 0 {
+		body["aggs"] = req.Aggregations
+	}
+	if len(req.Highlight) > 0 {
+		body["highlight"] = req.Highlight
+	}
+
+	payload, err := json.Marshal(body)
+	if err != nil {
+		return nil, err
+	}
+
+	// Default track_total_hits is 10000 (capped count); send it explicitly so
+	// callers can choose between exact totals (true) and skipping counting (false).
+	path := urlPath(i.VersionedIndexName(), "_search") + "?track_total_hits=" + strconv.FormatBool(req.TrackTotal)
+	res, err := i.do(ctx, http.MethodPost, path, "application/json", bytes.NewReader(payload))
+	if err != nil {
+		return nil, err
+	}
+	defer drainAndClose(res)
+	return decodeSearchResponse(res.Body)
+}
+
+func (i *Indexer) indexExists(ctx context.Context, name string) (bool, error) {
+	res, err := i.do(ctx, http.MethodHead, urlPath(name), "", nil, http.StatusNotFound)
+	if err != nil {
+		return false, err
+	}
+	drainAndClose(res)
+	return res.StatusCode == http.StatusOK, nil
+}
+
+func (i *Indexer) createIndex(ctx context.Context) error {
+	var body struct {
+		Acknowledged bool `json:"acknowledged"`
+	}
+	if err := i.doJSON(ctx, http.MethodPut, urlPath(i.VersionedIndexName()), bytes.NewBufferString(i.mapping), &body); err != nil {
+		return fmt.Errorf("create index %s: %w", i.VersionedIndexName(), err)
+	}
+	if !body.Acknowledged {
+		return fmt.Errorf("create index %s not acknowledged", i.VersionedIndexName())
+	}
+
+	i.checkOldIndexes(ctx)
+	return nil
+}
+
+// do sends a request and returns the response. Status >= 300 is turned into
+// an error unless the status appears in okStatus. The caller closes Body.
+func (i *Indexer) do(ctx context.Context, method, path, contentType string, body io.Reader, okStatus ...int) (*http.Response, error) {
+	req, err := http.NewRequestWithContext(ctx, method, i.base+path, body)
+	if err != nil {
+		return nil, err
+	}
+	if contentType != "" {
+		req.Header.Set("Content-Type", contentType)
+	}
+	if i.user != "" || i.pass != "" {
+		req.SetBasicAuth(i.user, i.pass)
+	}
+	res, err := i.client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	if res.StatusCode >= 300 && !slices.Contains(okStatus, res.StatusCode) {
+		msg := readErrBody(res)
+		res.Body.Close()
+		return nil, fmt.Errorf("%s %s: %s", method, path, msg)
+	}
+	return res, nil
+}
+
+// doJSON sends a request with a JSON body and, when out is non-nil, decodes
+// the JSON response into it.
+func (i *Indexer) doJSON(ctx context.Context, method, path string, body io.Reader, out any) error {
+	contentType := ""
+	if body != nil {
+		contentType = "application/json"
+	}
+	res, err := i.do(ctx, method, path, contentType, body)
+	if err != nil {
+		return err
+	}
+	defer drainAndClose(res)
+	if out == nil {
+		return nil
+	}
+	return json.NewDecoder(res.Body).Decode(out)
+}
+
+// drainAndClose discards any unread response body before closing so the
+// underlying TCP connection can be reused for keep-alive.
+func drainAndClose(res *http.Response) {
+	_, _ = io.Copy(io.Discard, res.Body)
+	res.Body.Close()
+}
+
+func writeJSONLine(buf *bytes.Buffer, v any) error {
+	enc, err := json.Marshal(v)
+	if err != nil {
+		return err
+	}
+	buf.Write(enc)
+	buf.WriteByte('\n')
+	return nil
+}
+
+// readErrBody reads up to 4 KiB of an error response and drains the rest so
+// the underlying connection can be reused (keep-alive needs Body fully read).
+func readErrBody(res *http.Response) string {
+	const limit = 4 << 10
+	b, _ := io.ReadAll(io.LimitReader(res.Body, limit))
+	_, _ = io.Copy(io.Discard, res.Body)
+	return fmt.Sprintf("status %d: %s", res.StatusCode, bytes.TrimSpace(b))
+}
+
+func decodeSearchResponse(r io.Reader) (*SearchResponse, error) {
+	var raw struct {
+		Hits struct {
+			Total struct {
+				Value int64 `json:"value"`
+			} `json:"total"`
+			Hits []struct {
+				ID        string              `json:"_id"`
+				Score     float64             `json:"_score"`
+				Source    json.Value          `json:"_source"`
+				Highlight map[string][]string `json:"highlight"`
+			} `json:"hits"`
+		} `json:"hits"`
+		Aggregations map[string]struct {
+			Buckets []struct {
+				Key      any   `json:"key"`
+				DocCount int64 `json:"doc_count"`
+			} `json:"buckets"`
+		} `json:"aggregations"`
+	}
+	if err := json.NewDecoder(r).Decode(&raw); err != nil {
+		return nil, err
+	}
+
+	resp := &SearchResponse{
+		Total: raw.Hits.Total.Value,
+		Hits:  make([]SearchHit, 0, len(raw.Hits.Hits)),
+	}
+	for _, h := range raw.Hits.Hits {
+		resp.Hits = append(resp.Hits, SearchHit{
+			ID:        h.ID,
+			Score:     h.Score,
+			Source:    h.Source,
+			Highlight: h.Highlight,
+		})
+	}
+	if len(raw.Aggregations) > 0 {
+		resp.Aggregations = make(map[string][]AggBucket, len(raw.Aggregations))
+		for name, agg := range raw.Aggregations {
+			buckets := make([]AggBucket, len(agg.Buckets))
+			for idx, b := range agg.Buckets {
+				buckets[idx] = AggBucket{Key: b.Key, DocCount: b.DocCount}
+			}
+			resp.Aggregations[name] = buckets
+		}
+	}
+	return resp, nil
+}
+
+// urlPath joins path segments with `/` and percent-escapes each.
+func urlPath(segments ...string) string {
+	var b bytes.Buffer
+	for idx, s := range segments {
+		if idx > 0 {
+			b.WriteByte('/')
+		}
+		b.WriteString(url.PathEscape(s))
+	}
+	return b.String()
+}
@@ -0,0 +1,39 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import (
+	"strings"
+	"testing"
+
+	"gitea.dev/modules/test"
+
+	"github.com/stretchr/testify/require"
+)
+
+func newRealIndexer(t *testing.T) *Indexer {
+	t.Helper()
+	esURL := test.ExternalServiceHTTP(t, "TEST_ELASTICSEARCH_URL", "http://elasticsearch:9200")
+	indexName := "gitea_test_" + strings.ReplaceAll(strings.ToLower(t.Name()), "/", "_")
+	ix := NewIndexer(esURL, indexName, 1, `{"mappings":{"properties":{"x":{"type":"keyword"}}}}`)
+	_, err := ix.Init(t.Context())
+	require.NoError(t, err)
+	t.Cleanup(ix.Close)
+	return ix
+}
+
+func TestPing(t *testing.T) {
+	ix := newRealIndexer(t)
+	require.NoError(t, ix.Ping(t.Context()))
+}
+
+func TestDeleteSwallows404(t *testing.T) {
+	ix := newRealIndexer(t)
+	require.NoError(t, ix.Delete(t.Context(), "missing-id"))
+}
+
+func TestBulkAcceptsDelete404(t *testing.T) {
+	ix := newRealIndexer(t)
+	require.NoError(t, ix.Bulk(t.Context(), []BulkOp{DeleteOp("missing-id")}))
+}
@@ -0,0 +1,132 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+// MultiMatch types used by the call sites. See
+// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
+const (
+	MultiMatchTypeBestFields   = "best_fields"
+	MultiMatchTypePhrasePrefix = "phrase_prefix"
+)
+
+// ToAnySlice converts []T to []any for variadic query args like TermsQuery.
+func ToAnySlice[T any](s []T) []any {
+	out := make([]any, len(s))
+	for idx, v := range s {
+		out[idx] = v
+	}
+	return out
+}
+
+// Query is an Elasticsearch query DSL node. It marshals to the JSON
+// object expected by the ES query API.
+type Query interface {
+	querySource() map[string]any
+}
+
+type rawQuery map[string]any
+
+func (q rawQuery) querySource() map[string]any { return q }
+
+// TermQuery matches documents whose `field` exactly equals `value`.
+func TermQuery(field string, value any) Query {
+	return rawQuery{"term": map[string]any{field: value}}
+}
+
+// TermsQuery matches documents whose `field` equals any of `values`.
+func TermsQuery(field string, values ...any) Query {
+	return rawQuery{"terms": map[string]any{field: values}}
+}
+
+// MatchQuery is a full-text match on a single field.
+func MatchQuery(field string, value any) Query {
+	return rawQuery{"match": map[string]any{field: value}}
+}
+
+// MatchPhraseQuery matches the exact phrase on `field`.
+func MatchPhraseQuery(field, value string) Query {
+	return rawQuery{"match_phrase": map[string]any{field: value}}
+}
+
+// MultiMatchQuery is the fluent builder for a multi_match query.
+type MultiMatchQuery struct {
+	query    any
+	fields   []string
+	typ      string
+	operator string
+}
+
+// NewMultiMatchQuery creates a multi_match query over the given fields.
+func NewMultiMatchQuery(query any, fields ...string) *MultiMatchQuery {
+	return &MultiMatchQuery{query: query, fields: fields}
+}
+
+func (m *MultiMatchQuery) Type(t string) *MultiMatchQuery      { m.typ = t; return m }
+func (m *MultiMatchQuery) Operator(op string) *MultiMatchQuery { m.operator = op; return m }
+
+func (m *MultiMatchQuery) querySource() map[string]any {
+	body := map[string]any{"query": m.query}
+	if len(m.fields) > 0 {
+		body["fields"] = m.fields
+	}
+	if m.typ != "" {
+		body["type"] = m.typ
+	}
+	if m.operator != "" {
+		body["operator"] = m.operator
+	}
+	return map[string]any{"multi_match": body}
+}
+
+// RangeQuery is the fluent builder for a range query.
+type RangeQuery struct {
+	field string
+	body  map[string]any
+}
+
+func NewRangeQuery(field string) *RangeQuery {
+	return &RangeQuery{field: field, body: map[string]any{}}
+}
+
+func (r *RangeQuery) Gte(v any) *RangeQuery { r.body["gte"] = v; return r }
+func (r *RangeQuery) Lte(v any) *RangeQuery { r.body["lte"] = v; return r }
+
+func (r *RangeQuery) querySource() map[string]any {
+	return map[string]any{"range": map[string]any{r.field: r.body}}
+}
+
+// BoolQuery is the fluent builder for a bool query.
+type BoolQuery struct {
+	must    []Query
+	should  []Query
+	mustNot []Query
+}
+
+func NewBoolQuery() *BoolQuery { return &BoolQuery{} }
+
+func (b *BoolQuery) Must(q ...Query) *BoolQuery    { b.must = append(b.must, q...); return b }
+func (b *BoolQuery) Should(q ...Query) *BoolQuery  { b.should = append(b.should, q...); return b }
+func (b *BoolQuery) MustNot(q ...Query) *BoolQuery { b.mustNot = append(b.mustNot, q...); return b }
+
+func (b *BoolQuery) querySource() map[string]any {
+	body := map[string]any{}
+	if len(b.must) > 0 {
+		body["must"] = querySlice(b.must)
+	}
+	if len(b.should) > 0 {
+		body["should"] = querySlice(b.should)
+	}
+	if len(b.mustNot) > 0 {
+		body["must_not"] = querySlice(b.mustNot)
+	}
+	return map[string]any{"bool": body}
+}
+
+func querySlice(queries []Query) []map[string]any {
+	out := make([]map[string]any, len(queries))
+	for idx, q := range queries {
+		out[idx] = q.querySource()
+	}
+	return out
+}
@@ -0,0 +1,76 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import "gitea.dev/modules/json"
+
+const (
+	bulkActionIndex  = "index"
+	bulkActionDelete = "delete"
+)
+
+// BulkOp is a single write inside a Bulk call. Construct with IndexOp or DeleteOp.
+type BulkOp struct {
+	action string
+	id     string
+	doc    any
+}
+
+// IndexOp builds a bulk index operation.
+func IndexOp(id string, doc any) BulkOp {
+	return BulkOp{action: bulkActionIndex, id: id, doc: doc}
+}
+
+// DeleteOp builds a bulk delete operation.
+func DeleteOp(id string) BulkOp {
+	return BulkOp{action: bulkActionDelete, id: id}
+}
+
+// SortField is one entry of the search sort array.
+type SortField struct {
+	Field string
+	Desc  bool
+}
+
+func (s SortField) source() map[string]any {
+	order := "asc"
+	if s.Desc {
+		order = "desc"
+	}
+	return map[string]any{s.Field: map[string]any{"order": order}}
+}
+
+// SearchRequest captures everything Gitea sends to the _search endpoint.
+// Aggregations and Highlight are raw ES JSON bodies — callers write them as
+// map[string]any since each has exactly one call site with a fixed shape.
+type SearchRequest struct {
+	Query        Query
+	Sort         []SortField
+	From         int
+	Size         int
+	TrackTotal   bool
+	Aggregations map[string]any
+	Highlight    map[string]any
+}
+
+// SearchHit is a single result row.
+type SearchHit struct {
+	ID        string
+	Score     float64
+	Source    json.Value
+	Highlight map[string][]string
+}
+
+// AggBucket is a terms-aggregation bucket.
+type AggBucket struct {
+	Key      any
+	DocCount int64
+}
+
+// SearchResponse is Gitea's decoded view of the search reply.
+type SearchResponse struct {
+	Total        int64
+	Hits         []SearchHit
+	Aggregations map[string][]AggBucket
+}
@@ -0,0 +1,34 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import (
+	"context"
+	"fmt"
+
+	"gitea.dev/modules/log"
+)
+
+// VersionedIndexName returns the full index name with version suffix.
+func (i *Indexer) VersionedIndexName() string {
+	return versionedIndexName(i.indexName, i.version)
+}
+
+func versionedIndexName(indexName string, version int) string {
+	if version == 0 {
+		// Old index name without version
+		return indexName
+	}
+	return fmt.Sprintf("%s.v%d", indexName, version)
+}
+
+func (i *Indexer) checkOldIndexes(ctx context.Context) {
+	for v := range i.version {
+		indexName := versionedIndexName(i.indexName, v)
+		exists, err := i.indexExists(ctx, indexName)
+		if err == nil && exists {
+			log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
+		}
+	}
+}
@@ -0,0 +1,37 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+	"context"
+	"errors"
+)
+
+// Indexer defines an basic indexer interface
+type Indexer interface {
+	// Init initializes the indexer
+	// returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data)
+	Init(ctx context.Context) (bool, error)
+	// Ping checks if the indexer is available
+	Ping(ctx context.Context) error
+	// Close closes the indexer
+	Close()
+}
+
+// NewDummyIndexer returns a dummy indexer
+func NewDummyIndexer() Indexer {
+	return &dummyIndexer{}
+}
+
+type dummyIndexer struct{}
+
+func (d *dummyIndexer) Init(ctx context.Context) (bool, error) {
+	return false, errors.New("indexer is not ready")
+}
+
+func (d *dummyIndexer) Ping(ctx context.Context) error {
+	return errors.New("indexer is not ready")
+}
+
+func (d *dummyIndexer) Close() {}
@@ -0,0 +1,119 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Filter represents a filter for meilisearch queries.
+// It's just a simple wrapper around a string.
+// DO NOT assume that it is a complete implementation.
+type Filter interface {
+	Statement() string
+}
+
+type FilterAnd struct {
+	filters []Filter
+}
+
+func (f *FilterAnd) Statement() string {
+	var statements []string
+	for _, filter := range f.filters {
+		if s := filter.Statement(); s != "" {
+			statements = append(statements, fmt.Sprintf("(%s)", s))
+		}
+	}
+	return strings.Join(statements, " AND ")
+}
+
+func (f *FilterAnd) And(filter Filter) *FilterAnd {
+	f.filters = append(f.filters, filter)
+	return f
+}
+
+type FilterOr struct {
+	filters []Filter
+}
+
+func (f *FilterOr) Statement() string {
+	var statements []string
+	for _, filter := range f.filters {
+		if s := filter.Statement(); s != "" {
+			statements = append(statements, fmt.Sprintf("(%s)", s))
+		}
+	}
+	return strings.Join(statements, " OR ")
+}
+
+func (f *FilterOr) Or(filter Filter) *FilterOr {
+	f.filters = append(f.filters, filter)
+	return f
+}
+
+type FilterIn string
+
+// NewFilterIn creates a new FilterIn.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterIn[T int64](field string, values ...T) FilterIn {
+	if len(values) == 0 {
+		return ""
+	}
+	vs := make([]string, len(values))
+	for i, v := range values {
+		vs[i] = fmt.Sprintf("%v", v)
+	}
+	return FilterIn(fmt.Sprintf("%s IN [%v]", field, strings.Join(vs, ", ")))
+}
+
+func (f FilterIn) Statement() string {
+	return string(f)
+}
+
+type FilterEq string
+
+// NewFilterEq creates a new FilterEq.
+// It supports int64 and bool only, to avoid extra works to handle strings with special characters.
+func NewFilterEq[T bool | int64](field string, value T) FilterEq {
+	return FilterEq(fmt.Sprintf("%s = %v", field, value))
+}
+
+func (f FilterEq) Statement() string {
+	return string(f)
+}
+
+type FilterNot string
+
+func NewFilterNot(filter Filter) FilterNot {
+	return FilterNot(fmt.Sprintf("NOT (%s)", filter.Statement()))
+}
+
+func (f FilterNot) Statement() string {
+	return string(f)
+}
+
+type FilterGte string
+
+// NewFilterGte creates a new FilterGte.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterGte[T int64](field string, value T) FilterGte {
+	return FilterGte(fmt.Sprintf("%s >= %v", field, value))
+}
+
+func (f FilterGte) Statement() string {
+	return string(f)
+}
+
+type FilterLte string
+
+// NewFilterLte creates a new FilterLte.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterLte[T int64](field string, value T) FilterLte {
+	return FilterLte(fmt.Sprintf("%s <= %v", field, value))
+}
+
+func (f FilterLte) Statement() string {
+	return string(f)
+}
@@ -0,0 +1,88 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/meilisearch/meilisearch-go"
+)
+
+// Indexer represents a basic meilisearch indexer implementation
+type Indexer struct {
+	Client meilisearch.ServiceManager
+
+	url, apiKey string
+	indexName   string
+	version     int
+	settings    *meilisearch.Settings
+}
+
+func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearch.Settings) *Indexer {
+	return &Indexer{
+		url:       url,
+		apiKey:    apiKey,
+		indexName: indexName,
+		version:   version,
+		settings:  settings,
+	}
+}
+
+// Init initializes the indexer
+func (i *Indexer) Init(_ context.Context) (bool, error) {
+	if i == nil {
+		return false, errors.New("cannot init nil indexer")
+	}
+
+	if i.Client != nil {
+		return false, errors.New("indexer is already initialized")
+	}
+
+	i.Client = meilisearch.New(i.url, meilisearch.WithAPIKey(i.apiKey))
+	_, err := i.Client.GetIndex(i.VersionedIndexName())
+	if err == nil {
+		return true, nil
+	}
+	_, err = i.Client.CreateIndex(&meilisearch.IndexConfig{
+		Uid:        i.VersionedIndexName(),
+		PrimaryKey: "id",
+	})
+	if err != nil {
+		return false, err
+	}
+
+	i.checkOldIndexes()
+
+	_, err = i.Client.Index(i.VersionedIndexName()).UpdateSettings(i.settings)
+	return false, err
+}
+
+// Ping checks if the indexer is available
+func (i *Indexer) Ping(ctx context.Context) error {
+	if i == nil {
+		return errors.New("cannot ping nil indexer")
+	}
+	if i.Client == nil {
+		return errors.New("indexer is not initialized")
+	}
+	resp, err := i.Client.Health()
+	if err != nil {
+		return err
+	}
+	if resp.Status != "available" {
+		// See https://docs.meilisearch.com/reference/api/health.html#status
+		return fmt.Errorf("status of meilisearch is not available: %s", resp.Status)
+	}
+	return nil
+}
+
+// Close closes the indexer
+func (i *Indexer) Close() {
+	if i == nil {
+		return
+	}
+	i.Client = nil
+}
@@ -0,0 +1,38 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+	"fmt"
+
+	"gitea.dev/modules/log"
+)
+
+// VersionedIndexName returns the full index name with version
+func (i *Indexer) VersionedIndexName() string {
+	return versionedIndexName(i.indexName, i.version)
+}
+
+func versionedIndexName(indexName string, version int) string {
+	if version == 0 {
+		// Old index name without version
+		return indexName
+	}
+
+	// The format of the index name is <index_name>_v<version>, not <index_name>.v<version> like elasticsearch.
+	// Because meilisearch does not support "." in index name, it should contain only alphanumeric characters, hyphens (-) and underscores (_).
+	// See https://www.meilisearch.com/docs/learn/core_concepts/indexes#index-uid
+
+	return fmt.Sprintf("%s_v%d", indexName, version)
+}
+
+func (i *Indexer) checkOldIndexes() {
+	for v := 0; v < i.version; v++ {
+		indexName := versionedIndexName(i.indexName, v)
+		_, err := i.Client.GetIndex(indexName)
+		if err == nil {
+			log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName)
+		}
+	}
+}
@@ -0,0 +1,34 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+	"math"
+
+	"gitea.dev/models/db"
+)
+
+// ParsePaginator parses a db.Paginator into a skip and limit
+func ParsePaginator(paginator *db.ListOptions, maxNums ...int) (int, int) {
+	// Use a very large number to indicate no limit
+	unlimited := math.MaxInt32
+	if len(maxNums) > 0 {
+		// Some indexer engines have a limit on the page size, respect that
+		unlimited = maxNums[0]
+	}
+
+	if paginator == nil || paginator.IsListAll() {
+		// It shouldn't happen. In actual usage scenarios, there should not be requests to search all.
+		// But if it does happen, respect it and return "unlimited".
+		// And it's also useful for testing.
+		return 0, unlimited
+	}
+
+	if paginator.PageSize == 0 {
+		// Do not return any results when searching, it's used to get the total count only.
+		return 0, 0
+	}
+
+	return paginator.GetSkipTake()
+}