初始提交: Gitea 项目代码

This commit is contained in:
root
2026-05-30 22:47:36 +08:00
commit f288f76350
6116 changed files with 776822 additions and 0 deletions
@@ -0,0 +1,105 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package path
import (
"slices"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const (
Name = "gitea/path"
)
type TokenFilter struct{}
func NewTokenFilter() *TokenFilter {
return &TokenFilter{}
}
func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewTokenFilter(), nil
}
func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
if len(input) == 1 {
// if there is only one token, we don't need to generate the reversed chain
return generatePathTokens(input, false)
}
normal := generatePathTokens(input, false)
reversed := generatePathTokens(input, true)
return append(normal, reversed...)
}
// Generates path tokens from the input tokens.
// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
//
// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
// to efficiently search for filenames without supplying the fullpath.
func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
terms := make([]string, 0, len(input))
longestTerm := 0
if reversed {
slices.Reverse(input)
}
for i := range input {
var sb strings.Builder
sb.Write(input[0].Term)
for j := 1; j < i; j++ {
sb.WriteString("/")
sb.Write(input[j].Term)
}
term := sb.String()
if longestTerm < len(term) {
longestTerm = len(term)
}
terms = append(terms, term)
}
output := make(analysis.TokenStream, 0, len(terms))
for _, term := range terms {
var start, end int
if reversed {
start = 0
end = len(term)
} else {
start = longestTerm - len(term)
end = longestTerm
}
token := analysis.Token{
Position: 1,
Start: start,
End: end,
Type: analysis.AlphaNumeric,
Term: []byte(term),
}
output = append(output, &token)
}
return output
}
func init() {
// FIXME: move it to the bleve's init function, but do not call it in global init
err := registry.RegisterTokenFilter(Name, TokenFilterConstructor)
if err != nil {
panic(err)
}
}
@@ -0,0 +1,76 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package path
import (
"fmt"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/stretchr/testify/assert"
)
type Scenario struct {
Input string
Tokens []string
}
func TestTokenFilter(t *testing.T) {
scenarios := []struct {
Input string
Terms []string
}{
{
Input: "Dockerfile",
Terms: []string{"Dockerfile"},
},
{
Input: "Dockerfile.rootless",
Terms: []string{"Dockerfile.rootless"},
},
{
Input: "a/b/c/Dockerfile.rootless",
Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
},
{
Input: "",
Terms: []string{},
},
}
for _, scenario := range scenarios {
t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
terms := extractTerms(scenario.Input)
assert.Len(t, terms, len(scenario.Terms))
for _, term := range terms {
assert.Contains(t, scenario.Terms, term)
}
})
}
}
func extractTerms(input string) []string {
tokens := tokenize(input)
filteredTokens := filter(tokens)
terms := make([]string, 0, len(filteredTokens))
for _, token := range filteredTokens {
terms = append(terms, string(token.Term))
}
return terms
}
func filter(input analysis.TokenStream) analysis.TokenStream {
filter := NewTokenFilter()
return filter.Filter(input)
}
func tokenize(input string) analysis.TokenStream {
tokenizer := unicode.NewUnicodeTokenizer()
return tokenizer.Tokenize([]byte(input))
}