初始提交: Gitea 项目代码

2026-05-30 22:47:36 +08:00
commit f288f76350
6116 changed files with 776822 additions and 0 deletions
@@ -0,0 +1,163 @@
+// Copyright 2015 The Gogs Authors. All rights reserved.
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"bytes"
+	gohtml "html"
+	"html/template"
+	"sync"
+
+	"gitea.dev/modules/log"
+	"gitea.dev/modules/setting"
+	"gitea.dev/modules/util"
+
+	"github.com/alecthomas/chroma/v2"
+	chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
+	"github.com/alecthomas/chroma/v2/styles"
+)
+
+// don't highlight files larger than this many bytes for performance purposes
+const sizeLimit = 1024 * 1024
+
+type globalVarsType struct {
+	highlightMapping map[string]string
+	githubStyles     *chroma.Style
+}
+
+var (
+	globalVarsMu  sync.Mutex
+	globalVarsPtr *globalVarsType
+)
+
+func globalVars() *globalVarsType {
+	// in the future, the globalVars might need to be re-initialized when settings change, so don't use sync.Once here
+	globalVarsMu.Lock()
+	defer globalVarsMu.Unlock()
+	if globalVarsPtr == nil {
+		globalVarsPtr = &globalVarsType{}
+		globalVarsPtr.githubStyles = styles.Get("github")
+		globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
+	}
+	return globalVarsPtr
+}
+
+// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
+// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
+// The '\n' is necessary for copying code from web UI to preserve original code lines
+// ATTENTION: It uses the unsafe conversion between string and []byte for performance reason
+// DO NOT make any modification to the returned [][]byte slice items
+func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
+	buf := util.UnsafeStringToBytes(string(code))
+	lineCount := bytes.Count(buf, []byte("\n")) + 1
+	ret = make([][]byte, 0, lineCount)
+	nlTagClose := []byte("\n</")
+	for {
+		pos := bytes.IndexByte(buf, '\n')
+		if pos == -1 {
+			if len(buf) > 0 {
+				ret = append(ret, buf)
+			}
+			return ret
+		}
+		// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
+		// * "<span>text\n</span>"
+		// * "<span>text</span>\n"
+		if bytes.HasPrefix(buf[pos:], nlTagClose) {
+			pos1 := bytes.IndexByte(buf[pos:], '>')
+			if pos1 != -1 {
+				pos += pos1
+			}
+		}
+		ret = append(ret, buf[:pos+1])
+		buf = buf[pos+1:]
+	}
+}
+
+func htmlEscape(code string) template.HTML {
+	return template.HTML(gohtml.EscapeString(code))
+}
+
+// RenderCodeSlowGuess tries to get a lexer by file name and language first,
+// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds).
+func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) {
+	// diff view newline will be passed as empty, change to literal '\n' so it can be copied
+	// preserve literal newline in blame view
+	if code == "" || code == "\n" {
+		return "\n", nil, ""
+	}
+
+	if len(code) > sizeLimit {
+		return htmlEscape(code), nil, ""
+	}
+
+	lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
+	return RenderCodeByLexer(lexer, code), lexer, formatLexerName(lexer.Config().Name)
+}
+
+// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
+func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
+	formatter := chromahtml.New(chromahtml.WithClasses(true),
+		chromahtml.WithLineNumbers(false),
+		chromahtml.PreventSurroundingPre(true),
+	)
+
+	iterator, err := lexer.Tokenise(nil, code)
+	if err != nil {
+		log.Error("Can't tokenize code: %v", err)
+		return htmlEscape(code)
+	}
+
+	htmlBuf := &bytes.Buffer{}
+	// style not used for live site but need to pass something
+	err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
+	if err != nil {
+		log.Error("Can't format code: %v", err)
+		return htmlEscape(code)
+	}
+	return template.HTML(util.UnsafeBytesToString(htmlBuf.Bytes()))
+}
+
+// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
+func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string) {
+	if language == LanguagePlaintext || len(code) > sizeLimit {
+		return renderPlainText(code), formatLexerName(LanguagePlaintext)
+	}
+	lexer := detectChromaLexerWithAnalyze(fileName, language, code)
+	lexerName := formatLexerName(lexer.Config().Name)
+	rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
+	unsafeLines := UnsafeSplitHighlightedLines(rendered)
+	lines := make([]template.HTML, len(unsafeLines))
+	for idx, lineBytes := range unsafeLines {
+		lines[idx] = template.HTML(util.UnsafeBytesToString(lineBytes))
+	}
+	return lines, lexerName
+}
+
+// renderPlainText returns non-highlighted HTML for code
+func renderPlainText(code []byte) []template.HTML {
+	lines := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
+	pos := 0
+	for pos < len(code) {
+		var content []byte
+		nextPos := bytes.IndexByte(code[pos:], '\n')
+		if nextPos == -1 {
+			content = code[pos:]
+			pos = len(code)
+		} else {
+			content = code[pos : pos+nextPos+1]
+			pos += nextPos + 1
+		}
+		lines = append(lines, htmlEscape(util.UnsafeBytesToString(content)))
+	}
+	return lines
+}
+
+func formatLexerName(name string) string {
+	if name == LanguagePlaintext || name == chromaLexerFallback {
+		return "Plaintext"
+	}
+	return util.ToTitleCaseNoLower(name)
+}
@@ -0,0 +1,218 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"html/template"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func lines(s string) (out []template.HTML) {
+	// "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists)
+	out = make([]template.HTML, 0)
+	s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n")
+	for {
+		if p := strings.IndexByte(s, '\n'); p != -1 {
+			out = append(out, template.HTML(s[:p+1]))
+			s = s[p+1:]
+		} else {
+			break
+		}
+	}
+	if s != "" {
+		out = append(out, template.HTML(s))
+	}
+	return out
+}
+
+func TestFile(t *testing.T) {
+	tests := []struct {
+		name      string
+		code      string
+		want      []template.HTML
+		lexerName string
+	}{
+		{
+			name:      "empty.py",
+			code:      "",
+			want:      lines(""),
+			lexerName: "Python",
+		},
+		{
+			name:      "empty.js",
+			code:      "",
+			want:      lines(""),
+			lexerName: "JavaScript",
+		},
+		{
+			name:      "empty.yaml",
+			code:      "",
+			want:      lines(""),
+			lexerName: "YAML",
+		},
+		{
+			name:      "tags.txt",
+			code:      "<>",
+			want:      lines("&lt;&gt;"),
+			lexerName: "Plaintext",
+		},
+		{
+			name:      "tags.py",
+			code:      "<>",
+			want:      lines(`<span class="o">&lt;&gt;</span>`),
+			lexerName: "Python",
+		},
+		{
+			name:      "eol-no.py",
+			code:      "a=1",
+			want:      lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`),
+			lexerName: "Python",
+		},
+		{
+			name:      "eol-newline1.py",
+			code:      "a=1\n",
+			want:      lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`),
+			lexerName: "Python",
+		},
+		{
+			name: "eol-newline2.py",
+			code: "a=1\n\n",
+			want: lines(`
+<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+			`,
+			),
+			lexerName: "Python",
+		},
+		{
+			name: "empty-line-with-space.py",
+			code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+    a=1
+
+b=''
+{space}
+c=2
+			`), "{space}", "    "),
+			want: lines(`
+<span class="n">def</span><span class="p">:</span>\n
+    <span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
+\n
+<span class="n">b</span><span class="o">=</span><span class="s1">&#39;&#39;</span>\n
+    \n
+<span class="n">c</span><span class="o">=</span><span class="mi">2</span>`,
+			),
+			lexerName: "Python",
+		},
+		{
+			name:      "test.sql",
+			code:      "--\nSELECT",
+			want:      []template.HTML{"<span class=\"c1\">--\n</span>", `<span class="k">SELECT</span>`},
+			lexerName: "SQL",
+		},
+		{
+			name: "test.http",
+			code: `HTTP/1.0 400 Bad request
+Content-Type: text/html
+
+<html></html>`,
+			want: lines(`<span class="kr">HTTP</span><span class="o">/</span><span class="m">1.0</span> <span class="m">400</span> <span class="ne">Bad request</span>\n
+<span class="n">Content-Type</span><span class="o">:</span> <span class="l">text/html</span>\n
+\n
+<span class="p">&lt;</span><span class="nt">html</span><span class="p">&gt;&lt;/</span><span class="nt">html</span><span class="p">&gt;</span>`),
+			lexerName: "HTTP",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out, lexerName := RenderFullFile(tt.name, "", []byte(tt.code))
+			assert.Equal(t, tt.want, out)
+			assert.Equal(t, tt.lexerName, lexerName)
+		})
+	}
+}
+
+func TestPlainText(t *testing.T) {
+	tests := []struct {
+		name string
+		code string
+		want []template.HTML
+	}{
+		{
+			name: "empty.py",
+			code: "",
+			want: lines(""),
+		},
+		{
+			name: "tags.py",
+			code: "<>",
+			want: lines("&lt;&gt;"),
+		},
+		{
+			name: "eol-no.py",
+			code: "a=1",
+			want: lines(`a=1`),
+		},
+		{
+			name: "eol-newline1.py",
+			code: "a=1\n",
+			want: lines(`a=1\n`),
+		},
+		{
+			name: "eol-newline2.py",
+			code: "a=1\n\n",
+			want: lines(`
+a=1\n
+\n
+			`),
+		},
+		{
+			name: "empty-line-with-space.py",
+			code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+    a=1
+
+b=''
+{space}
+c=2
+			`), "{space}", "    "),
+			want: lines(`
+def:\n
+    a=1\n
+\n
+b=&#39;&#39;\n
+    \n
+c=2`),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out := renderPlainText([]byte(tt.code))
+			assert.Equal(t, tt.want, out)
+		})
+	}
+}
+
+func TestUnsafeSplitHighlightedLines(t *testing.T) {
+	ret := UnsafeSplitHighlightedLines("")
+	assert.Empty(t, ret)
+
+	ret = UnsafeSplitHighlightedLines("a")
+	assert.Len(t, ret, 1)
+	assert.Equal(t, "a", string(ret[0]))
+
+	ret = UnsafeSplitHighlightedLines("\n")
+	assert.Len(t, ret, 1)
+	assert.Equal(t, "\n", string(ret[0]))
+
+	ret = UnsafeSplitHighlightedLines("<span>a</span>\n<span>b\n</span>")
+	assert.Len(t, ret, 2)
+	assert.Equal(t, "<span>a</span>\n", string(ret[0]))
+	assert.Equal(t, "<span>b\n</span>", string(ret[1]))
+}
@@ -0,0 +1,312 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"path"
+	"strings"
+	"sync"
+
+	"gitea.dev/modules/analyze"
+	"gitea.dev/modules/log"
+
+	"github.com/alecthomas/chroma/v2"
+	"github.com/alecthomas/chroma/v2/lexers"
+	"github.com/go-enry/go-enry/v2"
+)
+
+const (
+	mapKeyLowerPrefix   = "lower/"
+	LanguagePlaintext   = "plaintext"
+	chromaLexerFallback = "fallback"
+)
+
+// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
+// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)
+var chromaLexers = sync.OnceValue(func() (ret struct {
+	conflictingExtLangMap   map[string]string
+	conflictingAliasLangMap map[string]string
+
+	lowerNameMap map[string]chroma.Lexer // lexer name (lang name) in lower-case
+	fileBaseMap  map[string]chroma.Lexer
+	fileExtMap   map[string]chroma.Lexer
+	fileParts    []struct {
+		part  string
+		lexer chroma.Lexer
+	}
+},
+) {
+	ret.lowerNameMap = make(map[string]chroma.Lexer)
+	ret.fileBaseMap = make(map[string]chroma.Lexer)
+	ret.fileExtMap = make(map[string]chroma.Lexer)
+
+	// Chroma has conflicts in file extension for different languages,
+	// When we need to do fast render, there is no way to detect the language by content,
+	// So we can only choose some default languages for the conflicted file extensions.
+	ret.conflictingExtLangMap = map[string]string{
+		".as":      "ActionScript 3", // ActionScript
+		".asm":     "NASM",           // TASM, NASM, RGBDS Assembly, Z80 Assembly
+		".ASM":     "NASM",
+		".bas":     "VB.net",       // QBasic
+		".bf":      "Beef",         // Brainfuck
+		".fs":      "FSharp",       // Forth
+		".gd":      "GDScript",     // GDScript3
+		".h":       "C",            // Objective-C
+		".hcl":     "Terraform",    // HCL
+		".hh":      "C++",          // HolyC
+		".inc":     "PHP",          // ObjectPascal, POVRay, SourcePawn, PHTML
+		".m":       "Objective-C",  // Matlab, Mathematica, Mason
+		".mc":      "Mason",        // MonkeyC
+		".mod":     "AMPL",         // Modula-2
+		".network": "SYSTEMD",      // INI
+		".php":     "PHP",          // PHTML
+		".php3":    "PHP",          // PHTML
+		".php4":    "PHP",          // PHTML
+		".php5":    "PHP",          // PHTML
+		".pl":      "Perl",         // Prolog, Raku
+		".pm":      "Perl",         // Promela, Raku
+		".pp":      "ObjectPascal", // Puppet
+		".s":       "ArmAsm",       // GAS
+		".S":       "ArmAsm",       // R, GAS
+		".service": "SYSTEMD",      // INI
+		".socket":  "SYSTEMD",      // INI
+		".sql":     "SQL",          // MySQL
+		".t":       "Perl",         // Raku
+		".ts":      "TypeScript",   // TypoScript
+		".v":       "V",            // verilog
+		".xslt":    "HTML",         // XML
+	}
+	// use widely used language names as the default mapping to resolve name alias conflict
+	ret.conflictingAliasLangMap = map[string]string{
+		"hcl": "HCL", // Terraform
+		"v":   "V",   // verilog
+	}
+
+	isPlainPattern := func(key string) bool {
+		return !strings.ContainsAny(key, "*?[]") // only support simple patterns
+	}
+
+	setFileNameMapWithLowerKey := func(m map[string]chroma.Lexer, key string, lexer chroma.Lexer) {
+		if _, conflict := m[key]; conflict {
+			panic("duplicate key in lexer map: " + key + ", need to add it to conflictingExtLangMap")
+		}
+		m[key] = lexer
+		m[mapKeyLowerPrefix+strings.ToLower(key)] = lexer
+	}
+
+	processFileName := func(fileName string, lexer chroma.Lexer) bool {
+		if isPlainPattern(fileName) {
+			// full base name match
+			setFileNameMapWithLowerKey(ret.fileBaseMap, fileName, lexer)
+			return true
+		}
+		if strings.HasPrefix(fileName, "*") {
+			// ext name match: "*.js"
+			fileExt := strings.Trim(fileName, "*")
+			if isPlainPattern(fileExt) {
+				presetName := ret.conflictingExtLangMap[fileExt]
+				if presetName == "" || lexer.Config().Name == presetName {
+					setFileNameMapWithLowerKey(ret.fileExtMap, fileExt, lexer)
+				}
+				return true
+			}
+		}
+		if strings.HasSuffix(fileName, "*") {
+			// part match: "*.env.*"
+			filePart := strings.Trim(fileName, "*")
+			if isPlainPattern(filePart) {
+				ret.fileParts = append(ret.fileParts, struct {
+					part  string
+					lexer chroma.Lexer
+				}{
+					part:  filePart,
+					lexer: lexer,
+				})
+				return true
+			}
+		}
+		return false
+	}
+
+	expandGlobPatterns := func(patterns []string) []string {
+		// expand patterns like "file.[ch]" to "file.c" and "file.h", only one pair of "[]" is supported, enough for current Chroma lexers
+		for idx, s := range patterns {
+			idx1 := strings.IndexByte(s, '[')
+			idx2 := strings.IndexByte(s, ']')
+			if idx1 != -1 && idx2 != -1 && idx2 > idx1+1 {
+				left, mid, right := s[:idx1], s[idx1+1:idx2], s[idx2+1:]
+				patterns[idx] = left + mid[0:1] + right
+				for i := 1; i < len(mid); i++ {
+					patterns = append(patterns, left+mid[i:i+1]+right)
+				}
+			}
+		}
+		return patterns
+	}
+
+	processLexerNameAliases := func(lexer chroma.Lexer) {
+		cfg := lexer.Config()
+		lowerName := strings.ToLower(cfg.Name)
+		if _, conflicted := ret.lowerNameMap[lowerName]; conflicted {
+			panic("duplicate language name in lexer map: " + lowerName)
+		}
+		ret.lowerNameMap[lowerName] = lexer
+
+		for _, name := range cfg.Aliases {
+			lowerName := strings.ToLower(name)
+			if overriddenName, overridden := ret.conflictingAliasLangMap[lowerName]; overridden && overriddenName != cfg.Name {
+				continue
+			}
+			if existingLexer, conflict := ret.lowerNameMap[lowerName]; conflict && existingLexer.Config().Name != cfg.Name {
+				panic("duplicate alias in lexer map: " + name + ", conflict between " + existingLexer.Config().Name + " and " + cfg.Name)
+			}
+			ret.lowerNameMap[lowerName] = lexer
+		}
+	}
+
+	// the main loop: build our lookup maps for lexers
+	for _, lexer := range lexers.GlobalLexerRegistry.Lexers {
+		cfg := lexer.Config()
+		processLexerNameAliases(lexer)
+		for _, s := range expandGlobPatterns(cfg.Filenames) {
+			if !processFileName(s, lexer) {
+				panic("unsupported file name pattern in lexer: " + s)
+			}
+		}
+		for _, s := range expandGlobPatterns(cfg.AliasFilenames) {
+			if !processFileName(s, lexer) {
+				panic("unsupported alias file name pattern in lexer: " + s)
+			}
+		}
+	}
+
+	// final check: make sure the default overriding mapping is correct, nothing is missing
+	for lowerName, lexerName := range ret.conflictingAliasLangMap {
+		if lexer, ok := ret.lowerNameMap[lowerName]; !ok || lexer.Config().Name != lexerName {
+			panic("missing default name-lang mapping for: " + lowerName)
+		}
+	}
+	for ext, lexerName := range ret.conflictingExtLangMap {
+		if lexer, ok := ret.fileExtMap[ext]; !ok || lexer.Config().Name != lexerName {
+			panic("missing default ext-lang mapping for: " + ext)
+		}
+	}
+	return ret
+})
+
+func normalizeFileNameLang(fileName, fileLang string) (string, string) {
+	fileName = path.Base(fileName)
+	fileLang, _, _ = strings.Cut(fileLang, "?") // maybe, the value from gitattributes might contain `?` parameters?
+	ext := path.Ext(fileName)
+	// the "lang" might come from enry or gitattributes, it has different naming for some languages
+	switch fileLang {
+	case "F#":
+		fileLang = "FSharp"
+	case "Pascal":
+		fileLang = "ObjectPascal"
+	case "C":
+		if ext == ".C" || ext == ".H" {
+			fileLang = "C++"
+		}
+	}
+	return fileName, fileLang
+}
+
+func DetectChromaLexerByFileName(fileName, fileLang string) chroma.Lexer {
+	lexer, _ := detectChromaLexerByFileName(fileName, fileLang)
+	return lexer
+}
+
+func detectChromaLexerByFileName(fileName, fileLang string) (_ chroma.Lexer, byLang bool) {
+	fileName, fileLang = normalizeFileNameLang(fileName, fileLang)
+	fileExt := path.Ext(fileName)
+
+	// apply custom mapping for file extension, highest priority, for example:
+	// * ".my-js" -> ".js"
+	// * ".my-html" -> "HTML"
+	if fileExt != "" {
+		if val, ok := globalVars().highlightMapping[fileExt]; ok {
+			if strings.HasPrefix(val, ".") {
+				fileName = "dummy" + val
+				fileLang = ""
+			} else {
+				fileLang = val
+			}
+		}
+	}
+
+	// try to use language for lexer name
+	if fileLang != "" {
+		lexer := chromaLexers().lowerNameMap[strings.ToLower(fileLang)]
+		if lexer != nil {
+			return lexer, true
+		}
+	}
+
+	if fileName == "" {
+		return lexers.Fallback, false
+	}
+
+	// try base name
+	{
+		baseName := path.Base(fileName)
+		if lexer, ok := chromaLexers().fileBaseMap[baseName]; ok {
+			return lexer, false
+		} else if lexer, ok = chromaLexers().fileBaseMap[mapKeyLowerPrefix+strings.ToLower(baseName)]; ok {
+			return lexer, false
+		}
+	}
+
+	if fileExt == "" {
+		return lexers.Fallback, false
+	}
+
+	// try ext name
+	{
+		if lexer, ok := chromaLexers().fileExtMap[fileExt]; ok {
+			return lexer, false
+		} else if lexer, ok = chromaLexers().fileExtMap[mapKeyLowerPrefix+strings.ToLower(fileExt)]; ok {
+			return lexer, false
+		}
+	}
+
+	// try file part match, for example: ".env.local" for "*.env.*"
+	// it assumes that there must be a dot in filename (fileExt isn't empty)
+	for _, item := range chromaLexers().fileParts {
+		if strings.Contains(fileName, item.part) {
+			return item.lexer, false
+		}
+	}
+	return lexers.Fallback, false
+}
+
+// detectChromaLexerWithAnalyze returns a chroma lexer by given file name, language and code content. All parameters can be optional.
+// When code content is provided, it will be slow if no lexer is found by file name or language.
+// If no lexer is found, it will return the fallback lexer.
+func detectChromaLexerWithAnalyze(fileName, lang string, code []byte) chroma.Lexer {
+	lexer, byLang := detectChromaLexerByFileName(fileName, lang)
+
+	// if lang is provided, and it matches a lexer, use it directly
+	if byLang {
+		return chroma.Coalesce(lexer)
+	}
+
+	// if a lexer is detected and there is no conflict for the file extension, use it directly
+	fileExt := path.Ext(fileName)
+	_, hasConflicts := chromaLexers().conflictingExtLangMap[fileExt]
+	if !hasConflicts && lexer != lexers.Fallback {
+		return chroma.Coalesce(lexer)
+	}
+
+	// try to detect language by content, for best guessing for the language
+	// when using "code" to detect, analyze.GetCodeLanguage is slow, it iterates many rules to detect language from content
+	analyzedLanguage := analyze.GetCodeLanguage(fileName, code)
+	lexer, _ = detectChromaLexerByFileName(fileName, analyzedLanguage)
+	if lexer == lexers.Fallback {
+		if analyzedLanguage != enry.OtherLanguage {
+			log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", analyzedLanguage, fileName)
+		}
+	}
+	return chroma.Coalesce(lexer)
+}
@@ -0,0 +1,116 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/alecthomas/chroma/v2/lexers"
+	"github.com/stretchr/testify/assert"
+)
+
+func BenchmarkDetectChromaLexerByFileName(b *testing.B) {
+	for b.Loop() {
+		// BenchmarkDetectChromaLexerByFileName-12    	18214717	        61.35 ns/op
+		DetectChromaLexerByFileName("a.sql", "")
+	}
+}
+
+func BenchmarkDetectChromaLexerWithAnalyze(b *testing.B) {
+	b.StopTimer()
+	code := []byte(strings.Repeat("SELECT * FROM table;\n", 1000))
+	b.StartTimer()
+	for b.Loop() {
+		// BenchmarkRenderCodeSlowGuess-12    	   87946	     13310 ns/op
+		detectChromaLexerWithAnalyze("a", "", code)
+	}
+}
+
+func BenchmarkChromaAnalyze(b *testing.B) {
+	b.StopTimer()
+	code := strings.Repeat("SELECT * FROM table;\n", 1000)
+	b.StartTimer()
+	for b.Loop() {
+		// comparing to detectChromaLexerWithAnalyze (go-enry), "chroma/lexers.Analyse" is very slow
+		// BenchmarkChromaAnalyze-12    	     519	   2247104 ns/op
+		lexers.Analyse(code)
+	}
+}
+
+func BenchmarkRenderCodeByLexer(b *testing.B) {
+	b.StopTimer()
+	code := strings.Repeat("SELECT * FROM table;\n", 1000)
+	lexer := DetectChromaLexerByFileName("a.sql", "")
+	b.StartTimer()
+	for b.Loop() {
+		// Really slow ....... the regexp2 used by Chroma takes most of the time
+		// BenchmarkRenderCodeByLexer-12    	      22	  47159038 ns/op
+		RenderCodeByLexer(lexer, code)
+	}
+}
+
+func TestDetectChromaLexer(t *testing.T) {
+	globalVars().highlightMapping[".my-html"] = "HTML"
+	t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
+
+	casesWithContent := []struct {
+		fileName string
+		language string
+		content  string
+		expected string
+	}{
+		{"test.v", "", "", "V"},
+		{"test.v", "any-lang-name", "", "V"},
+
+		{"any-file", "javascript", "", "JavaScript"},
+		{"any-file", "", "/* vim: set filetype=python */", "Python"},
+		{"any-file", "", "", "fallback"},
+
+		{"test.fs", "", "", "FSharp"},
+		{"test.fs", "F#", "", "FSharp"},
+		{"test.fs", "", "let x = 1", "FSharp"},
+
+		{"test.c", "", "", "C"},
+		{"test.C", "", "", "C++"},
+		{"OLD-CODE.PAS", "", "", "ObjectPascal"},
+		{"test.my-html", "", "", "HTML"},
+
+		{"a.php", "", "", "PHP"},
+		{"a.sql", "", "", "SQL"},
+		{"dhcpd.conf", "", "", "ISCdhcpd"},
+		{".env.my-production", "", "", "Bash"},
+
+		{"a.hcl", "", "", "HCL"}, // not the same as Chroma, enry detects "*.hcl" as "HCL"
+		{"a.hcl", "HCL", "", "HCL"},
+		{"a.hcl", "Terraform", "", "Terraform"},
+	}
+	for _, c := range casesWithContent {
+		lexer := detectChromaLexerWithAnalyze(c.fileName, c.language, []byte(c.content))
+		if assert.NotNil(t, lexer, "case: %+v", c) {
+			assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
+		}
+	}
+
+	casesNameLang := []struct {
+		fileName string
+		language string
+		expected string
+		byLang   bool
+	}{
+		{"a.v", "", "V", false},
+		{"a.v", "V", "V", true},
+		{"a.v", "verilog", "verilog", true},
+		{"a.v", "any-lang-name", "V", false},
+
+		{"a.hcl", "", "Terraform", false}, // not the same as enry
+		{"a.hcl", "HCL", "HCL", true},
+		{"a.hcl", "Terraform", "Terraform", true},
+	}
+	for _, c := range casesNameLang {
+		lexer, byLang := detectChromaLexerByFileName(c.fileName, c.language)
+		assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
+		assert.Equal(t, c.byLang, byLang, "case: %+v", c)
+	}
+}