初始提交: Gitea 项目代码

This commit is contained in:
root
2026-05-30 22:47:36 +08:00
commit f288f76350
6116 changed files with 776822 additions and 0 deletions
+137
View File
@@ -0,0 +1,137 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package incoming
import (
"regexp"
"strings"
"sync"
"gitea.dev/modules/util"
)
const (
yearToken = `\b\d{4}\b` // 4-digit year
timeToken = `\b\d{1,2}[:.]\d{2}\b` // HH:MM or HH.MM
// "wrote" verbs ending an attribution line; CJK ones are matched without a
// preceding word-separator since those scripts don't space their words
wroteVerbs = `wrote|writes|schrieb|skrev|napisał|escreveu|escribió|написал|пише|a écrit`
cjkWroteVerbs = `写道|寫道|書きました|작성`
// device names anchoring CJK mobile signatures, so prose isn't mistaken for one
cjkDevice = `iphone|ipad|ipod|android|galaxy|手机|手機|平板`
)
// forwarded-mail header fields across the common mail clients/locales. headerFromFields
// (the "From"-equivalents) must begin a block; headerFields is the full set allowed to
// follow. Matched as a prefix by headerLine, so adding a locale is a one-line change.
var (
headerFromFields = []string{
"from", "fra", "de", "von", "da", "van", "från", "expéditeur",
"发件人", "寄件者", "差出人", "보낸사람",
}
headerFields = append([]string{
"to", "cc", "bcc", "sent", "date", "subject", "reply-to",
"til", "emne", "an", "betreff", "gesendet", "para", "assunto", "asunto",
"risposta", "inviato", "oggetto", "destinataire", "objet", "répondre à",
"aan", "onderwerp", "beantwoorden", "skickat", "till", "ämne",
"收件人", "主题", "主旨", "主題", "收件者", "抄送", "日期", "宛先", "件名", "받는사람", "제목",
}, headerFromFields...)
)
// patterns are compiled on first use so the incoming-mail feature adds nothing to startup.
var patterns = sync.OnceValue(func() (ret struct {
signature, attribution, separator *regexp.Regexp
},
) {
// "-- " delimiter and common mobile footers with frequent localizations. The CJK
// forms require a device name so ordinary prose like "发自我的内心" or "会議から送信"
// is not mistaken for a signature.
ret.signature = regexp.MustCompile(`(?i)^(--|__|—` +
`|sent (from|via|with) .+|get outlook for .+` +
`|envoyé depuis mon .+|sendt fra min .+|von meinem .+|verzonden (met|vanaf) .+` +
`|(發|发)自我的.*(` + cjkDevice + `).*` +
`|.*(` + cjkDevice + `).*(から送信|에서 보냄|傳送|发送))$`)
// attribution introducing quoted history: a line ending in a "wrote:" verb
// (Latin/Cyrillic or CJK), a "Name <email> wrote" line, a lead word directly
// followed by a day number or weekday plus a year and a time, or an ISO-date-led
// line. The date phrasing, trailing colon and the email before the verb guard
// against prose (so "On the 2024 roadmap … at 10:00" is not an attribution).
ret.attribution = regexp.MustCompile(`(?i)^>*\s*(` +
`.*[\s">'](` + wroteVerbs + `)\s*[:]` +
`|.*(` + cjkWroteVerbs + `)\s*[:]` +
`|.*<\S+@\S+>\s+(` + wroteVerbs + `)\b.*` +
`|(on|at|le|am|el|em|den|il|op|dnia|w dniu)\b[\s,]*(\d|(?:mon|tue|wed|thu|fri|sat|sun)\b).*` + yearToken + `.*` + timeToken + `.*` +
`|\d{4}-\d{2}-\d{2}\b.*` + timeToken + `.*` +
`)$`)
// a dash/underscore rule line, or text fenced by dashes such as
// "-------- Original Message --------" or "-----Mensaje original-----"
ret.separator = regexp.MustCompile(`(?i)^\s*\*?\s*([-_]{5,}|-{2,}.+-{2,}|original message|forwarded message)\s*\*?\s*$`)
return ret
})
// extractReply returns the user-written part of a plain-text email body, dropping
// quoted history, the reply attribution, signatures and forwarded headers. It is a
// slim, dependency-free reimplementation based on github.com/dimiro1/reply (MIT),
// covering the common mail-client formats and languages; bottom posting and
// forwarded bodies are not handled.
func extractReply(text string) string {
p := patterns()
lines := strings.Split(util.NormalizeStringEOL(text), "\n")
// cut at the first line that begins quoted history, a signature or a header block
for i := range lines {
trimmed := strings.TrimSpace(lines[i])
if p.signature.MatchString(trimmed) || p.attribution.MatchString(trimmed) ||
p.separator.MatchString(trimmed) || headerBlock(trimmed, lines[i+1:]) {
lines = lines[:i]
break
}
}
// drop the trailing block of quoted/blank lines, unless the whole body is quoted
end := len(lines)
for end > 0 {
// "ᐧ" is the trailing marker some mobile clients (Mailbox) append
if t := strings.TrimSpace(lines[end-1]); t != "" && t != "ᐧ" && !strings.HasPrefix(t, ">") {
break
}
end--
}
if end > 0 {
lines = lines[:end]
}
return strings.TrimSpace(strings.Join(lines, "\n"))
}
// headerBlock reports whether a forwarded-mail header block starts here: the
// (already-trimmed) first line is a "From" field and the next non-blank line is
// another field, so a lone "Subject:" sentence is not a boundary.
func headerBlock(first string, rest []string) bool {
if !headerLine(first, headerFromFields) {
return false
}
for _, next := range rest {
if t := strings.TrimSpace(next); t != "" {
return headerLine(t, headerFields)
}
}
return false
}
// headerLine reports whether the already-trimmed line is a "Field:" header for one
// of fields. An ASCII colon must be followed by a space so prose like "To:do this"
// is ignored; the CJK fullwidth colon "" needs no space.
func headerLine(line string, fields []string) bool {
lower := strings.ToLower(line)
for _, field := range fields {
if rest, ok := strings.CutPrefix(lower, field); ok &&
(strings.HasPrefix(rest, ": ") || strings.HasPrefix(rest, "")) {
return true
}
}
return false
}