Refactor Git Attribute & performance optimization (#34154)

This PR moved git attributes related code to `modules/git/attribute` sub
package and moved language stats related code to
`modules/git/languagestats` sub package to make it easier to maintain.

And it also introduced a performance improvement which use the `git
check-attr --source` which can be run in a bare git repository so that
we don't need to create a git index file. The new parameter need a git
version >= 2.40 . If git version less than 2.40, it will fall back to
previous implementation.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: yp05327 <576951401@qq.com>
This commit is contained in:
Lunny Xiao 2025-04-11 06:41:29 -07:00 committed by GitHub
parent d725b78824
commit ae0af8ea5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 875 additions and 587 deletions

View File

@ -1,35 +0,0 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"code.gitea.io/gitea/modules/optional"
)
const (
AttributeLinguistVendored = "linguist-vendored"
AttributeLinguistGenerated = "linguist-generated"
AttributeLinguistDocumentation = "linguist-documentation"
AttributeLinguistDetectable = "linguist-detectable"
AttributeLinguistLanguage = "linguist-language"
AttributeGitlabLanguage = "gitlab-language"
)
// true if "set"/"true", false if "unset"/"false", none otherwise
func AttributeToBool(attr map[string]string, name string) optional.Option[bool] {
switch attr[name] {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
return optional.None[bool]()
}
func AttributeToString(attr map[string]string, name string) optional.Option[string] {
if value, has := attr[name]; has && value != "unspecified" {
return optional.Some(value)
}
return optional.None[string]()
}

View File

@ -0,0 +1,114 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"strings"
"code.gitea.io/gitea/modules/optional"
)
type Attribute string
const (
LinguistVendored = "linguist-vendored"
LinguistGenerated = "linguist-generated"
LinguistDocumentation = "linguist-documentation"
LinguistDetectable = "linguist-detectable"
LinguistLanguage = "linguist-language"
GitlabLanguage = "gitlab-language"
Lockable = "lockable"
Filter = "filter"
)
var LinguistAttributes = []string{
LinguistVendored,
LinguistGenerated,
LinguistDocumentation,
LinguistDetectable,
LinguistLanguage,
GitlabLanguage,
}
func (a Attribute) IsUnspecified() bool {
return a == "" || a == "unspecified"
}
func (a Attribute) ToString() optional.Option[string] {
if !a.IsUnspecified() {
return optional.Some(string(a))
}
return optional.None[string]()
}
// ToBool converts the attribute value to optional boolean: true if "set"/"true", false if "unset"/"false", none otherwise
func (a Attribute) ToBool() optional.Option[bool] {
switch a {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
return optional.None[bool]()
}
type Attributes struct {
m map[string]Attribute
}
func NewAttributes() *Attributes {
return &Attributes{m: make(map[string]Attribute)}
}
func (attrs *Attributes) Get(name string) Attribute {
if value, has := attrs.m[name]; has {
return value
}
return ""
}
func (attrs *Attributes) GetVendored() optional.Option[bool] {
return attrs.Get(LinguistVendored).ToBool()
}
func (attrs *Attributes) GetGenerated() optional.Option[bool] {
return attrs.Get(LinguistGenerated).ToBool()
}
func (attrs *Attributes) GetDocumentation() optional.Option[bool] {
return attrs.Get(LinguistDocumentation).ToBool()
}
func (attrs *Attributes) GetDetectable() optional.Option[bool] {
return attrs.Get(LinguistDetectable).ToBool()
}
func (attrs *Attributes) GetLinguistLanguage() optional.Option[string] {
return attrs.Get(LinguistLanguage).ToString()
}
func (attrs *Attributes) GetGitlabLanguage() optional.Option[string] {
attrStr := attrs.Get(GitlabLanguage).ToString()
if attrStr.Has() {
raw := attrStr.Value()
// gitlab-language may have additional parameters after the language
// ignore them and just use the main language
// https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
if idx := strings.IndexByte(raw, '?'); idx >= 0 {
return optional.Some(raw[:idx])
}
}
return attrStr
}
func (attrs *Attributes) GetLanguage() optional.Option[string] {
// prefer linguist-language over gitlab-language
// if linguist-language is not set, use gitlab-language
// if both are not set, return none
language := attrs.GetLinguistLanguage()
if language.Value() == "" {
language = attrs.GetGitlabLanguage()
}
return language
}

View File

@ -0,0 +1,37 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_Attribute(t *testing.T) {
assert.Empty(t, Attribute("").ToString().Value())
assert.Empty(t, Attribute("unspecified").ToString().Value())
assert.Equal(t, "python", Attribute("python").ToString().Value())
assert.Equal(t, "Java", Attribute("Java").ToString().Value())
attributes := Attributes{
m: map[string]Attribute{
LinguistGenerated: "true",
LinguistDocumentation: "false",
LinguistDetectable: "set",
LinguistLanguage: "Python",
GitlabLanguage: "Java",
"filter": "unspecified",
"test": "",
},
}
assert.Empty(t, attributes.Get("test").ToString().Value())
assert.Empty(t, attributes.Get("filter").ToString().Value())
assert.Equal(t, "Python", attributes.Get(LinguistLanguage).ToString().Value())
assert.Equal(t, "Java", attributes.Get(GitlabLanguage).ToString().Value())
assert.True(t, attributes.Get(LinguistGenerated).ToBool().Value())
assert.False(t, attributes.Get(LinguistDocumentation).ToBool().Value())
assert.True(t, attributes.Get(LinguistDetectable).ToBool().Value())
}

View File

@ -0,0 +1,216 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
)
// BatchChecker provides a reader for check-attribute content that can be long running
type BatchChecker struct {
attributesNum int
repo *git.Repository
stdinWriter *os.File
stdOut *nulSeparatedAttributeWriter
ctx context.Context
cancel context.CancelFunc
cmd *git.Command
}
// NewBatchChecker creates a check attribute reader for the current repository and provided commit ID
// If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo
func NewBatchChecker(repo *git.Repository, treeish string, attributes []string) (checker *BatchChecker, returnedErr error) {
ctx, cancel := context.WithCancel(repo.Ctx)
defer func() {
if returnedErr != nil {
cancel()
}
}()
cmd, envs, cleanup, err := checkAttrCommand(repo, treeish, nil, attributes)
if err != nil {
return nil, err
}
defer func() {
if returnedErr != nil {
cleanup()
}
}()
cmd.AddArguments("--stdin")
checker = &BatchChecker{
attributesNum: len(attributes),
repo: repo,
ctx: ctx,
cmd: cmd,
cancel: func() {
cancel()
cleanup()
},
}
stdinReader, stdinWriter, err := os.Pipe()
if err != nil {
return nil, err
}
checker.stdinWriter = stdinWriter
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, len(attributes))
lw.closed = make(chan struct{})
checker.stdOut = lw
go func() {
defer func() {
_ = stdinReader.Close()
_ = lw.Close()
}()
stdErr := new(bytes.Buffer)
err := cmd.Run(ctx, &git.RunOpts{
Env: envs,
Dir: repo.Path,
Stdin: stdinReader,
Stdout: lw,
Stderr: stdErr,
})
if err != nil && !git.IsErrCanceledOrKilled(err) {
log.Error("Attribute checker for commit %s exits with error: %v", treeish, err)
}
checker.cancel()
}()
return checker, nil
}
// CheckPath check attr for given path
func (c *BatchChecker) CheckPath(path string) (rs *Attributes, err error) {
defer func() {
if err != nil && err != c.ctx.Err() {
log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.repo.Path), err)
}
}()
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}
if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.Close()
return nil, err
}
reportTimeout := func() error {
stdOutClosed := false
select {
case <-c.stdOut.closed:
stdOutClosed = true
default:
}
debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.repo.Path))
debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed)
if c.cmd != nil {
debugMsg += fmt.Sprintf(", process state: %q", c.cmd.ProcessState())
}
_ = c.Close()
return fmt.Errorf("CheckPath timeout: %s", debugMsg)
}
rs = NewAttributes()
for i := 0; i < c.attributesNum; i++ {
select {
case <-time.After(5 * time.Second):
// there is no "hang" problem now. This code is just used to catch other potential problems.
return nil, reportTimeout()
case attr, ok := <-c.stdOut.ReadAttribute():
if !ok {
return nil, c.ctx.Err()
}
rs.m[attr.Attribute] = Attribute(attr.Value)
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
func (c *BatchChecker) Close() error {
c.cancel()
err := c.stdinWriter.Close()
return err
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
closed chan struct{}
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return l, nil
}
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error {
select {
case <-wr.closed:
return nil
default:
}
close(wr.attributes)
close(wr.closed)
return nil
}

View File

@ -1,13 +1,19 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
package attribute
import (
"path/filepath"
"testing"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
@ -24,7 +30,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, AttributeLinguistVendored, attr.Attribute)
assert.Equal(t, LinguistVendored, attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.FailNow(t, "took too long to read an attribute from the list")
@ -38,7 +44,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, AttributeLinguistVendored, attr.Attribute)
assert.Equal(t, LinguistVendored, attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.FailNow(t, "took too long to read an attribute from the list")
@ -77,21 +83,90 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: AttributeLinguistVendored,
Attribute: LinguistVendored,
Value: "set",
}, attr)
attr = <-wr.ReadAttribute()
assert.NoError(t, err)
assert.Equal(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: AttributeLinguistGenerated,
Attribute: LinguistGenerated,
Value: "unspecified",
}, attr)
attr = <-wr.ReadAttribute()
assert.NoError(t, err)
assert.Equal(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: AttributeLinguistLanguage,
Attribute: LinguistLanguage,
Value: "unspecified",
}, attr)
}
func expectedAttrs() *Attributes {
return &Attributes{
m: map[string]Attribute{
LinguistGenerated: "unspecified",
LinguistDetectable: "unspecified",
LinguistDocumentation: "unspecified",
LinguistVendored: "unspecified",
LinguistLanguage: "Python",
GitlabLanguage: "unspecified",
},
}
}
func Test_BatchChecker(t *testing.T) {
setting.AppDataPath = t.TempDir()
repoPath := "../tests/repos/language_stats_repo"
gitRepo, err := git.OpenRepository(t.Context(), repoPath)
require.NoError(t, err)
defer gitRepo.Close()
commitID := "8fee858da5796dfb37704761701bb8e800ad9ef3"
t.Run("Create index file to run git check-attr", func(t *testing.T) {
defer test.MockVariableValue(&git.DefaultFeatures().SupportCheckAttrOnBare, false)()
checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes)
assert.NoError(t, err)
defer checker.Close()
attributes, err := checker.CheckPath("i-am-a-python.p")
assert.NoError(t, err)
assert.Equal(t, expectedAttrs(), attributes)
})
// run git check-attr on work tree
t.Run("Run git check-attr on git work tree", func(t *testing.T) {
dir := filepath.Join(t.TempDir(), "test-repo")
err := git.Clone(t.Context(), repoPath, dir, git.CloneRepoOptions{
Shared: true,
Branch: "master",
})
assert.NoError(t, err)
tempRepo, err := git.OpenRepository(t.Context(), dir)
assert.NoError(t, err)
defer tempRepo.Close()
checker, err := NewBatchChecker(tempRepo, "", LinguistAttributes)
assert.NoError(t, err)
defer checker.Close()
attributes, err := checker.CheckPath("i-am-a-python.p")
assert.NoError(t, err)
assert.Equal(t, expectedAttrs(), attributes)
})
if !git.DefaultFeatures().SupportCheckAttrOnBare {
t.Skip("git version 2.40 is required to support run check-attr on bare repo")
return
}
t.Run("Run git check-attr in bare repository", func(t *testing.T) {
checker, err := NewBatchChecker(gitRepo, commitID, LinguistAttributes)
assert.NoError(t, err)
defer checker.Close()
attributes, err := checker.CheckPath("i-am-a-python.p")
assert.NoError(t, err)
assert.Equal(t, expectedAttrs(), attributes)
})
}

View File

@ -0,0 +1,96 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"bytes"
"context"
"errors"
"fmt"
"os"
"code.gitea.io/gitea/modules/git"
)
func checkAttrCommand(gitRepo *git.Repository, treeish string, filenames, attributes []string) (*git.Command, []string, func(), error) {
cancel := func() {}
envs := []string{"GIT_FLUSH=1"}
cmd := git.NewCommand("check-attr", "-z")
if len(attributes) == 0 {
cmd.AddArguments("--all")
}
// there is treeish, read from bare repo or temp index created by "read-tree"
if treeish != "" {
if git.DefaultFeatures().SupportCheckAttrOnBare {
cmd.AddArguments("--source")
cmd.AddDynamicArguments(treeish)
} else {
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(treeish)
if err != nil {
return nil, nil, nil, err
}
cmd.AddArguments("--cached")
envs = append(envs,
"GIT_INDEX_FILE="+indexFilename,
"GIT_WORK_TREE="+worktree,
)
cancel = deleteTemporaryFile
}
} // else: no treeish, assume it is a not a bare repo, read from working directory
cmd.AddDynamicArguments(attributes...)
if len(filenames) > 0 {
cmd.AddDashesAndList(filenames...)
}
return cmd, envs, cancel, nil
}
type CheckAttributeOpts struct {
Filenames []string
Attributes []string
}
// CheckAttributes return the attributes of the given filenames and attributes in the given treeish.
// If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo
func CheckAttributes(ctx context.Context, gitRepo *git.Repository, treeish string, opts CheckAttributeOpts) (map[string]*Attributes, error) {
cmd, envs, cancel, err := checkAttrCommand(gitRepo, treeish, opts.Filenames, opts.Attributes)
if err != nil {
return nil, err
}
defer cancel()
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
if err := cmd.Run(ctx, &git.RunOpts{
Env: append(os.Environ(), envs...),
Dir: gitRepo.Path,
Stdout: stdOut,
Stderr: stdErr,
}); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
}
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, errors.New("wrong number of fields in return from check-attr")
}
attributesMap := make(map[string]*Attributes)
for i := 0; i < (len(fields) / 3); i++ {
filename := string(fields[3*i])
attribute := string(fields[3*i+1])
info := string(fields[3*i+2])
attribute2info, ok := attributesMap[filename]
if !ok {
attribute2info = NewAttributes()
attributesMap[filename] = attribute2info
}
attribute2info.m[attribute] = Attribute(info)
}
return attributesMap, nil
}

View File

@ -0,0 +1,74 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"path/filepath"
"testing"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_Checker(t *testing.T) {
setting.AppDataPath = t.TempDir()
repoPath := "../tests/repos/language_stats_repo"
gitRepo, err := git.OpenRepository(t.Context(), repoPath)
require.NoError(t, err)
defer gitRepo.Close()
commitID := "8fee858da5796dfb37704761701bb8e800ad9ef3"
t.Run("Create index file to run git check-attr", func(t *testing.T) {
defer test.MockVariableValue(&git.DefaultFeatures().SupportCheckAttrOnBare, false)()
attrs, err := CheckAttributes(t.Context(), gitRepo, commitID, CheckAttributeOpts{
Filenames: []string{"i-am-a-python.p"},
Attributes: LinguistAttributes,
})
assert.NoError(t, err)
assert.Len(t, attrs, 1)
assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"])
})
// run git check-attr on work tree
t.Run("Run git check-attr on git work tree", func(t *testing.T) {
dir := filepath.Join(t.TempDir(), "test-repo")
err := git.Clone(t.Context(), repoPath, dir, git.CloneRepoOptions{
Shared: true,
Branch: "master",
})
assert.NoError(t, err)
tempRepo, err := git.OpenRepository(t.Context(), dir)
assert.NoError(t, err)
defer tempRepo.Close()
attrs, err := CheckAttributes(t.Context(), tempRepo, "", CheckAttributeOpts{
Filenames: []string{"i-am-a-python.p"},
Attributes: LinguistAttributes,
})
assert.NoError(t, err)
assert.Len(t, attrs, 1)
assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"])
})
if !git.DefaultFeatures().SupportCheckAttrOnBare {
t.Skip("git version 2.40 is required to support run check-attr on bare repo")
return
}
t.Run("Run git check-attr in bare repository", func(t *testing.T) {
attrs, err := CheckAttributes(t.Context(), gitRepo, commitID, CheckAttributeOpts{
Filenames: []string{"i-am-a-python.p"},
Attributes: LinguistAttributes,
})
assert.NoError(t, err)
assert.Len(t, attrs, 1)
assert.Equal(t, expectedAttrs(), attrs["i-am-a-python.p"])
})
}

View File

@ -0,0 +1,41 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package attribute
import (
"context"
"fmt"
"os"
"testing"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
)
func testRun(m *testing.M) error {
gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home")
if err != nil {
return fmt.Errorf("unable to create temp dir: %w", err)
}
defer util.RemoveAll(gitHomePath)
setting.Git.HomePath = gitHomePath
if err = git.InitFull(context.Background()); err != nil {
return fmt.Errorf("failed to call Init: %w", err)
}
exitCode := m.Run()
if exitCode != 0 {
return fmt.Errorf("run test failed, ExitCode=%d", exitCode)
}
return nil
}
func TestMain(m *testing.M) {
if err := testRun(m); err != nil {
_, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err)
os.Exit(1)
}
}

View File

@ -80,6 +80,13 @@ func (c *Command) LogString() string {
return strings.Join(a, " ")
}
func (c *Command) ProcessState() string {
if c.cmd == nil {
return ""
}
return c.cmd.ProcessState.String()
}
// NewCommand creates and returns a new Git Command based on given command and arguments.
// Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead.
func NewCommand(args ...internal.CmdArg) *Command {

View File

@ -30,6 +30,7 @@ type Features struct {
SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an experimental curiosity
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
}
var (
@ -77,6 +78,7 @@ func loadGitVersionFeatures() (*Features, error) {
if features.SupportHashSha256 {
features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat)
}
features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40")
return features, nil
}

View File

@ -1,13 +1,15 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
package languagestats
import (
"context"
"strings"
"unicode"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
)
const (
@ -49,19 +51,15 @@ func mergeLanguageStats(stats map[string]int64) map[string]int64 {
return res
}
func TryReadLanguageAttribute(attrs map[string]string) optional.Option[string] {
language := AttributeToString(attrs, AttributeLinguistLanguage)
if language.Value() == "" {
language = AttributeToString(attrs, AttributeGitlabLanguage)
if language.Has() {
raw := language.Value()
// gitlab-language may have additional parameters after the language
// ignore them and just use the main language
// https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
if idx := strings.IndexByte(raw, '?'); idx >= 0 {
language = optional.Some(raw[:idx])
}
}
// GetFileLanguage tries to get the (linguist) language of the file content
func GetFileLanguage(ctx context.Context, gitRepo *git.Repository, treeish, treePath string) (string, error) {
attributesMap, err := attribute.CheckAttributes(ctx, gitRepo, treeish, attribute.CheckAttributeOpts{
Attributes: []string{attribute.LinguistLanguage, attribute.GitlabLanguage},
Filenames: []string{treePath},
})
if err != nil {
return "", err
}
return language
return attributesMap[treePath].GetLanguage().Value(), nil
}

View File

@ -3,13 +3,15 @@
//go:build gogit
package git
package languagestats
import (
"bytes"
"io"
"code.gitea.io/gitea/modules/analyze"
git_module "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/optional"
"github.com/go-enry/go-enry/v2"
@ -19,7 +21,7 @@ import (
)
// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
func GetLanguageStats(repo *git_module.Repository, commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
if err != nil {
return nil, err
@ -40,8 +42,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
checker, deferable := repo.CheckAttributeReader(commitID)
defer deferable()
checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes)
if err != nil {
return nil, err
}
defer checker.Close()
// sizes contains the current calculated size of all files by language
sizes := make(map[string]int64)
@ -62,43 +67,41 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()
if checker != nil {
attrs, err := checker.CheckPath(f.Name)
if err == nil {
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
if isVendored.ValueOrDefault(false) {
return nil
attrs, err := checker.CheckPath(f.Name)
if err == nil {
isVendored = attrs.GetVendored()
if isVendored.ValueOrDefault(false) {
return nil
}
isGenerated = attrs.GetGenerated()
if isGenerated.ValueOrDefault(false) {
return nil
}
isDocumentation = attrs.GetDocumentation()
if isDocumentation.ValueOrDefault(false) {
return nil
}
isDetectable = attrs.GetDetectable()
if !isDetectable.ValueOrDefault(true) {
return nil
}
hasLanguage := attrs.GetLanguage()
if hasLanguage.Value() != "" {
language := hasLanguage.Value()
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
if isGenerated.ValueOrDefault(false) {
return nil
}
isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
if isDocumentation.ValueOrDefault(false) {
return nil
}
isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
if !isDetectable.ValueOrDefault(true) {
return nil
}
hasLanguage := TryReadLanguageAttribute(attrs)
if hasLanguage.Value() != "" {
language := hasLanguage.Value()
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
// this language will always be added to the size
sizes[language] += f.Size
return nil
}
// this language will always be added to the size
sizes[language] += f.Size
return nil
}
}

View File

@ -3,13 +3,15 @@
//go:build !gogit
package git
package languagestats
import (
"bytes"
"io"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
@ -17,7 +19,7 @@ import (
)
// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx)
@ -34,19 +36,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if err := writeID(commitID); err != nil {
return nil, err
}
shaBytes, typ, size, err := ReadBatchLine(batchReader)
shaBytes, typ, size, err := git.ReadBatchLine(batchReader)
if typ != "commit" {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, ErrNotExist{commitID, ""}
return nil, git.ErrNotExist{ID: commitID}
}
sha, err := NewIDFromString(string(shaBytes))
sha, err := git.NewIDFromString(string(shaBytes))
if err != nil {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, ErrNotExist{commitID, ""}
return nil, git.ErrNotExist{ID: commitID}
}
commit, err := CommitFromReader(repo, sha, io.LimitReader(batchReader, size))
commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size))
if err != nil {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, err
@ -62,8 +64,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
checker, deferable := repo.CheckAttributeReader(commitID)
defer deferable()
checker, err := attribute.NewBatchChecker(repo, commitID, attribute.LinguistAttributes)
if err != nil {
return nil, err
}
defer checker.Close()
contentBuf := bytes.Buffer{}
var content []byte
@ -96,43 +101,36 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()
if checker != nil {
attrs, err := checker.CheckPath(f.Name())
if err == nil {
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
if isVendored.ValueOrDefault(false) {
continue
attrs, err := checker.CheckPath(f.Name())
if err == nil {
if isVendored = attrs.GetVendored(); isVendored.ValueOrDefault(false) {
continue
}
if isGenerated = attrs.GetGenerated(); isGenerated.ValueOrDefault(false) {
continue
}
if isDocumentation = attrs.GetDocumentation(); isDocumentation.ValueOrDefault(false) {
continue
}
if isDetectable = attrs.GetDetectable(); !isDetectable.ValueOrDefault(true) {
continue
}
if hasLanguage := attrs.GetLanguage(); hasLanguage.Value() != "" {
language := hasLanguage.Value()
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
if isGenerated.ValueOrDefault(false) {
continue
}
isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
if isDocumentation.ValueOrDefault(false) {
continue
}
isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
if !isDetectable.ValueOrDefault(true) {
continue
}
hasLanguage := TryReadLanguageAttribute(attrs)
if hasLanguage.Value() != "" {
language := hasLanguage.Value()
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
// this language will always be added to the size
sizes[language] += f.Size()
continue
}
// this language will always be added to the size
sizes[language] += f.Size()
continue
}
}
@ -149,7 +147,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if err := writeID(f.ID.String()); err != nil {
return nil, err
}
_, _, size, err := ReadBatchLine(batchReader)
_, _, size, err := git.ReadBatchLine(batchReader)
if err != nil {
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
return nil, err
@ -167,7 +165,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
content = contentBuf.Bytes()
if err := DiscardFull(batchReader, discard); err != nil {
if err := git.DiscardFull(batchReader, discard); err != nil {
return nil, err
}
}

View File

@ -3,12 +3,12 @@
//go:build !gogit
package git
package languagestats
import (
"path/filepath"
"testing"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
@ -17,13 +17,12 @@ import (
func TestRepository_GetLanguageStats(t *testing.T) {
setting.AppDataPath = t.TempDir()
repoPath := filepath.Join(testReposDir, "language_stats_repo")
gitRepo, err := openRepositoryWithDefaultContext(repoPath)
repoPath := "../tests/repos/language_stats_repo"
gitRepo, err := git.OpenRepository(t.Context(), repoPath)
require.NoError(t, err)
defer gitRepo.Close()
stats, err := gitRepo.GetLanguageStats("8fee858da5796dfb37704761701bb8e800ad9ef3")
stats, err := GetLanguageStats(gitRepo, "8fee858da5796dfb37704761701bb8e800ad9ef3")
require.NoError(t, err)
assert.Equal(t, map[string]int64{

View File

@ -0,0 +1,41 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package languagestats
import (
"context"
"fmt"
"os"
"testing"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
)
func testRun(m *testing.M) error {
gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home")
if err != nil {
return fmt.Errorf("unable to create temp dir: %w", err)
}
defer util.RemoveAll(gitHomePath)
setting.Git.HomePath = gitHomePath
if err = git.InitFull(context.Background()); err != nil {
return fmt.Errorf("failed to call Init: %w", err)
}
exitCode := m.Run()
if exitCode != 0 {
return fmt.Errorf("run test failed, ExitCode=%d", exitCode)
}
return nil
}
func TestMain(m *testing.M) {
if err := testRun(m); err != nil {
_, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err)
os.Exit(1)
}
}

View File

@ -1,341 +0,0 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"time"
"code.gitea.io/gitea/modules/log"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
type CheckAttributeOpts struct {
CachedOnly bool
AllAttributes bool
Attributes []string
Filenames []string
IndexFile string
WorkTree string
}
// CheckAttribute return the Blame object of file
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
env := []string{}
if len(opts.IndexFile) > 0 {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
}
if len(opts.WorkTree) > 0 {
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
}
if len(env) > 0 {
env = append(os.Environ(), env...)
}
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
cmd := NewCommand("check-attr", "-z")
if opts.AllAttributes {
cmd.AddArguments("-a")
} else {
for _, attribute := range opts.Attributes {
if attribute != "" {
cmd.AddDynamicArguments(attribute)
}
}
}
if opts.CachedOnly {
cmd.AddArguments("--cached")
}
cmd.AddDashesAndList(opts.Filenames...)
if err := cmd.Run(repo.Ctx, &RunOpts{
Env: env,
Dir: repo.Path,
Stdout: stdOut,
Stderr: stdErr,
}); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
}
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, errors.New("wrong number of fields in return from check-attr")
}
name2attribute2info := make(map[string]map[string]string)
for i := 0; i < (len(fields) / 3); i++ {
filename := string(fields[3*i])
attribute := string(fields[3*i+1])
info := string(fields[3*i+2])
attribute2info := name2attribute2info[filename]
if attribute2info == nil {
attribute2info = make(map[string]string)
}
attribute2info[attribute] = info
name2attribute2info[filename] = attribute2info
}
return name2attribute2info, nil
}
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string
stdinReader io.ReadCloser
stdinWriter *os.File
stdOut *nulSeparatedAttributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
}
// Init initializes the CheckAttributeReader
func (c *CheckAttributeReader) Init(ctx context.Context) error {
if len(c.Attributes) == 0 {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)
lw.closed = make(chan struct{})
c.stdOut = lw
c.stdOut.Close()
return errors.New("no provided Attributes to check")
}
c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommand("check-attr", "--stdin", "-z")
if len(c.IndexFile) > 0 {
c.cmd.AddArguments("--cached")
c.env = append(c.env, "GIT_INDEX_FILE="+c.IndexFile)
}
if len(c.WorkTree) > 0 {
c.env = append(c.env, "GIT_WORK_TREE="+c.WorkTree)
}
c.env = append(c.env, "GIT_FLUSH=1")
c.cmd.AddDynamicArguments(c.Attributes...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
c.cancel()
return err
}
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
lw.closed = make(chan struct{})
c.stdOut = lw
return nil
}
func (c *CheckAttributeReader) Run() error {
defer func() {
_ = c.stdinReader.Close()
_ = c.stdOut.Close()
}()
stdErr := new(bytes.Buffer)
err := c.cmd.Run(c.ctx, &RunOpts{
Env: c.env,
Dir: c.Repo.Path,
Stdin: c.stdinReader,
Stdout: c.stdOut,
Stderr: stdErr,
})
if err != nil && !IsErrCanceledOrKilled(err) {
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
return nil
}
// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err error) {
defer func() {
if err != nil && err != c.ctx.Err() {
log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.Repo.Path), err)
}
}()
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}
if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.Close()
return nil, err
}
reportTimeout := func() error {
stdOutClosed := false
select {
case <-c.stdOut.closed:
stdOutClosed = true
default:
}
debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.Repo.Path))
debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed)
if c.cmd.cmd != nil {
debugMsg += fmt.Sprintf(", process state: %q", c.cmd.cmd.ProcessState.String())
}
_ = c.Close()
return fmt.Errorf("CheckPath timeout: %s", debugMsg)
}
rs = make(map[string]string)
for range c.Attributes {
select {
case <-time.After(5 * time.Second):
// There is a strange "hang" problem in gitdiff.GetDiff -> CheckPath
// So add a timeout here to mitigate the problem, and output more logs for debug purpose
// In real world, if CheckPath runs long than seconds, it blocks the end user's operation,
// and at the moment the CheckPath result is not so important, so we can just ignore it.
return nil, reportTimeout()
case attr, ok := <-c.stdOut.ReadAttribute():
if !ok {
return nil, c.ctx.Err()
}
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
func (c *CheckAttributeReader) Close() error {
c.cancel()
err := c.stdinWriter.Close()
return err
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
closed chan struct{}
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return l, nil
}
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error {
select {
case <-wr.closed:
return nil
default:
}
close(wr.attributes)
close(wr.closed)
return nil
}
// CheckAttributeReader creates a check attribute reader for the current repository and provided commit ID
func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeReader, context.CancelFunc) {
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err != nil {
return nil, func() {}
}
checker := &CheckAttributeReader{
Attributes: []string{
AttributeLinguistVendored,
AttributeLinguistGenerated,
AttributeLinguistDocumentation,
AttributeLinguistDetectable,
AttributeLinguistLanguage,
AttributeGitlabLanguage,
},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
}
ctx, cancel := context.WithCancel(repo.Ctx)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open attribute checker for commit %s, error: %v", commitID, err)
} else {
go func() {
err := checker.Run()
if err != nil && !IsErrCanceledOrKilled(err) {
log.Error("Attribute checker for commit %s exits with error: %v", commitID, err)
}
cancel()
}()
}
deferrable := func() {
_ = checker.Close()
cancel()
deleteTemporaryFile()
}
return checker, deferrable
}

View File

@ -1,5 +1,5 @@
[core]
repositoryformatversion = 0
filemode = true
bare = false
bare = true
logallrefupdates = true

View File

@ -1,7 +1,7 @@
[core]
repositoryformatversion = 0
filemode = false
bare = false
bare = true
logallrefupdates = true
symlinks = false
ignorecase = true

View File

@ -1,7 +1,7 @@
[core]
repositoryformatversion = 0
filemode = false
bare = false
bare = true
logallrefupdates = true
symlinks = false
ignorecase = true

View File

@ -8,6 +8,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/languagestats"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
@ -62,7 +63,7 @@ func (db *DBIndexer) Index(id int64) error {
}
// Calculate and save language statistics to database
stats, err := gitRepo.GetLanguageStats(commitID)
stats, err := languagestats.GetLanguageStats(gitRepo, commitID)
if err != nil {
if !setting.IsInTesting {
log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err)

View File

@ -15,13 +15,13 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/languagestats"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/services/context"
files_service "code.gitea.io/gitea/services/repository/files"
)
type blameRow struct {
@ -234,7 +234,7 @@ func processBlameParts(ctx *context.Context, blameParts []*git.BlamePart) map[st
func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) {
repoLink := ctx.Repo.RepoLink
language, err := files_service.TryGetContentLanguage(ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath)
language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath)
if err != nil {
log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
}

View File

@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/git/pipeline"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
@ -134,39 +135,24 @@ func LFSLocks(ctx *context.Context) {
}
defer gitRepo.Close()
filenames := make([]string, len(lfsLocks))
for i, lock := range lfsLocks {
filenames[i] = lock.Path
}
if err := gitRepo.ReadTreeToIndex(ctx.Repo.Repository.DefaultBranch); err != nil {
log.Error("Unable to read the default branch to the index: %s (%v)", ctx.Repo.Repository.DefaultBranch, err)
ctx.ServerError("LFSLocks", fmt.Errorf("unable to read the default branch to the index: %s (%w)", ctx.Repo.Repository.DefaultBranch, err))
return
}
name2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"lockable"},
Filenames: filenames,
CachedOnly: true,
})
checker, err := attribute.NewBatchChecker(gitRepo, ctx.Repo.Repository.DefaultBranch, []string{attribute.Lockable})
if err != nil {
log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err)
ctx.ServerError("LFSLocks", err)
return
}
defer checker.Close()
lockables := make([]bool, len(lfsLocks))
filenames := make([]string, len(lfsLocks))
for i, lock := range lfsLocks {
attribute2info, has := name2attribute2info[lock.Path]
if !has {
filenames[i] = lock.Path
attrs, err := checker.CheckPath(lock.Path)
if err != nil {
log.Error("Unable to check attributes in %s: %s (%v)", tmpBasePath, lock.Path, err)
continue
}
if attribute2info["lockable"] != "set" {
continue
}
lockables[i] = true
lockables[i] = attrs.Get(attribute.Lockable).ToBool().Value()
}
ctx.Data["Lockables"] = lockables

View File

@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/actions"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
@ -25,7 +26,6 @@ import (
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/services/context"
issue_service "code.gitea.io/gitea/services/issue"
files_service "code.gitea.io/gitea/services/repository/files"
"github.com/nektos/act/pkg/model"
)
@ -147,6 +147,23 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) {
ctx.Data["EditFileTooltip"] = ctx.Tr("repo.editor.cannot_edit_non_text_files")
}
// read all needed attributes which will be used later
// there should be no performance different between reading 2 or 4 here
attrsMap, err := attribute.CheckAttributes(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, attribute.CheckAttributeOpts{
Filenames: []string{ctx.Repo.TreePath},
Attributes: []string{attribute.LinguistGenerated, attribute.LinguistVendored, attribute.LinguistLanguage, attribute.GitlabLanguage},
})
if err != nil {
ctx.ServerError("attribute.CheckAttributes", err)
return
}
attrs := attrsMap[ctx.Repo.TreePath]
if attrs == nil {
// this case shouldn't happen, just in case.
setting.PanicInDevOrTesting("no attributes found for %s", ctx.Repo.TreePath)
attrs = attribute.NewAttributes()
}
switch {
case isRepresentableAsText:
if fInfo.fileSize >= setting.UI.MaxDisplayFileSize {
@ -209,11 +226,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) {
ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1
}
language, err := files_service.TryGetContentLanguage(ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath)
if err != nil {
log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
}
language := attrs.GetLanguage().Value()
fileContent, lexerName, err := highlight.File(blob.Name(), language, buf)
ctx.Data["LexerName"] = lexerName
if err != nil {
@ -283,17 +296,7 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) {
}
}
if ctx.Repo.GitRepo != nil {
checker, deferable := ctx.Repo.GitRepo.CheckAttributeReader(ctx.Repo.CommitID)
if checker != nil {
defer deferable()
attrs, err := checker.CheckPath(ctx.Repo.TreePath)
if err == nil {
ctx.Data["IsVendored"] = git.AttributeToBool(attrs, git.AttributeLinguistVendored).Value()
ctx.Data["IsGenerated"] = git.AttributeToBool(attrs, git.AttributeLinguistGenerated).Value()
}
}
}
ctx.Data["IsVendored"], ctx.Data["IsGenerated"] = attrs.GetVendored().Value(), attrs.GetGenerated().Value()
if fInfo.st.IsImage() && !fInfo.st.IsSvgImage() {
img, _, err := image.DecodeConfig(bytes.NewReader(buf))

View File

@ -25,6 +25,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
@ -1237,24 +1238,21 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp
return nil, err
}
checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
defer deferrable()
checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage})
if err != nil {
return nil, err
}
defer checker.Close()
for _, diffFile := range diff.Files {
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
if checker != nil {
attrs, err := checker.CheckPath(diffFile.Name)
if err == nil {
isVendored = git.AttributeToBool(attrs, git.AttributeLinguistVendored)
isGenerated = git.AttributeToBool(attrs, git.AttributeLinguistGenerated)
language := git.TryReadLanguageAttribute(attrs)
if language.Has() {
diffFile.Language = language.Value()
}
} else {
checker = nil // CheckPath fails, it's not impossible to "check" anymore
attrs, err := checker.CheckPath(diffFile.Name)
if err == nil {
isVendored, isGenerated = attrs.GetVendored(), attrs.GetGenerated()
language := attrs.GetLanguage()
if language.Has() {
diffFile.Language = language.Value()
}
}

View File

@ -14,13 +14,13 @@ import (
"code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git/languagestats"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
gitea_context "code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/repository/files"
)
func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePreviewOptions) (template.HTML, error) {
@ -61,7 +61,7 @@ func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePrevie
return "", err
}
language, _ := files.TryGetContentLanguage(gitRepo, opts.CommitID, opts.FilePath)
language, _ := languagestats.GetFileLanguage(ctx, gitRepo, opts.CommitID, opts.FilePath)
blob, err := commit.GetBlobByPath(opts.FilePath)
if err != nil {
return "", err

View File

@ -277,28 +277,3 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
Content: content,
}, nil
}
// TryGetContentLanguage tries to get the (linguist) language of the file content
func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) {
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(commitID)
if err != nil {
return "", err
}
defer deleteTemporaryFile()
filename2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{
CachedOnly: true,
Attributes: []string{git.AttributeLinguistLanguage, git.AttributeGitlabLanguage},
Filenames: []string{treePath},
IndexFile: indexFilename,
WorkTree: worktree,
})
if err != nil {
return "", err
}
language := git.TryReadLanguageAttribute(filename2attribute2info[treePath])
return language.Value(), nil
}

View File

@ -15,6 +15,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
@ -488,16 +489,15 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
var lfsMetaObject *git_model.LFSMetaObject
if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos
filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{
Attributes: []string{attribute.Filter},
Filenames: []string{file.Options.treePath},
CachedOnly: true,
})
if err != nil {
return err
}
if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" {
if attributesMap[file.Options.treePath] != nil && attributesMap[file.Options.treePath].Get(attribute.Filter).ToString().Value() == "lfs" {
// OK so we are supposed to LFS this data!
pointer, err := lfs.GeneratePointer(treeObjectContentReader)
if err != nil {

View File

@ -14,6 +14,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
)
@ -105,12 +106,11 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
}
}
var filename2attribute2info map[string]map[string]string
var attributesMap map[string]*attribute.Attributes
if setting.LFS.StartServer {
filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{
Attributes: []string{attribute.Filter},
Filenames: names,
CachedOnly: true,
})
if err != nil {
return err
@ -119,7 +119,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
// Copy uploaded files into repository.
for i := range infos {
if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], filename2attribute2info, t, opts.TreePath); err != nil {
if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], attributesMap, t, opts.TreePath); err != nil {
return err
}
}
@ -176,7 +176,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
return repo_model.DeleteUploads(ctx, uploads...)
}
func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error {
func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, attributesMap map[string]*attribute.Attributes, t *TemporaryUploadRepository, treePath string) error {
file, err := os.Open(info.upload.LocalPath())
if err != nil {
return err
@ -184,7 +184,7 @@ func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, fi
defer file.Close()
var objectHash string
if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" {
if setting.LFS.StartServer && attributesMap[info.upload.Name] != nil && attributesMap[info.upload.Name].Get(attribute.Filter).ToString().Value() == "lfs" {
// Handle LFS
// FIXME: Inefficient! this should probably happen in models.Upload
pointer, err := lfs.GeneratePointer(file)