forkjo/routers/web/repo/blame.go
wxiaoguang 6bc3079c00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)

## Review without space diff

https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1

## Purpose of this PR

1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command

## The main idea of this PR

* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
    * Before: `AddArguments("-m").AddDynamicArguments(message)`
    * After: `AddOptionValues("-m", message)`
    * -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`

## FAQ

### Why these changes were not done in #21535 ?

#21535 is mainly a search&replace, it did its best to not change too
much logic.

Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.


### The naming of `AddOptionXxx`

According to git's manual, the `--xxx` part is called `option`.

### How can it guarantee that `internal.CmdArg` won't be not misused?

Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.

And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.

### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?

Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.

### Why there was a `CmdArgCheck` and why it's removed?

At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.


### Why many codes for `signArg == ""` is deleted?

Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00

303 lines
8.2 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repo
import (
"fmt"
gotemplate "html/template"
"net/http"
"net/url"
"strings"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
const (
tplBlame base.TplName = "repo/home"
)
type blameRow struct {
RowNumber int
Avatar gotemplate.HTML
RepoLink string
PartSha string
PreviousSha string
PreviousShaURL string
IsFirstCommit bool
CommitURL string
CommitMessage string
CommitSince gotemplate.HTML
Code gotemplate.HTML
EscapeStatus *charset.EscapeStatus
}
// RefBlame render blame page
func RefBlame(ctx *context.Context) {
fileName := ctx.Repo.TreePath
if len(fileName) == 0 {
ctx.NotFound("Blame FileName", nil)
return
}
userName := ctx.Repo.Owner.Name
repoName := ctx.Repo.Repository.Name
commitID := ctx.Repo.CommitID
branchLink := ctx.Repo.RepoLink + "/src/" + ctx.Repo.BranchNameSubURL()
treeLink := branchLink
rawLink := ctx.Repo.RepoLink + "/raw/" + ctx.Repo.BranchNameSubURL()
if len(ctx.Repo.TreePath) > 0 {
treeLink += "/" + util.PathEscapeSegments(ctx.Repo.TreePath)
}
var treeNames []string
paths := make([]string, 0, 5)
if len(ctx.Repo.TreePath) > 0 {
treeNames = strings.Split(ctx.Repo.TreePath, "/")
for i := range treeNames {
paths = append(paths, strings.Join(treeNames[:i+1], "/"))
}
ctx.Data["HasParentPath"] = true
if len(paths)-2 >= 0 {
ctx.Data["ParentPath"] = "/" + paths[len(paths)-1]
}
}
// Get current entry user currently looking at.
entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath)
if err != nil {
ctx.NotFoundOrServerError("Repo.Commit.GetTreeEntryByPath", git.IsErrNotExist, err)
return
}
blob := entry.Blob()
ctx.Data["Paths"] = paths
ctx.Data["TreeLink"] = treeLink
ctx.Data["TreeNames"] = treeNames
ctx.Data["BranchLink"] = branchLink
ctx.Data["RawFileLink"] = rawLink + "/" + util.PathEscapeSegments(ctx.Repo.TreePath)
ctx.Data["PageIsViewCode"] = true
ctx.Data["IsBlame"] = true
ctx.Data["FileSize"] = blob.Size()
ctx.Data["FileName"] = blob.Name()
ctx.Data["NumLines"], err = blob.GetBlobLineCount()
ctx.Data["NumLinesSet"] = true
if err != nil {
ctx.NotFound("GetBlobLineCount", err)
return
}
blameReader, err := git.CreateBlameReader(ctx, repo_model.RepoPath(userName, repoName), commitID, fileName)
if err != nil {
ctx.NotFound("CreateBlameReader", err)
return
}
defer blameReader.Close()
blameParts := make([]git.BlamePart, 0)
for {
blamePart, err := blameReader.NextPart()
if err != nil {
ctx.NotFound("NextPart", err)
return
}
if blamePart == nil {
break
}
blameParts = append(blameParts, *blamePart)
}
// Get Topics of this repo
renderRepoTopics(ctx)
if ctx.Written() {
return
}
commitNames, previousCommits := processBlameParts(ctx, blameParts)
if ctx.Written() {
return
}
renderBlame(ctx, blameParts, commitNames, previousCommits)
ctx.HTML(http.StatusOK, tplBlame)
}
func processBlameParts(ctx *context.Context, blameParts []git.BlamePart) (map[string]*user_model.UserCommit, map[string]string) {
// store commit data by SHA to look up avatar info etc
commitNames := make(map[string]*user_model.UserCommit)
// previousCommits contains links from SHA to parent SHA,
// if parent also contains the current TreePath.
previousCommits := make(map[string]string)
// and as blameParts can reference the same commits multiple
// times, we cache the lookup work locally
commits := make([]*git.Commit, 0, len(blameParts))
commitCache := map[string]*git.Commit{}
commitCache[ctx.Repo.Commit.ID.String()] = ctx.Repo.Commit
for _, part := range blameParts {
sha := part.Sha
if _, ok := commitNames[sha]; ok {
continue
}
// find the blamePart commit, to look up parent & email address for avatars
commit, ok := commitCache[sha]
var err error
if !ok {
commit, err = ctx.Repo.GitRepo.GetCommit(sha)
if err != nil {
if git.IsErrNotExist(err) {
ctx.NotFound("Repo.GitRepo.GetCommit", err)
} else {
ctx.ServerError("Repo.GitRepo.GetCommit", err)
}
return nil, nil
}
commitCache[sha] = commit
}
// find parent commit
if commit.ParentCount() > 0 {
psha := commit.Parents[0]
previousCommit, ok := commitCache[psha.String()]
if !ok {
previousCommit, _ = commit.Parent(0)
if previousCommit != nil {
commitCache[psha.String()] = previousCommit
}
}
// only store parent commit ONCE, if it has the file
if previousCommit != nil {
if haz1, _ := previousCommit.HasFile(ctx.Repo.TreePath); haz1 {
previousCommits[commit.ID.String()] = previousCommit.ID.String()
}
}
}
commits = append(commits, commit)
}
// populate commit email addresses to later look up avatars.
for _, c := range user_model.ValidateCommitsWithEmails(commits) {
commitNames[c.ID.String()] = c
}
return commitNames, previousCommits
}
func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames map[string]*user_model.UserCommit, previousCommits map[string]string) {
repoLink := ctx.Repo.RepoLink
language := ""
indexFilename, worktree, deleteTemporaryFile, err := ctx.Repo.GitRepo.ReadTreeToTemporaryIndex(ctx.Repo.CommitID)
if err == nil {
defer deleteTemporaryFile()
filename2attribute2info, err := ctx.Repo.GitRepo.CheckAttribute(git.CheckAttributeOpts{
CachedOnly: true,
Attributes: []string{"linguist-language", "gitlab-language"},
Filenames: []string{ctx.Repo.TreePath},
IndexFile: indexFilename,
WorkTree: worktree,
})
if err != nil {
log.Error("Unable to load attributes for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
}
language = filename2attribute2info[ctx.Repo.TreePath]["linguist-language"]
if language == "" || language == "unspecified" {
language = filename2attribute2info[ctx.Repo.TreePath]["gitlab-language"]
}
if language == "unspecified" {
language = ""
}
}
lines := make([]string, 0)
rows := make([]*blameRow, 0)
escapeStatus := &charset.EscapeStatus{}
var lexerName string
i := 0
commitCnt := 0
for _, part := range blameParts {
for index, line := range part.Lines {
i++
lines = append(lines, line)
br := &blameRow{
RowNumber: i,
}
commit := commitNames[part.Sha]
previousSha := previousCommits[part.Sha]
if index == 0 {
// Count commit number
commitCnt++
// User avatar image
commitSince := timeutil.TimeSinceUnix(timeutil.TimeStamp(commit.Author.When.Unix()), ctx.Locale)
var avatar string
if commit.User != nil {
avatar = string(templates.Avatar(commit.User, 18, "mr-3"))
} else {
avatar = string(templates.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18, "mr-3"))
}
br.Avatar = gotemplate.HTML(avatar)
br.RepoLink = repoLink
br.PartSha = part.Sha
br.PreviousSha = previousSha
br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(previousSha), util.PathEscapeSegments(ctx.Repo.TreePath))
br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha))
br.CommitMessage = commit.CommitMessage
br.CommitSince = commitSince
}
if i != len(lines)-1 {
line += "\n"
}
fileName := fmt.Sprintf("%v", ctx.Data["FileName"])
line, lexerNameForLine := highlight.Code(fileName, language, line)
// set lexer name to the first detected lexer. this is certainly suboptimal and
// we should instead highlight the whole file at once
if lexerName == "" {
lexerName = lexerNameForLine
}
br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale)
br.Code = gotemplate.HTML(line)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus)
}
}
ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["BlameRows"] = rows
ctx.Data["CommitCnt"] = commitCnt
ctx.Data["LexerName"] = lexerName
}