2018-11-27 22:52:20 +01:00
|
|
|
package git
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
"unicode/utf8"
|
|
|
|
|
|
|
|
"gopkg.in/src-d/go-git.v4/plumbing"
|
|
|
|
"gopkg.in/src-d/go-git.v4/plumbing/object"
|
|
|
|
"gopkg.in/src-d/go-git.v4/utils/diff"
|
|
|
|
)
|
|
|
|
|
|
|
|
// BlameResult represents the result of a Blame operation.
|
|
|
|
type BlameResult struct {
|
|
|
|
// Path is the path of the File that we're blaming.
|
|
|
|
Path string
|
|
|
|
// Rev (Revision) is the hash of the specified Commit used to generate this result.
|
|
|
|
Rev plumbing.Hash
|
|
|
|
// Lines contains every line with its authorship.
|
|
|
|
Lines []*Line
|
|
|
|
}
|
|
|
|
|
|
|
|
// Blame returns a BlameResult with the information about the last author of
|
|
|
|
// each line from file `path` at commit `c`.
|
|
|
|
func Blame(c *object.Commit, path string) (*BlameResult, error) {
|
|
|
|
// The file to blame is identified by the input arguments:
|
|
|
|
// commit and path. commit is a Commit object obtained from a Repository. Path
|
|
|
|
// represents a path to a specific file contained into the repository.
|
|
|
|
//
|
|
|
|
// Blaming a file is a two step process:
|
|
|
|
//
|
|
|
|
// 1. Create a linear history of the commits affecting a file. We use
|
|
|
|
// revlist.New for that.
|
|
|
|
//
|
|
|
|
// 2. Then build a graph with a node for every line in every file in
|
|
|
|
// the history of the file.
|
|
|
|
//
|
|
|
|
// Each node is assigned a commit: Start by the nodes in the first
|
|
|
|
// commit. Assign that commit as the creator of all its lines.
|
|
|
|
//
|
|
|
|
// Then jump to the nodes in the next commit, and calculate the diff
|
|
|
|
// between the two files. Newly created lines get
|
|
|
|
// assigned the new commit as its origin. Modified lines also get
|
|
|
|
// this new commit. Untouched lines retain the old commit.
|
|
|
|
//
|
|
|
|
// All this work is done in the assignOrigin function which holds all
|
|
|
|
// the internal relevant data in a "blame" struct, that is not
|
|
|
|
// exported.
|
|
|
|
//
|
|
|
|
// TODO: ways to improve the efficiency of this function:
|
|
|
|
// 1. Improve revlist
|
|
|
|
// 2. Improve how to traverse the history (example a backward traversal will
|
|
|
|
// be much more efficient)
|
|
|
|
//
|
|
|
|
// TODO: ways to improve the function in general:
|
|
|
|
// 1. Add memoization between revlist and assign.
|
|
|
|
// 2. It is using much more memory than needed, see the TODOs below.
|
|
|
|
|
|
|
|
b := new(blame)
|
|
|
|
b.fRev = c
|
|
|
|
b.path = path
|
|
|
|
|
|
|
|
// get all the file revisions
|
|
|
|
if err := b.fillRevs(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// calculate the line tracking graph and fill in
|
|
|
|
// file contents in data.
|
|
|
|
if err := b.fillGraphAndData(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := b.fRev.File(b.path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
finalLines, err := file.Lines()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Each node (line) holds the commit where it was introduced or
|
|
|
|
// last modified. To achieve that we use the FORWARD algorithm
|
|
|
|
// described in Zimmermann, et al. "Mining Version Archives for
|
|
|
|
// Co-changed Lines", in proceedings of the Mining Software
|
|
|
|
// Repositories workshop, Shanghai, May 22-23, 2006.
|
|
|
|
lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &BlameResult{
|
|
|
|
Path: path,
|
|
|
|
Rev: c.Hash,
|
|
|
|
Lines: lines,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line values represent the contents and author of a line in BlamedResult values.
|
|
|
|
type Line struct {
|
|
|
|
// Author is the email address of the last author that modified the line.
|
|
|
|
Author string
|
|
|
|
// Text is the original text of the line.
|
|
|
|
Text string
|
|
|
|
// Date is when the original text of the line was introduced
|
|
|
|
Date time.Time
|
|
|
|
// Hash is the commit hash that introduced the original line
|
|
|
|
Hash plumbing.Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line {
|
|
|
|
return &Line{
|
|
|
|
Author: author,
|
|
|
|
Text: text,
|
|
|
|
Hash: hash,
|
|
|
|
Date: date,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newLines(contents []string, commits []*object.Commit) ([]*Line, error) {
|
|
|
|
lcontents := len(contents)
|
|
|
|
lcommits := len(commits)
|
|
|
|
|
|
|
|
if lcontents != lcommits {
|
|
|
|
if lcontents == lcommits-1 && contents[lcontents-1] != "\n" {
|
|
|
|
contents = append(contents, "\n")
|
|
|
|
} else {
|
|
|
|
return nil, errors.New("contents and commits have different length")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
result := make([]*Line, 0, lcontents)
|
|
|
|
for i := range contents {
|
|
|
|
result = append(result, newLine(
|
|
|
|
commits[i].Author.Email, contents[i],
|
|
|
|
commits[i].Author.When, commits[i].Hash,
|
|
|
|
))
|
|
|
|
}
|
|
|
|
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// this struct is internally used by the blame function to hold its
|
|
|
|
// inputs, outputs and state.
|
|
|
|
type blame struct {
|
|
|
|
// the path of the file to blame
|
|
|
|
path string
|
|
|
|
// the commit of the final revision of the file to blame
|
|
|
|
fRev *object.Commit
|
|
|
|
// the chain of revisions affecting the the file to blame
|
|
|
|
revs []*object.Commit
|
|
|
|
// the contents of the file across all its revisions
|
|
|
|
data []string
|
|
|
|
// the graph of the lines in the file across all the revisions
|
|
|
|
graph [][]*object.Commit
|
|
|
|
}
|
|
|
|
|
|
|
|
// calculate the history of a file "path", starting from commit "from", sorted by commit date.
|
|
|
|
func (b *blame) fillRevs() error {
|
|
|
|
var err error
|
|
|
|
|
|
|
|
b.revs, err = references(b.fRev, b.path)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// build graph of a file from its revision history
|
|
|
|
func (b *blame) fillGraphAndData() error {
|
|
|
|
//TODO: not all commits are needed, only the current rev and the prev
|
|
|
|
b.graph = make([][]*object.Commit, len(b.revs))
|
|
|
|
b.data = make([]string, len(b.revs)) // file contents in all the revisions
|
|
|
|
// for every revision of the file, starting with the first
|
|
|
|
// one...
|
|
|
|
for i, rev := range b.revs {
|
|
|
|
// get the contents of the file
|
|
|
|
file, err := rev.File(b.path)
|
|
|
|
if err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
b.data[i], err = file.Contents()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
nLines := countLines(b.data[i])
|
|
|
|
// create a node for each line
|
|
|
|
b.graph[i] = make([]*object.Commit, nLines)
|
|
|
|
// assign a commit to each node
|
|
|
|
// if this is the first revision, then the node is assigned to
|
|
|
|
// this first commit.
|
|
|
|
if i == 0 {
|
|
|
|
for j := 0; j < nLines; j++ {
|
2019-07-31 18:45:42 +02:00
|
|
|
b.graph[i][j] = b.revs[i]
|
2018-11-27 22:52:20 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// if this is not the first commit, then assign to the old
|
|
|
|
// commit or to the new one, depending on what the diff
|
|
|
|
// says.
|
|
|
|
b.assignOrigin(i, i-1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// sliceGraph returns a slice of commits (one per line) for a particular
|
|
|
|
// revision of a file (0=first revision).
|
|
|
|
func (b *blame) sliceGraph(i int) []*object.Commit {
|
|
|
|
fVs := b.graph[i]
|
|
|
|
result := make([]*object.Commit, 0, len(fVs))
|
|
|
|
for _, v := range fVs {
|
2019-07-31 18:45:42 +02:00
|
|
|
c := *v
|
2018-11-27 22:52:20 +01:00
|
|
|
result = append(result, &c)
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assigns origin to vertexes in current (c) rev from data in its previous (p)
|
|
|
|
// revision
|
|
|
|
func (b *blame) assignOrigin(c, p int) {
|
|
|
|
// assign origin based on diff info
|
|
|
|
hunks := diff.Do(b.data[p], b.data[c])
|
|
|
|
sl := -1 // source line
|
|
|
|
dl := -1 // destination line
|
|
|
|
for h := range hunks {
|
|
|
|
hLines := countLines(hunks[h].Text)
|
|
|
|
for hl := 0; hl < hLines; hl++ {
|
|
|
|
switch {
|
|
|
|
case hunks[h].Type == 0:
|
|
|
|
sl++
|
|
|
|
dl++
|
|
|
|
b.graph[c][dl] = b.graph[p][sl]
|
|
|
|
case hunks[h].Type == 1:
|
|
|
|
dl++
|
2019-07-31 18:45:42 +02:00
|
|
|
b.graph[c][dl] = b.revs[c]
|
2018-11-27 22:52:20 +01:00
|
|
|
case hunks[h].Type == -1:
|
|
|
|
sl++
|
|
|
|
default:
|
|
|
|
panic("unreachable")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GoString prints the results of a Blame using git-blame's style.
|
|
|
|
func (b *blame) GoString() string {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
|
|
|
|
file, err := b.fRev.File(b.path)
|
|
|
|
if err != nil {
|
|
|
|
panic("PrettyPrint: internal error in repo.Data")
|
|
|
|
}
|
|
|
|
contents, err := file.Contents()
|
|
|
|
if err != nil {
|
|
|
|
panic("PrettyPrint: internal error in repo.Data")
|
|
|
|
}
|
|
|
|
|
|
|
|
lines := strings.Split(contents, "\n")
|
|
|
|
// max line number length
|
|
|
|
mlnl := len(strconv.Itoa(len(lines)))
|
|
|
|
// max author length
|
|
|
|
mal := b.maxAuthorLength()
|
|
|
|
format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
|
|
|
|
mal, mlnl)
|
|
|
|
|
|
|
|
fVs := b.graph[len(b.graph)-1]
|
|
|
|
for ln, v := range fVs {
|
|
|
|
fmt.Fprintf(&buf, format, v.Hash.String()[:8],
|
|
|
|
prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
|
|
|
|
}
|
|
|
|
return buf.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
// utility function to pretty print the author.
|
|
|
|
func prettyPrintAuthor(c *object.Commit) string {
|
|
|
|
return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
|
|
|
|
}
|
|
|
|
|
|
|
|
// utility function to calculate the number of runes needed
|
|
|
|
// to print the longest author name in the blame of a file.
|
|
|
|
func (b *blame) maxAuthorLength() int {
|
|
|
|
memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1)
|
|
|
|
fVs := b.graph[len(b.graph)-1]
|
|
|
|
m := 0
|
|
|
|
for ln := range fVs {
|
|
|
|
if _, ok := memo[fVs[ln].Hash]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
memo[fVs[ln].Hash] = struct{}{}
|
|
|
|
m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
|
|
|
|
}
|
|
|
|
return m
|
|
|
|
}
|
|
|
|
|
|
|
|
func max(a, b int) int {
|
|
|
|
if a > b {
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
return b
|
|
|
|
}
|