package git import ( "bytes" "errors" "fmt" "strconv" "strings" "time" "unicode/utf8" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/utils/diff" ) // BlameResult represents the result of a Blame operation. type BlameResult struct { // Path is the path of the File that we're blaming. Path string // Rev (Revision) is the hash of the specified Commit used to generate this result. Rev plumbing.Hash // Lines contains every line with its authorship. Lines []*Line } // Blame returns a BlameResult with the information about the last author of // each line from file `path` at commit `c`. func Blame(c *object.Commit, path string) (*BlameResult, error) { // The file to blame is identified by the input arguments: // commit and path. commit is a Commit object obtained from a Repository. Path // represents a path to a specific file contained into the repository. // // Blaming a file is a two step process: // // 1. Create a linear history of the commits affecting a file. We use // revlist.New for that. // // 2. Then build a graph with a node for every line in every file in // the history of the file. // // Each node is assigned a commit: Start by the nodes in the first // commit. Assign that commit as the creator of all its lines. // // Then jump to the nodes in the next commit, and calculate the diff // between the two files. Newly created lines get // assigned the new commit as its origin. Modified lines also get // this new commit. Untouched lines retain the old commit. // // All this work is done in the assignOrigin function which holds all // the internal relevant data in a "blame" struct, that is not // exported. // // TODO: ways to improve the efficiency of this function: // 1. Improve revlist // 2. Improve how to traverse the history (example a backward traversal will // be much more efficient) // // TODO: ways to improve the function in general: // 1. Add memoization between revlist and assign. // 2. It is using much more memory than needed, see the TODOs below. b := new(blame) b.fRev = c b.path = path // get all the file revisions if err := b.fillRevs(); err != nil { return nil, err } // calculate the line tracking graph and fill in // file contents in data. if err := b.fillGraphAndData(); err != nil { return nil, err } file, err := b.fRev.File(b.path) if err != nil { return nil, err } finalLines, err := file.Lines() if err != nil { return nil, err } // Each node (line) holds the commit where it was introduced or // last modified. To achieve that we use the FORWARD algorithm // described in Zimmermann, et al. "Mining Version Archives for // Co-changed Lines", in proceedings of the Mining Software // Repositories workshop, Shanghai, May 22-23, 2006. lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1)) if err != nil { return nil, err } return &BlameResult{ Path: path, Rev: c.Hash, Lines: lines, }, nil } // Line values represent the contents and author of a line in BlamedResult values. type Line struct { // Author is the email address of the last author that modified the line. Author string // Text is the original text of the line. Text string // Date is when the original text of the line was introduced Date time.Time // Hash is the commit hash that introduced the original line Hash plumbing.Hash } func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line { return &Line{ Author: author, Text: text, Hash: hash, Date: date, } } func newLines(contents []string, commits []*object.Commit) ([]*Line, error) { lcontents := len(contents) lcommits := len(commits) if lcontents != lcommits { if lcontents == lcommits-1 && contents[lcontents-1] != "\n" { contents = append(contents, "\n") } else { return nil, errors.New("contents and commits have different length") } } result := make([]*Line, 0, lcontents) for i := range contents { result = append(result, newLine( commits[i].Author.Email, contents[i], commits[i].Author.When, commits[i].Hash, )) } return result, nil } // this struct is internally used by the blame function to hold its // inputs, outputs and state. type blame struct { // the path of the file to blame path string // the commit of the final revision of the file to blame fRev *object.Commit // the chain of revisions affecting the the file to blame revs []*object.Commit // the contents of the file across all its revisions data []string // the graph of the lines in the file across all the revisions graph [][]*object.Commit } // calculate the history of a file "path", starting from commit "from", sorted by commit date. func (b *blame) fillRevs() error { var err error b.revs, err = references(b.fRev, b.path) return err } // build graph of a file from its revision history func (b *blame) fillGraphAndData() error { //TODO: not all commits are needed, only the current rev and the prev b.graph = make([][]*object.Commit, len(b.revs)) b.data = make([]string, len(b.revs)) // file contents in all the revisions // for every revision of the file, starting with the first // one... for i, rev := range b.revs { // get the contents of the file file, err := rev.File(b.path) if err != nil { return nil } b.data[i], err = file.Contents() if err != nil { return err } nLines := countLines(b.data[i]) // create a node for each line b.graph[i] = make([]*object.Commit, nLines) // assign a commit to each node // if this is the first revision, then the node is assigned to // this first commit. if i == 0 { for j := 0; j < nLines; j++ { b.graph[i][j] = b.revs[i] } } else { // if this is not the first commit, then assign to the old // commit or to the new one, depending on what the diff // says. b.assignOrigin(i, i-1) } } return nil } // sliceGraph returns a slice of commits (one per line) for a particular // revision of a file (0=first revision). func (b *blame) sliceGraph(i int) []*object.Commit { fVs := b.graph[i] result := make([]*object.Commit, 0, len(fVs)) for _, v := range fVs { c := *v result = append(result, &c) } return result } // Assigns origin to vertexes in current (c) rev from data in its previous (p) // revision func (b *blame) assignOrigin(c, p int) { // assign origin based on diff info hunks := diff.Do(b.data[p], b.data[c]) sl := -1 // source line dl := -1 // destination line for h := range hunks { hLines := countLines(hunks[h].Text) for hl := 0; hl < hLines; hl++ { switch { case hunks[h].Type == 0: sl++ dl++ b.graph[c][dl] = b.graph[p][sl] case hunks[h].Type == 1: dl++ b.graph[c][dl] = b.revs[c] case hunks[h].Type == -1: sl++ default: panic("unreachable") } } } } // GoString prints the results of a Blame using git-blame's style. func (b *blame) GoString() string { var buf bytes.Buffer file, err := b.fRev.File(b.path) if err != nil { panic("PrettyPrint: internal error in repo.Data") } contents, err := file.Contents() if err != nil { panic("PrettyPrint: internal error in repo.Data") } lines := strings.Split(contents, "\n") // max line number length mlnl := len(strconv.Itoa(len(lines))) // max author length mal := b.maxAuthorLength() format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n", mal, mlnl) fVs := b.graph[len(b.graph)-1] for ln, v := range fVs { fmt.Fprintf(&buf, format, v.Hash.String()[:8], prettyPrintAuthor(fVs[ln]), ln+1, lines[ln]) } return buf.String() } // utility function to pretty print the author. func prettyPrintAuthor(c *object.Commit) string { return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02")) } // utility function to calculate the number of runes needed // to print the longest author name in the blame of a file. func (b *blame) maxAuthorLength() int { memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1) fVs := b.graph[len(b.graph)-1] m := 0 for ln := range fVs { if _, ok := memo[fVs[ln].Hash]; ok { continue } memo[fVs[ln].Hash] = struct{}{} m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln]))) } return m } func max(a, b int) int { if a > b { return a } return b }