Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit c22c181

Browse files
committed
Merge pull request #7 from alcortesm/blame
Blame
2 parents 5c8fff7 + c347e97 commit c22c181

17 files changed

+2101
-12
lines changed

blame/blame.go

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
// Package blame contains blaming functionality for files in the repo.
2+
//
3+
// Blaming a file is finding what commit was the last to modify each of
4+
// the lines in the file, therefore the output of a blaming operation is
5+
// usualy a slice of commits, one commit per line in the file.
6+
//
7+
// This package also provides a pretty print function to output the
8+
// results of a blame in a similar format to the git-blame command.
9+
package blame
10+
11+
import (
12+
"bytes"
13+
"errors"
14+
"fmt"
15+
"strconv"
16+
"strings"
17+
"unicode/utf8"
18+
19+
"gopkg.in/src-d/go-git.v2"
20+
"gopkg.in/src-d/go-git.v2/core"
21+
"gopkg.in/src-d/go-git.v2/diff"
22+
"gopkg.in/src-d/go-git.v2/revlist"
23+
)
24+
25+
// Blame returns the last commit that modified each line of a file in
26+
// a repository.
27+
//
28+
// The file to blame is identified by the input arguments: repo, commit and path.
29+
// The output is a slice of commits, one for each line in the file.
30+
//
31+
// Blaming a file is a two step process:
32+
//
33+
// 1. Create a linear history of the commits affecting a file. We use
34+
// revlist.New for that.
35+
//
36+
// 2. Then build a graph with a node for every line in every file in
37+
// the history of the file.
38+
//
39+
// Each node (line) holds the commit where it was introduced or
40+
// last modified. To achieve that we use the FORWARD algorithm
41+
// described in Zimmermann, et al. "Mining Version Archives for
42+
// Co-changed Lines", in proceedings of the Mining Software
43+
// Repositories workshop, Shanghai, May 22-23, 2006.
44+
//
45+
// Each node is asigned a commit: Start by the nodes in the first
46+
// commit. Assign that commit as the creator of all its lines.
47+
//
48+
// Then jump to the nodes in the next commit, and calculate the diff
49+
// between the two files. Newly created lines get
50+
// assigned the new commit as its origin. Modified lines also get
51+
// this new commit. Untouched lines retain the old commit.
52+
//
53+
// All this work is done in the assignOrigin function which holds all
54+
// the internal relevant data in a "blame" struct, that is not
55+
// exported.
56+
//
57+
// TODO: ways to improve the efficiency of this function:
58+
//
59+
// 1. Improve revlist
60+
//
61+
// 2. Improve how to traverse the history (example a backward
62+
// traversal will be much more efficient)
63+
//
64+
// TODO: ways to improve the function in general:
65+
//
66+
// 1. Add memoization between revlist and assign.
67+
//
68+
// 2. It is using much more memory than needed, see the TODOs below.
69+
70+
type Blame struct {
71+
Repo string
72+
Path string
73+
Rev string
74+
Lines []*line
75+
}
76+
77+
func New(repo *git.Repository, path string, commit *git.Commit) (*Blame, error) {
78+
// init the internal blame struct
79+
b := new(blame)
80+
b.repo = repo
81+
b.fRev = commit
82+
b.path = path
83+
84+
// get all the file revisions
85+
if err := b.fillRevs(); err != nil {
86+
return nil, err
87+
}
88+
89+
// calculate the line tracking graph and fill in
90+
// file contents in data.
91+
if err := b.fillGraphAndData(); err != nil {
92+
return nil, err
93+
}
94+
95+
file, err := b.fRev.File(b.path)
96+
if err != nil {
97+
return nil, err
98+
}
99+
finalLines := file.Lines()
100+
101+
lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
102+
if err != nil {
103+
return nil, err
104+
}
105+
106+
return &Blame{
107+
Repo: repo.URL,
108+
Path: path,
109+
Rev: commit.Hash.String(),
110+
Lines: lines,
111+
}, nil
112+
}
113+
114+
type line struct {
115+
author string
116+
text string
117+
}
118+
119+
func newLine(author, text string) *line {
120+
return &line{
121+
author: author,
122+
text: text,
123+
}
124+
}
125+
126+
func newLines(contents []string, commits []*git.Commit) ([]*line, error) {
127+
if len(contents) != len(commits) {
128+
return nil, errors.New("contents and commits have different length")
129+
}
130+
result := make([]*line, 0, len(contents))
131+
for i := range contents {
132+
l := newLine(commits[i].Author.Email, contents[i])
133+
result = append(result, l)
134+
}
135+
return result, nil
136+
}
137+
138+
// this struct is internally used by the blame function to hold its
139+
// inputs, outputs and state.
140+
type blame struct {
141+
repo *git.Repository // the repo holding the history of the file to blame
142+
path string // the path of the file to blame
143+
fRev *git.Commit // the commit of the final revision of the file to blame
144+
revs revlist.Revs // the chain of revisions affecting the the file to blame
145+
data []string // the contents of the file across all its revisions
146+
graph [][]*git.Commit // the graph of the lines in the file across all the revisions TODO: not all commits are needed, only the current rev and the prev
147+
}
148+
149+
// calculte the history of a file "path", starting from commit "from", sorted by commit date.
150+
func (b *blame) fillRevs() error {
151+
var err error
152+
b.revs, err = revlist.NewRevs(b.repo, b.fRev, b.path)
153+
if err != nil {
154+
return err
155+
}
156+
return nil
157+
}
158+
159+
// build graph of a file from its revision history
160+
func (b *blame) fillGraphAndData() error {
161+
b.graph = make([][]*git.Commit, len(b.revs))
162+
b.data = make([]string, len(b.revs)) // file contents in all the revisions
163+
// for every revision of the file, starting with the first
164+
// one...
165+
for i, rev := range b.revs {
166+
// get the contents of the file
167+
file, err := rev.File(b.path)
168+
if err != nil {
169+
return nil
170+
}
171+
b.data[i] = file.Contents()
172+
nLines := git.CountLines(b.data[i])
173+
// create a node for each line
174+
b.graph[i] = make([]*git.Commit, nLines)
175+
// assign a commit to each node
176+
// if this is the first revision, then the node is assigned to
177+
// this first commit.
178+
if i == 0 {
179+
for j := 0; j < nLines; j++ {
180+
b.graph[i][j] = (*git.Commit)(b.revs[i])
181+
}
182+
} else {
183+
// if this is not the first commit, then assign to the old
184+
// commit or to the new one, depending on what the diff
185+
// says.
186+
b.assignOrigin(i, i-1)
187+
}
188+
}
189+
return nil
190+
}
191+
192+
// sliceGraph returns a slice of commits (one per line) for a particular
193+
// revision of a file (0=first revision).
194+
func (b *blame) sliceGraph(i int) []*git.Commit {
195+
fVs := b.graph[i]
196+
result := make([]*git.Commit, 0, len(fVs))
197+
for _, v := range fVs {
198+
c := git.Commit(*v)
199+
result = append(result, &c)
200+
}
201+
return result
202+
}
203+
204+
// Assigns origin to vertexes in current (c) rev from data in its previous (p)
205+
// revision
206+
func (b *blame) assignOrigin(c, p int) {
207+
// assign origin based on diff info
208+
hunks := diff.Do(b.data[p], b.data[c])
209+
sl := -1 // source line
210+
dl := -1 // destination line
211+
for h := range hunks {
212+
hLines := git.CountLines(hunks[h].Text)
213+
for hl := 0; hl < hLines; hl++ {
214+
switch {
215+
case hunks[h].Type == 0:
216+
sl++
217+
dl++
218+
b.graph[c][dl] = b.graph[p][sl]
219+
case hunks[h].Type == 1:
220+
dl++
221+
b.graph[c][dl] = (*git.Commit)(b.revs[c])
222+
case hunks[h].Type == -1:
223+
sl++
224+
default:
225+
panic("unreachable")
226+
}
227+
}
228+
}
229+
}
230+
231+
// GoString prints the results of a Blame using git-blame's style.
232+
func (b *blame) GoString() string {
233+
var buf bytes.Buffer
234+
235+
file, err := b.fRev.File(b.path)
236+
if err != nil {
237+
panic("PrettyPrint: internal error in repo.Data")
238+
}
239+
contents := file.Contents()
240+
241+
lines := strings.Split(contents, "\n")
242+
// max line number length
243+
mlnl := len(fmt.Sprintf("%s", strconv.Itoa(len(lines))))
244+
// max author length
245+
mal := b.maxAuthorLength()
246+
format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
247+
mal, mlnl)
248+
249+
fVs := b.graph[len(b.graph)-1]
250+
for ln, v := range fVs {
251+
fmt.Fprintf(&buf, format, v.Hash.String()[:8],
252+
prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
253+
}
254+
return buf.String()
255+
}
256+
257+
// utility function to pretty print the author.
258+
func prettyPrintAuthor(c *git.Commit) string {
259+
return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
260+
}
261+
262+
// utility function to calculate the number of runes needed
263+
// to print the longest author name in the blame of a file.
264+
func (b *blame) maxAuthorLength() int {
265+
memo := make(map[core.Hash]struct{}, len(b.graph)-1)
266+
fVs := b.graph[len(b.graph)-1]
267+
m := 0
268+
for ln := range fVs {
269+
if _, ok := memo[fVs[ln].Hash]; ok {
270+
continue
271+
}
272+
memo[fVs[ln].Hash] = struct{}{}
273+
m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
274+
}
275+
return m
276+
}
277+
278+
func max(a, b int) int {
279+
if a > b {
280+
return a
281+
}
282+
return b
283+
}

blame/blame2humantest.bash

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
repo=`git remote show origin | grep Fetch | cut -d' ' -f5`
6+
branch="master"
7+
if [ "$#" -eq 1 ] ; then
8+
commit=`git log | head -1 | cut -d' ' -f2`
9+
path=$1
10+
elif [ "$#" -eq 2 ] ; then
11+
commit=$1
12+
path=$2
13+
else
14+
echo "bad number of parameters" > /dev/stderr
15+
echo > /dev/stderr
16+
echo " try with: [commit] path" > /dev/stderr
17+
exit
18+
fi
19+
20+
blames=`git blame --root $path | cut -d' ' -f1`
21+
declare -a blame
22+
i=0
23+
for shortBlame in $blames ; do
24+
blame[$i]=`git show $shortBlame | head -1 | cut -d' ' -f2`
25+
i=`expr $i + 1`
26+
done
27+
28+
# some remotes have the .git, other don't,
29+
# repoDot makes sure all have
30+
repoDot="${repo%.git}.git"
31+
32+
echo -e "\t{\"${repoDot}\", \"${branch}\", \"${commit}\", \"${path}\", concat(&[]string{},"
33+
prev=""
34+
count=1
35+
for i in ${blame[@]} ; do
36+
if [ "${prev}" == "" ] ; then
37+
prev=$i
38+
elif [ "$prev" == "$i" ] ; then
39+
count=`expr $count + 1`
40+
else
41+
echo -e "\t\trepeat(\"${prev}\", $count),"
42+
count=1
43+
prev=$i
44+
fi
45+
done
46+
echo -e "\t\trepeat(\"${prev}\", $count),"
47+
echo -e "\t)},"

0 commit comments

Comments
 (0)