git_blame is a scoreboard

This commit is contained in:
Ben Straub 2013-09-20 15:51:22 -07:00
parent ef03d040cf
commit 0a0f0558a4
4 changed files with 132 additions and 152 deletions

View File

@ -210,10 +210,10 @@ static git_blame_hunk *split_hunk_in_vector(
* To allow quick access to the contents of nth line in the
* final image, prepare an index in the scoreboard.
*/
static int prepare_lines(git_blame__scoreboard *sb)
static int prepare_lines(git_blame *blame)
{
const char *buf = sb->final_buf;
git_off_t len = sb->final_buf_size;
const char *buf = blame->final_buf;
git_off_t len = blame->final_buf_size;
int num = 0, incomplete = 0, bol = 1;
if (len && buf[len-1] != '\n')
@ -227,8 +227,8 @@ static int prepare_lines(git_blame__scoreboard *sb)
bol = 1;
}
}
sb->num_lines = num + incomplete;
return sb->num_lines;
blame->num_lines = num + incomplete;
return blame->num_lines;
}
static git_blame_hunk* hunk_from_entry(git_blame__entry *e)
@ -244,21 +244,20 @@ static int walk_and_mark(git_blame *blame)
{
int error;
git_blame__scoreboard sb = {0};
git_blame__entry *ent = NULL;
git_blob *blob = NULL;
git_blame__origin *o;
if ((error = git_commit_lookup(&sb.final, blame->repository, &blame->options.newest_commit)) < 0 ||
(error = git_object_lookup_bypath((git_object**)&blob, (git_object*)sb.final, blame->path, GIT_OBJ_BLOB)) < 0)
if ((error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit)) < 0 ||
(error = git_object_lookup_bypath((git_object**)&blob, (git_object*)blame->final, blame->path, GIT_OBJ_BLOB)) < 0)
goto cleanup;
sb.final_buf = git_blob_rawcontent(blob);
sb.final_buf_size = git_blob_rawsize(blob);
if ((error = get_origin(&o, &sb, sb.final, blame->path)) < 0)
blame->final_buf = git_blob_rawcontent(blob);
blame->final_buf_size = git_blob_rawsize(blob);
if ((error = get_origin(&o, blame, blame->final, blame->path)) < 0)
goto cleanup;
ent = git__calloc(1, sizeof(*ent));
ent->num_lines = prepare_lines(&sb);
ent->num_lines = prepare_lines(blame);
ent->lno = blame->options.min_line - 1;
ent->num_lines = ent->num_lines - blame->options.min_line + 1;
if (blame->options.max_line > 0) {
@ -267,15 +266,14 @@ static int walk_and_mark(git_blame *blame)
ent->s_lno = ent->lno;
ent->suspect = o;
sb.ent = ent;
sb.path = blame->path;
sb.blame = blame;
blame->ent = ent;
blame->path = blame->path;
assign_blame(&sb, blame->options.flags);
coalesce(&sb);
assign_blame(blame, blame->options.flags);
coalesce(blame);
cleanup:
for (ent = sb.ent; ent; ) {
for (ent = blame->ent; ent; ) {
git_blame__entry *e = ent->next;
git_blame__origin *o = ent->suspect;

View File

@ -8,6 +8,61 @@
#include "array.h"
#include "git2/oid.h"
/*
* One blob in a commit that is being suspected
*/
typedef struct git_blame__origin {
int refcnt;
struct git_blame__origin *previous;
git_commit *commit;
git_blob *blob;
char path[];
} git_blame__origin;
/*
* Each group of lines is described by a git_blame__entry; it can be split
* as we pass blame to the parents. They form a linked list in the
* scoreboard structure, sorted by the target line number.
*/
typedef struct git_blame__entry {
struct git_blame__entry *prev;
struct git_blame__entry *next;
/* the first line of this group in the final image;
* internally all line numbers are 0 based.
*/
int lno;
/* how many lines this group has */
int num_lines;
/* the commit that introduced this group into the final image */
git_blame__origin *suspect;
/* true if the suspect is truly guilty; false while we have not
* checked if the group came from one of its parents.
*/
char guilty;
/* true if the entry has been scanned for copies in the current parent
*/
char scanned;
/* the line number of the first line of this group in the
* suspect's file; internally all line numbers are 0 based.
*/
int s_lno;
/* how significant this entry is -- cached to avoid
* scanning the lines over and over.
*/
unsigned score;
/* Whether this entry has been tracked to a boundary commit.
*/
bool is_boundary;
} git_blame__entry;
struct git_blame {
const char *path;
git_repository *repository;
@ -20,6 +75,13 @@ struct git_blame {
size_t current_diff_line;
git_blame_hunk *current_hunk;
/* Scoreboard fields */
git_commit *final;
git_blame__entry *ent;
int num_lines;
const char *final_buf;
git_off_t final_buf_size;
};
git_blame *git_blame__alloc(

View File

@ -7,15 +7,16 @@
#include "blame_git.h"
#include "commit.h"
#include "xdiff/xinclude.h"
/*
* Locate an existing origin or create a new one.
*/
int get_origin(git_blame__origin **out, git_blame__scoreboard *sb, git_commit *commit, const char *path)
int get_origin(git_blame__origin **out, git_blame *blame, git_commit *commit, const char *path)
{
git_blame__entry *e;
for (e = sb->ent; e; e = e->next) {
for (e = blame->ent; e; e = e->next) {
if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
*out = origin_incref(e->suspect);
}
@ -50,7 +51,7 @@ int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
}
struct blame_chunk_cb_data {
git_blame__scoreboard *sb;
git_blame *blame;
git_blame__origin *target;
git_blame__origin *parent;
long tlno;
@ -66,13 +67,13 @@ static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
return 0 == strcmp(a->path, b->path);
}
/* Find the line number of the last line the target is suspected for */
static int find_last_in_target(git_blame__scoreboard *sb, git_blame__origin *target)
/* find the line number of the last line the target is suspected for */
static int find_last_in_target(git_blame *blame, git_blame__origin *target)
{
git_blame__entry *e;
int last_in_target = -1;
for (e=sb->ent; e; e=e->next) {
for (e=blame->ent; e; e=e->next) {
if (e->guilty || !same_suspect(e->suspect, target))
continue;
if (last_in_target < e->s_lno + e->num_lines)
@ -138,13 +139,13 @@ static void split_overlap(git_blame__entry *split, git_blame__entry *e,
* Link in a new blame entry to the scoreboard. Entries that cover the same
* line range have been removed from the scoreboard previously.
*/
static void add_blame_entry(git_blame__scoreboard *sb, git_blame__entry *e)
static void add_blame_entry(git_blame *blame, git_blame__entry *e)
{
git_blame__entry *ent, *prev = NULL;
origin_incref(e->suspect);
for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
prev = ent;
/* prev, if not NULL, is the last one that is below e */
@ -153,8 +154,8 @@ static void add_blame_entry(git_blame__scoreboard *sb, git_blame__entry *e)
e->next = prev->next;
prev->next = e;
} else {
e->next = sb->ent;
sb->ent = e;
e->next = blame->ent;
blame->ent = e;
}
if (e->next)
e->next->prev = e;
@ -183,7 +184,7 @@ static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
* split_overlap() divided an existing blame e into up to three parts in split.
* Adjust the linked list of blames in the scoreboard to reflect the split.
*/
static void split_blame(git_blame__scoreboard *sb, git_blame__entry *split, git_blame__entry *e)
static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
{
git_blame__entry *new_entry;
@ -194,12 +195,12 @@ static void split_blame(git_blame__scoreboard *sb, git_blame__entry *split, git_
/* The last part -- me */
new_entry = git__malloc(sizeof(*new_entry));
memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
add_blame_entry(sb, new_entry);
add_blame_entry(blame, new_entry);
/* ... and the middle part -- parent */
new_entry = git__malloc(sizeof(*new_entry));
memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
add_blame_entry(sb, new_entry);
add_blame_entry(blame, new_entry);
} else if (!split[0].suspect && !split[2].suspect) {
/*
* The parent covers the entire area; reuse storage for e and replace it
@ -211,13 +212,13 @@ static void split_blame(git_blame__scoreboard *sb, git_blame__entry *split, git_
dup_entry(e, &split[0]);
new_entry = git__malloc(sizeof(*new_entry));
memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
add_blame_entry(sb, new_entry);
add_blame_entry(blame, new_entry);
} else {
/* parent and then me */
dup_entry(e, &split[1]);
new_entry = git__malloc(sizeof(*new_entry));
memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
add_blame_entry(sb, new_entry);
add_blame_entry(blame, new_entry);
}
}
@ -236,13 +237,13 @@ static void decref_split(git_blame__entry *split)
* Helper for blame_chunk(). blame_entry e is known to overlap with the patch
* hunk; split it and pass blame to the parent.
*/
static void blame_overlap(git_blame__scoreboard *sb, git_blame__entry *e, int tlno, int plno, int same, git_blame__origin *parent)
static void blame_overlap(git_blame *blame, git_blame__entry *e, int tlno, int plno, int same, git_blame__origin *parent)
{
git_blame__entry split[3] = {{0}};
split_overlap(split, e, tlno, plno, same, parent);
if (split[1].suspect)
split_blame(sb, split, e);
split_blame(blame, split, e);
decref_split(split);
}
@ -251,17 +252,17 @@ static void blame_overlap(git_blame__scoreboard *sb, git_blame__entry *e, int tl
* e and its parent. Find and split the overlap, and pass blame to the
* overlapping part to the parent.
*/
static void blame_chunk(git_blame__scoreboard *sb, int tlno, int plno, int same, git_blame__origin *target, git_blame__origin *parent)
static void blame_chunk(git_blame *blame, int tlno, int plno, int same, git_blame__origin *target, git_blame__origin *parent)
{
git_blame__entry *e;
for (e = sb->ent; e; e = e->next) {
for (e = blame->ent; e; e = e->next) {
if (e->guilty || !same_suspect(e->suspect, target))
continue;
if (same <= e->s_lno)
continue;
if (tlno < e->s_lno + e->num_lines) {
blame_overlap(sb, e, tlno, plno, same, parent);
blame_overlap(blame, e, tlno, plno, same, parent);
}
}
}
@ -269,7 +270,7 @@ static void blame_chunk(git_blame__scoreboard *sb, int tlno, int plno, int same,
static void blame_chunk_cb(long start_a, long count_a, long start_b, long count_b, void *data)
{
struct blame_chunk_cb_data *d = data;
blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent);
blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent);
d->plno = start_a + count_a;
d->tlno = start_b + count_b;
}
@ -341,15 +342,15 @@ static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
}
}
static int pass_blame_to_parent(git_blame__scoreboard *sb,
static int pass_blame_to_parent(git_blame *blame,
git_blame__origin *target,
git_blame__origin *parent)
{
int last_in_target;
mmfile_t file_p, file_o;
struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 };
struct blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
last_in_target = find_last_in_target(sb, target);
last_in_target = find_last_in_target(blame, target);
if (last_in_target < 0)
return 1; /* nothing remains for this target */
@ -358,7 +359,7 @@ static int pass_blame_to_parent(git_blame__scoreboard *sb,
diff_hunks(&file_p, &file_o, &d);
/* The reset (i.e. anything after tlno) are the same as the parent */
blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent);
return 0;
}
@ -369,7 +370,7 @@ static int paths_on_dup(void **old, void *new)
git__free(new);
return -1;
}
static git_blame__origin* find_origin(git_blame__scoreboard *sb, git_commit *parent,
static git_blame__origin* find_origin(git_blame *blame, git_commit *parent,
git_blame__origin *origin)
{
git_blame__origin *porigin = NULL;
@ -387,14 +388,14 @@ static git_blame__origin* find_origin(git_blame__scoreboard *sb, git_commit *par
diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
/* Check to see if files we're interested have changed */
diffopts.pathspec.count = sb->blame->paths.length;
diffopts.pathspec.strings = (char**)sb->blame->paths.contents;
if (0 != git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts))
diffopts.pathspec.count = blame->paths.length;
diffopts.pathspec.strings = (char**)blame->paths.contents;
if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
goto cleanup;
if (!git_diff_num_deltas(difflist)) {
/* No changes; copy data */
get_origin(&porigin, sb, parent, origin->path);
get_origin(&porigin, blame, parent, origin->path);
} else {
git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
int i;
@ -402,7 +403,7 @@ static git_blame__origin* find_origin(git_blame__scoreboard *sb, git_commit *par
/* Generate a full diff between the two trees */
git_diff_list_free(difflist);
diffopts.pathspec.count = 0;
if (0 != git_diff_tree_to_tree(&difflist, sb->blame->repository, ptree, otree, &diffopts))
if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
goto cleanup;
/* Let diff find renames */
@ -414,10 +415,10 @@ static git_blame__origin* find_origin(git_blame__scoreboard *sb, git_commit *par
for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
const git_diff_delta *delta;
git_diff_get_patch(NULL, &delta, difflist, i);
if (git_vector_bsearch(NULL, &sb->blame->paths, delta->new_file.path) != 0)
if (git_vector_bsearch(NULL, &blame->paths, delta->new_file.path) != 0)
continue;
git_vector_insert_sorted(&sb->blame->paths, (void*)git__strdup(delta->old_file.path), paths_on_dup);
git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path), paths_on_dup);
make_origin(&porigin, parent, delta->old_file.path);
}
}
@ -433,15 +434,15 @@ cleanup:
* The blobs of origin and porigin exactly match, so everything origin is
* suspected for can be blamed on the parent.
*/
static void pass_whole_blame(git_blame__scoreboard *sb,
static void pass_whole_blame(git_blame *blame,
git_blame__origin *origin, git_blame__origin *porigin)
{
git_blame__entry *e;
if (!porigin->blob)
git_object_lookup((git_object**)&porigin->blob, sb->blame->repository, git_blob_id(origin->blob),
git_object_lookup((git_object**)&porigin->blob, blame->repository, git_blob_id(origin->blob),
GIT_OBJ_BLOB);
for (e=sb->ent; e; e=e->next) {
for (e=blame->ent; e; e=e->next) {
if (!same_suspect(e->suspect, origin))
continue;
origin_incref(porigin);
@ -450,7 +451,7 @@ static void pass_whole_blame(git_blame__scoreboard *sb,
}
}
static void pass_blame(git_blame__scoreboard *sb, git_blame__origin *origin, uint32_t opt)
static void pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
{
git_commit *commit = origin->commit;
int i, num_sg;
@ -460,10 +461,10 @@ static void pass_blame(git_blame__scoreboard *sb, git_blame__origin *origin, uin
GIT_UNUSED(opt);
num_sg = git_commit_parentcount(commit);
if (!git_oid_cmp(git_commit_id(commit), &sb->blame->options.oldest_commit))
if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
num_sg = 0;
if (!num_sg) {
git_oid_cpy(&sb->blame->options.oldest_commit, git_commit_id(commit));
git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
goto finish;
}
else if (num_sg < (int)ARRAY_SIZE(sg_buf))
@ -479,13 +480,13 @@ static void pass_blame(git_blame__scoreboard *sb, git_blame__origin *origin, uin
continue;
git_commit_parent(&p, origin->commit, i);
porigin = find_origin(sb, p, origin);
porigin = find_origin(blame, p, origin);
if (!porigin)
continue;
if (porigin->blob && origin->blob &&
!git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
pass_whole_blame(sb, origin, porigin);
pass_whole_blame(blame, origin, porigin);
origin_decref(porigin);
goto finish;
}
@ -509,7 +510,7 @@ static void pass_blame(git_blame__scoreboard *sb, git_blame__origin *origin, uin
origin_incref(porigin);
origin->previous = porigin;
}
if (pass_blame_to_parent(sb, origin, porigin))
if (pass_blame_to_parent(blame, origin, porigin))
goto finish;
}
@ -548,14 +549,14 @@ void origin_decref(git_blame__origin *o)
}
}
void assign_blame(git_blame__scoreboard *sb, uint32_t opt)
void assign_blame(git_blame *blame, uint32_t opt)
{
while (true) {
git_blame__entry *ent;
git_blame__origin *suspect = NULL;
/* Find a suspect to break down */
for (ent = sb->ent; !suspect && ent; ent = ent->next)
for (ent = blame->ent; !suspect && ent; ent = ent->next)
if (!ent->guilty)
suspect = ent->suspect;
if (!suspect)
@ -563,26 +564,26 @@ void assign_blame(git_blame__scoreboard *sb, uint32_t opt)
/* We'll use this suspect later in the loop, so hold on to it for now. */
origin_incref(suspect);
pass_blame(sb, suspect, opt);
pass_blame(blame, suspect, opt);
/* Take responsibility for the remaining entries */
for (ent = sb->ent; ent; ent = ent->next) {
for (ent = blame->ent; ent; ent = ent->next) {
if (same_suspect(ent->suspect, suspect)) {
ent->guilty = 1;
ent->is_boundary = !git_oid_cmp(
git_commit_id(suspect->commit),
&sb->blame->options.oldest_commit);
&blame->options.oldest_commit);
}
}
origin_decref(suspect);
}
}
void coalesce(git_blame__scoreboard *sb)
void coalesce(git_blame *blame)
{
git_blame__entry *ent, *next;
for (ent=sb->ent; ent && (next = ent->next); ent = next) {
for (ent=blame->ent; ent && (next = ent->next); ent = next) {
if (same_suspect(ent->suspect, next->suspect) &&
ent->guilty == next->guilty &&
ent->s_lno + ent->num_lines == next->s_lno)

View File

@ -3,95 +3,14 @@
#define INCLUDE_blame_git__
#include "git2.h"
#include "xdiff/xinclude.h"
#include "blame.h"
#include "xdiff/xinclude.h"
/*
* One blob in a commit that is being suspected
*/
typedef struct git_blame__origin {
int refcnt;
struct git_blame__origin *previous;
git_commit *commit;
git_blob *blob;
char path[];
} git_blame__origin;
/*
* Each group of lines is described by a git_blame__entry; it can be split
* as we pass blame to the parents. They form a linked list in the
* scoreboard structure, sorted by the target line number.
*/
typedef struct git_blame__entry {
struct git_blame__entry *prev;
struct git_blame__entry *next;
/* the first line of this group in the final image;
* internally all line numbers are 0 based.
*/
int lno;
/* how many lines this group has */
int num_lines;
/* the commit that introduced this group into the final image */
git_blame__origin *suspect;
/* true if the suspect is truly guilty; false while we have not
* checked if the group came from one of its parents.
*/
char guilty;
/* true if the entry has been scanned for copies in the current parent
*/
char scanned;
/* the line number of the first line of this group in the
* suspect's file; internally all line numbers are 0 based.
*/
int s_lno;
/* how significant this entry is -- cached to avoid
* scanning the lines over and over.
*/
unsigned score;
/* Whether this entry has been tracked to a boundary commit.
*/
bool is_boundary;
} git_blame__entry;
/*
* The current state of the blame assignment.
*/
typedef struct git_blame__scoreboard {
/* the final commit (i.e. where we started digging from) */
git_commit *final;
const char *path;
/*
* The contents in the final image.
* Used by many functions to obtain contents of the nth line,
* indexed with scoreboard.lineno[blame_entry.lno].
*/
const char *final_buf;
git_off_t final_buf_size;
/* linked list of blames */
git_blame__entry *ent;
/* look-up a line in the final buffer */
int num_lines;
git_blame *blame;
} git_blame__scoreboard;
int get_origin(git_blame__origin **out, git_blame__scoreboard *sb, git_commit *commit, const char *path);
int get_origin(git_blame__origin **out, git_blame *sb, git_commit *commit, const char *path);
int make_origin(git_blame__origin **out, git_commit *commit, const char *path);
git_blame__origin *origin_incref(git_blame__origin *o);
void origin_decref(git_blame__origin *o);
void assign_blame(git_blame__scoreboard *sb, uint32_t flags);
void coalesce(git_blame__scoreboard *sb);
void assign_blame(git_blame *sb, uint32_t flags);
void coalesce(git_blame *sb);
#endif