From f6ed5e2d055fec0ed6cc36d613362f733860ccdd Mon Sep 17 00:00:00 2001 From: Philip Kelley Date: Mon, 7 Jan 2013 09:53:43 -0500 Subject: [PATCH] Revert changes from git/git diff-delta.c by dak@gnu.org, proski@gnu.org --- src/delta.c | 126 ++++++++++++---------------------------------------- 1 file changed, 28 insertions(+), 98 deletions(-) diff --git a/src/delta.c b/src/delta.c index 2514dccaf..3db319cd8 100644 --- a/src/delta.c +++ b/src/delta.c @@ -1,16 +1,8 @@ /* - * diff-delta.c: generate a delta between two buffers + * Copyright (C) 2009-2012 the libgit2 contributors * - * This code was greatly inspired by parts of LibXDiff from Davide Libenzi - * http://www.xmailserver.org/xdiff-lib.html - * - * Rewritten for GIT by Nicolas Pitre , (C) 2005-2007 - * - * Modified for libgit2 by Michael Schubert , (C) 2012 - * - * This code is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. */ #include "delta.h" @@ -116,11 +108,7 @@ static const unsigned int U[256] = { struct index_entry { const unsigned char *ptr; unsigned int val; -}; - -struct unpacked_index_entry { - struct index_entry entry; - struct unpacked_index_entry *next; + struct index_entry *next; }; struct git_delta_index { @@ -137,8 +125,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) unsigned int i, hsize, hmask, entries, prev_val, *hash_count; const unsigned char *data, *buffer = buf; struct git_delta_index *index; - struct unpacked_index_entry *entry, **hash; - struct index_entry *packed_entry, **packed_hash; + struct index_entry *entry, **hash; void *mem; unsigned long memsize; @@ -146,7 +133,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) return NULL; /* Determine index hash size. Note that indexing skips the - first byte to allow for optimizing the Rabin's polynomial + first byte to allow for optimizing the rabin polynomial initialization in create_delta(). */ entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW; if (bufsize >= 0xffffffffUL) { @@ -162,21 +149,28 @@ git_delta_create_index(const void *buf, unsigned long bufsize) hmask = hsize - 1; /* allocate lookup index */ - memsize = sizeof(*hash) * hsize + + memsize = sizeof(*index) + + sizeof(*hash) * hsize + sizeof(*entry) * entries; mem = git__malloc(memsize); if (!mem) return NULL; + index = mem; + mem = index->hash; hash = mem; mem = hash + hsize; entry = mem; + index->memsize = memsize; + index->src_buf = buf; + index->src_size = bufsize; + index->hash_mask = hmask; memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash entries */ hash_count = calloc(hsize, sizeof(*hash_count)); if (!hash_count) { - git__free(hash); + git__free(index); return NULL; } @@ -190,13 +184,12 @@ git_delta_create_index(const void *buf, unsigned long bufsize) val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; if (val == prev_val) { /* keep the lowest of consecutive identical blocks */ - entry[-1].entry.ptr = data + RABIN_WINDOW; - --entries; + entry[-1].ptr = data + RABIN_WINDOW; } else { prev_val = val; i = val & hmask; - entry->entry.ptr = data + RABIN_WINDOW; - entry->entry.val = val; + entry->ptr = data + RABIN_WINDOW; + entry->val = val; entry->next = hash[i]; hash[i] = entry++; hash_count[i]++; @@ -205,7 +198,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) /* * Determine a limit on the number of entries in the same hash - * bucket. This guards us against pathological data sets causing + * bucket. This guard us against patological data sets causing * really bad hash distribution with most entries in the same hash * bucket that would bring us to O(m*n) computing costs (m and n * corresponding to reference and target buffer sizes). @@ -216,84 +209,21 @@ git_delta_create_index(const void *buf, unsigned long bufsize) * the reference buffer. */ for (i = 0; i < hsize; i++) { - int acc; - - if (hash_count[i] <= HASH_LIMIT) + if (hash_count[i] < HASH_LIMIT) continue; - /* We leave exactly HASH_LIMIT entries in the bucket */ - entries -= hash_count[i] - HASH_LIMIT; - entry = hash[i]; - acc = 0; - - /* - * Assume that this loop is gone through exactly - * HASH_LIMIT times and is entered and left with - * acc==0. So the first statement in the loop - * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT - * to the accumulator, and the inner loop consequently - * is run (hash_count[i]-HASH_LIMIT) times, removing - * one element from the list each time. Since acc - * balances out to 0 at the final run, the inner loop - * body can't be left with entry==NULL. So we indeed - * encounter entry==NULL in the outer loop only. - */ do { - acc += hash_count[i] - HASH_LIMIT; - if (acc > 0) { - struct unpacked_index_entry *keep = entry; - do { - entry = entry->next; - acc -= HASH_LIMIT; - } while (acc > 0); - keep->next = entry->next; - } - entry = entry->next; + struct index_entry *keep = entry; + int skip = hash_count[i] / HASH_LIMIT / 2; + do { + entry = entry->next; + } while(--skip && entry); + keep->next = entry; } while (entry); } git__free(hash_count); - /* - * Now create the packed index in array form - * rather than linked lists. - */ - memsize = sizeof(*index) - + sizeof(*packed_hash) * (hsize+1) - + sizeof(*packed_entry) * entries; - mem = git__malloc(memsize); - if (!mem) { - git__free(hash); - return NULL; - } - - index = mem; - index->memsize = memsize; - index->src_buf = buf; - index->src_size = bufsize; - index->hash_mask = hmask; - - mem = index->hash; - packed_hash = mem; - mem = packed_hash + (hsize+1); - packed_entry = mem; - - for (i = 0; i < hsize; i++) { - /* - * Coalesce all entries belonging to one linked list - * into consecutive array entries. - */ - packed_hash[i] = packed_entry; - for (entry = hash[i]; entry; entry = entry->next) - *packed_entry++ = entry->entry; - } - - /* Sentinel value to indicate the length of the last hash bucket */ - packed_hash[hsize] = packed_entry; - - assert(packed_entry - (struct index_entry *)mem == entries); - git__free(hash); - return index; } @@ -312,7 +242,7 @@ unsigned long git_delta_sizeof_index(struct git_delta_index *index) /* * The maximum size for any opcode sequence, including the initial header - * plus Rabin window plus biggest copy. + * plus rabin window plus biggest copy. */ #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) @@ -377,7 +307,7 @@ git_delta_create( val ^= U[data[-RABIN_WINDOW]]; val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; i = val & index->hash_mask; - for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) { + for (entry = index->hash[i]; entry; entry = entry->next) { const unsigned char *ref = entry->ptr; const unsigned char *src = data; unsigned int ref_size = (unsigned int)(ref_top - ref);