From e0646b38c7acfcfc1802c987ceaa2c275b4ae226 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 30 Dec 2010 00:31:58 +0200 Subject: [PATCH] Add generic hash function to util.c It's MurmurHash3 slightly edited to make it cross-platform. Fast and neat. Use this for hashing strings on hash tables instead of a full SHA1 hash. It's very fast and well distributed. Obviously not crypto-secure. Signed-off-by: Vicent Marti --- src/util.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/util.h | 9 +++++ 2 files changed, 105 insertions(+) diff --git a/src/util.c b/src/util.c index a3b62effa..ef97f68a4 100644 --- a/src/util.c +++ b/src/util.c @@ -141,3 +141,99 @@ void git__hexdump(const char *buffer, size_t len) printf("\n"); } + +#ifdef GIT_LEGACY_HASH +uint32_t git__hash(const void *key, int len, unsigned int seed) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + uint32_t h = seed ^ len; + + const unsigned char *data = (const unsigned char *)key; + + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} +#else +/* + Cross-platform version of Murmurhash3 + http://code.google.com/p/smhasher/wiki/MurmurHash3 + by Austin Appleby (aappleby@gmail.com) + + This code is on the public domain. +*/ +uint32_t git__hash(const void *key, int len, uint32_t seed) +{ + +#define MURMUR_BLOCK() {\ + k1 *= c1; \ + k1 = git__rotl(k1,11);\ + k1 *= c2;\ + h1 ^= k1;\ + h1 = h1*3 + 0x52dce729;\ + c1 = c1*5 + 0x7b7d159c;\ + c2 = c2*5 + 0x6bce6396;\ +} + + const uint8_t *data = (const uint8_t*)key; + const int nblocks = len / 4; + + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); + + uint32_t h1 = 0x971e137b ^ seed; + uint32_t k1; + + uint32_t c1 = 0x95543787; + uint32_t c2 = 0x2ad7eb25; + + int i; + + for (i = -nblocks; i; i++) { + k1 = blocks[i]; + MURMUR_BLOCK(); + } + + k1 = 0; + + switch(len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + MURMUR_BLOCK(); + } + + h1 ^= len; + h1 ^= h1 >> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; + + return h1; +} +#endif diff --git a/src/util.h b/src/util.h index 808b20da4..5e6c7d28b 100644 --- a/src/util.h +++ b/src/util.h @@ -23,6 +23,8 @@ extern int git__dirname(char *dir, size_t n, char *path); extern int git__basename(char *base, size_t n, char *path); extern void git__hexdump(const char *buffer, size_t n); +extern uint32_t git__hash(const void *key, int len, uint32_t seed); + /** @return true if p fits into the range of a size_t */ GIT_INLINE(int) git__is_sizet(off_t p) @@ -31,6 +33,13 @@ GIT_INLINE(int) git__is_sizet(off_t p) return p == (off_t)r; } +/* 32-bit cross-platform rotl */ +#ifdef _MSC_VER /* use built-in method in MSVC */ +# define git__rotl(v, s) (uint32_t)_rotl(v, s) +#else /* use bitops in GCC; with o2 this gets optimized to a rotl instruction */ +# define git__rotl(v, s) (uint32_t)(((uint32_t)(v) << (s)) | ((uint32_t)(v) >> (32 - (s)))) +#endif + /* * Realloc the buffer pointed at by variable 'x' so that it can hold * at least 'nr' entries; the number of entries currently allocated