Sometimes it’s useful (e.g. for algorithms like Cuckoo hashing) to generate a large hash value because then it can be conveniently split up into smaller hashes rather than calling n different hash functions, or the same hash function n times using n different seeds. A cryptographic hash function such as SHA1 would be a good choice but run-time performance is slow. With this in mind, and also wanting to fix the hash collision vulnerabilities of the 32bit MurmurHash2, I came up with the fast 160bit MurmurHash2_160, et voilà:
// MurmurHash2_160, by Simon Hardy-Francis
// based upon...
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
void
MurmurHash2_160 (
const void * key,
int len,
unsigned int seed,
unsigned int hash32[5])
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m1 = 0x5bd1e995;
const unsigned int m2 = 0x19D699A5;
const unsigned int m3 = 0xB11924E1;
const unsigned int m4 = 0x16118B03;
const unsigned int m5 = 0x53C93455;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h1 = seed ^ len; /* original */
unsigned int h2 = seed ^ len ^ m2;
unsigned int h3 = seed ^ len ^ m3;
unsigned int h4 = seed ^ len ^ m4;
unsigned int h5 = seed ^ len ^ m5;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k1 = *(unsigned int *)data;
k1 *= m1;
k1 ^= k1 >> r;
k1 *= m1;
h1 *= m1; h1 ^= k1; h1 ^= h2;
h2 *= m2; h2 ^= k1; h2 ^= h3;
h3 *= m3; h3 ^= k1; h3 ^= h4;
h4 *= m4; h4 ^= k1; h4 ^= h5;
h5 *= m5; h5 ^= k1; h5 ^= h1;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h1 ^= data[2] << 16; h2 ^= data[2] << 16; h3 ^= data[2] << 16; h4 ^= data[2] << 16; h5 ^= data[2] << 16;
case 2: h1 ^= data[1] << 8 ; h2 ^= data[1] << 8 ; h3 ^= data[1] << 8 ; h4 ^= data[1] << 8 ; h5 ^= data[1] << 8 ;
case 1: h1 ^= data[0] ; h2 ^= data[0] ; h3 ^= data[0] ; h4 ^= data[0] ; h5 ^= data[0] ;
h1 *= m1 ; h2 *= m2 ; h3 *= m3 ; h4 *= m4 ; h5 *= m5 ;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h1 ^= h1 >> 13; h1 *= m1; h1 ^= h1 >> 15;
h2 ^= h2 >> 13; h2 *= m2; h2 ^= h2 >> 15;
h3 ^= h3 >> 13; h3 *= m3; h3 ^= h3 >> 15;
h4 ^= h4 >> 13; h4 *= m4; h4 ^= h4 >> 15;
h5 ^= h5 >> 13; h5 *= m5; h5 ^= h5 >> 15;
hash32[0] = h1 ^ h2 ^ h3 ^ h4 ^ h5;
hash32[1] = h2 ^ h1;
hash32[2] = h3 ^ h1;
hash32[3] = h4 ^ h1;
hash32[4] = h5 ^ h1;
}
[...] hash functions to be tested are FNV (32bit), lookup3 (32bit), lookup3 (64bit), murmurhash2 (32bit), murmurhash2_160 (160bits), SHA1 (160bits), SuperFastHash (32bits), and SuperFastHash_64 (64bits). We test each hash [...]