Commit f574ae66 authored by Tomas Janousek's avatar Tomas Janousek

k-universal hash family using polynomials over prime fields

parent b9764029
#pragma once
#include <stdint.h>
#include "util/array.h"
/*! @brief Carter Wegman trick for polynomial k-universal hashing using
* Mersenne primes.
*
* Code from:
* Mikkel Thorup, Yin Zhang. 2010. Tabulation Based 5-Universal Hashing
* and Linear Probing. ALENEX 2010.
* http://www.siam.org/proceedings/alenex/2010/alx10_007_thorupm.pdf
*
* Idea and proof in:
* M. Wegman and J. Carter. New hash functions and their use in
* authentication and set equality. J. Comp. Syst. Sci., 22:265--279,
* 1981.
* (section 1, paragraph 3)
*/
namespace cw_trick {
/* The code of Thorup, Zhang (2010) incorrectly defines INT96 as
* uint64_t[3]. CWtrick89 computes wrong results then. */
typedef array< uint32_t, 3> INT96;
//! @brief Extract lower 32 bits from uint64_t.
inline uint64_t LOW( uint64_t x ) {
static const uint64_t LowOnes = (((uint64_t) 1) << 32) - 1;
return x & LowOnes;
}
//! @brief Extract higher 32 bits from uint64_t.
inline uint64_t HIGH( uint64_t x ) { return x >> 32; }
static const uint64_t Prime61 = (((uint64_t) 1) << 61) - 1;
/*! @brief Computes ax+b mod Prime, possibly plus 2*Prime,
* exploiting the structure of Prime. */
inline uint64_t MultAddPrime( uint32_t x, uint64_t a, uint64_t b )
{
uint64_t a0, a1, c0, c1, c;
a0 = LOW(a) * x;
a1 = HIGH(a) * x;
c0 = a0 + (a1 << 32);
c1 = (a0 >> 32) + a1;
c = (c0 & Prime61) + (c1 >> 29) + b;
return c;
}
/*! @brief k-universal CW trick for 32-bit key x with prime 2ˆ61-1. */
template < unsigned k >
inline uint64_t CWtrick_k( uint32_t x, const uint64_t A[ k ] )
{
uint64_t h = A[ 0 ];
for ( unsigned i = 1; i < k; ++i )
h = MultAddPrime( x, h, A[ i ] );
/* The use of MultAddPrime here is different from that of
* Thorup, Zhang (2010) -- the arguments x and h are swapped.
* This is a mistake in their code and it does indeed make the
* family not be k-universal. Their code for CWtrick89 does
* not have this mistake in it.
*/
h = ( h & Prime61 ) + ( h >> 61 );
if ( h >= Prime61 )
h -= Prime61;
return h;
}
static const uint64_t Prime89_0 = (((uint64_t) 1) << 32) - 1;
static const uint64_t Prime89_1 = (((uint64_t) 1) << 32) - 1;
static const uint64_t Prime89_2 = (((uint64_t) 1) << 25) - 1;
static const uint64_t Prime89_21 = (((uint64_t) 1) << 57) - 1;
/*! @brief Computes (r mod Prime89) mod 2ˆ64,
* exploiting the structure of Prime89 */
inline uint64_t Mod64Prime89( const INT96 &r )
{
uint64_t r0, r1, r2;
// r2r1r0 = r&Prime89 + r>>89
r2 = r[2];
r1 = r[1];
r0 = r[0] + (r2 >> 25);
r2 &= Prime89_2;
return (r2 == Prime89_2 &&
r1 == Prime89_1 &&
r0 >= Prime89_0) ?
(r0 - Prime89_0) : (r0 + (r1 << 32));
}
/*! @brief Computes a 96-bit r such that
* r mod Prime89 == (ax+b) mod Prime89,
* exploiting the structure of Prime89. */
inline INT96 MultAddPrime89( uint64_t x, const INT96 &a, const INT96 &b )
{
INT96 r;
uint64_t x1, x0, c21, c20, c11, c10, c01, c00;
uint64_t d0, d1, d2, d3;
uint64_t s0, s1, carry;
x1 = HIGH(x); x0 = LOW(x);
c21 = a[2] * x1; c20 = a[2] * x0;
c11 = a[1] * x1; c10 = a[1] * x0;
c01 = a[0] * x1; c00 = a[0] * x0;
d0 = (c20 >> 25) + (c11 >> 25) + (c10 >> 57) + (c01 >> 57);
d1 = (c21 << 7);
d2 = (c10 & Prime89_21) + (c01 & Prime89_21);
d3 = (c20 & Prime89_2) + (c11 & Prime89_2) + (c21 >> 57);
s0 = b[0] + LOW(c00) + LOW(d0) + LOW(d1);
r[0] = LOW(s0); carry = HIGH(s0);
s1 = b[1] + HIGH(c00) + HIGH(d0) + HIGH(d1) + LOW(d2) + carry;
r[1] = LOW(s1); carry = HIGH(s1);
r[2] = b[2] + HIGH(d2) + d3 + carry;
return r;
}
/*! @brief k-universal CW trick for 64-bit key x with prime 2ˆ89-1. */
template < unsigned k >
inline uint64_t CWtrick89_k( uint64_t x, const INT96 A[ k ] )
{
INT96 r = A[ 0 ];
for ( unsigned i = 1; i < k; ++i )
r = MultAddPrime89( x, r, A[ i ] );
return Mod64Prime89( r );
}
}
#pragma once
#include <stdint.h>
#include "util/static_assert.h"
#include "hash/RNG.h"
#include "struct/RandomVectors.h"
#include "hash/CWTrick.h"
/* error messages */
struct KEY_TYPE_not_supported_by_CW_trick {};
/*!
* @brief Generate a random value in <0, 2^61-1> using a 32-bit random
* generator.
*/
struct RNGFunP61 : RNG {
RNGFunP61() : RNG() {}
RNGFunP61( uint64_t s ) : RNG( s ) {}
uint64_t operator () () {
uint64_t r;
do {
r = gen_u64() & cw_trick::Prime61;
} while ( r == cw_trick::Prime61 );
return r;
}
};
/*!
* @brief Generate a random value in <0, 2^89-1> using a 32-bit random
* generator.
*/
struct RNGFunP89 : RNG {
RNGFunP89() : RNG() {}
RNGFunP89( uint64_t s ) : RNG( s ) {}
cw_trick::INT96 operator () () {
cw_trick::INT96 r;
do {
r[0] = gen_u32() & cw_trick::Prime89_0;
r[1] = gen_u32() & cw_trick::Prime89_1;
r[2] = gen_u32() & cw_trick::Prime89_2;
} while ( r[0] == cw_trick::Prime89_0
&& r[1] == cw_trick::Prime89_1
&& r[2] == cw_trick::Prime89_2 );
return r;
}
};
/*!
* @headerfile KUniversalPolynomialHash.h "hash/KUniversalPolynomialHash.h"
* @brief Stub for unsupported KEY_TYPEs.
* For description, see the specialized classes.
*/
template < unsigned K, typename KEY_TYPE >
class KUniversalPolynomialHash
: Assert< false, KEY_TYPE_not_supported_by_CW_trick >
{
// XXX: Shall we perhaps provide a generic implementation using big
// numbers (gmp) or is this utterly useless?
};
/*!
* @headerfile KUniversalPolynomialHash.h "hash/KUniversalPolynomialHash.h"
* @brief k-universal hashing system for 32-bit integers.
* @tparam K Universality parameter.
* @tparam KEY_TYPE = uint32_t
*
* Implemented algorithm:
* See cw_trick.
*/
template < unsigned K >
class KUniversalPolynomialHash< K, uint32_t >
{
public:
/*!
* @brief Compute hash of the key, using function identified by index.
* @param index Hash function to use.
* @param key Key to hash.
* @return Computed hash of the key.
*
* Wrapper for the hash() member function.
*/
uint64_t operator() ( unsigned index, const uint32_t key )
{ return hash( index, key ); }
/*!
* @brief Compute hash of the key, using function identified by index.
* @param index Hash function to use.
* @param key Key to hash.
* @return Computed hash of the key.
*/
uint64_t hash( unsigned index, const uint32_t key )
{
const uint64_t *line = mTable.get( index );
return cw_trick::CWtrick_k< K >( key, line );
}
private:
/*! @brief The table used for hash computing. */
RandomVectors< uint64_t, K, RNGFunP61 > mTable;
};
/*!
* @headerfile KUniversalPolynomialHash.h "hash/KUniversalPolynomialHash.h"
* @brief k-universal hashing system for 64-bit integers.
* @tparam FUNCTIONS Number of hash functions in the system.
* @tparam K Universality parameter.
* @tparam KEY_TYPE = uint64_t
*
* Implemented algorithm:
* See cw_trick.
*/
template < unsigned K >
class KUniversalPolynomialHash< K, uint64_t >
{
public:
/*!
* @brief Compute hash of the key, using function identified by index.
* @param index Hash function to use.
* @param key Key to hash.
* @return Computed hash of the key.
*
* Wrapper for the hash() member function.
*/
uint64_t operator() ( unsigned index, const uint64_t key )
{ return hash( index, key ); }
/*!
* @brief Compute hash of the key, using function identified by index.
* @param index Hash function to use.
* @param key Key to hash.
* @return Computed hash of the key.
*/
uint64_t hash( unsigned index, const uint64_t key )
{
const cw_trick::INT96 *line = mTable.get( index );
return cw_trick::CWtrick89_k< K >( key, line );
}
private:
/*! @brief The table used for hash computing. */
RandomVectors< cw_trick::INT96, K, RNGFunP89 > mTable;
};
#pragma once
#include <cstddef> /* for size_t */
/*!
* @brief Encapsulation of C arrays for use in templates etc.
* FIXME: Drop in favor of C++0x array.
*/
template < typename T, size_t sz >
struct array {
operator T * () { return mData; }
operator const T * () const { return mData; }
private:
T mData[ sz ];
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment