Commit 2da03d5d authored by Lubos Slovak's avatar Lubos Slovak

First commit - server with cuckoo hash prototype

Classic (2-ary) cuckoo hash table with char* keys.
  (Insert, find, delete implemented.)
Simple bitset (not used right now).
Several hash functions downloaded from web.
Universal system of hash functions (according to J. Katajainen & M. Lykke
  of University of Copenhagen).
Several basic data structures for DNS data - not finished yet.
parents
### Colours ###
COL_BLACK = \033[01;30m
COL_GREEN = \033[01;32m
COL_BLUE = \033[01;34m
COL_RED = \033[01;31m
COL_YELLOW = \033[01;33m
COL_VIOLET = \033[01;35m
COL_CYAN = \033[01;36m
COL_WHITE = \033[01;37m
COL_END = \033[0m
INC_DIRS = ./
SRC_DIRS = ./
OBJ_DIR = ./
BIN_DIR = ./
VPATH += ${SRC_DIRS} ${OBJ_DIR}
SRC_FILES = $(shell find $(SRC_DIRS) -name "*.c" )
OBJS = $(addprefix $(OBJ_DIR)/, $(addsuffix .o, $(basename $(notdir $(SRC_FILES)))))
CC = gcc
CFLAGS += -Wall
all:cuckoo-hash
### Dependencies ###
DEPEND = $(CC) $(addprefix -I ,$(INC_DIRS)) -MM $(SRC_FILES) 2>/dev/null | sed "s%^\([^\ \t\n]*\.o\)%$(OBJ_DIR)/\1%"
Makefile.depend:
# @echo ${DEPEND}
@$(DEPEND) > Makefile.depend
# cuckoo hash
cuckoo-hash: Makefile.depend $(OBJS)
@echo "$(COL_WHITE)Linking... $(COL_YELLOW)${BIN_DIR}$@$(COL_END) <-- $(COL_CYAN)$(OBJS)$(COL_END)"
@$(CC) $(LDFLAGS) $(OBJS) -o ${BIN_DIR}$@
#.PHONY: Makefile.depend
.INTERMEDIATE: Makefile.depend
-include Makefile.depend
.SUFFIXES:
### Generic Rules ###
$(OBJ_DIR)/%.o : %.c
@echo "$(COL_WHITE)Compiling $(COL_CYAN)$@: $(COL_BLUE)$< $(COL_END)"
@$(CC) $(CFLAGS) -c -o $@ $<
### Cleaning ###
.PHONY: clean
clean:
@echo "$(COL_WHITE)Cleaning object files...$(COL_RED)"
@rm -vf ${OBJ_DIR}/*.o
@echo "$(COL_WHITE)done$(COL_END)"
#include "bitset.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#define WORD_SIZE 32
#define WORD_CHECK 31
static unsigned int size = 0;
//static bitset_t clear = NULL;
void BITSET_CREATE( bitset_t *bitset, unsigned int n )
{
size = ((n >> WORD_EXP) + 1) * sizeof(uint32_t);
*bitset = (bitset_t)malloc(size);
//clear = (bitset_t)malloc(size);
//memset(clear, 0, size);
}
void BITSET_DESTROY( bitset_t bitset )
{
assert(size > 0);
free(bitset);
}
void BITSET_SET( bitset_t bitset, unsigned int i )
{
assert(size > 0);
bitset[((unsigned)i) >> WORD_EXP]
|= (1 << (((unsigned)i) & WORD_CHECK));
}
void BITSET_UNSET( bitset_t bitset, unsigned int i )
{
assert(size > 0);
bitset[((unsigned)i) >> WORD_EXP]
&= ~(1 << (((unsigned)i) & WORD_CHECK));
}
unsigned int BITSET_GET( bitset_t bitset, unsigned int i )
{
assert(size > 0);
return bitset[((unsigned)i) >> WORD_EXP]
& (1 << (((unsigned)i) & WORD_CHECK));
}
unsigned int BITSET_ISSET( bitset_t bitset, unsigned int i )
{
assert(size > 0);
return BITSET_GET(bitset, i) != 0;
//return 0;
}
void BITSET_CLEAR( bitset_t bitset, unsigned int n )
{
assert(size > 0);
memset(bitset, 0, size);
//memcpy(bitset, clear, size);
}
#ifndef BITSET
#define BITSET
#include <stdint.h> /* uint32_t */
#define WORD_EXP 5
typedef uint32_t* bitset_t;
inline void BITSET_CREATE( bitset_t *bitset, unsigned int n );
inline void BITSET_DESTROY( bitset_t bitset );
inline void BITSET_SET( bitset_t bitset, unsigned int i );
inline void BITSET_UNSET( bitset_t bitset, unsigned int i );
inline unsigned int BITSET_GET( bitset_t bitset, unsigned int i );
inline unsigned int BITSET_ISSET( bitset_t bitset, unsigned int i );
inline void BITSET_CLEAR( bitset_t bitset, unsigned int n );
#endif /* BITSET */
This diff is collapsed.
#ifndef CUCKOO_HASH_TABLE
#define CUCKOO_HASH_TABLE
#include <stdint.h> /* uint32_t */
#include <stdlib.h> /* size_t */
typedef unsigned int uint;
//typedef unsigned long ulong;
#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)
#define BUFFER_SIZE 1000
#define GENERATION_FLAG_1 0x1 // 00000001
#define GENERATION_FLAG_2 0x2 // 00000010
#define REHASH_FLAG 0x4 // 00000100
/*----------------------------------------------------------------------------*/
struct ck_hash_table_item {
const char *key;
size_t key_length;
void *value;
uint8_t timestamp; // 000000xy; xy .. generation; may be 01 or 10
}; // size 13 B
typedef struct ck_hash_table_item ck_hash_table_item;
/*----------------------------------------------------------------------------*/
struct ck_hash_table {
int table_size_exp; // exponent (2^table_size_exp is table size)
// -1 if not initialized
ck_hash_table_item *table1;
ck_hash_table_item *table2;
ck_hash_table_item *buffer;
uint buf_i;
uint8_t generation; /* 00000xyz x==1 .. rehashing in progress
yz .. generation; may be 01 or 10 */
};
typedef struct ck_hash_table ck_hash_table;
/*----------------------------------------------------------------------------*/
ck_hash_table *ck_create_table( uint items );
/*----------------------------------------------------------------------------*/
void ck_destroy_table( ck_hash_table *table );
/*----------------------------------------------------------------------------*/
/**
* @brief Inserts item into the hash table.
*
* @retval 0 No error.
* @retval -1 Insertion failed.
*/
int ck_insert_item( ck_hash_table *table, const char *key,
size_t length, void *value, unsigned long *collisions );
/*----------------------------------------------------------------------------*/
int ck_rehash( ck_hash_table *table );
/*----------------------------------------------------------------------------*/
/**
* @brief Finds item in table.
*/
const ck_hash_table_item *ck_find_item(
ck_hash_table *table, const char *key, size_t length );
/*----------------------------------------------------------------------------*/
/**
* @brief Dumps the whole hash table.
*/
void ck_dump_table( ck_hash_table *table );
/*----------------------------------------------------------------------------*/
/**
* @todo Check size of integers, the table size may be larger than unsigned int.
* @todo Generalize to be k-ary cuckoo hashing (not dependent on number of
* tables.
*/
#endif
This diff is collapsed.
#include "dns-simple.h"
#define HEADER_SET_QR(flags) (flags |= (1 << 15))
#define HEADER_SET_AA(flags) (flags |= (1 << 10))
/*----------------------------------------------------------------------------*/
dnss_rr *dnss_create_rr( unsigned char *data, uint length, void *place )
{
dnss_rr *rr;
unsigned char *rdata;
rr = (place == NULL) ? malloc(sizeof(dnss_rr) + length) : place;
rdata = rr + sizeof(dnss_rr);
memcpy(rdata, data, length);
rr->rrtype = RRTYPE_DEFAULT;
rr->rrclass = RRCLASS_DEFAULT;
rr->ttl = TTL_DEFAULT;
rr->rdlength = length;
rr->rdata = rdata;
return rr;
}
/*----------------------------------------------------------------------------*/
dnss_question *dnss_create_question( unsigned char *qname, uint length )
{
dnss_question *question = malloc(sizeof(dnss_question) + length);
question->qname = question + sizeof(dnss_question);
memcpy(question->qname, qname, length);
question->qclass = RRCLASS_DEFAULT;
question->qtype = RRTYPE_DEFAULT;
return question;
}
/*----------------------------------------------------------------------------*/
dnss_packet *dnss_create_response( dnss_packet *query, dnss_rr *answers,
uint count )
{
dnss_packet *packet = malloc(sizeof(dnss_packet));
// header
memcpy(packet->header, query->header, sizeof(dnss_header)); // copy header
HEADER_SET_AA(packet->header);
HEADER_SET_QR(packet->header);
// questions; assuming that the domain names will not be deleted
packet->questions = malloc(packet->header.qdcount * sizeof(dnss_question));
memcpy(packet->questions, query->questions,
packet->header.qdcount * sizeof(dnss_question));
// answers;
packet->header.ancount = count;
packet->answers = answers;
packet->header.nscount = 0;
packet->authority = NULL;
packet->header.arcount = 0;
packet->additional = NULL;
return packet;
}
/*----------------------------------------------------------------------------*/
unsigned char *dnss_wire_format( dnss_packet *packet )
{
}
#ifndef DNS_SIMPLE
#define DNS_SIMPLE
#include <stdint.h>
#define RRTYPE_DEFAULT 1 // A
#define RRCLASS_DEFAULT 1 // IN
#define TTL_DEFAULT 3600
typedef unsigned int uint;
/*----------------------------------------------------------------------------*/
struct dnss_rr {
uint16_t rrtype;
uint16_t rrclass;
uint32_t ttl;
uint16_t rdlength;
unsigned char *rdata;
}; // size: (14 + rdlength) B
typedef struct dnss_rr dnss_rr;
/*----------------------------------------------------------------------------*/
struct dnss_header {
uint16_t id;
uint16_t flags;
uint16_t qdcount;
uint16_t ancount;
uint16_t nscount;
uint16_t arcount;
};
typedef struct dnss_header dnss_header;
/*----------------------------------------------------------------------------*/
struct dnss_question {
unsigned char *qname;
uint16_t qtype;
uint16_t qclass;
};
typedef struct dnss_question dnss_question;
/*----------------------------------------------------------------------------*/
struct dnss_packet {
dnss_header header;
dnss_question *questions;
dnss_rr *answers;
dnss_rr *authority;
dnss_rr *additional;
};
typedef struct dnss_packet dnss_packet;
/*----------------------------------------------------------------------------*/
dnss_rr *dnss_create_rr( unsigned char *data, uint length, void *place );
dnss_question *dnss_create_question( unsigned char *qname, uint length );
dnss_packet *dnss_create_response( dnss_question *question, dnss_rr *answers );
unsigned char *dnss_wire_format( dnss_packet *packet );
#endif /* DNS_SIMPLE */
#include "hash-functions.h"
/*--------------------------------- FNV HASH ---------------------------------*/
unsigned long int fnv_hash( const char *data, int size, int bits )
{
int shift, i;
unsigned long int mask;
unsigned long int hash = 2166136261;
if (bits == -1) {
shift = 0;
mask = 0xFFFFFFFF;
} else {
shift = 32 - bits;
mask = (1U << shift) - 1U;
}
for (i = 0; i < size; i++) {
hash = (hash * 16777619) ^ data[i];
}
if (shift == 0)
return hash;
return (hash ^ (hash >> shift)) & mask;
}
unsigned long int fnv_hash2( char *data, int size, int bits )
{
int i;
const unsigned int p = 16777619;
unsigned long int hash = 2166136261;
for (i = 0; i < size; i++) {
hash = (hash ^ data[i]) * p;
}
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
return hash;
}
/*------------------------------- JENKINS HASH -------------------------------*/
/* The mixing step */
#define mix(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>13); \
b=b-c; b=b-a; b=b^(a<<8); \
c=c-a; c=c-b; c=c^(b>>13); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<16); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>3); \
b=b-c; b=b-a; b=b^(a<<10); \
c=c-a; c=c-b; c=c^(b>>15); \
}
/* The whole new hash function */
u4 jhash( k, length, initval )
register u1 *k; /* the key */
u4 length; /* the length of the key in bytes */
u4 initval; /* the previous hash, or an arbitrary value */
{
register u4 a,b,c; /* the internal state */
u4 len; /* how many key bytes still need mixing */
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* variable initialization of internal state */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a=a+(k[0]+((u4)k[1]<<8)+((u4)k[2]<<16) +((u4)k[3]<<24));
b=b+(k[4]+((u4)k[5]<<8)+((u4)k[6]<<16) +((u4)k[7]<<24));
c=c+(k[8]+((u4)k[9]<<8)+((u4)k[10]<<16)+((u4)k[11]<<24));
mix(a,b,c);
k = k+12; len = len-12;
}
/*------------------------------------- handle the last 11 bytes */
c = c+length;
switch(len) /* all the case statements fall through */
{
case 11: c=c+((u4)k[10]<<24);
case 10: c=c+((u4)k[9]<<16);
case 9 : c=c+((u4)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b=b+((u4)k[7]<<24);
case 7 : b=b+((u4)k[6]<<16);
case 6 : b=b+((u4)k[5]<<8);
case 5 : b=b+k[4];
case 4 : a=a+((u4)k[3]<<24);
case 3 : a=a+((u4)k[2]<<16);
case 2 : a=a+((u4)k[1]<<8);
case 1 : a=a+k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
/*--------------------------------- SDBM HASH --------------------------------*/
unsigned long sdbm_hash( const unsigned char *key, int size )
{
int i = 0;
unsigned long h = 0;
while (i < size)
h = key[i++] + (h<<6) + (h<<16) - h;
return h;
}
/*--------------------------------- SDBM HASH --------------------------------*/
unsigned long djb_hash( const unsigned char *key, int size )
{
unsigned long h = 0;
int i;
for ( i = 0; i < size; i++ )
h = 33 * h ^ key[i];
return h;
}
/*--------------------------------- JSW HASH ---------------------------------*/
// TODO: needs table of random numbers
//unsigned long jsw_hash( const unsigned char *key, int size )
//{
// unsigned long h = 16777551;
// int i;
//
// for ( i = 0; i < size; i++ )
// h = ( h << 1 | h >> 31 ) ^ tab[key[i]];
//
// return h;
//
//}
/*--------------------------------- ELF HASH ---------------------------------*/
unsigned long elf_hash( const unsigned char *key, int size )
{
unsigned long h = 0, g;
int i;
for ( i = 0; i < size; i++ ) {
h = ( h << 4 ) + key[i];
g = h & 0xf0000000L;
if ( g != 0 )
h ^= g >> 24;
h &= ~g;
}
return h;
}
#ifndef HASH_FUNCTIONS
#define HASH_FUNCTIONS
typedef unsigned long int u4; /* unsigned 4-byte type */
typedef unsigned char u1; /* unsigned 1-byte type */
unsigned long int fnv_hash( const char *data, int size, int bits );
unsigned long int fnv_hash2( char *data, int size, int bits );
u4 jhash( register u1 *k, u4 length, u4 initval );
unsigned long sdbm_hash( const unsigned char *key, int size );
unsigned long djb_hash( const unsigned char *key, int size );
//unsigned long jsw_hash( const unsigned char *key, int size );
unsigned long elf_hash( const unsigned char *key, int size );
#endif
/**
* @todo What if all numbers are tried and still need rehash?
* (that means 2mld rehashes - we can live with that ;)
*/
#ifndef UNIVERSAL_SYSTEM
#define UNIVERSAL_SYSTEM
#include <limits.h>
#include <stdint.h>
#include <time.h>
#define MAX_UINT_EXP 32
#define MAX_UINT_MY 4294967295
//#define COEF_DEFAULT1 3
//#define COEF_DEFAULT2 5
static unsigned int coefs[2][2]; // two generations, two functions
/*----------------------------------------------------------------------------*/
inline void us_generate_coefs( unsigned int *generation ) {
generation[0] = rand() % MAX_UINT_MY;
if (generation[0] % 2 == 0) {
generation[0] = (generation[0] == 0) ? 1 : generation[0] - 1;
}
generation[1] = generation[0];
while (generation[1] == generation[0]) {
generation[1] = rand() % MAX_UINT_MY;
if (generation[1] % 2 == 0) {
generation[1] = (generation[1] == 0) ? 1 : generation[1] - 1;
}
}
}
/*----------------------------------------------------------------------------*/
inline void us_initialize()
{
int i;
assert(UINT_MAX == MAX_UINT_MY);
srand(time(NULL));
/*
* Initialize both generations of functions by generating random odd numbers
* First:
*/
for (i = 0; i < 2; ++i) {
us_generate_coefs(coefs[i]);
}
}
/*----------------------------------------------------------------------------*/
inline int us_next( uint generation )
{
// generate new coeficients for the new generation
us_generate_coefs(coefs[generation >> 1]);
return 0;
}
/*----------------------------------------------------------------------------*/
inline uint32_t us_hash( uint32_t value, unsigned int table_exp, uint c,
uint generation )
{
/* multiplication should overflow if larger than MAX_UINT
this is the same as (coef * value) mod MAX_UINT */
assert(table_exp <= 32);
assert(c <= 1);
assert(generation <= 2);
return ((coefs[generation >> 1][c] * value) >> (MAX_UINT_EXP - table_exp));
}
/*----------------------------------------------------------------------------*/
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment