Commit 760c2ba4 authored by Marek Vavrusa's avatar Marek Vavrusa

Initial implementation of Hopscotch hashing based on RRL code.

parent daa0e304
......@@ -46,6 +46,8 @@ src/common/fdset.c
src/common/fdset.h
src/common/getline.c
src/common/getline.h
src/common/hhash.c
src/common/hhash.h
src/common/hattrie/ahtable.c
src/common/hattrie/ahtable.h
src/common/hattrie/hat-trie.c
......@@ -271,6 +273,7 @@ tests/dnssec_zone_nsec.c
tests/dthreads.c
tests/events.c
tests/fdset.c
tests/hhash.c
tests/hattrie.c
tests/journal.c
tests/rrl.c
......
......@@ -224,6 +224,8 @@ libknots_la_SOURCES = \
common/log.h \
common/mempool.c \
common/mempool.h \
common/hhash.c \
common/hhash.h \
common/hattrie/ahtable.c \
common/hattrie/ahtable.h \
common/hattrie/hat-trie.c \
......
This diff is collapsed.
/* Copyright (C) 2013 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*!
* \file hhash.h
*
* \author Marek Vavrusa <marek.vavrusa@nic.cz>
*
* \brief Hopscotch hashing scheme based hash table.
*
* \addtogroup common_lib
* @{
*/
#ifndef _KNOTD_HHTABLE_H_
#define _KNOTD_HHTABLE_H_
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "common/mempattern.h"
/*! \brief Bitvector type. */
typedef unsigned hhbitvec_t;
/*! \brief Element value. */
typedef void* value_t;
/*! \brief Mode flags. */
enum {
HHASH_INSERT = 1 << 0, /* Insert if not exists. */
HHASH_FORCE = 1 << 1, /* Force vacate a hash bucket. */
HHASH_LEFT = 1 << 2, /* Split left side. */
HHASH_RIGHT = 1 << 3, /* Split right side. */
HHASH_CONSUME = 1 << 4 /* Consume first byte of the split items. */
};
/*! \brief Element descriptor, contains data and bitmap of adjacent items. */
typedef struct hhelem {
hhbitvec_t hop; /* Hop bitvector. */
char *d; /* { value_t val, uint16_t keylen, char[] key } */
} hhelem_t;
typedef struct hhash {
/* Compatibility with HAT-trie nodes. */
uint8_t flag;
uint8_t c0;
uint8_t c1;
/* Metadata */
uint32_t size; /*!< Number of buckets */
uint32_t weight; /*!< Table weight (number of inserted). */
/* Table data storage. */
mm_ctx_t mm; /*!< Memory manager. */
uint32_t *index; /*!< Order index (optional). */
hhelem_t item[]; /*!< Table items. */
} hhash_t;
/*!
* \brief Create hopscotch hash table.
*
* \param size Fixed size.
*
* \return created table
*/
hhash_t *hhash_create(uint32_t size);
/*! \brief Create hopscotch hash table (custom memory manager). */
hhash_t *hhash_create_mm(uint32_t size, const mm_ctx_t *mm);
/*!
* \brief Clear hash table.
*
* \param tbl Hash table.
*/
void hhash_clear(hhash_t *tbl);
/*!
* \brief Free hash table and keys.
*
* \param tbl Hash table.
*/
void hhash_free(hhash_t *tbl);
/*!
* \brief Find key in the hash table and return pointer to it's value.
*
* \param tbl Hash table.
* \param key Key.
* \param len Key length.
*
* \retval pointer to value if found
* \retval NULL if not exists
*/
value_t *hhash_find(hhash_t* tbl, const char* key, uint16_t len);
/*!
* \brief Insert/replace value for given key.
*
* \param tbl Hash table.
* \param key Key.
* \param len Key length.
* \param val Value.
*
* \retval KNOT_EOK
* \retval KNOT_ESPACE
*/
int hhash_insert(hhash_t* tbl, const char* key, uint16_t len, value_t val);
/*!
* \brief Return pointer to value for given key (insert if not exists).
*
* \note This is more complex function than \fn hhash_insert() which is preferred
* for easier usage.
*
* \param tbl Hash table.
* \param key Key.
* \param len Key length.
* \param mode Operation mode flags.
*
* \retval pointer to new/existing value
* \retval NULL if the table is full or no memory
*/
value_t *hhash_map(hhash_t* tbl, const char* key, uint16_t len, uint16_t mode);
/*!
* \brief Remove value from hash table.
*
* \param tbl Hash table.
* \param key Key.
* \param len Key length.
*
* \retval KNOT_EOK
* \retval KNOT_ENOENT
*/
int hhash_del(hhash_t* tbl, const char* key, uint16_t len);
/*!
* \brief Evict first element colliding with key.
*
* \note This function is very specific, but it is needed in a case where you
* want to accept that the table is full, but still forcifully create a
* vacant space for a new insertion.
*
* \param tbl Hash table.
* \param key Key.
* \param len Key length.
* \param val Removed element value is stored in val.
*
* \return KNOT_EOK
*/
int hhash_evict(hhash_t* tbl, const char* key, uint16_t len, value_t *val);
/*
* Hash table allows to build order index for extra memory costs.
* This is not required, but useful if the table is small and insertions
* don't happen very often.
* Keep in mind to rebuild index after the insertions/deletes are complete
*/
/*! \brief Return value from ordered index. */
value_t *hhash_indexval(hhash_t* tbl, unsigned i);
/*! \brief Build index for fast ordered lookup. */
void hhash_build_index(hhash_t* tbl);
/*!
* \brief Find a key that is exact match or lexicographic predecessor.
*
* \retval 0 if exact match
* \retval 1 if couldn't find and no predecessor is found
* \retval -1 if found predecessor
*/
int hhash_find_leq(hhash_t* tbl, const char* key, uint16_t len, value_t **dst);
/*! \brief Hash table iterator. */
typedef struct htable_iter {
unsigned flags; /* Internal */
hhash_t* tbl; /* Iterated table. */
uint32_t i; /* Current direct/indirect index. */
} hhash_iter_t;
/*! \brief Set iterator the the beginning of the table. */
void hhash_iter_begin (hhash_t*, hhash_iter_t*, bool sorted);
/*! \brief Next value. */
void hhash_iter_next (hhash_iter_t*);
/*! \brief Return true if at the end. */
bool hhash_iter_finished (hhash_iter_t*);
/*! \brief Return current key/keylen. */
const char *hhash_iter_key (hhash_iter_t*, uint16_t* len);
/*! \brief Return current value. */
value_t *hhash_iter_val(hhash_iter_t*);
#endif /* _KNOTD_HHTABLE_H_ */
/*! @} */
......@@ -4,6 +4,7 @@ check_PROGRAMS = \
tests/journal \
tests/slab \
tests/hattrie \
tests/hhash \
tests/dthreads \
tests/events \
tests/acl \
......
journal
slab
hattrie
hhash
dthreads
events
acl
......
......@@ -36,7 +36,7 @@ static char *randstr() {
int main(int argc, char *argv[])
{
plan(6);
plan(8);
/* Interesting intems. */
unsigned count = 10;
......@@ -55,7 +55,7 @@ int main(int argc, char *argv[])
/* Dummy items. */
srand(time(NULL));
unsigned dummy_count = 10000;
unsigned dummy_count = 32768;
char **dummy = xmalloc(sizeof(char*) * dummy_count);
for (unsigned i = 0; i < dummy_count; ++i) {
dummy[i] = randstr();
......@@ -131,6 +131,25 @@ int main(int argc, char *argv[])
int ret = hattrie_find_lpr(t, false_lpr, strlen(false_lpr), &v);
ok(ret != 0 && v == NULL, "hattrie: non-existent prefix lookup");
/* Unsorted iteration */
unsigned counted = 0;
hattrie_iter_t *it = hattrie_iter_begin(t, false);
while (!hattrie_iter_finished(it)) {
++counted;
hattrie_iter_next(it);
}
is_int(hattrie_weight(t), counted, "hattrie: unsorted iteration");
hattrie_iter_free(it);
/* Sorted iteration. */
counted = 0;
it = hattrie_iter_begin(t, true);
while (!hattrie_iter_finished(it)) {
++counted;
hattrie_iter_next(it);
}
is_int(hattrie_weight(t), counted, "hattrie: sorted iteration");
hattrie_iter_free(it);
for (unsigned i = 0; i < dummy_count; ++i) {
free(dummy[i]);
......
/* Copyright (C) 2013 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <string.h>
#include <assert.h>
#include <tap/basic.h>
#include "common/hhash.h"
#include "common/mempattern.h"
#include "common/mempool.h"
#include "libknot/common.h"
/* Test defines. */
#define ELEM_COUNT 65535
/* Random key string generator for tests. */
static const char *alphabet = "0123abcdABCDwxyzWXYZ.-_";
char *test_randstr_mm(struct mm_ctx *mm)
{
unsigned len = (5 + rand() % 251) + 1;
char *s = mm->alloc(mm->ctx, len * sizeof(char));
for (unsigned i = 0; i < len - 1; ++i) {
s[i] = alphabet[rand() % strlen(alphabet)];
}
s[len - 1] = '\0';
return s;
}
/*! \brief Return true if 'cur' comes after 'prev'. */
static bool str_check_sort(const char *prev, const char *cur)
{
if (prev == NULL) {
return true;
}
int l1 = strlen(prev), l2 = strlen(cur);
int res = memcmp(prev, cur, MIN(l1, l2));
if (res == 0) { /* Keys may be equal. */
if (l1 > l2) { /* 'prev' is longer, breaks ordering. */
return false;
}
} else if (res > 0){
return false; /* Broken lexicographical order */
}
return true;
}
int main(int argc, char *argv[])
{
plan(11);
/* Create memory pool context. */
struct mempool *pool = mp_new(64 * 1024);
mm_ctx_t mm;
mm.ctx = pool;
mm.alloc = (mm_alloc_t)mp_alloc;
mm.free = NULL;
/* Create hashtable */
int ret = KNOT_EOK;
uint16_t len = 0;
const char *key = "mykey", *cur = NULL, *prev = NULL;
value_t val = (void*)0xdeadbeef, *rval = NULL;
hhash_iter_t it;
hhash_t *tbl = hhash_create_mm(ELEM_COUNT, &mm);
ok(tbl != NULL, "hhash: create");
if (tbl == NULL) {
return KNOT_ERROR; /* No point in testing further on. */
}
/* Generate random keys. */
char *keys[ELEM_COUNT];
unsigned nfilled = 0;
for (unsigned i = 0; i < ELEM_COUNT; ++i) {
keys[i] = test_randstr_mm(&mm);
}
/* Insert single element. */
ret = hhash_insert(tbl, key, strlen(key), val);
ok(ret == KNOT_EOK, "hhash: insert single element");
/* Retrieve nonexistent element. */
cur = "nokey";
rval = hhash_find(tbl, cur, strlen(cur));
ok(rval == NULL, "hhash: find non-existent element");
/* Retrieve single element. */
rval = hhash_find(tbl, key, strlen(key));
ok(rval != NULL, "hhash: find existing element");
/* Fill the table. */
for (unsigned i = 0; i < ELEM_COUNT; ++i) {
ret = hhash_insert(tbl, keys[i], strlen(keys[i]), keys[i]);
if (ret != KNOT_EOK) {
nfilled = i;
ret = KNOT_EOK;
break;
}
}
/* Check all keys integrity. */
unsigned nfound = 0;
for (unsigned i = 0; i < nfilled; ++i) {
rval = hhash_find(tbl, keys[i], strlen(keys[i]));
if (!rval || memcmp(*rval, keys[i], strlen(keys[i])) != 0) {
break; /* Mismatch */
}
++nfound;
}
is_int(nfilled, nfound, "hhash: found all inserted keys");
/* Test keys order index. */
hhash_build_index(tbl);
hhash_iter_begin(tbl, &it, true);
while (!hhash_iter_finished(&it)) {
cur = hhash_iter_key(&it, &len);
if (!str_check_sort(prev, cur)) {
break;
}
prev = cur;
hhash_iter_next(&it);
}
ok(hhash_iter_finished(&it), "hhash: passed order index checks");
/* Retrieve all keys. */
nfound = 0;
hhash_iter_begin(tbl, &it, false);
while (!hhash_iter_finished(&it)) {
cur = hhash_iter_key(&it, &len);
if (hhash_find(tbl, cur, len) == NULL) {
break;
} else {
++nfound;
}
hhash_iter_next(&it);
}
ok(hhash_iter_finished(&it), "hhash: found all iterated keys");
is_int(tbl->weight, nfound, "hhash: all iterated keys found");
/* Test find less or equal. */
prev = "mykey0"; /* mykey should precede it */
hhash_find_leq(tbl, prev, strlen(prev), &rval);
ok(rval && *rval == val, "hhash: find less or equal");
/* Delete key and retrieve it. */
ret = hhash_del(tbl, key, strlen(key));
ok(ret == KNOT_EOK, "hhash: remove key");
rval = hhash_find(tbl, key, strlen(key));
ok(rval == NULL, "hhash: find removed element");
/* Free all memory. */
mp_delete(mm.ctx);
return KNOT_EOK;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment