Commit 3ab001b9 authored by Martin Mareš's avatar Martin Mareš

Rewrote fib functions to make them insert/delete/asynchronous-walk safe.

This is implemented in a way similar to lib/slists.h, but it took some
more effort to make rehashing not disturb the readers. We do it by just
taking _highest_ k bits of ipa_hash as our hash value and sorting each
box by whole ipa_hash().

Consult FIB_ITERATE_* macros in nest/route.h.

Implemented fib_check() debugging function and also rewrote the rehashing
algorithm to use better thresholds and not to waste time by rehashing
forth and back.
parent a6f250f5
......@@ -17,21 +17,31 @@ struct proto;
/*
* Generic data structure for storing network prefixes. Also used
* for the master routing table. Currently implemented as a hash
* for the master routing table. Currently implemented as a hash
* table.
*
* Available operations:
* - insertion of new entry
* - deletion of entry
* - searching for entry by network prefix
* - asynchronous retrieval of fib contents
*/
struct fib_node {
ip_addr prefix; /* In host order */
struct fib_node *next; /* Next in hash chain */
struct fib_iterator *readers; /* List of readers of this node */
byte pxlen;
byte flags; /* User-defined */
byte x0, x1; /* User-defined */
struct fib_node *next; /* Next in hash chain */
ip_addr prefix; /* In host order */
};
struct fib_iterator { /* See lib/slists.h for an explanation */
struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */
struct fib_node *node; /* Or NULL if freshly merged */
byte efef; /* 0xff to distinguish between iterator and node */
byte pad[3];
unsigned int hash;
};
struct fib {
......@@ -39,17 +49,23 @@ struct fib {
slab *fib_slab; /* Slab holding all fib nodes */
struct fib_node **hash_table; /* Node hash table */
unsigned int hash_size; /* Number of hash table entries (a power of two) */
unsigned int hash_mask; /* hash_size - 1 */
unsigned int hash_order; /* Binary logarithm of hash_size */
unsigned int hash_shift; /* 16 - hash_log */
unsigned int entries; /* Number of entries */
unsigned int entries_min, entries_max;/* Entry count limits (else start rehashing) */
void (*init)(struct fib_node *); /* Constructor */
};
void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_size, void (*init)(struct fib_node *));
void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_order, void (*init)(struct fib_node *));
void *fib_find(struct fib *, ip_addr *, int); /* Find or return NULL if doesn't exist */
void *fib_get(struct fib *, ip_addr *, int); /* Find or create new if nonexistent */
void fib_delete(struct fib *, void *); /* Remove fib entry */
void fib_free(struct fib *); /* Destroy the fib */
void fib_check(struct fib *); /* Consistency check for debugging */
void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */
struct fib_node *fit_get(struct fib *, struct fib_iterator *);
void fit_put(struct fib_iterator *, struct fib_node *);
#define FIB_WALK(fib, z) do { \
struct fib_node *z, **ff = (fib)->hash_table; \
......@@ -59,6 +75,24 @@ void fib_free(struct fib *); /* Destroy the fib */
#define FIB_WALK_END } while (0)
#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib)
#define FIB_ITERATE_START(fib, it, z) do { \
struct fib_node *z = fit_get(fib, it); \
unsigned int count = (fib)->hash_size; \
unsigned int hpos = (it)->hash; \
for(;;) { \
again: if (!z) { \
if (++hpos >= count) \
break; \
z = (fib)->hash_table[hpos]; \
goto again; \
}
#define FIB_ITERATE_END z = z->next; } } while(0)
#define FIB_ITERATE_PUT(it, z) fit_put(it, z)
/*
* Master Routing Tables. Generally speaking, each of them is a list
* of FIB (one per TOS) with each entry pointing to a list of route entries
......@@ -76,7 +110,7 @@ typedef struct rtable {
} rtable;
typedef struct network {
struct fib_node n; /* FIB flags,x0 reserved for kernel syncer */
struct fib_node n; /* FIB flags reserved for kernel syncer */
struct rte *routes; /* Available routes for this network */
} net;
......
......@@ -13,24 +13,30 @@
#include "nest/bird.h"
#include "nest/route.h"
#define HASH_DEF_SIZE 1024
#define HASH_HI_MARK *16
#define HASH_LO_MARK *0
#define HASH_LO_MIN 128
#define HASH_HI_RESIZE *16
#define HASH_LO_RESIZE *0
#define HASH_DEF_ORDER 10
#define HASH_HI_MARK *4
#define HASH_HI_STEP 2
#define HASH_HI_MAX 16 /* Must be at most 16 */
#define HASH_LO_MARK /5
#define HASH_LO_STEP 2
#define HASH_LO_MIN 10
static void
fib_ht_alloc(struct fib *f)
{
f->hash_mask = f->hash_size - 1;
f->entries_max = f->hash_size HASH_HI_MARK;
f->entries_min = f->hash_size HASH_LO_MARK;
if (f->entries_min < HASH_LO_MIN)
f->hash_size = 1 << f->hash_order;
f->hash_shift = 16 - f->hash_order;
if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP)
f->entries_max = ~0;
else
f->entries_max = f->hash_size HASH_HI_MARK;
if (f->hash_order < HASH_LO_MIN + HASH_LO_STEP)
f->entries_min = 0;
DBG("Allocating FIB: %d entries, %d low, %d high\n", f->hash_size, f->entries_min, f->entries_max);
else
f->entries_min = f->hash_size HASH_LO_MARK;
DBG("Allocating FIB hash of order %d: %d entries, %d low, %d high\n",
f->hash_order, f->hash_size, f->entries_min, f->entries_max);
f->hash_table = mb_alloc(f->fib_pool, f->hash_size * sizeof(struct fib_node *));
bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *));
}
static inline void
......@@ -42,46 +48,64 @@ fib_ht_free(struct fib_node **h)
static inline unsigned
fib_hash(struct fib *f, ip_addr *a)
{
return ipa_hash(*a) & f->hash_mask;
return ipa_hash(*a) >> f->hash_shift;
}
void
fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_size, void (*init)(struct fib_node *))
fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_order, void (*init)(struct fib_node *))
{
if (!hash_size)
hash_size = HASH_DEF_SIZE;
if (!hash_order)
hash_order = HASH_DEF_ORDER;
f->fib_pool = p;
f->fib_slab = sl_new(p, node_size);
f->hash_size = hash_size;
f->hash_order = hash_order;
fib_ht_alloc(f);
bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *));
f->entries = 0;
f->entries_min = 0;
f->init = init;
}
static void
fib_rehash(struct fib *f, unsigned new)
fib_rehash(struct fib *f, int step)
{
unsigned old;
unsigned old, new, oldn, newn, ni, nh;
struct fib_node **n, *e, *x, **t, **m, **h;
old = f->hash_size;
old = f->hash_order;
oldn = f->hash_size;
new = old + step;
m = h = f->hash_table;
DBG("Re-hashing FIB from %d to %d\n", old, new);
f->hash_size = new;
DBG("Re-hashing FIB from order %d to %d\n", old, new);
f->hash_order = new;
fib_ht_alloc(f);
n = f->hash_table;
t = n = f->hash_table;
newn = f->hash_size;
ni = 0;
while (old--)
{
x = *h++;
while (e = x)
{
x = e->next;
t = n + fib_hash(f, &e->prefix);
e->next = *t;
nh = fib_hash(f, &e->prefix);
while (nh > ni)
{
*t = NULL;
ni++;
t = ++n;
}
*t = e;
t = &e->next;
}
}
while (ni < newn)
{
*t = NULL;
ni++;
t = ++n;
}
fib_ht_free(m);
}
......@@ -98,8 +122,9 @@ fib_find(struct fib *f, ip_addr *a, int len)
void *
fib_get(struct fib *f, ip_addr *a, int len)
{
struct fib_node **ee = f->hash_table + fib_hash(f, a);
struct fib_node *e = *ee;
unsigned int h = ipa_hash(*a);
struct fib_node **ee = f->hash_table + (h >> f->hash_shift);
struct fib_node *g, *e = *ee;
while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix)))
e = e->next;
......@@ -112,28 +137,80 @@ fib_get(struct fib *f, ip_addr *a, int len)
e = sl_alloc(f->fib_slab);
e->prefix = *a;
e->pxlen = len;
while ((g = *ee) && ipa_hash(g->prefix) < h)
ee = &g->next;
e->next = *ee;
*ee = e;
e->readers = NULL;
f->init(e);
if (f->entries++ > f->entries_max)
fib_rehash(f, f->hash_size HASH_HI_RESIZE);
fib_rehash(f, HASH_HI_STEP);
return e;
}
static inline void
fib_merge_readers(struct fib_iterator *i, struct fib_node *to)
{
if (to)
{
struct fib_iterator *j = to->readers;
if (!j)
{
/* Fast path */
to->readers = i;
i->prev = (struct fib_iterator *) to;
fixup:
while (i && i->node)
{
i->node = NULL;
i = i->next;
}
return;
}
/* Really merging */
while (j->next)
j = j->next;
j->next = i;
i->prev = j;
goto fixup;
}
else /* No more nodes */
while (i)
{
i->prev = NULL;
i = i->next;
}
}
void
fib_delete(struct fib *f, void *E)
{
struct fib_node *e = E;
struct fib_node **ee = f->hash_table + fib_hash(f, &e->prefix);
unsigned int h = fib_hash(f, &e->prefix);
struct fib_node **ee = f->hash_table + h;
struct fib_iterator *it;
while (*ee)
{
if (*ee == e)
{
*ee = e->next;
if (it = e->readers)
{
struct fib_node *l = e->next;
while (!l)
{
h++;
if (h >= f->hash_size)
break;
else
l = f->hash_table[h];
}
fib_merge_readers(it, l);
}
sl_free(f->fib_slab, e);
if (f->entries-- < f->entries_min)
fib_rehash(f, f->hash_size HASH_LO_RESIZE);
fib_rehash(f, -HASH_LO_STEP);
return;
}
ee = &((*ee)->next);
......@@ -147,3 +224,205 @@ fib_free(struct fib *f)
fib_ht_free(f->hash_table);
rfree(f->fib_slab);
}
void
fit_init(struct fib_iterator *i, struct fib *f)
{
unsigned h;
struct fib_node *n;
i->efef = 0xff;
for(h=0; h<f->hash_size; h++)
if (n = f->hash_table[h])
{
i->prev = (struct fib_iterator *) n;
if (i->next = n->readers)
i->next->prev = i;
n->readers = i;
i->node = n;
return;
}
/* The fib is empty, nothing to do */
i->prev = i->next = NULL;
i->node = NULL;
}
struct fib_node *
fit_get(struct fib *f, struct fib_iterator *i)
{
struct fib_node *n;
struct fib_iterator *j, *k;
if (!i->prev)
{
/* We are at the end */
i->hash = f->hash_size;
return NULL;
}
if (!(n = i->node))
{
/* No node info available, we are a victim of merging. Try harder. */
j = i;
while (j->efef == 0xff)
j = j->prev;
n = (struct fib_node *) j;
}
j = i->prev;
if (k = i->next)
k->prev = j;
j->next = k;
i->hash = fib_hash(f, &n->prefix);
return n;
}
void
fit_put(struct fib_iterator *i, struct fib_node *n)
{
struct fib_iterator *j;
i->node = n;
if (j = n->readers)
j->prev = i;
i->next = j;
n->readers = i;
i->prev = (struct fib_iterator *) n;
}
#ifdef DEBUGGING
void
fib_check(struct fib *f)
{
unsigned int i, ec, lo, nulls;
ec = 0;
for(i=0; i<f->hash_size; i++)
{
struct fib_node *n;
lo = 0;
for(n=f->hash_table[i]; n; n=n->next)
{
struct fib_iterator *j, *j0;
unsigned int h0 = ipa_hash(n->prefix);
if (h0 < lo)
die("fib_check: discord in hash chains");
lo = h0;
if ((h0 >> f->hash_shift) != i)
die("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order);
j0 = (struct fib_iterator *) n;
nulls = 0;
for(j=n->readers; j; j=j->next)
{
if (j->prev != j0)
die("fib_check: iterator->prev mismatch");
j0 = j;
if (!j->node)
nulls++;
else if (nulls)
die("fib_check: iterator nullified");
else if (j->node != n)
die("fib_check: iterator->node mismatch");
}
ec++;
}
}
if (ec != f->entries)
die("fib_check: invalid entry count (%d != %d)", ec, f->entries);
}
#endif
#ifdef TEST
#include "lib/resource.h"
struct fib f;
void dump(char *m)
{
unsigned int i;
debug("%s ... order=%d, size=%d, entries=%d\n", m, f.hash_order, f.hash_size, f.hash_size);
for(i=0; i<f.hash_size; i++)
{
struct fib_node *n;
struct fib_iterator *j;
for(n=f.hash_table[i]; n; n=n->next)
{
debug("%04x %04x %p %I/%2d", i, ipa_hash(n->prefix), n, n->prefix, n->pxlen);
for(j=n->readers; j; j=j->next)
debug(" %p[%p]", j, j->node);
debug("\n");
}
}
fib_check(&f);
debug("-----\n");
}
void init(struct fib_node *n)
{
}
int main(void)
{
struct fib_node *n;
struct fib_iterator i, j;
ip_addr a;
int c;
log_init_debug(NULL);
resource_init();
fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init);
dump("init");
a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32);
a = ipa_from_u32(0x02030405); n = fib_get(&f, &a, 32);
a = ipa_from_u32(0x03040506); n = fib_get(&f, &a, 32);
a = ipa_from_u32(0x00000000); n = fib_get(&f, &a, 32);
a = ipa_from_u32(0x00000c01); n = fib_get(&f, &a, 32);
a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32);
dump("fill");
fit_init(&i, &f);
dump("iter init");
fib_rehash(&f, 1);
dump("rehash up");
fib_rehash(&f, -1);
dump("rehash down");
next:
c = 0;
FIB_ITERATE_START(&f, &i, z)
{
if (c)
{
FIB_ITERATE_PUT(&i, z);
dump("iter");
goto next;
}
c = 1;
debug("got %p\n", z);
}
FIB_ITERATE_END;
dump("iter end");
fit_init(&i, &f);
fit_init(&j, &f);
dump("iter init 2");
n = fit_get(&f, &i);
dump("iter step 2");
fit_put(&i, n->next);
dump("iter step 3");
a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32);
fib_delete(&f, n);
dump("iter step 3");
return 0;
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment