Commit 87b60bf7 authored by Martin Mareš's avatar Martin Mareš

Added several tools for fib hashing function analysis. It turned out

we can use very simple function which is monotonic with respect
to re-hashing:

	n ^= n >> 16;
	n ^= n << 10;
	h = (n >> (16 - o)) & ((1 << o) - 1);

where o is table order. Statistical analysis for both backbone routing
table and local OSPF routing tables gives values near theoretical
optimum for uniform distribution (see ips.c for formulae).

The trick is very simple: We always calculate a 16-bit hash value n and
use o most significant bits (this gives us monotonity wrt. rehashing
if we sort the chains by the value of n). The first shift/xor pair
reduces the IP address to a 16-bit one, the second pair makes higher
bits of the 16-bit value uniformly distributed even for tables containing
lots of long prefixes (typical interior routing case with 24-bit or even
longer prefixes).
parent 02933ddb
all: ips
ips: ips.c
gcc ips.c -o ips -lm -O2 -Wall
clean:
rm -f ips
#!/usr/bin/perl
#
# Convert Cisco routing table listing to list of prefixes
#
$loc = ($ARGV[0] eq "-l"); # Print only local prefixes
while (<STDIN>) {
($loc ? /^[OR]\s/ : /^B\s/) || next;
/^[ORB]( E[12])?\s+(\d+\.\d+\.\d+\.\d+)(\s|\/\d+\s)/ || die "Cannot parse $_";
$net = $2;
$len = $3;
if ($len =~ /^\s*$/) {
# Magic rule :)
$len = ($net =~ /\.0$/) ? 24 : 32;
}
$len =~ s/^\///;
$net =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
printf "%02x%02x%02x%02x/%d\n", $1, $2, $3, $4, $len;
}
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <stdlib.h>
int h[65536];
/*
* Probability analysis of hashing function:
*
* Let n be number of items and k number of boxes. For uniform distribution
* we get:
*
* Expected value of "item i is in given box": Xi = 1/k
* Expected number of items in given box: a = EX = E(sum Xi) = sum E(Xi) = n/k
* Expected square value: E(X^2) = E((sum Xi)^2) = E((sum_i Xi^2) + (sum_i,j i<>j XiXj)) =
* = sum_i E(Xi^2) + sum_i,j i<>j E(XiXj) =
* = sum_i E(Xi) [Xi is binary] + sum_i,j i<>j E(XiXj) [those are independent] =
* = n/k + n*(n-1)/k^2
* Variance: var X = E(X^2) - (EX)^2 = n/k + n*(n-1)/k^2 - n^2/k^2 =
* = n/k - n/k^2 = a * (1-1/k)
* Probability of fixed box being zero: Pz = ((k-1)/k)^n = (1-1/k)^n = (1-1/k)^(ak) =
* = ((1-1/k)^k)^a which we can approximate by e^-a.
*/
unsigned int hf(unsigned int n)
{
#if 0
n = (n ^ (n >> 16)) & 0xffff;
n = (n ^ (n << 8)) & 0xffff;
#elif 1
n = (n >> 16) ^ n;
n = (n ^ (n << 10)) & 0xffff;
#elif 0
n = (n >> 16) ^ n;
n *= 259309;
#elif 0
n ^= (n >> 20);
n ^= (n >> 10);
n ^= (n >> 5);
#elif 0
n = (n * 259309) + ((n >> 16) * 123479);
#else
return random();
#endif
return n;
}
int
main(int argc, char **argv)
{
int cnt=0;
int i;
int bits = atol(argv[1]);
int z = 1 << bits;
int max = atol(argv[2]);
while (max--)
{
unsigned int i, e;
if (scanf("%x/%d", &i, &e) != 2)
if (feof(stdin))
break;
else
fprintf(stderr, "BUGGG\n");
// i >>= (32-e);
// i |= (i >> e);
cnt++;
h[(hf(i) >> 1*(16 - bits)) & (z-1)]++;
}
// printf(">>> %d addresses\n", cnt);
#if 0
for(i=0; i<z; i++)
printf("%d\t%d\n", i, h[i]);
#else
{
int min=cnt, max=0, zer=0;
double delta=0;
double avg = (double) cnt / z;
double exdelta = avg*(1-1/z);
double exzer = exp(-avg);
for(i=0; i<z; i++) {
if (h[i] < min) min=h[i];
if (h[i] > max) max=h[i];
delta += (h[i] - avg) * (h[i] - avg);
if (!h[i]) zer++;
}
printf("size=%5d, min=%d, max=%2d, delta=%-7.6g (%-7.6g), avg=%-5.3g, zero=%g%% (%g%%)\n", z, min, max, delta/z, exdelta, avg, zer/(double)z*100, exzer*100);
}
#endif
return 0;
}
#!/bin/sh
make ips
echo "Global tables:"
for a in 4 5 6 7 8 9 10 11 12 13 14 15 ; do
./ips <global $a $((1<<($a+2)))
done
echo "Local tables:"
./ips <local 6 1000
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment