rt-attr.c 29.5 KB
Newer Older
1 2 3
/*
 *	BIRD -- Route Attribute Cache
 *
4
 *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5 6 7 8
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/**
 * DOC: Route attribute cache
 *
 * Each route entry carries a set of route attributes. Several of them
 * vary from route to route, but most attributes are usually common
 * for a large number of routes. To conserve memory, we've decided to
 * store only the varying ones directly in the &rte and hold the rest
 * in a special structure called &rta which is shared among all the
 * &rte's with these attributes.
 *
 * Each &rta contains all the static attributes of the route (i.e.,
 * those which are always present) as structure members and a list of
 * dynamic attributes represented by a linked list of &ea_list
 * structures, each of them consisting of an array of &eattr's containing
 * the individual attributes. An attribute can be specified more than once
24
 * in the &ea_list chain and in such case the first occurrence overrides
25 26 27 28 29 30 31 32 33 34 35
 * the others. This semantics is used especially when someone (for example
 * a filter) wishes to alter values of several dynamic attributes, but
 * it wants to preserve the original attribute lists maintained by
 * another module.
 *
 * Each &eattr contains an attribute identifier (split to protocol ID and
 * per-protocol attribute ID), protocol dependent flags, a type code (consisting
 * of several bit fields describing attribute characteristics) and either an
 * embedded 32-bit value or a pointer to a &adata structure holding attribute
 * contents.
 *
36
 * There exist two variants of &rta's -- cached and un-cached ones. Un-cached
37 38 39 40 41 42 43 44 45 46
 * &rta's can have arbitrarily complex structure of &ea_list's and they
 * can be modified by any module in the route processing chain. Cached
 * &rta's have their attribute lists normalized (that means at most one
 * &ea_list is present and its values are sorted in order to speed up
 * searching), they are stored in a hash table to make fast lookup possible
 * and they are provided with a use count to allow sharing.
 *
 * Routing tables always contain only cached &rta's.
 */

47 48 49
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/protocol.h"
50
#include "nest/iface.h"
51
#include "nest/cli.h"
52
#include "nest/attrs.h"
53
#include "lib/alloca.h"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
54
#include "lib/hash.h"
55
#include "lib/idm.h"
56
#include "lib/resource.h"
57
#include "lib/string.h"
58

59 60
#include <stddef.h>

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
const char * const rta_src_names[RTS_MAX] = {
  [RTS_DUMMY]		= "",
  [RTS_STATIC]		= "static",
  [RTS_INHERIT]		= "inherit",
  [RTS_DEVICE]		= "device",
  [RTS_STATIC_DEVICE]	= "static-device",
  [RTS_REDIRECT]	= "redirect",
  [RTS_RIP]		= "RIP",
  [RTS_OSPF]		= "OSPF",
  [RTS_OSPF_IA]		= "OSPF-IA",
  [RTS_OSPF_EXT1]	= "OSPF-E1",
  [RTS_OSPF_EXT2]	= "OSPF-E2",
  [RTS_BGP]		= "BGP",
  [RTS_PIPE]		= "pipe",
  [RTS_BABEL]		= "Babel",
  [RTS_RPKI]		= "RPKI",
};

79 80 81 82 83 84 85 86
const char * rta_dest_names[RTD_MAX] = {
  [RTD_NONE]		= "",
  [RTD_UNICAST]		= "unicast",
  [RTD_BLACKHOLE]	= "blackhole",
  [RTD_UNREACHABLE]	= "unreachable",
  [RTD_PROHIBIT]	= "prohibited",
};

87 88
pool *rta_pool;

89 90
static slab *rta_slab_[4];
static slab *nexthop_slab_[4];
91 92
static slab *rte_src_slab;

93
static struct idm src_ids;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
94
#define SRC_ID_INIT_SIZE 4
95 96

/* rte source hash */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
97 98 99 100 101 102 103 104 105 106 107

#define RSH_KEY(n)		n->proto, n->private_id
#define RSH_NEXT(n)		n->next
#define RSH_EQ(p1,n1,p2,n2)	p1 == p2 && n1 == n2
#define RSH_FN(p,n)		p->hash_key ^ u32_hash(n)

#define RSH_REHASH		rte_src_rehash
#define RSH_PARAMS		/2, *2, 1, 1, 8, 20
#define RSH_INIT_ORDER		6

static HASH(struct rte_src) src_hash;
108

109 110 111 112 113
static void
rte_src_init(void)
{
  rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));

114
  idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
115

Ondřej Zajíček's avatar
Ondřej Zajíček committed
116
  HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
117 118 119
}


Ondřej Zajíček's avatar
Ondřej Zajíček committed
120
HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
121 122 123 124

struct rte_src *
rt_find_source(struct proto *p, u32 id)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
125
  return HASH_FIND(src_hash, RSH, p, id);
126 127 128 129 130
}

struct rte_src *
rt_get_source(struct proto *p, u32 id)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
131
  struct rte_src *src = rt_find_source(p, id);
132

Ondřej Zajíček's avatar
Ondřej Zajíček committed
133 134
  if (src)
    return src;
135 136 137 138

  src = sl_alloc(rte_src_slab);
  src->proto = p;
  src->private_id = id;
139
  src->global_id = idm_alloc(&src_ids);
140
  src->uc = 0;
141

Ondřej Zajíček's avatar
Ondřej Zajíček committed
142
  HASH_INSERT2(src_hash, RSH, rta_pool, src);
143 144 145 146 147 148 149

  return src;
}

void
rt_prune_sources(void)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
150 151 152
  HASH_WALK_FILTER(src_hash, next, src, sp)
  {
    if (src->uc == 0)
153
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
154
      HASH_DO_REMOVE(src_hash, RSH, sp);
155
      idm_free(&src_ids, src->global_id);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
156
      sl_free(rte_src_slab, src);
157
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
158 159
  }
  HASH_WALK_FILTER_END;
160

Ondřej Zajíček's avatar
Ondřej Zajíček committed
161
  HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
162 163 164 165 166 167 168
}


/*
 *	Multipath Next Hop
 */

169
static inline u32
170
nexthop_hash(struct nexthop *x)
171
{
172
  u32 h = 0;
173
  for (; x; x = x->next)
174 175
  {
    h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
176 177

    for (int i = 0; i < x->labels; i++)
178 179
      h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
  }
180 181 182 183 184

  return h;
}

int
185
nexthop__same(struct nexthop *x, struct nexthop *y)
186 187
{
  for (; x && y; x = x->next, y = y->next)
188
  {
189 190
    if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) ||
	(x->flags != y->flags) || (x->weight != y->weight) ||
191
	(x->labels_orig != y->labels_orig) || (x->labels != y->labels))
192
      return 0;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
193 194

    for (int i = 0; i < x->labels; i++)
195 196 197
      if (x->label[i] != y->label[i])
	return 0;
  }
198

Ondřej Zajíček's avatar
Ondřej Zajíček committed
199
  return x == y;
200 201
}

202
static int
203
nexthop_compare_node(struct nexthop *x, struct nexthop *y)
204 205 206 207 208 209 210 211 212
{
  int r;

  if (!x)
    return 1;

  if (!y)
    return -1;

213 214
  /* Should we also compare flags ? */

215 216 217 218 219 220 221 222
  r = ((int) y->weight) - ((int) x->weight);
  if (r)
    return r;

  r = ipa_compare(x->gw, y->gw);
  if (r)
    return r;

223 224 225 226
  r = ((int) y->labels) - ((int) x->labels);
  if (r)
    return r;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
227
  for (int i = 0; i < y->labels; i++)
228 229 230 231 232 233
  {
    r = ((int) y->label[i]) - ((int) x->label[i]);
    if (r)
      return r;
  }

234 235 236
  return ((int) x->iface->index) - ((int) y->iface->index);
}

237 238
static inline struct nexthop *
nexthop_copy_node(const struct nexthop *src, linpool *lp)
239
{
240 241 242
  struct nexthop *n = lp_alloc(lp, nexthop_size(src));

  memcpy(n, src, nexthop_size(src));
243
  n->next = NULL;
244

245 246 247 248
  return n;
}

/**
249
 * nexthop_merge - merge nexthop lists
250 251 252 253 254 255 256
 * @x: list 1
 * @y: list 2
 * @rx: reusability of list @x
 * @ry: reusability of list @y
 * @max: max number of nexthops
 * @lp: linpool for allocating nexthops
 *
257
 * The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
258 259 260 261 262 263 264 265 266 267 268 269
 * eliminating possible duplicates. The input lists must be sorted and the
 * result is sorted too. The number of nexthops in result is limited by @max.
 * New nodes are allocated from linpool @lp.
 *
 * The arguments @rx and @ry specify whether corresponding input lists may be
 * consumed by the function (i.e. their nodes reused in the resulting list), in
 * that case the caller should not access these lists after that. To eliminate
 * issues with deallocation of these lists, the caller should use some form of
 * bulk deallocation (e.g. stack or linpool) to free these nodes when the
 * resulting list is no longer needed. When reusability is not set, the
 * corresponding lists are not modified nor linked from the resulting list.
 */
270 271
struct nexthop *
nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
272
{
273 274
  struct nexthop *root = NULL;
  struct nexthop **n = &root;
275 276 277

  while ((x || y) && max--)
  {
278
    int cmp = nexthop_compare_node(x, y);
279 280
    if (cmp < 0)
    {
281
      *n = rx ? x : nexthop_copy_node(x, lp);
282 283 284 285
      x = x->next;
    }
    else if (cmp > 0)
    {
286
      *n = ry ? y : nexthop_copy_node(y, lp);
287 288 289 290
      y = y->next;
    }
    else
    {
291
      *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
292 293 294 295 296 297 298 299 300 301
      x = x->next;
      y = y->next;
    }
    n = &((*n)->next);
  }
  *n = NULL;

  return root;
}

302
void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
303
nexthop_insert(struct nexthop **n, struct nexthop *x)
304
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
305
  for (; *n; n = &((*n)->next))
306
  {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
307
    int cmp = nexthop_compare_node(*n, x);
308 309 310

    if (cmp < 0)
      continue;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
311 312 313 314
    else if (cmp > 0)
      break;
    else
      return;
315 316
  }

Ondřej Zajíček's avatar
Ondřej Zajíček committed
317 318
  x->next = *n;
  *n = x;
319 320 321
}

int
322
nexthop_is_sorted(struct nexthop *x)
323 324
{
  for (; x && x->next; x = x->next)
325
    if (nexthop_compare_node(x, x->next) >= 0)
326 327 328 329
      return 0;

  return 1;
}
330

331 332 333
static inline slab *
nexthop_slab(struct nexthop *nh)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
334
  return nexthop_slab_[MIN(nh->labels, 3)];
335 336
}

337 338
static struct nexthop *
nexthop_copy(struct nexthop *o)
339
{
340 341
  struct nexthop *first = NULL;
  struct nexthop **last = &first;
342 343 344

  for (; o; o = o->next)
    {
345
      struct nexthop *n = sl_alloc(nexthop_slab(o));
346 347 348
      n->gw = o->gw;
      n->iface = o->iface;
      n->next = NULL;
349
      n->flags = o->flags;
350
      n->weight = o->weight;
351
      n->labels_orig = o->labels_orig;
352 353 354
      n->labels = o->labels;
      for (int i=0; i<o->labels; i++)
	n->label[i] = o->label[i];
355 356 357 358 359 360 361 362 363

      *last = n;
      last = &(n->next);
    }

  return first;
}

static void
364
nexthop_free(struct nexthop *o)
365
{
366
  struct nexthop *n;
367 368 369 370

  while (o)
    {
      n = o->next;
371
      sl_free(nexthop_slab(o), o);
372 373 374 375 376
      o = n;
    }
}


377 378 379 380
/*
 *	Extended Attributes
 */

381 382
static inline eattr *
ea__find(ea_list *e, unsigned id)
383 384 385 386 387 388 389 390 391
{
  eattr *a;
  int l, r, m;

  while (e)
    {
      if (e->flags & EALF_BISECT)
	{
	  l = 0;
392
	  r = e->count - 1;
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
	  while (l <= r)
	    {
	      m = (l+r) / 2;
	      a = &e->attrs[m];
	      if (a->id == id)
		return a;
	      else if (a->id < id)
		l = m+1;
	      else
		r = m-1;
	    }
	}
      else
	for(m=0; m<e->count; m++)
	  if (e->attrs[m].id == id)
	    return &e->attrs[m];
      e = e->next;
    }
  return NULL;
}

414 415 416 417 418 419
/**
 * ea_find - find an extended attribute
 * @e: attribute list to search in
 * @id: attribute ID to search for
 *
 * Given an extended attribute list, ea_find() searches for a first
420
 * occurrence of an attribute with specified ID, returning either a pointer
421 422
 * to its &eattr structure or %NULL if no such attribute exists.
 */
423 424 425 426 427 428 429 430 431 432 433
eattr *
ea_find(ea_list *e, unsigned id)
{
  eattr *a = ea__find(e, id & EA_CODE_MASK);

  if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
      !(id & EA_ALLOW_UNDEF))
    return NULL;
  return a;
}

434 435 436 437 438 439 440 441 442 443
/**
 * ea_walk - walk through extended attributes
 * @s: walk state structure
 * @id: start of attribute ID interval
 * @max: length of attribute ID interval
 *
 * Given an extended attribute list, ea_walk() walks through the list looking
 * for first occurrences of attributes with ID in specified interval from @id to
 * (@id + @max - 1), returning pointers to found &eattr structures, storing its
 * walk state in @s for subsequent calls.
444
 *
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
 * The function ea_walk() is supposed to be called in a loop, with initially
 * zeroed walk state structure @s with filled the initial extended attribute
 * list, returning one found attribute in each call or %NULL when no other
 * attribute exists. The extended attribute list or the arguments should not be
 * modified between calls. The maximum value of @max is 128.
 */
eattr *
ea_walk(struct ea_walk_state *s, uint id, uint max)
{
  ea_list *e = s->eattrs;
  eattr *a = s->ea;
  eattr *a_max;

  max = id + max;

  if (a)
    goto step;

  for (; e; e = e->next)
  {
    if (e->flags & EALF_BISECT)
    {
      int l, r, m;

      l = 0;
      r = e->count - 1;
      while (l < r)
      {
	m = (l+r) / 2;
	if (e->attrs[m].id < id)
	  l = m + 1;
	else
	  r = m;
      }
      a = e->attrs + l;
    }
    else
      a = e->attrs;

  step:
    a_max = e->attrs + e->count;
    for (; a < a_max; a++)
      if ((a->id >= id) && (a->id < max))
      {
	int n = a->id - id;

	if (BIT32_TEST(s->visited, n))
	  continue;

	BIT32_SET(s->visited, n);

	if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
	  continue;

	s->eattrs = e;
	s->ea = a;
	return a;
      }
      else if (e->flags & EALF_BISECT)
	break;
  }

  return NULL;
}

510 511 512 513 514 515 516 517 518 519
/**
 * ea_get_int - fetch an integer attribute
 * @e: attribute list
 * @id: attribute ID
 * @def: default value
 *
 * This function is a shortcut for retrieving a value of an integer attribute
 * by calling ea_find() to find the attribute, extracting its value or returning
 * a provided default if no such attribute is present.
 */
520 521 522 523 524 525 526 527 528
int
ea_get_int(ea_list *e, unsigned id, int def)
{
  eattr *a = ea_find(e, id);
  if (!a)
    return def;
  return a->u.data;
}

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
static inline void
ea_do_sort(ea_list *e)
{
  unsigned n = e->count;
  eattr *a = e->attrs;
  eattr *b = alloca(n * sizeof(eattr));
  unsigned s, ss;

  /* We need to use a stable sorting algorithm, hence mergesort */
  do
    {
      s = ss = 0;
      while (s < n)
	{
	  eattr *p, *q, *lo, *hi;
	  p = b;
	  ss = s;
	  *p++ = a[s++];
	  while (s < n && p[-1].id <= a[s].id)
	    *p++ = a[s++];
	  if (s < n)
	    {
	      q = p;
	      *p++ = a[s++];
	      while (s < n && p[-1].id <= a[s].id)
		*p++ = a[s++];
	      lo = b;
	      hi = q;
	      s = ss;
	      while (lo < q && hi < p)
		if (lo->id <= hi->id)
		  a[s++] = *lo++;
		else
		  a[s++] = *hi++;
	      while (lo < q)
		a[s++] = *lo++;
	      while (hi < p)
		a[s++] = *hi++;
	    }
	}
    }
  while (ss);
}

573
/**
574 575
 * In place discard duplicates and undefs in sorted ea_list. We use stable sort
 * for this reason.
576
 **/
577 578 579
static inline void
ea_do_prune(ea_list *e)
{
580
  eattr *s, *d, *l, *s0;
581 582
  int i = 0;

583 584 585 586
  s = d = e->attrs;	    /* Beginning of the list. @s is source, @d is destination. */
  l = e->attrs + e->count;  /* End of the list */

  /* Walk from begin to end. */
587 588
  while (s < l)
    {
589
      s0 = s++;
590
      /* Find a consecutive block of the same attribute */
591 592
      while (s < l && s->id == s[-1].id)
	s++;
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607

      /* Now s0 is the most recent version, s[-1] the oldest one */
      /* Drop undefs */
      if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
	continue;

      /* Copy the newest version to destination */
      *d = *s0;

      /* Preserve info whether it originated locally */
      d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);

      /* Next destination */
      d++;
      i++;
608
    }
609

610
  e->count = i;
611 612
}

613 614 615 616 617 618 619 620
/**
 * ea_sort - sort an attribute list
 * @e: list to be sorted
 *
 * This function takes a &ea_list chain and sorts the attributes
 * within each of its entries.
 *
 * If an attribute occurs multiple times in a single &ea_list,
621
 * ea_sort() leaves only the first (the only significant) occurrence.
622
 */
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
void
ea_sort(ea_list *e)
{
  while (e)
    {
      if (!(e->flags & EALF_SORTED))
	{
	  ea_do_sort(e);
	  ea_do_prune(e);
	  e->flags |= EALF_SORTED;
	}
      if (e->count > 5)
	e->flags |= EALF_BISECT;
      e = e->next;
    }
}

640 641 642 643 644 645 646
/**
 * ea_scan - estimate attribute list size
 * @e: attribute list
 *
 * This function calculates an upper bound of the size of
 * a given &ea_list after merging with ea_merge().
 */
647 648 649 650 651 652 653 654 655 656 657 658 659
unsigned
ea_scan(ea_list *e)
{
  unsigned cnt = 0;

  while (e)
    {
      cnt += e->count;
      e = e->next;
    }
  return sizeof(ea_list) + sizeof(eattr)*cnt;
}

660 661 662 663 664 665 666 667 668 669 670 671 672 673
/**
 * ea_merge - merge segments of an attribute list
 * @e: attribute list
 * @t: buffer to store the result to
 *
 * This function takes a possibly multi-segment attribute list
 * and merges all of its segments to one.
 *
 * The primary use of this function is for &ea_list normalization:
 * first call ea_scan() to determine how much memory will the result
 * take, then allocate a buffer (usually using alloca()), merge the
 * segments with ea_merge() and finally sort and prune the result
 * by calling ea_sort().
 */
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
void
ea_merge(ea_list *e, ea_list *t)
{
  eattr *d = t->attrs;

  t->flags = 0;
  t->count = 0;
  t->next = NULL;
  while (e)
    {
      memcpy(d, e->attrs, sizeof(eattr)*e->count);
      t->count += e->count;
      d += e->count;
      e = e->next;
    }
}

691 692 693 694 695 696 697 698
/**
 * ea_same - compare two &ea_list's
 * @x: attribute list
 * @y: attribute list
 *
 * ea_same() compares two normalized attribute lists @x and @y and returns
 * 1 if they contain the same attributes, 0 otherwise.
 */
699
int
700 701 702 703
ea_same(ea_list *x, ea_list *y)
{
  int c;

704 705 706 707 708 709
  if (!x || !y)
    return x == y;
  ASSERT(!x->next && !y->next);
  if (x->count != y->count)
    return 0;
  for(c=0; c<x->count; c++)
710
    {
711 712 713 714 715 716
      eattr *a = &x->attrs[c];
      eattr *b = &y->attrs[c];

      if (a->id != b->id ||
	  a->flags != b->flags ||
	  a->type != b->type ||
717
	  ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
718
	return 0;
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
    }
  return 1;
}

static inline ea_list *
ea_list_copy(ea_list *o)
{
  ea_list *n;
  unsigned i, len;

  if (!o)
    return NULL;
  ASSERT(!o->next);
  len = sizeof(ea_list) + sizeof(eattr) * o->count;
  n = mb_alloc(rta_pool, len);
  memcpy(n, o, len);
  n->flags |= EALF_CACHED;
  for(i=0; i<o->count; i++)
    {
      eattr *a = &n->attrs[i];
739
      if (!(a->type & EAF_EMBEDDED))
740 741 742 743 744 745 746 747 748 749
	{
	  unsigned size = sizeof(struct adata) + a->u.ptr->length;
	  struct adata *d = mb_alloc(rta_pool, size);
	  memcpy(d, a->u.ptr, size);
	  a->u.ptr = d;
	}
    }
  return n;
}

Martin Mareš's avatar
Martin Mareš committed
750 751 752
static inline void
ea_free(ea_list *o)
{
753 754
  int i;

Martin Mareš's avatar
Martin Mareš committed
755 756 757
  if (o)
    {
      ASSERT(!o->next);
758 759 760 761 762 763
      for(i=0; i<o->count; i++)
	{
	  eattr *a = &o->attrs[i];
	  if (!(a->type & EAF_EMBEDDED))
	    mb_free(a->u.ptr);
	}
Martin Mareš's avatar
Martin Mareš committed
764 765 766 767
      mb_free(o);
    }
}

768 769 770 771 772 773 774 775
static int
get_generic_attr(eattr *a, byte **buf, int buflen UNUSED)
{
  if (a->id == EA_GEN_IGP_METRIC)
    {
      *buf += bsprintf(*buf, "igp_metric");
      return GA_NAME;
    }
776

777 778 779
  return GA_UNKNOWN;
}

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
void
ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
{
  byte *bound = buf + bufsize - 32;
  u32 data = a->u.data;
  int i;

  for (i = min; i < max; i++)
    if ((data & (1u << i)) && names[i])
    {
      if (buf > bound)
      {
	strcpy(buf, " ...");
	return;
      }

      buf += bsprintf(buf, " %s", names[i]);
      data &= ~(1u << i);
    }

  if (data)
    bsprintf(buf, " %08x", data);

  return;
}

806
static inline void
Pavel Tvrdík's avatar
Pavel Tvrdík committed
807
opaque_format(struct adata *ad, byte *buf, uint size)
808 809
{
  byte *bound = buf + size - 10;
810
  uint i;
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832

  for(i = 0; i < ad->length; i++)
    {
      if (buf > bound)
	{
	  strcpy(buf, " ...");
	  return;
	}
      if (i)
	*buf++ = ' ';

      buf += bsprintf(buf, "%02x", ad->data[i]);
    }

  *buf = 0;
  return;
}

static inline void
ea_show_int_set(struct cli *c, struct adata *ad, int way, byte *pos, byte *buf, byte *end)
{
  int i = int_set_format(ad, way, 0, pos, end - pos);
833
  cli_printf(c, -1012, "\t%s", buf);
834 835 836
  while (i)
    {
      i = int_set_format(ad, way, i, buf, end - buf - 1);
837
      cli_printf(c, -1012, "\t\t%s", buf);
838 839 840
    }
}

841 842 843 844
static inline void
ea_show_ec_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
{
  int i = ec_set_format(ad, 0, pos, end - pos);
845
  cli_printf(c, -1012, "\t%s", buf);
846 847 848
  while (i)
    {
      i = ec_set_format(ad, i, buf, end - buf - 1);
849
      cli_printf(c, -1012, "\t\t%s", buf);
850 851 852
    }
}

853 854 855 856 857 858 859 860 861 862 863 864
static inline void
ea_show_lc_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
{
  int i = lc_set_format(ad, 0, pos, end - pos);
  cli_printf(c, -1012, "\t%s", buf);
  while (i)
    {
      i = lc_set_format(ad, i, buf, end - buf - 1);
      cli_printf(c, -1012, "\t\t%s", buf);
    }
}

865
/**
866 867 868
 * ea_show - print an &eattr to CLI
 * @c: destination CLI
 * @e: attribute to be printed
869
 *
870 871
 * This function takes an extended attribute represented by its &eattr
 * structure and prints it to the CLI according to the type information.
872 873 874 875
 *
 * If the protocol defining the attribute provides its own
 * get_attr() hook, it's consulted first.
 */
876
void
877
ea_show(struct cli *c, eattr *e)
878 879 880 881
{
  struct protocol *p;
  int status = GA_UNKNOWN;
  struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
882 883
  byte buf[CLI_MSG_SIZE];
  byte *pos = buf, *end = buf + sizeof(buf);
884

Maria Matejka's avatar
Maria Matejka committed
885 886 887 888 889 890 891 892 893 894 895 896
  if (EA_IS_CUSTOM(e->id))
    {
      const char *name = ea_custom_name(e->id);
      if (name)
        {
	  pos += bsprintf(pos, "%s", name);
	  status = GA_NAME;
	}
      else
	pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
    }
  else if (p = class_to_protocol[EA_PROTO(e->id)])
897
    {
898
      pos += bsprintf(pos, "%s.", p->name);
899
      if (p->get_attr)
900 901
	status = p->get_attr(e, pos, end - pos);
      pos += strlen(pos);
902 903
    }
  else if (EA_PROTO(e->id))
904
    pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
905
  else
906
    status = get_generic_attr(e, &pos, end - pos);
907

908
  if (status < GA_NAME)
909
    pos += bsprintf(pos, "%02x", EA_ID(e->id));
910 911
  if (status < GA_FULL)
    {
912 913
      *pos++ = ':';
      *pos++ = ' ';
914 915 916
      switch (e->type & EAF_TYPE_MASK)
	{
	case EAF_TYPE_INT:
917
	  bsprintf(pos, "%u", e->u.data);
918 919
	  break;
	case EAF_TYPE_OPAQUE:
920
	  opaque_format(ad, pos, end - pos);
921 922
	  break;
	case EAF_TYPE_IP_ADDRESS:
923
	  bsprintf(pos, "%I", *(ip_addr *) ad->data);
924 925
	  break;
	case EAF_TYPE_ROUTER_ID:
926
	  bsprintf(pos, "%R", e->u.data);
927
	  break;
928
	case EAF_TYPE_AS_PATH:
929
	  as_path_format(ad, pos, end - pos);
930
	  break;
931 932 933
	case EAF_TYPE_BITFIELD:
	  bsprintf(pos, "%08x", e->u.data);
	  break;
934
	case EAF_TYPE_INT_SET:
935 936
	  ea_show_int_set(c, ad, 1, pos, buf, end);
	  return;
937 938 939
	case EAF_TYPE_EC_SET:
	  ea_show_ec_set(c, ad, pos, buf, end);
	  return;
940 941 942
	case EAF_TYPE_LC_SET:
	  ea_show_lc_set(c, ad, pos, buf, end);
	  return;
943 944
	case EAF_TYPE_UNDEF:
	default:
945
	  bsprintf(pos, "<type %02x>", e->type);
946 947
	}
    }
948
  cli_printf(c, -1012, "\t%s", buf);
949 950
}

951 952 953 954 955 956 957
/**
 * ea_dump - dump an extended attribute
 * @e: attribute to be dumped
 *
 * ea_dump() dumps contents of the extended attribute given to
 * the debug output.
 */
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
void
ea_dump(ea_list *e)
{
  int i;

  if (!e)
    {
      debug("NONE");
      return;
    }
  while (e)
    {
      debug("[%c%c%c]",
	    (e->flags & EALF_SORTED) ? 'S' : 's',
	    (e->flags & EALF_BISECT) ? 'B' : 'b',
	    (e->flags & EALF_CACHED) ? 'C' : 'c');
      for(i=0; i<e->count; i++)
975
	{
976 977 978
	  eattr *a = &e->attrs[i];
	  debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
	  debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
979 980
	  if (a->type & EAF_ORIGINATED)
	    debug("o");
981 982 983 984 985 986 987 988 989
	  if (a->type & EAF_EMBEDDED)
	    debug(":%08x", a->u.data);
	  else
	    {
	      int j, len = a->u.ptr->length;
	      debug("[%d]:", len);
	      for(j=0; j<len; j++)
		debug("%02x", a->u.ptr->data[j]);
	    }
990
	}
991 992
      if (e = e->next)
	debug(" | ");
993 994 995
    }
}

996 997 998 999 1000 1001 1002
/**
 * ea_hash - calculate an &ea_list hash key
 * @e: attribute list
 *
 * ea_hash() takes an extended attribute list and calculated a hopefully
 * uniformly distributed hash value from its contents.
 */
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1003
inline uint
1004 1005
ea_hash(ea_list *e)
{
1006 1007
  const u64 mul = 0x68576150f3d6847;
  u64 h = 0xafcef24eda8b29;
1008 1009 1010 1011 1012 1013 1014
  int i;

  if (e)			/* Assuming chain of length 1 */
    {
      for(i=0; i<e->count; i++)
	{
	  struct eattr *a = &e->attrs[i];
1015
	  h ^= a->id; h *= mul;
1016 1017 1018 1019 1020
	  if (a->type & EAF_EMBEDDED)
	    h ^= a->u.data;
	  else
	    {
	      struct adata *d = a->u.ptr;
1021
	      h ^= mem_hash(d->data, d->length);
1022
	    }
1023
	  h *= mul;
1024 1025
	}
    }
1026
  return (h >> 32) ^ (h & 0xffffffff);
1027 1028
}

1029 1030 1031 1032 1033 1034 1035 1036
/**
 * ea_append - concatenate &ea_list's
 * @to: destination list (can be %NULL)
 * @what: list to be appended (can be %NULL)
 *
 * This function appends the &ea_list @what at the end of
 * &ea_list @to and returns a pointer to the resulting list.
 */
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
ea_list *
ea_append(ea_list *to, ea_list *what)
{
  ea_list *res;

  if (!to)
    return what;
  res = to;
  while (to->next)
    to = to->next;
  to->next = what;
  return res;
}

1051 1052 1053 1054
/*
 *	rta's
 */

Pavel Tvrdík's avatar
Pavel Tvrdík committed
1055 1056 1057 1058
static uint rta_cache_count;
static uint rta_cache_size = 32;
static uint rta_cache_limit;
static uint rta_cache_mask;
1059 1060 1061 1062 1063
static rta **rta_hash_table;

static void
rta_alloc_hash(void)
{
Martin Mareš's avatar
Martin Mareš committed
1064
  rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
1065 1066 1067 1068 1069 1070 1071
  if (rta_cache_size < 32768)
    rta_cache_limit = rta_cache_size * 2;
  else
    rta_cache_limit = ~0;
  rta_cache_mask = rta_cache_size - 1;
}

Pavel Tvrdík's avatar
Pavel Tvrdík committed
1072
static inline uint
1073 1074
rta_hash(rta *a)
{
1075
  u64 h;
1076
  mem_hash_init(&h);
1077
#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
1078 1079 1080 1081
  MIX(src);
  MIX(hostentry);
  MIX(from);
  MIX(igp_metric);
1082 1083 1084
  MIX(source);
  MIX(scope);
  MIX(dest);
1085
#undef MIX
1086

1087
  return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
1088 1089
}

1090 1091 1092
static inline int
rta_same(rta *x, rta *y)
{
1093
  return (x->src == y->src &&
1094 1095 1096
	  x->source == y->source &&
	  x->scope == y->scope &&
	  x->dest == y->dest &&
1097
	  x->igp_metric == y->igp_metric &&
1098
	  ipa_equal(x->from, y->from) &&
1099
	  x->hostentry == y->hostentry &&
1100
	  nexthop_same(&(x->nh), &(y->nh)) &&
1101
	  ea_same(x->eattrs, y->eattrs));
1102 1103
}

1104 1105 1106 1107 1108 1109
static inline slab *
rta_slab(rta *a)
{
  return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
}

1110 1111 1112
static rta *
rta_copy(rta *o)
{
1113
  rta *r = sl_alloc(rta_slab(o));
1114

1115
  memcpy(r, o, rta_size(o));
1116
  r->uc = 1;
1117
  r->nh.next = nexthop_copy(o->nh.next);
1118
  r->eattrs = ea_list_copy(o->eattrs);
1119 1120 1121
  return r;
}

1122 1123 1124
static inline void
rta_insert(rta *r)
{
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1125
  uint h = r->hash_key & rta_cache_mask;
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
  r->next = rta_hash_table[h];
  if (r->next)
    r->next->pprev = &r->next;
  r->pprev = &rta_hash_table[h];
  rta_hash_table[h] = r;
}

static void
rta_rehash(void)
{
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1136 1137
  uint ohs = rta_cache_size;
  uint h;
1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
  rta *r, *n;
  rta **oht = rta_hash_table;

  rta_cache_size = 2*rta_cache_size;
  DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
  rta_alloc_hash();
  for(h=0; h<ohs; h++)
    for(r=oht[h]; r; r=n)
      {
	n = r->next;
	rta_insert(r);
      }
  mb_free(oht);
}

1153 1154
/**
 * rta_lookup - look up a &rta in attribute cache
1155
 * @o: a un-cached &rta
1156
 *
1157
 * rta_lookup() gets an un-cached &rta structure and returns its cached
1158 1159 1160 1161 1162 1163 1164 1165
 * counterpart. It starts with examining the attribute cache to see whether
 * there exists a matching entry. If such an entry exists, it's returned and
 * its use count is incremented, else a new entry is created with use count
 * set to 1.
 *
 * The extended attribute lists attached to the &rta are automatically
 * converted to the normalized form.
 */
1166 1167 1168 1169
rta *
rta_lookup(rta *o)
{
  rta *r;
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1170
  uint h;
1171

1172
  ASSERT(!(o->aflags & RTAF_CACHED));
1173
  if (o->eattrs)
1174
    ea_normalize(o->eattrs);
1175

1176 1177 1178
  h = rta_hash(o);
  for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
    if (r->hash_key == h && rta_same(r, o))
1179
      return rta_clone(r);
1180

1181
  r = rta_copy(o);
1182
  r->hash_key = h;
1183
  r->aflags = RTAF_CACHED;
1184
  rt_lock_source(r->src);
1185
  rt_lock_hostentry(r->hostentry);
1186 1187 1188 1189 1190
  rta_insert(r);

  if (++rta_cache_count > rta_cache_limit)
    rta_rehash();

1191 1192 1193 1194
  return r;
}

void
1195
rta__free(rta *a)
1196
{
1197 1198
  ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
  rta_cache_count--;
Martin Mareš's avatar
Martin Mareš committed
1199 1200 1201
  *a->pprev = a->next;
  if (a->next)
    a->next->pprev = a->pprev;
1202
  rt_unlock_hostentry(a->hostentry);
1203
  rt_unlock_source(a->src);
1204 1205
  if (a->nh.next)
    nexthop_free(a->nh.next);
Martin Mareš's avatar
Martin Mareš committed
1206
  ea_free(a->eattrs);
1207
  a->aflags = 0;		/* Poison the entry */
1208
  sl_free(rta_slab(a), a);
1209 1210
}

1211 1212 1213
rta *
rta_do_cow(rta *o, linpool *lp)
{
1214 1215
  rta *r = lp_alloc(lp, rta_size(o));
  memcpy(r, o, rta_size(o));
1216 1217 1218 1219 1220 1221
  for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
    {
      *nhn = lp_alloc(lp, nexthop_size(nho));
      memcpy(*nhn, nho, nexthop_size(nho));
      nhn = &((*nhn)->next);
    }
1222 1223 1224 1225 1226
  r->aflags = 0;
  r->uc = 0;
  return r;
}

1227 1228 1229 1230 1231 1232
/**
 * rta_dump - dump route attributes
 * @a: attribute structure to dump
 *
 * This function takes a &rta and dumps its contents to the debug output.
 */
1233
void
1234
rta_dump(rta *a)
1235
{
1236
  static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
Martin Mareš's avatar
Martin Mareš committed
1237
			 "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
Ondřej Filip's avatar
Ondřej Filip committed
1238
			 "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
1239
			 "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
1240 1241
  static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };

1242 1243
  debug("p=%s uc=%d %s %s%s h=%04x",
	a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
1244
	rtd[a->dest], a->hash_key);
1245 1246
  if (!(a->aflags & RTAF_CACHED))
    debug(" !CACHED");
1247
  debug(" <-%I", a->from);
1248 1249 1250 1251
  if (a->dest == RTD_UNICAST)
    for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
      {
	if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
1252 1253 1254
	if (nh->labels) debug(" L %d", nh->label[0]);
	for (int i=1; i<nh->labels; i++)
	  debug("/%d", nh->label[i]);
1255 1256
	debug(" [%s]", nh->iface ? nh->iface->name : "???");
      }
1257
  if (a->eattrs)
1258 1259
    {
      debug(" EA: ");
1260
      ea_dump(a->eattrs);
1261
    }
1262 1263
}

1264 1265 1266 1267 1268 1269
/**
 * rta_dump_all - dump attribute cache
 *
 * This function dumps the whole contents of route attribute cache
 * to the debug output.
 */
1270 1271 1272
void
rta_dump_all(void)
{
1273
  rta *a;
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1274
  uint h;
1275 1276 1277 1278 1279 1280 1281 1282 1283

  debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
  for(h=0; h<rta_cache_size; h++)
    for(a=rta_hash_table[h]; a; a=a->next)
      {
	debug("%p ", a);
	rta_dump(a);
	debug("\n");
      }
1284
  debug("\n");
1285 1286
}

1287
void
1288
rta_show(struct cli *c, rta *a)
1289
{
1290
  cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope));
1291 1292 1293

  for(ea_list *eal = a->eattrs; eal; eal=eal->next)
    for(int i=0; i<eal->count; i++)
1294
      ea_show(c, &eal->attrs[i]);
1295 1296
}

1297 1298 1299 1300 1301 1302
/**
 * rta_init - initialize route attribute cache
 *
 * This function is called during initialization of the routing
 * table module to set up the internals of the attribute cache.
 */
1303 1304 1305
void
rta_init(void)
{
1306
  rta_pool = rp_new(&root_pool, "Attributes");
1307 1308 1309 1310

  rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
  rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
  rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
1311
  rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1312 1313 1314 1315

  nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
  nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
  nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
1316
  nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1317

1318
  rta_alloc_hash();
1319
  rte_src_init();
1320
}
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350

/*
 *  Documentation for functions declared inline in route.h
 */
#if 0

/**
 * rta_clone - clone route attributes
 * @r: a &rta to be cloned
 *
 * rta_clone() takes a cached &rta and returns its identical cached
 * copy. Currently it works by just returning the original &rta with
 * its use count incremented.
 */
static inline rta *rta_clone(rta *r)
{ DUMMY; }

/**
 * rta_free - free route attributes
 * @r: a &rta to be freed
 *
 * If you stop using a &rta (for example when deleting a route which uses
 * it), you need to call rta_free() to notify the attribute cache the
 * attribute is no longer in use and can be freed if you were the last
 * user (which rta_free() tests by inspecting the use count).
 */
static inline void rta_free(rta *r)
{ DUMMY; }

#endif