rt-attr.c 29.9 KB
Newer Older
1 2 3
/*
 *	BIRD -- Route Attribute Cache
 *
4
 *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5 6 7 8
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/**
 * DOC: Route attribute cache
 *
 * Each route entry carries a set of route attributes. Several of them
 * vary from route to route, but most attributes are usually common
 * for a large number of routes. To conserve memory, we've decided to
 * store only the varying ones directly in the &rte and hold the rest
 * in a special structure called &rta which is shared among all the
 * &rte's with these attributes.
 *
 * Each &rta contains all the static attributes of the route (i.e.,
 * those which are always present) as structure members and a list of
 * dynamic attributes represented by a linked list of &ea_list
 * structures, each of them consisting of an array of &eattr's containing
 * the individual attributes. An attribute can be specified more than once
24
 * in the &ea_list chain and in such case the first occurrence overrides
25 26 27 28 29 30 31 32 33 34 35
 * the others. This semantics is used especially when someone (for example
 * a filter) wishes to alter values of several dynamic attributes, but
 * it wants to preserve the original attribute lists maintained by
 * another module.
 *
 * Each &eattr contains an attribute identifier (split to protocol ID and
 * per-protocol attribute ID), protocol dependent flags, a type code (consisting
 * of several bit fields describing attribute characteristics) and either an
 * embedded 32-bit value or a pointer to a &adata structure holding attribute
 * contents.
 *
36
 * There exist two variants of &rta's -- cached and un-cached ones. Un-cached
37 38 39 40 41 42 43 44 45 46
 * &rta's can have arbitrarily complex structure of &ea_list's and they
 * can be modified by any module in the route processing chain. Cached
 * &rta's have their attribute lists normalized (that means at most one
 * &ea_list is present and its values are sorted in order to speed up
 * searching), they are stored in a hash table to make fast lookup possible
 * and they are provided with a use count to allow sharing.
 *
 * Routing tables always contain only cached &rta's.
 */

47 48 49
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/protocol.h"
50
#include "nest/iface.h"
51
#include "nest/cli.h"
52
#include "nest/attrs.h"
53
#include "lib/alloca.h"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
54
#include "lib/hash.h"
55
#include "lib/idm.h"
56
#include "lib/resource.h"
57
#include "lib/string.h"
58

59 60
#include <stddef.h>

61 62
const adata null_adata;		/* adata of length 0 */

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
const char * const rta_src_names[RTS_MAX] = {
  [RTS_DUMMY]		= "",
  [RTS_STATIC]		= "static",
  [RTS_INHERIT]		= "inherit",
  [RTS_DEVICE]		= "device",
  [RTS_STATIC_DEVICE]	= "static-device",
  [RTS_REDIRECT]	= "redirect",
  [RTS_RIP]		= "RIP",
  [RTS_OSPF]		= "OSPF",
  [RTS_OSPF_IA]		= "OSPF-IA",
  [RTS_OSPF_EXT1]	= "OSPF-E1",
  [RTS_OSPF_EXT2]	= "OSPF-E2",
  [RTS_BGP]		= "BGP",
  [RTS_PIPE]		= "pipe",
  [RTS_BABEL]		= "Babel",
  [RTS_RPKI]		= "RPKI",
};

81 82 83 84 85 86 87 88
const char * rta_dest_names[RTD_MAX] = {
  [RTD_NONE]		= "",
  [RTD_UNICAST]		= "unicast",
  [RTD_BLACKHOLE]	= "blackhole",
  [RTD_UNREACHABLE]	= "unreachable",
  [RTD_PROHIBIT]	= "prohibited",
};

89 90
pool *rta_pool;

91 92
static slab *rta_slab_[4];
static slab *nexthop_slab_[4];
93 94
static slab *rte_src_slab;

95
static struct idm src_ids;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
96
#define SRC_ID_INIT_SIZE 4
97 98

/* rte source hash */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
99 100 101 102 103 104 105 106 107 108 109

#define RSH_KEY(n)		n->proto, n->private_id
#define RSH_NEXT(n)		n->next
#define RSH_EQ(p1,n1,p2,n2)	p1 == p2 && n1 == n2
#define RSH_FN(p,n)		p->hash_key ^ u32_hash(n)

#define RSH_REHASH		rte_src_rehash
#define RSH_PARAMS		/2, *2, 1, 1, 8, 20
#define RSH_INIT_ORDER		6

static HASH(struct rte_src) src_hash;
110

111 112 113 114 115
static void
rte_src_init(void)
{
  rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));

116
  idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
117

Ondřej Zajíček's avatar
Ondřej Zajíček committed
118
  HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
119 120 121
}


Ondřej Zajíček's avatar
Ondřej Zajíček committed
122
HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
123 124 125 126

struct rte_src *
rt_find_source(struct proto *p, u32 id)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
127
  return HASH_FIND(src_hash, RSH, p, id);
128 129 130 131 132
}

struct rte_src *
rt_get_source(struct proto *p, u32 id)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
133
  struct rte_src *src = rt_find_source(p, id);
134

Ondřej Zajíček's avatar
Ondřej Zajíček committed
135 136
  if (src)
    return src;
137 138 139 140

  src = sl_alloc(rte_src_slab);
  src->proto = p;
  src->private_id = id;
141
  src->global_id = idm_alloc(&src_ids);
142
  src->uc = 0;
143

Ondřej Zajíček's avatar
Ondřej Zajíček committed
144
  HASH_INSERT2(src_hash, RSH, rta_pool, src);
145 146 147 148 149 150 151

  return src;
}

void
rt_prune_sources(void)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
152 153 154
  HASH_WALK_FILTER(src_hash, next, src, sp)
  {
    if (src->uc == 0)
155
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
156
      HASH_DO_REMOVE(src_hash, RSH, sp);
157
      idm_free(&src_ids, src->global_id);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
158
      sl_free(rte_src_slab, src);
159
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
160 161
  }
  HASH_WALK_FILTER_END;
162

Ondřej Zajíček's avatar
Ondřej Zajíček committed
163
  HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
164 165 166 167 168 169 170
}


/*
 *	Multipath Next Hop
 */

171
static inline u32
172
nexthop_hash(struct nexthop *x)
173
{
174
  u32 h = 0;
175
  for (; x; x = x->next)
176 177
  {
    h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
178 179

    for (int i = 0; i < x->labels; i++)
180 181
      h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
  }
182 183 184 185 186

  return h;
}

int
187
nexthop__same(struct nexthop *x, struct nexthop *y)
188 189
{
  for (; x && y; x = x->next, y = y->next)
190
  {
191 192
    if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) ||
	(x->flags != y->flags) || (x->weight != y->weight) ||
193
	(x->labels_orig != y->labels_orig) || (x->labels != y->labels))
194
      return 0;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
195 196

    for (int i = 0; i < x->labels; i++)
197 198 199
      if (x->label[i] != y->label[i])
	return 0;
  }
200

Ondřej Zajíček's avatar
Ondřej Zajíček committed
201
  return x == y;
202 203
}

204
static int
205
nexthop_compare_node(const struct nexthop *x, const  struct nexthop *y)
206 207 208 209 210 211 212 213 214
{
  int r;

  if (!x)
    return 1;

  if (!y)
    return -1;

215 216
  /* Should we also compare flags ? */

217 218 219 220 221 222 223 224
  r = ((int) y->weight) - ((int) x->weight);
  if (r)
    return r;

  r = ipa_compare(x->gw, y->gw);
  if (r)
    return r;

225 226 227 228
  r = ((int) y->labels) - ((int) x->labels);
  if (r)
    return r;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
229
  for (int i = 0; i < y->labels; i++)
230 231 232 233 234 235
  {
    r = ((int) y->label[i]) - ((int) x->label[i]);
    if (r)
      return r;
  }

236 237 238
  return ((int) x->iface->index) - ((int) y->iface->index);
}

239 240
static inline struct nexthop *
nexthop_copy_node(const struct nexthop *src, linpool *lp)
241
{
242 243 244
  struct nexthop *n = lp_alloc(lp, nexthop_size(src));

  memcpy(n, src, nexthop_size(src));
245
  n->next = NULL;
246

247 248 249 250
  return n;
}

/**
251
 * nexthop_merge - merge nexthop lists
252 253 254 255 256 257 258
 * @x: list 1
 * @y: list 2
 * @rx: reusability of list @x
 * @ry: reusability of list @y
 * @max: max number of nexthops
 * @lp: linpool for allocating nexthops
 *
259
 * The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
260 261 262 263 264 265 266 267 268 269 270 271
 * eliminating possible duplicates. The input lists must be sorted and the
 * result is sorted too. The number of nexthops in result is limited by @max.
 * New nodes are allocated from linpool @lp.
 *
 * The arguments @rx and @ry specify whether corresponding input lists may be
 * consumed by the function (i.e. their nodes reused in the resulting list), in
 * that case the caller should not access these lists after that. To eliminate
 * issues with deallocation of these lists, the caller should use some form of
 * bulk deallocation (e.g. stack or linpool) to free these nodes when the
 * resulting list is no longer needed. When reusability is not set, the
 * corresponding lists are not modified nor linked from the resulting list.
 */
272 273
struct nexthop *
nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
274
{
275 276
  struct nexthop *root = NULL;
  struct nexthop **n = &root;
277 278 279

  while ((x || y) && max--)
  {
280
    int cmp = nexthop_compare_node(x, y);
281 282
    if (cmp < 0)
    {
283
      *n = rx ? x : nexthop_copy_node(x, lp);
284 285 286 287
      x = x->next;
    }
    else if (cmp > 0)
    {
288
      *n = ry ? y : nexthop_copy_node(y, lp);
289 290 291 292
      y = y->next;
    }
    else
    {
293
      *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
294 295 296 297 298 299 300 301 302 303
      x = x->next;
      y = y->next;
    }
    n = &((*n)->next);
  }
  *n = NULL;

  return root;
}

304
void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
305
nexthop_insert(struct nexthop **n, struct nexthop *x)
306
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
307
  for (; *n; n = &((*n)->next))
308
  {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
309
    int cmp = nexthop_compare_node(*n, x);
310 311 312

    if (cmp < 0)
      continue;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
313 314 315 316
    else if (cmp > 0)
      break;
    else
      return;
317 318
  }

Ondřej Zajíček's avatar
Ondřej Zajíček committed
319 320
  x->next = *n;
  *n = x;
321 322
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
struct nexthop *
nexthop_sort(struct nexthop *x)
{
  struct nexthop *s = NULL;

  /* Simple insert-sort */
  while (x)
  {
    struct nexthop *n = x;
    x = n->next;
    n->next = NULL;

    nexthop_insert(&s, n);
  }

  return s;
}

341
int
342
nexthop_is_sorted(struct nexthop *x)
343 344
{
  for (; x && x->next; x = x->next)
345
    if (nexthop_compare_node(x, x->next) >= 0)
346 347 348 349
      return 0;

  return 1;
}
350

351 352 353
static inline slab *
nexthop_slab(struct nexthop *nh)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
354
  return nexthop_slab_[MIN(nh->labels, 3)];
355 356
}

357 358
static struct nexthop *
nexthop_copy(struct nexthop *o)
359
{
360 361
  struct nexthop *first = NULL;
  struct nexthop **last = &first;
362 363 364

  for (; o; o = o->next)
    {
365
      struct nexthop *n = sl_alloc(nexthop_slab(o));
366 367 368
      n->gw = o->gw;
      n->iface = o->iface;
      n->next = NULL;
369
      n->flags = o->flags;
370
      n->weight = o->weight;
371
      n->labels_orig = o->labels_orig;
372 373 374
      n->labels = o->labels;
      for (int i=0; i<o->labels; i++)
	n->label[i] = o->label[i];
375 376 377 378 379 380 381 382 383

      *last = n;
      last = &(n->next);
    }

  return first;
}

static void
384
nexthop_free(struct nexthop *o)
385
{
386
  struct nexthop *n;
387 388 389 390

  while (o)
    {
      n = o->next;
391
      sl_free(nexthop_slab(o), o);
392 393 394 395 396
      o = n;
    }
}


397 398 399 400
/*
 *	Extended Attributes
 */

401 402
static inline eattr *
ea__find(ea_list *e, unsigned id)
403 404 405 406 407 408 409 410 411
{
  eattr *a;
  int l, r, m;

  while (e)
    {
      if (e->flags & EALF_BISECT)
	{
	  l = 0;
412
	  r = e->count - 1;
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
	  while (l <= r)
	    {
	      m = (l+r) / 2;
	      a = &e->attrs[m];
	      if (a->id == id)
		return a;
	      else if (a->id < id)
		l = m+1;
	      else
		r = m-1;
	    }
	}
      else
	for(m=0; m<e->count; m++)
	  if (e->attrs[m].id == id)
	    return &e->attrs[m];
      e = e->next;
    }
  return NULL;
}

434 435 436 437 438 439
/**
 * ea_find - find an extended attribute
 * @e: attribute list to search in
 * @id: attribute ID to search for
 *
 * Given an extended attribute list, ea_find() searches for a first
440
 * occurrence of an attribute with specified ID, returning either a pointer
441 442
 * to its &eattr structure or %NULL if no such attribute exists.
 */
443 444 445 446 447 448 449 450 451 452 453
eattr *
ea_find(ea_list *e, unsigned id)
{
  eattr *a = ea__find(e, id & EA_CODE_MASK);

  if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
      !(id & EA_ALLOW_UNDEF))
    return NULL;
  return a;
}

454 455 456 457 458 459 460 461 462 463
/**
 * ea_walk - walk through extended attributes
 * @s: walk state structure
 * @id: start of attribute ID interval
 * @max: length of attribute ID interval
 *
 * Given an extended attribute list, ea_walk() walks through the list looking
 * for first occurrences of attributes with ID in specified interval from @id to
 * (@id + @max - 1), returning pointers to found &eattr structures, storing its
 * walk state in @s for subsequent calls.
464
 *
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
 * The function ea_walk() is supposed to be called in a loop, with initially
 * zeroed walk state structure @s with filled the initial extended attribute
 * list, returning one found attribute in each call or %NULL when no other
 * attribute exists. The extended attribute list or the arguments should not be
 * modified between calls. The maximum value of @max is 128.
 */
eattr *
ea_walk(struct ea_walk_state *s, uint id, uint max)
{
  ea_list *e = s->eattrs;
  eattr *a = s->ea;
  eattr *a_max;

  max = id + max;

  if (a)
    goto step;

  for (; e; e = e->next)
  {
    if (e->flags & EALF_BISECT)
    {
      int l, r, m;

      l = 0;
      r = e->count - 1;
      while (l < r)
      {
	m = (l+r) / 2;
	if (e->attrs[m].id < id)
	  l = m + 1;
	else
	  r = m;
      }
      a = e->attrs + l;
    }
    else
      a = e->attrs;

  step:
    a_max = e->attrs + e->count;
    for (; a < a_max; a++)
      if ((a->id >= id) && (a->id < max))
      {
	int n = a->id - id;

	if (BIT32_TEST(s->visited, n))
	  continue;

	BIT32_SET(s->visited, n);

	if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
	  continue;

	s->eattrs = e;
	s->ea = a;
	return a;
      }
      else if (e->flags & EALF_BISECT)
	break;
  }

  return NULL;
}

530 531 532 533 534 535 536 537 538 539
/**
 * ea_get_int - fetch an integer attribute
 * @e: attribute list
 * @id: attribute ID
 * @def: default value
 *
 * This function is a shortcut for retrieving a value of an integer attribute
 * by calling ea_find() to find the attribute, extracting its value or returning
 * a provided default if no such attribute is present.
 */
540 541 542 543 544 545 546 547 548
int
ea_get_int(ea_list *e, unsigned id, int def)
{
  eattr *a = ea_find(e, id);
  if (!a)
    return def;
  return a->u.data;
}

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
static inline void
ea_do_sort(ea_list *e)
{
  unsigned n = e->count;
  eattr *a = e->attrs;
  eattr *b = alloca(n * sizeof(eattr));
  unsigned s, ss;

  /* We need to use a stable sorting algorithm, hence mergesort */
  do
    {
      s = ss = 0;
      while (s < n)
	{
	  eattr *p, *q, *lo, *hi;
	  p = b;
	  ss = s;
	  *p++ = a[s++];
	  while (s < n && p[-1].id <= a[s].id)
	    *p++ = a[s++];
	  if (s < n)
	    {
	      q = p;
	      *p++ = a[s++];
	      while (s < n && p[-1].id <= a[s].id)
		*p++ = a[s++];
	      lo = b;
	      hi = q;
	      s = ss;
	      while (lo < q && hi < p)
		if (lo->id <= hi->id)
		  a[s++] = *lo++;
		else
		  a[s++] = *hi++;
	      while (lo < q)
		a[s++] = *lo++;
	      while (hi < p)
		a[s++] = *hi++;
	    }
	}
    }
  while (ss);
}

593
/**
594 595
 * In place discard duplicates and undefs in sorted ea_list. We use stable sort
 * for this reason.
596
 **/
597 598 599
static inline void
ea_do_prune(ea_list *e)
{
600
  eattr *s, *d, *l, *s0;
601 602
  int i = 0;

603 604 605 606
  s = d = e->attrs;	    /* Beginning of the list. @s is source, @d is destination. */
  l = e->attrs + e->count;  /* End of the list */

  /* Walk from begin to end. */
607 608
  while (s < l)
    {
609
      s0 = s++;
610
      /* Find a consecutive block of the same attribute */
611 612
      while (s < l && s->id == s[-1].id)
	s++;
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627

      /* Now s0 is the most recent version, s[-1] the oldest one */
      /* Drop undefs */
      if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
	continue;

      /* Copy the newest version to destination */
      *d = *s0;

      /* Preserve info whether it originated locally */
      d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);

      /* Next destination */
      d++;
      i++;
628
    }
629

630
  e->count = i;
631 632
}

633 634 635 636 637 638 639 640
/**
 * ea_sort - sort an attribute list
 * @e: list to be sorted
 *
 * This function takes a &ea_list chain and sorts the attributes
 * within each of its entries.
 *
 * If an attribute occurs multiple times in a single &ea_list,
641
 * ea_sort() leaves only the first (the only significant) occurrence.
642
 */
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
void
ea_sort(ea_list *e)
{
  while (e)
    {
      if (!(e->flags & EALF_SORTED))
	{
	  ea_do_sort(e);
	  ea_do_prune(e);
	  e->flags |= EALF_SORTED;
	}
      if (e->count > 5)
	e->flags |= EALF_BISECT;
      e = e->next;
    }
}

660 661 662 663 664 665 666
/**
 * ea_scan - estimate attribute list size
 * @e: attribute list
 *
 * This function calculates an upper bound of the size of
 * a given &ea_list after merging with ea_merge().
 */
667 668 669 670 671 672 673 674 675 676 677 678 679
unsigned
ea_scan(ea_list *e)
{
  unsigned cnt = 0;

  while (e)
    {
      cnt += e->count;
      e = e->next;
    }
  return sizeof(ea_list) + sizeof(eattr)*cnt;
}

680 681 682 683 684 685 686 687 688 689 690 691 692 693
/**
 * ea_merge - merge segments of an attribute list
 * @e: attribute list
 * @t: buffer to store the result to
 *
 * This function takes a possibly multi-segment attribute list
 * and merges all of its segments to one.
 *
 * The primary use of this function is for &ea_list normalization:
 * first call ea_scan() to determine how much memory will the result
 * take, then allocate a buffer (usually using alloca()), merge the
 * segments with ea_merge() and finally sort and prune the result
 * by calling ea_sort().
 */
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
void
ea_merge(ea_list *e, ea_list *t)
{
  eattr *d = t->attrs;

  t->flags = 0;
  t->count = 0;
  t->next = NULL;
  while (e)
    {
      memcpy(d, e->attrs, sizeof(eattr)*e->count);
      t->count += e->count;
      d += e->count;
      e = e->next;
    }
}

711 712 713 714 715 716 717 718
/**
 * ea_same - compare two &ea_list's
 * @x: attribute list
 * @y: attribute list
 *
 * ea_same() compares two normalized attribute lists @x and @y and returns
 * 1 if they contain the same attributes, 0 otherwise.
 */
719
int
720 721 722 723
ea_same(ea_list *x, ea_list *y)
{
  int c;

724 725 726 727 728 729
  if (!x || !y)
    return x == y;
  ASSERT(!x->next && !y->next);
  if (x->count != y->count)
    return 0;
  for(c=0; c<x->count; c++)
730
    {
731 732 733 734 735 736
      eattr *a = &x->attrs[c];
      eattr *b = &y->attrs[c];

      if (a->id != b->id ||
	  a->flags != b->flags ||
	  a->type != b->type ||
737
	  ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
738
	return 0;
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
    }
  return 1;
}

static inline ea_list *
ea_list_copy(ea_list *o)
{
  ea_list *n;
  unsigned i, len;

  if (!o)
    return NULL;
  ASSERT(!o->next);
  len = sizeof(ea_list) + sizeof(eattr) * o->count;
  n = mb_alloc(rta_pool, len);
  memcpy(n, o, len);
  n->flags |= EALF_CACHED;
  for(i=0; i<o->count; i++)
    {
      eattr *a = &n->attrs[i];
759
      if (!(a->type & EAF_EMBEDDED))
760 761 762 763 764 765 766 767 768 769
	{
	  unsigned size = sizeof(struct adata) + a->u.ptr->length;
	  struct adata *d = mb_alloc(rta_pool, size);
	  memcpy(d, a->u.ptr, size);
	  a->u.ptr = d;
	}
    }
  return n;
}

Martin Mareš's avatar
Martin Mareš committed
770 771 772
static inline void
ea_free(ea_list *o)
{
773 774
  int i;

Martin Mareš's avatar
Martin Mareš committed
775 776 777
  if (o)
    {
      ASSERT(!o->next);
778 779 780 781
      for(i=0; i<o->count; i++)
	{
	  eattr *a = &o->attrs[i];
	  if (!(a->type & EAF_EMBEDDED))
782
	    mb_free((void *) a->u.ptr);
783
	}
Martin Mareš's avatar
Martin Mareš committed
784 785 786 787
      mb_free(o);
    }
}

788 789 790 791 792 793 794 795
static int
get_generic_attr(eattr *a, byte **buf, int buflen UNUSED)
{
  if (a->id == EA_GEN_IGP_METRIC)
    {
      *buf += bsprintf(*buf, "igp_metric");
      return GA_NAME;
    }
796

797 798 799
  return GA_UNKNOWN;
}

800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
void
ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
{
  byte *bound = buf + bufsize - 32;
  u32 data = a->u.data;
  int i;

  for (i = min; i < max; i++)
    if ((data & (1u << i)) && names[i])
    {
      if (buf > bound)
      {
	strcpy(buf, " ...");
	return;
      }

      buf += bsprintf(buf, " %s", names[i]);
      data &= ~(1u << i);
    }

  if (data)
    bsprintf(buf, " %08x", data);

  return;
}

826
static inline void
827
opaque_format(const struct adata *ad, byte *buf, uint size)
828 829
{
  byte *bound = buf + size - 10;
830
  uint i;
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849

  for(i = 0; i < ad->length; i++)
    {
      if (buf > bound)
	{
	  strcpy(buf, " ...");
	  return;
	}
      if (i)
	*buf++ = ' ';

      buf += bsprintf(buf, "%02x", ad->data[i]);
    }

  *buf = 0;
  return;
}

static inline void
850
ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end)
851 852
{
  int i = int_set_format(ad, way, 0, pos, end - pos);
853
  cli_printf(c, -1012, "\t%s", buf);
854 855 856
  while (i)
    {
      i = int_set_format(ad, way, i, buf, end - buf - 1);
857
      cli_printf(c, -1012, "\t\t%s", buf);
858 859 860
    }
}

861
static inline void
862
ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
863 864
{
  int i = ec_set_format(ad, 0, pos, end - pos);
865
  cli_printf(c, -1012, "\t%s", buf);
866 867 868
  while (i)
    {
      i = ec_set_format(ad, i, buf, end - buf - 1);
869
      cli_printf(c, -1012, "\t\t%s", buf);
870 871 872
    }
}

873
static inline void
874
ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
875 876 877 878 879 880 881 882 883 884
{
  int i = lc_set_format(ad, 0, pos, end - pos);
  cli_printf(c, -1012, "\t%s", buf);
  while (i)
    {
      i = lc_set_format(ad, i, buf, end - buf - 1);
      cli_printf(c, -1012, "\t\t%s", buf);
    }
}

885
/**
886 887 888
 * ea_show - print an &eattr to CLI
 * @c: destination CLI
 * @e: attribute to be printed
889
 *
890 891
 * This function takes an extended attribute represented by its &eattr
 * structure and prints it to the CLI according to the type information.
892 893 894 895
 *
 * If the protocol defining the attribute provides its own
 * get_attr() hook, it's consulted first.
 */
896
void
897
ea_show(struct cli *c, eattr *e)
898 899 900
{
  struct protocol *p;
  int status = GA_UNKNOWN;
901
  const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
902 903
  byte buf[CLI_MSG_SIZE];
  byte *pos = buf, *end = buf + sizeof(buf);
904

Maria Matejka's avatar
Maria Matejka committed
905 906 907 908 909 910 911 912 913 914 915 916
  if (EA_IS_CUSTOM(e->id))
    {
      const char *name = ea_custom_name(e->id);
      if (name)
        {
	  pos += bsprintf(pos, "%s", name);
	  status = GA_NAME;
	}
      else
	pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
    }
  else if (p = class_to_protocol[EA_PROTO(e->id)])
917
    {
918
      pos += bsprintf(pos, "%s.", p->name);
919
      if (p->get_attr)
920 921
	status = p->get_attr(e, pos, end - pos);
      pos += strlen(pos);
922 923
    }
  else if (EA_PROTO(e->id))
924
    pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
925
  else
926
    status = get_generic_attr(e, &pos, end - pos);
927

928
  if (status < GA_NAME)
929
    pos += bsprintf(pos, "%02x", EA_ID(e->id));
930 931
  if (status < GA_FULL)
    {
932 933
      *pos++ = ':';
      *pos++ = ' ';
934 935 936
      switch (e->type & EAF_TYPE_MASK)
	{
	case EAF_TYPE_INT:
937
	  bsprintf(pos, "%u", e->u.data);
938 939
	  break;
	case EAF_TYPE_OPAQUE:
940
	  opaque_format(ad, pos, end - pos);
941 942
	  break;
	case EAF_TYPE_IP_ADDRESS:
943
	  bsprintf(pos, "%I", *(ip_addr *) ad->data);
944 945
	  break;
	case EAF_TYPE_ROUTER_ID:
946
	  bsprintf(pos, "%R", e->u.data);
947
	  break;
948
	case EAF_TYPE_AS_PATH:
949
	  as_path_format(ad, pos, end - pos);
950
	  break;
951 952 953
	case EAF_TYPE_BITFIELD:
	  bsprintf(pos, "%08x", e->u.data);
	  break;
954
	case EAF_TYPE_INT_SET:
955 956
	  ea_show_int_set(c, ad, 1, pos, buf, end);
	  return;
957 958 959
	case EAF_TYPE_EC_SET:
	  ea_show_ec_set(c, ad, pos, buf, end);
	  return;
960 961 962
	case EAF_TYPE_LC_SET:
	  ea_show_lc_set(c, ad, pos, buf, end);
	  return;
963 964
	case EAF_TYPE_UNDEF:
	default:
965
	  bsprintf(pos, "<type %02x>", e->type);
966 967
	}
    }
968
  cli_printf(c, -1012, "\t%s", buf);
969 970
}

971 972 973 974 975 976 977
/**
 * ea_dump - dump an extended attribute
 * @e: attribute to be dumped
 *
 * ea_dump() dumps contents of the extended attribute given to
 * the debug output.
 */
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
void
ea_dump(ea_list *e)
{
  int i;

  if (!e)
    {
      debug("NONE");
      return;
    }
  while (e)
    {
      debug("[%c%c%c]",
	    (e->flags & EALF_SORTED) ? 'S' : 's',
	    (e->flags & EALF_BISECT) ? 'B' : 'b',
	    (e->flags & EALF_CACHED) ? 'C' : 'c');
      for(i=0; i<e->count; i++)
995
	{
996 997 998
	  eattr *a = &e->attrs[i];
	  debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
	  debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
999 1000
	  if (a->type & EAF_ORIGINATED)
	    debug("o");
1001 1002 1003 1004 1005 1006 1007 1008 1009
	  if (a->type & EAF_EMBEDDED)
	    debug(":%08x", a->u.data);
	  else
	    {
	      int j, len = a->u.ptr->length;
	      debug("[%d]:", len);
	      for(j=0; j<len; j++)
		debug("%02x", a->u.ptr->data[j]);
	    }
1010
	}
1011 1012
      if (e = e->next)
	debug(" | ");
1013 1014 1015
    }
}

1016 1017 1018 1019 1020 1021 1022
/**
 * ea_hash - calculate an &ea_list hash key
 * @e: attribute list
 *
 * ea_hash() takes an extended attribute list and calculated a hopefully
 * uniformly distributed hash value from its contents.
 */
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1023
inline uint
1024 1025
ea_hash(ea_list *e)
{
1026 1027
  const u64 mul = 0x68576150f3d6847;
  u64 h = 0xafcef24eda8b29;
1028 1029 1030 1031 1032 1033 1034
  int i;

  if (e)			/* Assuming chain of length 1 */
    {
      for(i=0; i<e->count; i++)
	{
	  struct eattr *a = &e->attrs[i];
1035
	  h ^= a->id; h *= mul;
1036 1037 1038 1039
	  if (a->type & EAF_EMBEDDED)
	    h ^= a->u.data;
	  else
	    {
1040
	      const struct adata *d = a->u.ptr;
1041
	      h ^= mem_hash(d->data, d->length);
1042
	    }
1043
	  h *= mul;
1044 1045
	}
    }
1046
  return (h >> 32) ^ (h & 0xffffffff);
1047 1048
}

1049 1050 1051 1052 1053 1054 1055 1056
/**
 * ea_append - concatenate &ea_list's
 * @to: destination list (can be %NULL)
 * @what: list to be appended (can be %NULL)
 *
 * This function appends the &ea_list @what at the end of
 * &ea_list @to and returns a pointer to the resulting list.
 */
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
ea_list *
ea_append(ea_list *to, ea_list *what)
{
  ea_list *res;

  if (!to)
    return what;
  res = to;
  while (to->next)
    to = to->next;
  to->next = what;
  return res;
}

1071 1072 1073 1074
/*
 *	rta's
 */

Pavel Tvrdík's avatar
Pavel Tvrdík committed
1075 1076 1077 1078
static uint rta_cache_count;
static uint rta_cache_size = 32;
static uint rta_cache_limit;
static uint rta_cache_mask;
1079 1080 1081 1082 1083
static rta **rta_hash_table;

static void
rta_alloc_hash(void)
{
Martin Mareš's avatar
Martin Mareš committed
1084
  rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
1085 1086 1087 1088 1089 1090 1091
  if (rta_cache_size < 32768)
    rta_cache_limit = rta_cache_size * 2;
  else
    rta_cache_limit = ~0;
  rta_cache_mask = rta_cache_size - 1;
}

Pavel Tvrdík's avatar
Pavel Tvrdík committed
1092
static inline uint
1093 1094
rta_hash(rta *a)
{
1095
  u64 h;
1096
  mem_hash_init(&h);
1097
#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
1098 1099 1100 1101
  MIX(src);
  MIX(hostentry);
  MIX(from);
  MIX(igp_metric);
1102 1103 1104
  MIX(source);
  MIX(scope);
  MIX(dest);
1105
#undef MIX
1106

1107
  return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
1108 1109
}

1110 1111 1112
static inline int
rta_same(rta *x, rta *y)
{
1113
  return (x->src == y->src &&
1114 1115 1116
	  x->source == y->source &&
	  x->scope == y->scope &&
	  x->dest == y->dest &&
1117
	  x->igp_metric == y->igp_metric &&
1118
	  ipa_equal(x->from, y->from) &&
1119
	  x->hostentry == y->hostentry &&
1120
	  nexthop_same(&(x->nh), &(y->nh)) &&
1121
	  ea_same(x->eattrs, y->eattrs));
1122 1123
}

1124 1125 1126 1127 1128 1129
static inline slab *
rta_slab(rta *a)
{
  return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
}

1130 1131 1132
static rta *
rta_copy(rta *o)
{
1133
  rta *r = sl_alloc(rta_slab(o));
1134

1135
  memcpy(r, o, rta_size(o));
1136
  r->uc = 1;
1137
  r->nh.next = nexthop_copy(o->nh.next);
1138
  r->eattrs = ea_list_copy(o->eattrs);
1139 1140 1141
  return r;
}

1142 1143 1144
static inline void
rta_insert(rta *r)
{
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1145
  uint h = r->hash_key & rta_cache_mask;
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
  r->next = rta_hash_table[h];
  if (r->next)
    r->next->pprev = &r->next;
  r->pprev = &rta_hash_table[h];
  rta_hash_table[h] = r;
}

static void
rta_rehash(void)
{
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1156 1157
  uint ohs = rta_cache_size;
  uint h;
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
  rta *r, *n;
  rta **oht = rta_hash_table;

  rta_cache_size = 2*rta_cache_size;
  DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
  rta_alloc_hash();
  for(h=0; h<ohs; h++)
    for(r=oht[h]; r; r=n)
      {
	n = r->next;
	rta_insert(r);
      }
  mb_free(oht);
}

1173 1174
/**
 * rta_lookup - look up a &rta in attribute cache
1175
 * @o: a un-cached &rta
1176
 *
1177
 * rta_lookup() gets an un-cached &rta structure and returns its cached
1178 1179 1180 1181 1182 1183 1184 1185
 * counterpart. It starts with examining the attribute cache to see whether
 * there exists a matching entry. If such an entry exists, it's returned and
 * its use count is incremented, else a new entry is created with use count
 * set to 1.
 *
 * The extended attribute lists attached to the &rta are automatically
 * converted to the normalized form.
 */
1186 1187 1188 1189
rta *
rta_lookup(rta *o)
{
  rta *r;
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1190
  uint h;
1191

1192
  ASSERT(!(o->aflags & RTAF_CACHED));
1193
  if (o->eattrs)
1194
    ea_normalize(o->eattrs);
1195

1196 1197 1198
  h = rta_hash(o);
  for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
    if (r->hash_key == h && rta_same(r, o))
1199
      return rta_clone(r);
1200

1201
  r = rta_copy(o);
1202
  r->hash_key = h;
1203
  r->aflags = RTAF_CACHED;
1204
  rt_lock_source(r->src);
1205
  rt_lock_hostentry(r->hostentry);
1206 1207 1208 1209 1210
  rta_insert(r);

  if (++rta_cache_count > rta_cache_limit)
    rta_rehash();

1211 1212 1213 1214
  return r;
}

void
1215
rta__free(rta *a)
1216
{
1217 1218
  ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
  rta_cache_count--;
Martin Mareš's avatar
Martin Mareš committed
1219 1220 1221
  *a->pprev = a->next;
  if (a->next)
    a->next->pprev = a->pprev;
1222
  rt_unlock_hostentry(a->hostentry);
1223
  rt_unlock_source(a->src);
1224 1225
  if (a->nh.next)
    nexthop_free(a->nh.next);
Martin Mareš's avatar
Martin Mareš committed
1226
  ea_free(a->eattrs);
1227
  a->aflags = 0;		/* Poison the entry */
1228
  sl_free(rta_slab(a), a);
1229 1230
}

1231 1232 1233
rta *
rta_do_cow(rta *o, linpool *lp)
{
1234 1235
  rta *r = lp_alloc(lp, rta_size(o));
  memcpy(r, o, rta_size(o));
1236 1237 1238 1239 1240 1241
  for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
    {
      *nhn = lp_alloc(lp, nexthop_size(nho));
      memcpy(*nhn, nho, nexthop_size(nho));
      nhn = &((*nhn)->next);
    }
1242 1243 1244 1245 1246
  r->aflags = 0;
  r->uc = 0;
  return r;
}

1247 1248 1249 1250 1251 1252
/**
 * rta_dump - dump route attributes
 * @a: attribute structure to dump
 *
 * This function takes a &rta and dumps its contents to the debug output.
 */
1253
void
1254
rta_dump(rta *a)
1255
{
1256
  static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
Martin Mareš's avatar
Martin Mareš committed
1257
			 "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
Ondřej Filip's avatar
Ondřej Filip committed
1258
			 "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
1259
			 "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
1260 1261
  static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };

1262 1263
  debug("p=%s uc=%d %s %s%s h=%04x",
	a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
1264
	rtd[a->dest], a->hash_key);
1265 1266
  if (!(a->aflags & RTAF_CACHED))
    debug(" !CACHED");
1267
  debug(" <-%I", a->from);
1268 1269 1270 1271
  if (a->dest == RTD_UNICAST)
    for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
      {
	if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
1272 1273 1274
	if (nh->labels) debug(" L %d", nh->label[0]);
	for (int i=1; i<nh->labels; i++)
	  debug("/%d", nh->label[i]);
1275 1276
	debug(" [%s]", nh->iface ? nh->iface->name : "???");
      }
1277
  if (a->eattrs)
1278 1279
    {
      debug(" EA: ");
1280
      ea_dump(a->eattrs);
1281
    }
1282 1283
}

1284 1285 1286 1287 1288 1289
/**
 * rta_dump_all - dump attribute cache
 *
 * This function dumps the whole contents of route attribute cache
 * to the debug output.
 */
1290 1291 1292
void
rta_dump_all(void)
{
1293
  rta *a;
Pavel Tvrdík's avatar
Pavel Tvrdík committed
1294
  uint h;
1295 1296 1297 1298 1299 1300 1301 1302 1303

  debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
  for(h=0; h<rta_cache_size; h++)
    for(a=rta_hash_table[h]; a; a=a->next)
      {
	debug("%p ", a);
	rta_dump(a);
	debug("\n");
      }
1304
  debug("\n");
1305 1306
}

1307
void
1308
rta_show(struct cli *c, rta *a)
1309
{
1310
  cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope));
1311 1312 1313

  for(ea_list *eal = a->eattrs; eal; eal=eal->next)
    for(int i=0; i<eal->count; i++)
1314
      ea_show(c, &eal->attrs[i]);
1315 1316
}

1317 1318 1319 1320 1321 1322
/**
 * rta_init - initialize route attribute cache
 *
 * This function is called during initialization of the routing
 * table module to set up the internals of the attribute cache.
 */
1323 1324 1325
void
rta_init(void)
{
1326
  rta_pool = rp_new(&root_pool, "Attributes");
1327 1328 1329 1330

  rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
  rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
  rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
1331
  rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1332 1333 1334 1335

  nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
  nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
  nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
1336
  nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1337

1338
  rta_alloc_hash();
1339
  rte_src_init();
1340
}
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370

/*
 *  Documentation for functions declared inline in route.h
 */
#if 0

/**
 * rta_clone - clone route attributes
 * @r: a &rta to be cloned
 *
 * rta_clone() takes a cached &rta and returns its identical cached
 * copy. Currently it works by just returning the original &rta with
 * its use count incremented.
 */
static inline rta *rta_clone(rta *r)
{ DUMMY; }

/**
 * rta_free - free route attributes
 * @r: a &rta to be freed
 *
 * If you stop using a &rta (for example when deleting a route which uses
 * it), you need to call rta_free() to notify the attribute cache the
 * attribute is no longer in use and can be freed if you were the last
 * user (which rta_free() tests by inspecting the use count).
 */
static inline void rta_free(rta *r)
{ DUMMY; }

#endif