rt-table.c 64.1 KB
Newer Older
1
/*
Martin Mareš's avatar
Martin Mareš committed
2
 *	BIRD -- Routing Tables
3
 *
4
 *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5 6 7 8
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9 10 11 12 13 14 15
/**
 * DOC: Routing tables
 *
 * Routing tables are probably the most important structures BIRD uses. They
 * hold all the information about known networks, the associated routes and
 * their attributes.
 *
16
 * There are multiple routing tables (a primary one together with any
Martin Mareš's avatar
Martin Mareš committed
17 18
 * number of secondary ones if requested by the configuration). Each table
 * is basically a FIB containing entries describing the individual
Martin Mareš's avatar
Martin Mareš committed
19
 * destination networks. For each network (represented by structure &net),
20 21
 * there is a one-way linked list of route entries (&rte), the first entry
 * on the list being the best one (i.e., the one we currently use
Martin Mareš's avatar
Martin Mareš committed
22 23 24 25 26 27 28 29 30
 * for routing), the order of the other ones is undetermined.
 *
 * The &rte contains information specific to the route (preference, protocol
 * metrics, time of last modification etc.) and a pointer to a &rta structure
 * (see the route attribute module for a precise explanation) holding the
 * remaining route attributes which are expected to be shared by multiple
 * routes in order to conserve memory.
 */

31
#undef LOCAL_DEBUG
32

33 34
#include "nest/bird.h"
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/iface.h"
37
#include "lib/resource.h"
38
#include "lib/event.h"
39
#include "lib/string.h"
40
#include "conf/conf.h"
41
#include "filter/filter.h"
42
#include "lib/hash.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
Martin Mareš's avatar
Martin Mareš committed
45

46 47
pool *rt_table_pool;

48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

51
static list routing_tables;
52

53 54 55 56
static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
57
static inline void rt_prune_table(rtable *tab);
58

59

60
/* Like fib_route(), but skips empty net entries */
61
static inline void *
62
net_route_ip4(rtable *t, net_addr_ip4 *n)
63
{
64
  net *r;
65

66
  while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
67 68 69 70 71 72 73 74 75
  {
    n->pxlen--;
    ip4_clrbit(&n->prefix, n->pxlen);
  }

  return r;
}

static inline void *
76
net_route_ip6(rtable *t, net_addr_ip6 *n)
77 78 79
{
  net *r;

80
  while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
81 82 83 84 85 86 87 88
  {
    n->pxlen--;
    ip6_clrbit(&n->prefix, n->pxlen);
  }

  return r;
}

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
static inline void *
net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
{
  struct fib_node *fn;

  while (1)
  {
    net *best = NULL;
    int best_pxlen = 0;

    /* We need to do dst first matching. Since sadr addresses are hashed on dst
       prefix only, find the hash table chain and go through it to find the
       match with the smallest matching src prefix. */
    for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
    {
      net_addr_ip6_sadr *a = (void *) fn->addr;

      if (net_equal_dst_ip6_sadr(n, a) &&
	  net_in_net_src_ip6_sadr(n, a) &&
	  (a->src_pxlen >= best_pxlen))
      {
	best = fib_node_to_user(&t->fib, fn);
	best_pxlen = a->src_pxlen;
      }
    }

    if (best)
      return best;

    if (!n->dst_pxlen)
      break;

    n->dst_pxlen--;
    ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
  }

  return NULL;
}

128 129
void *
net_route(rtable *tab, const net_addr *n)
130
{
131
  ASSERT(tab->addr_type == n->type);
132

133 134 135 136 137 138 139 140
  net_addr *n0 = alloca(n->length);
  net_copy(n0, n);

  switch (n->type)
  {
  case NET_IP4:
  case NET_VPN4:
  case NET_ROA4:
141
    return net_route_ip4(tab, (net_addr_ip4 *) n0);
142 143 144 145

  case NET_IP6:
  case NET_VPN6:
  case NET_ROA6:
146
    return net_route_ip6(tab, (net_addr_ip6 *) n0);
147

148 149 150
  case NET_IP6_SADR:
    return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);

151 152 153 154 155 156 157 158 159 160
  default:
    return NULL;
  }
}


static int
net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
{
  struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
161
  struct fib_node *fn;
162 163
  int anything = 0;

164 165 166 167
  while (1)
  {
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
    {
168
      net_addr_roa4 *roa = (void *) fn->addr;
169
      net *r = fib_node_to_user(&tab->fib, fn);
170 171

      if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
      {
	anything = 1;
	if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
	  return ROA_VALID;
      }
    }

    if (n.pxlen == 0)
      break;

    n.pxlen--;
    ip4_clrbit(&n.prefix, n.pxlen);
  }

  return anything ? ROA_INVALID : ROA_UNKNOWN;
}

189 190
static int
net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
191 192 193
{
  struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
  struct fib_node *fn;
194 195
  int anything = 0;

196 197 198 199
  while (1)
  {
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
    {
200
      net_addr_roa6 *roa = (void *) fn->addr;
201
      net *r = fib_node_to_user(&tab->fib, fn);
202 203

      if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
      {
	anything = 1;
	if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
	  return ROA_VALID;
      }
    }

    if (n.pxlen == 0)
      break;

    n.pxlen--;
    ip6_clrbit(&n.prefix, n.pxlen);
  }

  return anything ? ROA_INVALID : ROA_UNKNOWN;
}

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
/**
 * roa_check - check validity of route origination in a ROA table
 * @tab: ROA table
 * @n: network prefix to check
 * @asn: AS number of network prefix
 *
 * Implements RFC 6483 route validation for the given network prefix. The
 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
 * a candidate ROA with matching ASN and maxlen field greater than or equal to
 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
 * must have type NET_IP4 or NET_IP6, respectively.
 */
int
237 238
net_roa_check(rtable *tab, const net_addr *n, u32 asn)
{
239 240 241 242
  if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
    return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
  else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
    return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
243
  else
244
    return ROA_UNKNOWN;	/* Should not happen */
245
}
246

Martin Mareš's avatar
Martin Mareš committed
247 248 249
/**
 * rte_find - find a route
 * @net: network node
250
 * @src: route source
Martin Mareš's avatar
Martin Mareš committed
251 252
 *
 * The rte_find() function returns a route for destination @net
253
 * which is from route source @src.
Martin Mareš's avatar
Martin Mareš committed
254
 */
255
rte *
256
rte_find(net *net, struct rte_src *src)
257 258 259
{
  rte *e = net->routes;

260
  while (e && e->attrs->src != src)
261 262 263 264
    e = e->next;
  return e;
}

Martin Mareš's avatar
Martin Mareš committed
265 266
/**
 * rte_get_temp - get a temporary &rte
267
 * @a: attributes to assign to the new route (a &rta; in case it's
268
 * un-cached, rte_update() will create a cached copy automatically)
Martin Mareš's avatar
Martin Mareš committed
269 270 271 272 273
 *
 * Create a temporary &rte and bind it with the attributes @a.
 * Also set route preference to the default preference set for
 * the protocol.
 */
274 275 276 277 278 279
rte *
rte_get_temp(rta *a)
{
  rte *e = sl_alloc(rte_slab);

  e->attrs = a;
280
  e->flags = 0;
281
  e->pref = 0;
282 283 284
  return e;
}

285 286 287 288 289 290 291 292 293 294 295
rte *
rte_do_cow(rte *r)
{
  rte *e = sl_alloc(rte_slab);

  memcpy(e, r, sizeof(rte));
  e->attrs = rta_clone(r->attrs);
  e->flags = 0;
  return e;
}

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
/**
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
 * @r: a route entry to be copied
 * @lp: a linpool from which to allocate &rta
 *
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 * modification. There are three possibilities: First, both &rte and &rta are
 * private copies, in that case they are returned unchanged.  Second, &rte is
 * private copy, but &rta is cached, in that case &rta is duplicated using
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
 *
 * Note that in the second case, cached &rta loses one reference, while private
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 * nexthops, ...) with it. To work properly, original shared &rta should have
 * another reference during the life of created private copy.
 *
 * Result: a pointer to the new writable &rte with writable &rta.
 */
rte *
rte_cow_rta(rte *r, linpool *lp)
{
  if (!rta_is_cached(r->attrs))
    return r;

321
  r = rte_cow(r);
322
  rta *a = rta_do_cow(r->attrs, lp);
323 324 325
  rta_free(r->attrs);
  r->attrs = a;
  return r;
326 327
}

328 329 330
static int				/* Actually better or at least as good as */
rte_better(rte *new, rte *old)
{
331 332
  int (*better)(rte *, rte *);

333
  if (!rte_is_valid(old))
334
    return 1;
335 336 337
  if (!rte_is_valid(new))
    return 0;

338 339 340 341
  if (new->pref > old->pref)
    return 1;
  if (new->pref < old->pref)
    return 0;
342
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
343 344 345 346 347 348
    {
      /*
       *  If the user has configured protocol preferences, so that two different protocols
       *  have the same preference, try to break the tie by comparing addresses. Not too
       *  useful, but keeps the ordering of routes unambiguous.
       */
349
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
350
    }
351
  if (better = new->attrs->src->proto->rte_better)
352 353
    return better(new, old);
  return 0;
354 355
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
static int
rte_mergable(rte *pri, rte *sec)
{
  int (*mergable)(rte *, rte *);

  if (!rte_is_valid(pri) || !rte_is_valid(sec))
    return 0;

  if (pri->pref != sec->pref)
    return 0;

  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
    return 0;

  if (mergable = pri->attrs->src->proto->rte_mergable)
    return mergable(pri, sec);

  return 0;
}

376 377 378
static void
rte_trace(struct proto *p, rte *e, int dir, char *msg)
{
379
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
380 381 382
}

static inline void
Pavel Tvrdík's avatar
Pavel Tvrdík committed
383
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
384 385
{
  if (p->debug & flag)
386
    rte_trace(p, e, '>', msg);
387 388 389
}

static inline void
Pavel Tvrdík's avatar
Pavel Tvrdík committed
390
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
391 392
{
  if (p->debug & flag)
393
    rte_trace(p, e, '<', msg);
394 395
}

396
static rte *
397
export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
398
{
399 400 401
  struct proto *p = c->proto;
  struct filter *filter = c->out_filter;
  struct proto_stats *stats = &c->stats;
402 403
  rte *rt;
  int v;
404

405 406
  rt = rt0;
  *rt_free = NULL;
407

408
  rte_make_tmp_attrs(&rt, pool);
409

410
  v = p->import_control ? p->import_control(p, &rt, pool) : 0;
411 412 413 414
  if (v < 0)
    {
      if (silent)
	goto reject;
415

416
      stats->exp_updates_rejected++;
417 418
      if (v == RIC_REJECT)
	rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
419 420 421
      goto reject;
    }
  if (v > 0)
422
    {
423 424 425
      if (!silent)
	rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
      goto accept;
426
    }
427

428
  v = filter && ((filter == FILTER_REJECT) ||
429 430
		 (f_run(filter, &rt, pool,
			(silent ? FF_SILENT : 0)) > F_ACCEPT));
431 432 433 434 435 436 437 438
  if (v)
    {
      if (silent)
	goto reject;

      stats->exp_updates_filtered++;
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
      goto reject;
439
    }
440

441 442 443 444 445 446 447 448 449 450 451 452
 accept:
  if (rt != rt0)
    *rt_free = rt;
  return rt;

 reject:
  /* Discard temporary rte */
  if (rt != rt0)
    rte_free(rt);
  return NULL;
}

453
static inline rte *
454
export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
455
{
456
  return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
457 458
}

459
static void
460
do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
461
{
462 463
  struct proto *p = c->proto;
  struct proto_stats *stats = &c->stats;
464

465

466
  /*
467 468
   * First, apply export limit.
   *
469 470
   * Export route limits has several problems. Because exp_routes
   * counter is reset before refeed, we don't really know whether
471
   * limit is breached and whether the update is new or not. Therefore
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
   * the number of really exported routes may exceed the limit
   * temporarily (routes exported before and new routes in refeed).
   *
   * Minor advantage is that if the limit is decreased and refeed is
   * requested, the number of exported routes really decrease.
   *
   * Second problem is that with export limits, we don't know whether
   * old was really exported (it might be blocked by limit). When a
   * withdraw is exported, we announce it even when the previous
   * update was blocked. This is not a big issue, but the same problem
   * is in updating exp_routes counter. Therefore, to be consistent in
   * increases and decreases of exp_routes, we count exported routes
   * regardless of blocking by limits.
   *
   * Similar problem is in handling updates - when a new route is
   * received and blocking is active, the route would be blocked, but
   * when an update for the route will be received later, the update
   * would be propagated (as old != NULL). Therefore, we have to block
   * also non-new updates (contrary to import blocking).
   */
492

493 494
  struct channel_limit *l = &c->out_limit;
  if (l->action && new)
495
    {
496
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
497
	channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
498 499 500

      if (l->state == PLS_BLOCKED)
	{
501
	  stats->exp_routes++;	/* see note above */
502 503
	  stats->exp_updates_rejected++;
	  rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
504
	  new = NULL;
505 506 507

	  if (!old)
	    return;
508 509 510
	}
    }

511

512
  if (new)
513
    stats->exp_updates_accepted++;
514
  else
515
    stats->exp_withdraws_accepted++;
516

517 518
  /* Hack: We do not decrease exp_routes during refeed, we instead
     reset exp_routes at the start of refeed. */
519
  if (new)
520
    stats->exp_routes++;
521
  if (old && !refeed)
522
    stats->exp_routes--;
523

524 525 526 527 528 529
  if (p->debug & D_ROUTES)
    {
      if (new && old)
	rte_trace_out(D_ROUTES, p, new, "replaced");
      else if (new)
	rte_trace_out(D_ROUTES, p, new, "added");
530
      else if (old)
531 532
	rte_trace_out(D_ROUTES, p, old, "removed");
    }
533
  p->rt_notify(p, c, net, new, old);
534 535 536
}

static void
537
rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
538
{
539
  struct proto *p = c->proto;
540

541 542
  rte *new = new0;
  rte *old = old0;
543 544 545 546
  rte *new_free = NULL;
  rte *old_free = NULL;

  if (new)
547
    c->stats.exp_updates_received++;
548
  else
549
    c->stats.exp_withdraws_received++;
550 551

  /*
552 553 554 555
   * This is a tricky part - we don't know whether route 'old' was exported to
   * protocol 'p' or was filtered by the export filter. We try to run the export
   * filter to know this to have a correct value in 'old' argument of rte_update
   * (and proper filter value).
556
   *
557 558 559 560
   * This is broken because 'configure soft' may change filters but keep routes.
   * Refeed cycle is expected to be called after change of the filters and with
   * old == new, therefore we do not even try to run the filter on an old route.
   * This may lead to 'spurious withdraws' but ensure that there are no 'missing
561 562
   * withdraws'.
   *
563 564 565 566 567 568
   * This is not completely safe as there is a window between reconfiguration
   * and the end of refeed - if a newly filtered route disappears during this
   * period, proper withdraw is not sent (because old would be also filtered)
   * and the route is not refeeded (because it disappeared before that).
   * Therefore, we also do not try to run the filter on old routes that are
   * older than the last filter change.
569 570 571
   */

  if (new)
572
    new = export_filter(c, new, &new_free, 0);
573

574
  if (old && !(refeed || (old->lastmod <= c->last_tx_filter_change)))
575
    old = export_filter(c, old, &old_free, 1);
576 577

  if (!new && !old)
578 579 580 581 582 583 584 585 586 587 588 589 590 591
  {
    /*
     * As mentioned above, 'old' value may be incorrect in some race conditions.
     * We generally ignore it with the exception of withdraw to pipe protocol.
     * In that case we rather propagate unfiltered withdraws regardless of
     * export filters to ensure that when a protocol is flushed, its routes are
     * removed from all tables. Possible spurious unfiltered withdraws are not
     * problem here as they are ignored if there is no corresponding route at
     * the other end of the pipe. We directly call rt_notify() hook instead of
     * do_rt_notify() to avoid logging and stat counters.
     */

#ifdef CONFIG_PIPE
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
592
      p->rt_notify(p, c, net, NULL, old0);
593 594
#endif

595
    return;
596
  }
597

598
  do_rt_notify(c, net, new, old, refeed);
599 600 601 602 603 604 605 606 607

  /* Discard temporary rte's */
  if (new_free)
    rte_free(new_free);
  if (old_free)
    rte_free(old_free);
}

static void
608
rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
609
{
610
  // struct proto *p = c->proto;
611

612
  rte *r;
613 614 615 616 617
  rte *new_best = NULL;
  rte *old_best = NULL;
  rte *new_free = NULL;
  rte *old_free = NULL;

618 619 620 621 622 623 624
  /* Used to track whether we met old_changed position. If before_old is NULL
     old_changed was the first and we met it implicitly before current best route. */
  int old_meet = old_changed && !before_old;

  /* Note that before_old is either NULL or valid (not rejected) route.
     If old_changed is valid, before_old have to be too. If old changed route
     was not valid, caller must use NULL for both old_changed and before_old. */
625 626

  if (new_changed)
627
    c->stats.exp_updates_received++;
628
  else
629
    c->stats.exp_withdraws_received++;
630 631

  /* First, find the new_best route - first accepted by filters */
632
  for (r=net->routes; rte_is_valid(r); r=r->next)
633
    {
634
      if (new_best = export_filter(c, r, &new_free, 0))
635 636 637 638 639 640 641
	break;

      /* Note if we walked around the position of old_changed route */
      if (r == before_old)
	old_meet = 1;
    }

642
  /*
643
   * Second, handle the feed case. That means we do not care for
644
   * old_best. It is NULL for feed, and the new_best for refeed.
645 646 647 648 649 650
   * For refeed, there is a hack similar to one in rt_notify_basic()
   * to ensure withdraws in case of changed filters
   */
  if (feed)
    {
      if (feed == 2)	/* refeed */
651 652
	old_best = new_best ? new_best :
	  (rte_is_valid(net->routes) ? net->routes : NULL);
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
      else
	old_best = NULL;

      if (!new_best && !old_best)
	return;

      goto found;
    }

  /*
   * Now, we find the old_best route. Generally, it is the same as the
   * new_best, unless new_best is the same as new_changed or
   * old_changed is accepted before new_best.
   *
   * There are four cases:
   *
   * - We would find and accept old_changed before new_best, therefore
   *   old_changed is old_best. In remaining cases we suppose this
   *   is not true.
   *
   * - We found no new_best, therefore there is also no old_best and
   *   we ignore this withdraw.
   *
   * - We found new_best different than new_changed, therefore
   *   old_best is the same as new_best and we ignore this update.
   *
   * - We found new_best the same as new_changed, therefore it cannot
   *   be old_best and we have to continue search for old_best.
681 682 683
   *
   * There is also a hack to ensure consistency in case of changed filters.
   * It does not find the proper old_best, just selects a non-NULL route.
684 685
   */

686 687 688 689 690 691 692
  /* Hack for changed filters */
  if (old_changed && (old_changed->lastmod <= c->last_tx_filter_change))
    {
      old_best = old_changed;
      goto found;
    }

693 694
  /* First case */
  if (old_meet)
695
    if (old_best = export_filter(c, old_changed, &old_free, 1))
696 697 698 699 700
      goto found;

  /* Second case */
  if (!new_best)
    return;
701

702
  /* Third case, we use r instead of new_best, because export_filter() could change it */
703 704 705 706 707 708 709 710
  if (r != new_changed)
    {
      if (new_free)
	rte_free(new_free);
      return;
    }

  /* Fourth case */
711
  for (r=r->next; rte_is_valid(r); r=r->next)
712
    {
713
      if (old_best = export_filter(c, r, &old_free, 1))
714 715 716
	goto found;

      if (r == before_old)
717
	if (old_best = export_filter(c, old_changed, &old_free, 1))
718 719 720 721 722 723
	  goto found;
    }

  /* Implicitly, old_best is NULL and new_best is non-NULL */

 found:
724
  do_rt_notify(c, net, new_best, old_best, (feed == 2));
725 726 727 728 729 730

  /* Discard temporary rte's */
  if (new_free)
    rte_free(new_free);
  if (old_free)
    rte_free(old_free);
731 732
}

733

734 735
static struct nexthop *
nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
736
{
737
  return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
738 739 740
}

rte *
741
rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
742
{
743
  // struct proto *p = c->proto;
744
  struct nexthop *nhs = NULL;
745 746 747 748 749 750 751 752
  rte *best0, *best, *rt0, *rt, *tmp;

  best0 = net->routes;
  *rt_free = NULL;

  if (!rte_is_valid(best0))
    return NULL;

753
  best = export_filter_(c, best0, rt_free, pool, silent);
754 755 756 757 758 759 760 761 762

  if (!best || !rte_is_reachable(best))
    return best;

  for (rt0 = best0->next; rt0; rt0 = rt0->next)
  {
    if (!rte_mergable(best0, rt0))
      continue;

763
    rt = export_filter_(c, rt0, &tmp, pool, 1);
764 765 766 767 768

    if (!rt)
      continue;

    if (rte_is_reachable(rt))
769
      nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
770 771 772 773 774 775 776

    if (tmp)
      rte_free(tmp);
  }

  if (nhs)
  {
777
    nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
778 779 780

    if (nhs->next)
    {
781
      best = rte_cow_rta(best, pool);
782
      nexthop_link(best->attrs, nhs);
783 784 785 786 787 788 789 790 791 792 793
    }
  }

  if (best != best0)
    *rt_free = best;

  return best;
}


static void
794
rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
795 796
		 rte *new_best, rte*old_best, int refeed)
{
797
  // struct proto *p = c->proto;
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813

  rte *new_best_free = NULL;
  rte *old_best_free = NULL;
  rte *new_changed_free = NULL;
  rte *old_changed_free = NULL;

  /* We assume that all rte arguments are either NULL or rte_is_valid() */

  /* This check should be done by the caller */
  if (!new_best && !old_best)
    return;

  /* Check whether the change is relevant to the merged route */
  if ((new_best == old_best) && !refeed)
  {
    new_changed = rte_mergable(new_best, new_changed) ?
814
      export_filter(c, new_changed, &new_changed_free, 1) : NULL;
815 816

    old_changed = rte_mergable(old_best, old_changed) ?
817
      export_filter(c, old_changed, &old_changed_free, 1) : NULL;
818 819 820 821 822 823

    if (!new_changed && !old_changed)
      return;
  }

  if (new_best)
824
    c->stats.exp_updates_received++;
825
  else
826
    c->stats.exp_withdraws_received++;
827 828 829

  /* Prepare new merged route */
  if (new_best)
830
    new_best = rt_export_merged(c, net, &new_best_free, rte_update_pool, 0);
831 832 833 834

  /* Prepare old merged route (without proper merged next hops) */
  /* There are some issues with running filter on old route - see rt_notify_basic() */
  if (old_best && !refeed)
835
    old_best = export_filter(c, old_best, &old_best_free, 1);
836 837

  if (new_best || old_best)
838
    do_rt_notify(c, net, new_best, old_best, refeed);
839 840 841 842 843 844 845 846 847 848 849 850 851

  /* Discard temporary rte's */
  if (new_best_free)
    rte_free(new_best_free);
  if (old_best_free)
    rte_free(old_best_free);
  if (new_changed_free)
    rte_free(new_changed_free);
  if (old_changed_free)
    rte_free(old_changed_free);
}


852 853 854
/**
 * rte_announce - announce a routing table change
 * @tab: table the route has been added to
855
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
856 857
 * @net: network in question
 * @new: the new route to be announced
858
 * @old: the previous route for the same network
859 860 861
 * @new_best: the new best route for the same network
 * @old_best: the previous best route for the same network
 * @before_old: The previous route before @old for the same network.
862
 *		If @before_old is NULL @old was the first.
863 864
 *
 * This function gets a routing table update and announces it
Ondřej Zajíček's avatar
Ondřej Zajíček committed
865 866
 * to all protocols that acccepts given type of route announcement
 * and are connected to the same table by their announcement hooks.
867
 *
868
 * Route announcement of type %RA_OPTIMAL si generated when optimal
Ondřej Zajíček's avatar
Ondřej Zajíček committed
869 870
 * route (in routing table @tab) changes. In that case @old stores the
 * old optimal route.
871
 *
872
 * Route announcement of type %RA_ANY si generated when any route (in
Ondřej Zajíček's avatar
Ondřej Zajíček committed
873 874 875 876 877 878 879 880 881 882
 * routing table @tab) changes In that case @old stores the old route
 * from the same protocol.
 *
 * For each appropriate protocol, we first call its import_control()
 * hook which performs basic checks on the route (each protocol has a
 * right to veto or force accept of the route before any filter is
 * asked) and adds default values of attributes specific to the new
 * protocol (metrics, tags etc.).  Then it consults the protocol's
 * export filter and if it accepts the route, the rt_notify() hook of
 * the protocol gets called.
883
 */
884
static void
885 886
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
	     rte *new_best, rte *old_best, rte *before_old)
887
{
888 889 890
  if (!rte_is_valid(new))
    new = NULL;

891 892 893
  if (!rte_is_valid(old))
    old = before_old = NULL;

894 895 896 897 898
  if (!rte_is_valid(new_best))
    new_best = NULL;

  if (!rte_is_valid(old_best))
    old_best = NULL;
899 900 901

  if (!old && !new)
    return;
902

903 904
  if ((type == RA_OPTIMAL) && tab->hostcache)
    rt_notify_hostcache(tab, net);
905

906 907
  struct channel *c; node *n;
  WALK_LIST2(c, n, tab->channels, table_node)
908
    {
909 910 911 912
      if (c->export_state == ES_DOWN)
	continue;

      if (c->ra_mode == type)
913
	if (type == RA_ACCEPTED)
914
	  rt_notify_accepted(c, net, new, old, before_old, 0);
915
	else if (type == RA_MERGED)
916
	  rt_notify_merged(c, net, new, old, new_best, old_best, 0);
917
	else
918
	  rt_notify_basic(c, net, new, old, 0);
919
    }
920 921
}

922 923 924 925 926 927
static inline int
rte_validate(rte *e)
{
  int c;
  net *n = e->net;

928 929 930 931 932 933
  if (!net_validate(n->n.addr))
  {
    log(L_WARN "Ignoring bogus prefix %N received via %s",
	n->n.addr, e->sender->proto->name);
    return 0;
  }
934

Ondřej Zajíček's avatar
Ondřej Zajíček committed
935 936 937
  /* FIXME: better handling different nettypes */
  c = !net_is_flow(n->n.addr) ?
    net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
938
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
939 940 941 942 943
  {
    log(L_WARN "Ignoring bogus route %N received via %s",
	n->n.addr, e->sender->proto->name);
    return 0;
  }
944

945 946 947 948 949 950 951
  if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
  {
    log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
	n->n.addr, e->attrs->dest, e->sender->proto->name);
    return 0;
  }

952
  if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
953 954 955 956 957
  {
    log(L_WARN "Ignoring unsorted multipath route %N received via %s",
	n->n.addr, e->sender->proto->name);
    return 0;
  }
958

959 960 961
  return 1;
}

Martin Mareš's avatar
Martin Mareš committed
962 963 964 965 966 967
/**
 * rte_free - delete a &rte
 * @e: &rte to be deleted
 *
 * rte_free() deletes the given &rte from the routing table it's linked to.
 */
968
void
969
rte_free(rte *e)
970
{
971
  if (rta_is_cached(e->attrs))
972 973 974 975 976 977
    rta_free(e->attrs);
  sl_free(rte_slab, e);
}

static inline void
rte_free_quick(rte *e)
978 979 980 981 982
{
  rta_free(e->attrs);
  sl_free(rte_slab, e);
}

983 984 985 986 987 988 989 990
static int
rte_same(rte *x, rte *y)
{
  return
    x->attrs == y->attrs &&
    x->flags == y->flags &&
    x->pflags == y->pflags &&
    x->pref == y->pref &&
991
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
992 993
}

994 995
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }

996
static void
997
rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
998
{
999 1000 1001
  struct proto *p = c->proto;
  struct rtable *table = c->table;
  struct proto_stats *stats = &c->stats;
1002
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1003
  rte *before_old = NULL;
1004 1005
  rte *old_best = net->routes;
  rte *old = NULL;
1006
  rte **k;
1007 1008 1009 1010

  k = &net->routes;			/* Find and remove original route from the same protocol */
  while (old = *k)
    {
1011
      if (old->attrs->src == src)
1012
	{
1013 1014 1015 1016 1017 1018 1019 1020 1021
	  /* If there is the same route in the routing table but from
	   * a different sender, then there are two paths from the
	   * source protocol to this routing table through transparent
	   * pipes, which is not allowed.
	   *
	   * We log that and ignore the route. If it is withdraw, we
	   * ignore it completely (there might be 'spurious withdraws',
	   * see FIXME in do_rte_announce())
	   */
1022
	  if (old->sender->proto != p)
1023 1024 1025
	    {
	      if (new)
		{
1026 1027
		  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
		      net->n.addr, table->name);
1028 1029 1030 1031 1032
		  rte_free_quick(new);
		}
	      return;
	    }

1033
	  if (new && rte_same(old, new))
1034 1035
	    {
	      /* No changes, ignore the new route */
1036

1037
	      if (!rte_is_filtered(new))
1038 1039 1040 1041 1042
		{
		  stats->imp_updates_ignored++;
		  rte_trace_in(D_ROUTES, p, new, "ignored");
		}

1043 1044 1045
	      rte_free_quick(new);
	      return;
	    }
1046 1047 1048 1049
	  *k = old->next;
	  break;
	}
      k = &old->next;
1050
      before_old = old;
1051 1052
    }

1053 1054 1055
  if (!old)
    before_old = NULL;

1056 1057
  if (!old && !new)
    {
1058
      stats->imp_withdraws_ignored++;
1059 1060 1061
      return;
    }

1062 1063 1064
  int new_ok = rte_is_ok(new);
  int old_ok = rte_is_ok(old);

1065 1066
  struct channel_limit *l = &c->rx_limit;
  if (l->action && !old && new)
1067
    {
1068
      u32 all_routes = stats->imp_routes + stats->filt_routes;
1069 1070

      if (all_routes >= l->limit)
1071
	channel_notify_limit(c, l, PLD_RX, all_routes);
1072 1073 1074

      if (l->state == PLS_BLOCKED)
	{
1075 1076 1077
	  /* In receive limit the situation is simple, old is NULL so
	     we just free new and exit like nothing happened */

1078 1079 1080 1081 1082
	  stats->imp_updates_ignored++;
	  rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
	  rte_free_quick(new);
	  return;
	}
1083 1084
    }

1085 1086
  l = &c->in_limit;
  if (l->action && !old_ok && new_ok)
1087 1088
    {
      if (stats->imp_routes >= l->limit)
1089
	channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102

      if (l->state == PLS_BLOCKED)
	{
	  /* In import limit the situation is more complicated. We
	     shouldn't just drop the route, we should handle it like
	     it was filtered. We also have to continue the route
	     processing if old or new is non-NULL, but we should exit
	     if both are NULL as this case is probably assumed to be
	     already handled. */

	  stats->imp_updates_ignored++;
	  rte_trace_in(D_FILTERS, p, new, "ignored [limit]");

1103
	  if (c->in_keep_filtered)
1104 1105 1106 1107 1108
	    new->flags |= REF_FILTERED;
	  else
	    { rte_free_quick(new); new = NULL; }

	  /* Note that old && !new could be possible when
1109
	     c->in_keep_filtered changed in the recent past. */
1110 1111 1112 1113 1114 1115 1116 1117

	  if (!old && !new)
	    return;

	  new_ok = 0;
	  goto skip_stats1;
	}
    }
1118 1119

  if (new_ok)
1120
    stats->imp_updates_accepted++;
1121
  else if (old_ok)
1122
    stats->imp_withdraws_accepted++;
1123 1124
  else
    stats->imp_withdraws_ignored++;
1125

1126
 skip_stats1:
1127 1128

  if (new)
1129
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1130
  if (old)
1131
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1132

1133
  if (table->config->sorted)
1134
    {
1135 1136 1137 1138 1139 1140 1141
      /* If routes are sorted, just insert new route to appropriate position */
      if (new)
	{
	  if (before_old && !rte_better(new, before_old))
	    k = &before_old->next;
	  else
	    k = &net->routes;
1142

1143 1144 1145
	  for (; *k; k=&(*k)->next)
	    if (rte_better(new, *k))
	      break;
1146

1147 1148 1149
	  new->next = *k;
	  *k = new;
	}
1150
    }
1151
  else
1152
    {
1153 1154 1155
      /* If routes are not sorted, find the best route and move it on
	 the first position. There are several optimized cases. */

1156
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1157 1158 1159
	goto do_recalculate;

      if (new && rte_better(new, old_best))
1160
	{
1161 1162 1163
	  /* The first case - the new route is cleary optimal,
	     we link it at the first position */

1164 1165 1166
	  new->next = net->routes;
	  net->routes = new;
	}
1167
      else if (old == old_best)
1168
	{
1169 1170 1171 1172 1173 1174 1175 1176 1177
	  /* The second case - the old best route disappeared, we add the
	     new route (if we have any) to the list (we don't care about
	     position) and then we elect the new optimal route and relink
	     that route at the first position and announce it. New optimal
	     route might be NULL if there is no more routes */

	do_recalculate:
	  /* Add the new route to the list */
	  if (new)
1178
	    {
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
	      new->next = net->routes;
	      net->routes = new;
	    }

	  /* Find a new optimal route (if there is any) */
	  if (net->routes)
	    {
	      rte **bp = &net->routes;
	      for (k=&(*bp)->next; *k; k=&(*k)->next)
		if (rte_better(*k, *bp))
		  bp = k;

	      /* And relink it */
	      rte *best = *bp;
	      *bp = best->next;
	      best->next = net->routes;
	      net->routes = best;
1196 1197
	    }
	}
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
      else if (new)
	{
	  /* The third case - the new route is not better than the old
	     best route (therefore old_best != NULL) and the old best
	     route was not removed (therefore old_best == net->routes).
	     We just link the new route after the old best route. */

	  ASSERT(net->routes != NULL);
	  new->next = net->routes->next;
	  net->routes->next = new;
	}
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1210
    }
1211

1212
  if (new)
1213
    new->lastmod = current_time();
1214 1215

  /* Log the route change */
1216
  if (p->debug & D_ROUTES)
1217
    {
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
      if (new_ok)
	rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
      else if (old_ok)
	{
	  if (old != old_best)
	    rte_trace(p, old, '>', "removed");
	  else if (rte_is_ok(net->routes))
	    rte_trace(p, old, '>', "removed [replaced]");
	  else
	    rte_trace(p, old, '>', "removed [sole]");
	}
1229 1230
    }

1231
  /* Propagate the route change */
1232
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1233
  if (net->routes != old_best)
1234
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1235
  if (table->config->sorted)
1236 1237
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1238 1239 1240

  if (!net->routes &&
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1241
      (table->gc_time + table->config->gc_min_time <= current_time()))
1242
    rt_schedule_prune(table);
1243

1244 1245 1246 1247 1248
  if (old_ok && p->rte_remove)
    p->rte_remove(net, old);
  if (new_ok && p->rte_insert)
    p->rte_insert(net, new);

1249
  if (old)
1250
    rte_free_quick(old);
1251 1252
}

1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
static int rte_update_nest_cnt;		/* Nesting counter to allow recursive updates */

static inline void
rte_update_lock(void)
{
  rte_update_nest_cnt++;
}

static inline void
rte_update_unlock(void)
{
  if (!--rte_update_nest_cnt)
    lp_flush(rte_update_pool);
}

1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
static inline void
rte_hide_dummy_routes(net *net, rte **dummy)
{
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
  {
    *dummy = net->routes;
    net->routes = (*dummy)->next;
  }
}

static inline void
rte_unhide_dummy_routes(net *net, rte **dummy)
{
  if (*dummy)
  {
    (*dummy)->next = net->routes;
    net->routes = *dummy;
  }
}

Martin Mareš's avatar
Martin Mareš committed
1288 1289 1290
/**
 * rte_update - enter a new update to a routing table
 * @table: table to be updated
1291
 * @c: channel doing the update
Martin Mareš's avatar
Martin Mareš committed
1292 1293
 * @net: network node
 * @p: protocol submitting the update
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1294
 * @src: protocol originating the update
Martin Mareš's avatar
Martin Mareš committed
1295 1296 1297 1298
 * @new: a &rte representing the new route or %NULL for route removal.
 *
 * This function is called by the routing protocols whenever they discover
 * a new route or wish to update/remove an existing route. The right announcement
1299
 * sequence is to build route attributes first (either un-cached with @aflags set
Martin Mareš's avatar
Martin Mareš committed
1300 1301 1302 1303 1304
 * to zero or a cached one using rta_lookup(); in this case please note that
 * you need to increase the use count of the attributes yourself by calling
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
 * the appropriate data and finally submit the new &rte by calling rte_update().
 *
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1305 1306 1307 1308 1309 1310
 * @src specifies the protocol that originally created the route and the meaning
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
 * same value as @new->attrs->proto. @p specifies the protocol that called
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
 * stores @p in @new->sender;
 *
1311 1312 1313 1314 1315 1316 1317 1318 1319
 * When rte_update() gets any route, it automatically validates it (checks,
 * whether the network and next hop address are valid IP addresses and also
 * whether a normal routing protocol doesn't try to smuggle a host or link
 * scope route to the table), converts all protocol dependent attributes stored
 * in the &rte to temporary extended attributes, consults import filters of the
 * protocol to see if the route should be accepted and/or its attributes modified,
 * stores the temporary attributes back to the &rte.
 *
 * Now, having a "public" version of the route, we
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1320
 * automatically find any old route defined by the protocol @src
Martin Mareš's avatar
Martin Mareš committed
1321 1322
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
 * recalculate the optimal route for this destination and finally broadcast
1323
 * the change (if any) to all routing protocols by calling rte_announce().
1324 1325 1326 1327
 *
 * All memory used for attribute lists and other temporary allocations is taken
 * from a special linear pool @rte_update_pool and freed when rte_update()
 * finishes.
Martin Mareš's avatar
Martin Mareš committed
1328
 */
1329 1330

void
1331
rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1332
{
1333 1334 1335
  struct proto *p = c->proto;
  struct proto_stats *stats = &c->stats;
  struct filter *filter = c->in_filter;
1336
  rte *dummy = NULL;
1337
  net *nn;
1338

1339 1340
  ASSERT(c->channel_state == CS_UP);

1341 1342 1343
  rte_update_lock();
  if (new)
    {
1344 1345 1346
      nn = net_get(c->table, n);

      new->net = nn;
1347 1348 1349 1350
      new->sender = c;

      if (!new->pref)
	new->pref = c->preference;
1351

1352
      stats->imp_updates_received++;
1353 1354 1355
      if (!rte_validate(new))
	{
	  rte_trace_in(D_FILTERS, p, new, "invalid");
1356
	  stats->imp_updates_invalid++;
1357 1358
	  goto drop;
	}
1359

1360
      if (filter == FILTER_REJECT)
1361
	{
1362
	  stats->imp_updates_filtered++;
1363
	  rte_trace_in(D_FILTERS, p, new, "filtered out");
1364

1365
	  if (! c->in_keep_filtered)
1366 1367 1368
	    goto drop;

	  /* new is a private copy, i could modify it */
1369
	  new->flags |= REF_FILTERED;
1370
	}
1371
      else
1372
	{
1373
	  rte_make_tmp_attrs(&new, rte_update_pool);
1374
	  if (filter && (filter != FILTER_REJECT))
1375
	    {
1376 1377
	      ea_list *oldea