krt.c 24.2 KB
Newer Older
1 2 3
/*
 *	BIRD -- UNIX Kernel Synchronization
 *
4
 *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5 6 7 8
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

9 10 11 12 13 14 15 16 17 18
/**
 * DOC: Kernel synchronization
 *
 * This system dependent module implements the Kernel and Device protocol,
 * that is synchronization of interface lists and routing tables with the
 * OS kernel.
 *
 * The whole kernel synchronization is a bit messy and touches some internals
 * of the routing table engine, because routing table maintenance is a typical
 * example of the proverbial compatibility between different Unices and we want
19
 * to keep the overhead of our KRT business as low as possible and avoid maintaining
20 21 22
 * a local routing table copy.
 *
 * The kernel syncer can work in three different modes (according to system config header):
23
 * Either with a single routing table and single KRT protocol [traditional UNIX]
24
 * or with many routing tables and separate KRT protocols for all of them
25
 * or with many routing tables, but every scan including all tables, so we start
26
 * separate KRT protocols which cooperate with each other  [Linux 2.2].
27 28
 * In this case, we keep only a single scan timer.
 *
Martin Mareš's avatar
Martin Mareš committed
29 30 31 32
 * We use FIB node flags in the routing table to keep track of route
 * synchronization status. We also attach temporary &rte's to the routing table,
 * but it cannot do any harm to the rest of BIRD since table synchronization is
 * an atomic process.
33 34 35
 *
 * When starting up, we cheat by looking if there is another
 * KRT instance to be initialized later and performing table scan
36 37
 * only once for all the instances.
 */
38 39 40 41 42

/*
 *  If you are brave enough, continue now.  You cannot say you haven't been warned.
 */

43
#undef LOCAL_DEBUG
44 45 46 47 48

#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/route.h"
#include "nest/protocol.h"
49
#include "filter/filter.h"
50
#include "lib/timer.h"
51
#include "conf/conf.h"
Martin Mareš's avatar
Martin Mareš committed
52
#include "lib/string.h"
53 54 55 56

#include "unix.h"
#include "krt.h"

57 58 59 60
/*
 *	Global resources
 */

61
pool *krt_pool;
62
static linpool *krt_filter_lp;
63

64 65 66
void
krt_io_init(void)
{
67
  krt_pool = rp_new(&root_pool, "Kernel Syncer");
68
  krt_filter_lp = lp_new(krt_pool, 4080);
69
  kif_sys_io_init();
70 71 72 73 74 75
}

/*
 *	Interfaces
 */

76
static struct kif_config *kif_cf;
77 78 79 80 81 82 83 84 85
static struct kif_proto *kif_proto;
static timer *kif_scan_timer;
static bird_clock_t kif_last_shot;

static void
kif_scan(timer *t)
{
  struct kif_proto *p = t->data;

86
  KRT_TRACE(p, D_EVENTS, "Scanning interfaces");
87
  kif_last_shot = now;
88
  kif_do_scan(p);
89 90 91 92 93 94 95 96 97 98 99 100
}

static void
kif_force_scan(void)
{
  if (kif_proto && kif_last_shot + 2 < now)
    {
      kif_scan(kif_scan_timer);
      tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time);
    }
}

101 102 103 104 105 106 107
void
kif_request_scan(void)
{
  if (kif_proto && kif_scan_timer->expires > now)
    tm_start(kif_scan_timer, 1);
}

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
static inline int
prefer_scope(struct ifa *a, struct ifa *b)
{ return (a->scope > SCOPE_LINK) && (b->scope <= SCOPE_LINK); }

static inline int
prefer_addr(struct ifa *a, struct ifa *b)
{ return ipa_compare(a->ip, b->ip) < 0; }

static inline struct ifa *
find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask)
{
  struct ifa *a, *b = NULL;

  WALK_LIST(a, i->addrs)
    {
      if (!(a->flags & IA_SECONDARY) &&
	  ipa_equal(ipa_and(a->ip, mask), prefix) &&
	  (!b || prefer_scope(a, b) || prefer_addr(a, b)))
	b = a;
    }

  return b;
}

struct ifa *
kif_choose_primary(struct iface *i)
{
  struct kif_config *cf = (struct kif_config *) (kif_proto->p.cf);
  struct kif_primary_item *it;
  struct ifa *a;

  WALK_LIST(it, cf->primary)
    {
      if (!it->pattern || patmatch(it->pattern, i->name))
	if (a = find_preferred_ifa(i, it->prefix, ipa_mkmask(it->pxlen)))
	  return a;
    }

  return find_preferred_ifa(i, IPA_NONE, IPA_NONE);
}


150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
static struct proto *
kif_init(struct proto_config *c)
{
  struct kif_proto *p = proto_new(c, sizeof(struct kif_proto));

  kif_sys_init(p);
  return &p->p;
}

static int
kif_start(struct proto *P)
{
  struct kif_proto *p = (struct kif_proto *) P;

  kif_proto = p;
  kif_sys_start(p);

  /* Start periodic interface scanning */
  kif_scan_timer = tm_new(P->pool);
  kif_scan_timer->hook = kif_scan;
  kif_scan_timer->data = p;
  kif_scan_timer->recurrent = KIF_CF->scan_time;
  kif_scan(kif_scan_timer);
  tm_start(kif_scan_timer, KIF_CF->scan_time);

  return PS_UP;
}

static int
kif_shutdown(struct proto *P)
{
  struct kif_proto *p = (struct kif_proto *) P;

  tm_stop(kif_scan_timer);
  kif_sys_shutdown(p);
  kif_proto = NULL;

  return PS_DOWN;
}

190 191 192 193 194 195
static int
kif_reconfigure(struct proto *p, struct proto_config *new)
{
  struct kif_config *o = (struct kif_config *) p->cf;
  struct kif_config *n = (struct kif_config *) new;

196
  if (!kif_sys_reconfigure((struct kif_proto *) p, n, o))
197
    return 0;
198

199 200 201 202 203 204 205
  if (o->scan_time != n->scan_time)
    {
      tm_stop(kif_scan_timer);
      kif_scan_timer->recurrent = n->scan_time;
      kif_scan(kif_scan_timer);
      tm_start(kif_scan_timer, n->scan_time);
    }
206 207 208 209 210 211 212 213 214 215 216 217

  if (!EMPTY_LIST(o->primary) || !EMPTY_LIST(n->primary))
    {
      /* This is hack, we have to update a configuration
       * to the new value just now, because it is used
       * for recalculation of primary addresses.
       */
      p->cf = new;

      ifa_recalc_all_primary_addresses();
    }

218 219 220
  return 1;
}

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242

static void
kif_preconfig(struct protocol *P UNUSED, struct config *c)
{
  kif_cf = NULL;
  kif_sys_preconfig(c);
}

struct proto_config *
kif_init_config(int class)
{
  if (kif_cf)
    cf_error("Kernel device protocol already defined");

  kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, sizeof(struct kif_config), class);
  kif_cf->scan_time = 60;
  init_list(&kif_cf->primary);

  kif_sys_init_config(kif_cf);
  return (struct proto_config *) kif_cf;
}

243 244 245 246 247 248 249
static void
kif_copy_config(struct proto_config *dest, struct proto_config *src)
{
  struct kif_config *d = (struct kif_config *) dest;
  struct kif_config *s = (struct kif_config *) src;

  /* Shallow copy of everything (just scan_time currently) */
250
  proto_copy_rest(dest, src, sizeof(struct kif_config));
251 252 253 254 255

  /* Copy primary addr list */
  cfg_copy_list(&d->primary, &s->primary, sizeof(struct kif_primary_item));

  /* Fix sysdep parts */
256
  kif_sys_copy_config(d, s);
257 258 259
}


260 261
struct protocol proto_unix_iface = {
  name:		"Device",
262
  template:	"device%d",
263
  preference:	DEF_PREF_DIRECT,
264
  preconfig:	kif_preconfig,
265 266 267
  init:		kif_init,
  start:	kif_start,
  shutdown:	kif_shutdown,
268
  reconfigure:	kif_reconfigure,
269
  copy_config:	kif_copy_config
270
};
271

272 273 274 275
/*
 *	Tracing of routes
 */

276 277
static inline void
krt_trace_in(struct krt_proto *p, rte *e, char *msg)
278
{
279 280
  if (p->p.debug & D_PACKETS)
    log(L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg);
281 282 283
}

static inline void
284
krt_trace_in_rl(struct rate_limit *rl, struct krt_proto *p, rte *e, char *msg)
285 286
{
  if (p->p.debug & D_PACKETS)
287
    log_rl(rl, L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg);
288 289
}

290 291 292 293 294 295
/*
 *	Inherited Routes
 */

#ifdef KRT_ALLOW_LEARN

296 297
static struct rate_limit rl_alien_seen, rl_alien_updated, rl_alien_created, rl_alien_ignored;

298 299 300 301 302 303 304 305
/*
 * krt_same_key() specifies what (aside from the net) is the key in
 * kernel routing tables. It should be OS-dependent, this is for
 * Linux. It is important for asynchronous alien updates, because a
 * positive update is implicitly a negative one for any old route with
 * the same key.
 */

306 307 308
static inline int
krt_same_key(rte *a, rte *b)
{
309 310 311 312 313 314 315 316 317 318 319 320 321
  return a->u.krt.metric == b->u.krt.metric;
}

static inline int
krt_uptodate(rte *a, rte *b)
{
  if (a->attrs != b->attrs)
    return 0;

  if (a->u.krt.proto != b->u.krt.proto)
    return 0;

  return 1;
322 323 324 325 326 327 328 329
}

static void
krt_learn_announce_update(struct krt_proto *p, rte *e)
{
  net *n = e->net;
  rta *aa = rta_clone(e->attrs);
  rte *ee = rte_get_temp(aa);
330
  net *nn = net_get(p->p.table, n->n.prefix, n->n.pxlen);
331 332
  ee->net = nn;
  ee->pflags = 0;
333
  ee->pref = p->p.preference;
334
  ee->u.krt = e->u.krt;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
335
  rte_update(p->p.table, nn, &p->p, &p->p, ee);
336 337 338 339 340
}

static void
krt_learn_announce_delete(struct krt_proto *p, net *n)
{
341
  n = net_find(p->p.table, n->n.prefix, n->n.pxlen);
342
  if (n)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
343
    rte_update(p->p.table, n, &p->p, &p->p, NULL);
344 345
}

346
/* Called when alien route is discovered during scan */
347 348 349 350
static void
krt_learn_scan(struct krt_proto *p, rte *e)
{
  net *n0 = e->net;
351
  net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen);
352 353
  rte *m, **mm;

354
  e->attrs = rta_lookup(e->attrs);
355 356 357 358 359 360 361 362

  for(mm=&n->routes; m = *mm; mm=&m->next)
    if (krt_same_key(m, e))
      break;
  if (m)
    {
      if (krt_uptodate(m, e))
	{
363
	  krt_trace_in_rl(&rl_alien_seen, p, e, "[alien] seen");
364 365 366 367 368
	  rte_free(e);
	  m->u.krt.seen = 1;
	}
      else
	{
369
	  krt_trace_in_rl(&rl_alien_updated, p, e, "[alien] updated");
370 371 372 373 374 375
	  *mm = m->next;
	  rte_free(m);
	  m = NULL;
	}
    }
  else
376
    krt_trace_in_rl(&rl_alien_created, p, e, "[alien] created");
377 378 379 380 381 382 383 384 385 386 387 388 389 390
  if (!m)
    {
      e->next = n->routes;
      n->routes = e;
      e->u.krt.seen = 1;
    }
}

static void
krt_learn_prune(struct krt_proto *p)
{
  struct fib *fib = &p->krt_table.fib;
  struct fib_iterator fit;

391
  KRT_TRACE(p, D_EVENTS, "Pruning inherited routes");
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450

  FIB_ITERATE_INIT(&fit, fib);
again:
  FIB_ITERATE_START(fib, &fit, f)
    {
      net *n = (net *) f;
      rte *e, **ee, *best, **pbest, *old_best;

      old_best = n->routes;
      best = NULL;
      pbest = NULL;
      ee = &n->routes;
      while (e = *ee)
	{
	  if (!e->u.krt.seen)
	    {
	      *ee = e->next;
	      rte_free(e);
	      continue;
	    }
	  if (!best || best->u.krt.metric > e->u.krt.metric)
	    {
	      best = e;
	      pbest = ee;
	    }
	  e->u.krt.seen = 0;
	  ee = &e->next;
	}
      if (!n->routes)
	{
	  DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
	  if (old_best)
	    {
	      krt_learn_announce_delete(p, n);
	      n->n.flags &= ~KRF_INSTALLED;
	    }
	  FIB_ITERATE_PUT(&fit, f);
	  fib_delete(fib, f);
	  goto again;
	}
      *pbest = best->next;
      best->next = n->routes;
      n->routes = best;
      if (best != old_best || !(n->n.flags & KRF_INSTALLED))
	{
	  DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
	  krt_learn_announce_update(p, best);
	  n->n.flags |= KRF_INSTALLED;
	}
      else
	DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
    }
  FIB_ITERATE_END(f);
}

static void
krt_learn_async(struct krt_proto *p, rte *e, int new)
{
  net *n0 = e->net;
451
  net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen);
452 453
  rte *g, **gg, *best, **bestp, *old_best;

454
  e->attrs = rta_lookup(e->attrs);
455 456 457 458 459 460 461 462 463 464 465

  old_best = n->routes;
  for(gg=&n->routes; g = *gg; gg = &g->next)
    if (krt_same_key(g, e))
      break;
  if (new)
    {
      if (g)
	{
	  if (krt_uptodate(g, e))
	    {
466
	      krt_trace_in(p, e, "[alien async] same");
467 468 469
	      rte_free(e);
	      return;
	    }
470
	  krt_trace_in(p, e, "[alien async] updated");
471 472 473 474
	  *gg = g->next;
	  rte_free(g);
	}
      else
475
	krt_trace_in(p, e, "[alien async] created");
476

477 478 479 480 481
      e->next = n->routes;
      n->routes = e;
    }
  else if (!g)
    {
482
      krt_trace_in(p, e, "[alien async] delete failed");
483 484 485 486 487
      rte_free(e);
      return;
    }
  else
    {
488
      krt_trace_in(p, e, "[alien async] removed");
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
      *gg = g->next;
      rte_free(e);
      rte_free(g);
    }
  best = n->routes;
  bestp = &n->routes;
  for(gg=&n->routes; g=*gg; gg=&g->next)
    if (best->u.krt.metric > g->u.krt.metric)
      {
	best = g;
	bestp = gg;
      }
  if (best)
    {
      *bestp = best->next;
      best->next = n->routes;
      n->routes = best;
    }
  if (best != old_best)
    {
      DBG("krt_learn_async: distributing change\n");
      if (best)
	{
	  krt_learn_announce_update(p, best);
	  n->n.flags |= KRF_INSTALLED;
	}
      else
	{
	  n->routes = NULL;
	  krt_learn_announce_delete(p, n);
	  n->n.flags &= ~KRF_INSTALLED;
	}
    }
}

static void
krt_learn_init(struct krt_proto *p)
{
  if (KRT_CF->learn)
528
    rt_setup(p->p.pool, &p->krt_table, "Inherited", NULL);
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
}

static void
krt_dump(struct proto *P)
{
  struct krt_proto *p = (struct krt_proto *) P;

  if (!KRT_CF->learn)
    return;
  debug("KRT: Table of inheritable routes\n");
  rt_dump(&p->krt_table);
}

static void
krt_dump_attrs(rte *e)
{
  debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type);
}

#endif

550 551 552 553
/*
 *	Routes
 */

554 555 556 557 558 559
#ifdef CONFIG_ALL_TABLES_AT_ONCE
static timer *krt_scan_timer;
static int krt_instance_count;
static list krt_instance_list;
#endif

560 561 562
static void
krt_flush_routes(struct krt_proto *p)
{
563
  struct rtable *t = p->p.table;
564

565
  KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
566 567 568 569 570 571 572
  FIB_WALK(&t->fib, f)
    {
      net *n = (net *) f;
      rte *e = n->routes;
      if (e)
	{
	  rta *a = e->attrs;
573 574 575
	  if ((n->n.flags & KRF_INSTALLED) &&
	      a->source != RTS_DEVICE && a->source != RTS_INHERIT)
	    {
576
	      /* FIXME: this does not work if gw is changed in export filter */
577
	      krt_do_notify(p, e->net, NULL, e, NULL);
578 579
	      n->n.flags &= ~KRF_INSTALLED;
	    }
580 581 582 583 584 585
	}
    }
  FIB_WALK_END;
}

static int
586
krt_same_dest(rte *k, rte *e)
587 588 589 590 591 592 593 594 595 596 597
{
  rta *ka = k->attrs, *ea = e->attrs;

  if (ka->dest != ea->dest)
    return 0;
  switch (ka->dest)
    {
    case RTD_ROUTER:
      return ipa_equal(ka->gw, ea->gw);
    case RTD_DEVICE:
      return !strcmp(ka->iface->name, ea->iface->name);
598 599
    case RTD_MULTIPATH:
      return mpnh_same(ka->nexthops, ea->nexthops);
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
    default:
      return 1;
    }
}

/*
 *  This gets called back when the low-level scanning code discovers a route.
 *  We expect that the route is a temporary rte and its attributes are uncached.
 */

void
krt_got_route(struct krt_proto *p, rte *e)
{
  rte *old;
  net *net = e->net;
  int verdict;

617 618
#ifdef KRT_ALLOW_LEARN
  switch (e->u.krt.src)
619
    {
620
    case KRT_SRC_KERNEL:
621 622 623
      verdict = KRF_IGNORE;
      goto sentenced;

624 625 626 627 628
    case KRT_SRC_REDIRECT:
      verdict = KRF_DELETE;
      goto sentenced;

    case  KRT_SRC_ALIEN:
629 630 631
      if (KRT_CF->learn)
	krt_learn_scan(p, e);
      else
632
	{
633
	  krt_trace_in_rl(&rl_alien_ignored, p, e, "[alien] ignored");
634 635
	  rte_free(e);
	}
636 637 638
      return;
    }
#endif
639
  /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
640 641

  if (net->n.flags & KRF_VERDICT_MASK)
642 643
    {
      /* Route to this destination was already seen. Strange, but it happens... */
644
      krt_trace_in(p, e, "already seen");
645
      rte_free(e);
646 647 648
      return;
    }

649 650
  old = net->routes;
  if ((net->n.flags & KRF_INSTALLED) && old)
651
    {
652 653 654
      /* There may be changes in route attributes, we ignore that.
         Also, this does not work well if gw is changed in export filter */
      if ((net->n.flags & KRF_SYNC_ERROR) || ! krt_same_dest(e, old))
655
	verdict = KRF_UPDATE;
656 657
      else
	verdict = KRF_SEEN;
658 659 660 661
    }
  else
    verdict = KRF_DELETE;

662
 sentenced:
663
  krt_trace_in(p, e, ((char *[]) { "?", "seen", "will be updated", "will be removed", "ignored" }) [verdict]);
664 665
  net->n.flags = (net->n.flags & ~KRF_VERDICT_MASK) | verdict;
  if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
666
    {
667
      /* Get a cached copy of attributes and temporarily link the route */
668 669 670 671 672 673 674 675 676 677
      rta *a = e->attrs;
      a->source = RTS_DUMMY;
      e->attrs = rta_lookup(a);
      e->next = net->routes;
      net->routes = e;
    }
  else
    rte_free(e);
}

678 679 680
static inline int
krt_export_rte(struct krt_proto *p, rte **new, ea_list **tmpa)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
681
  struct filter *filter = p->p.main_ahook->out_filter;
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696

  if (! *new)
    return 0;

  if (filter == FILTER_REJECT)
    return 0;

  if (filter == FILTER_ACCEPT)
    return 1;

  struct proto *src = (*new)->attrs->proto;
  *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(*new, krt_filter_lp) : NULL;
  return f_run(filter, new, tmpa, krt_filter_lp, FF_FORCE_TMPATTR) <= F_ACCEPT;
}

697 698 699
static void
krt_prune(struct krt_proto *p)
{
700
  struct rtable *t = p->p.table;
701

702
  KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
703 704 705
  FIB_WALK(&t->fib, f)
    {
      net *n = (net *) f;
706
      int verdict = f->flags & KRF_VERDICT_MASK;
707 708
      rte *new, *new0, *old;
      ea_list *tmpa = NULL;
709

710
      if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
711
	{
712
	  /* Get a dummy route from krt_got_route() */
713 714 715 716 717
	  old = n->routes;
	  n->routes = old->next;
	}
      else
	old = NULL;
718 719 720 721 722 723 724 725 726 727 728

      new = new0 = n->routes;
      if (verdict == KRF_CREATE || verdict == KRF_UPDATE)
	{
	  /* We have to run export filter to get proper 'new' route */
	  if (! krt_export_rte(p, &new, &tmpa))
	    {
	      /* Route rejected, should not happen (KRF_INSTALLED) but to be sure .. */
	      verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE; 
	    }
	}
729 730 731 732

      switch (verdict)
	{
	case KRF_CREATE:
733
	  if (new && (f->flags & KRF_INSTALLED))
734
	    {
735
	      krt_trace_in(p, new, "reinstalling");
736
	      krt_do_notify(p, n, new, NULL, tmpa);
737 738 739
	    }
	  break;
	case KRF_SEEN:
740
	case KRF_IGNORE:
741 742 743
	  /* Nothing happens */
	  break;
	case KRF_UPDATE:
744
	  krt_trace_in(p, new, "updating");
745
	  krt_do_notify(p, n, new, old, tmpa);
746 747
	  break;
	case KRF_DELETE:
748
	  krt_trace_in(p, old, "deleting");
749
	  krt_do_notify(p, n, NULL, old, NULL);
750 751 752 753
	  break;
	default:
	  bug("krt_prune: invalid route status");
	}
754

755 756
      if (old)
	rte_free(old);
757 758 759
      if (new != new0)
	rte_free(new);
      lp_flush(krt_filter_lp);
760
      f->flags &= ~KRF_VERDICT_MASK;
761 762
    }
  FIB_WALK_END;
763 764 765 766 767

#ifdef KRT_ALLOW_LEARN
  if (KRT_CF->learn)
    krt_learn_prune(p);
#endif
768
  p->initialized = 1;
769 770
}

771
void
772
krt_got_route_async(struct krt_proto *p, rte *e, int new)
773 774 775
{
  net *net = e->net;

776
  switch (e->u.krt.src)
777 778
    {
    case KRT_SRC_BIRD:
779
      ASSERT(0);			/* Should be filtered by the back end */
780

781
    case KRT_SRC_REDIRECT:
782 783 784
      if (new)
	{
	  krt_trace_in(p, e, "[redirect] deleting");
785
	  krt_do_notify(p, net, NULL, e, NULL);
786 787
	}
      /* If !new, it is probably echo of our deletion */
788
      break;
789

790
#ifdef KRT_ALLOW_LEARN
791
    case KRT_SRC_ALIEN:
792
      if (KRT_CF->learn)
793
	{
794 795
	  krt_learn_async(p, e, new);
	  return;
796
	}
797
#endif
798
    }
799
  rte_free(e);
800 801
}

802 803 804 805 806
/*
 *	Periodic scanning
 */

static void
807
krt_scan(timer *t UNUSED)
808
{
809
  struct krt_proto *p;
810

811
  kif_force_scan();
812 813 814
#ifdef CONFIG_ALL_TABLES_AT_ONCE
  {
    void *q;
815 816 817 818
    /* We need some node to decide whether to print the debug messages or not */
    p = SKIP_BACK(struct krt_proto, instance_node, HEAD(krt_instance_list));
    if (p->instance_node.next)
      KRT_TRACE(p, D_EVENTS, "Scanning routing table");
819
    krt_do_scan(NULL);
820 821 822 823 824 825 826 827
    WALK_LIST(q, krt_instance_list)
      {
	p = SKIP_BACK(struct krt_proto, instance_node, q);
	krt_prune(p);
      }
  }
#else
  p = t->data;
828
  KRT_TRACE(p, D_EVENTS, "Scanning routing table");
829
  krt_do_scan(p);
830
  krt_prune(p);
831
#endif
832 833
}

834

835 836 837
/*
 *	Updates
 */
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867

static struct ea_list *
krt_make_tmp_attrs(rte *rt, struct linpool *pool)
{
  struct ea_list *l = lp_alloc(pool, sizeof(struct ea_list) + 2 * sizeof(eattr));

  l->next = NULL;
  l->flags = EALF_SORTED;
  l->count = 2;

  l->attrs[0].id = EA_KRT_SOURCE;
  l->attrs[0].flags = 0;
  l->attrs[0].type = EAF_TYPE_INT | EAF_TEMP;
  l->attrs[0].u.data = rt->u.krt.proto;

  l->attrs[1].id = EA_KRT_METRIC;
  l->attrs[1].flags = 0;
  l->attrs[1].type = EAF_TYPE_INT | EAF_TEMP;
  l->attrs[1].u.data = rt->u.krt.metric;

  return l;
}

static void
krt_store_tmp_attrs(rte *rt, struct ea_list *attrs)
{
  /* EA_KRT_SOURCE is read-only */
  rt->u.krt.metric = ea_get_int(attrs, EA_KRT_METRIC, 0);
}

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
static int
krt_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
{
  struct krt_proto *p = (struct krt_proto *) P;
  rte *e = *new;

  if (e->attrs->proto == P)
    return -1;

  if (!KRT_CF->devroutes && 
      (e->attrs->dest == RTD_DEVICE) && 
      (e->attrs->source != RTS_STATIC_DEVICE))
    return -1;

  if (!krt_capable(e))
    return -1;

  return 0;
}
887 888

static void
889
krt_notify(struct proto *P, struct rtable *table UNUSED, net *net,
890
	   rte *new, rte *old, struct ea_list *eattrs)
891 892 893
{
  struct krt_proto *p = (struct krt_proto *) P;

894
  if (shutting_down)
895
    return;
896 897 898 899 900 901
  if (!(net->n.flags & KRF_INSTALLED))
    old = NULL;
  if (new)
    net->n.flags |= KRF_INSTALLED;
  else
    net->n.flags &= ~KRF_INSTALLED;
902
  if (p->initialized)		/* Before first scan we don't touch the routes */
903
    krt_do_notify(p, net, new, old, eattrs);
904 905
}

906 907 908 909 910 911 912 913
static int
krt_rte_same(rte *a, rte *b)
{
  /* src is always KRT_SRC_ALIEN and type is irrelevant */
  return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric);
}


914 915 916 917
/*
 *	Protocol glue
 */

918
struct krt_config *krt_cf;
919

920 921
static struct proto *
krt_init(struct proto_config *c)
922
{
923
  struct krt_proto *p = proto_new(c, sizeof(struct krt_proto));
924

925 926 927 928 929 930
  p->p.accept_ra_types = RA_OPTIMAL;
  p->p.make_tmp_attrs = krt_make_tmp_attrs;
  p->p.store_tmp_attrs = krt_store_tmp_attrs;
  p->p.import_control = krt_import_control;
  p->p.rt_notify = krt_notify;
  p->p.rte_same = krt_rte_same;
931

932 933
  krt_sys_init(p);
  return &p->p;
934 935 936 937 938 939 940 941 942 943 944
}

static timer *
krt_start_timer(struct krt_proto *p)
{
  timer *t;

  t = tm_new(p->krt_pool);
  t->hook = krt_scan;
  t->data = p;
  t->recurrent = KRT_CF->scan_time;
945
  tm_start(t, 0);
946 947 948
  return t;
}

949 950 951 952
static int
krt_start(struct proto *P)
{
  struct krt_proto *p = (struct krt_proto *) P;
953 954 955 956 957 958 959 960 961 962 963 964
  int first = 1;

#ifdef CONFIG_ALL_TABLES_AT_ONCE
  if (!krt_instance_count++)
    init_list(&krt_instance_list);
  else
    first = 0;
  p->krt_pool = krt_pool;
  add_tail(&krt_instance_list, &p->instance_node);
#else
  p->krt_pool = P->pool;
#endif
965

966 967 968 969
#ifdef KRT_ALLOW_LEARN
  krt_learn_init(p);
#endif

970
  krt_sys_start(p, first);
971

972
  /* Start periodic routing table scanning */
973 974 975
#ifdef CONFIG_ALL_TABLES_AT_ONCE
  if (first)
    krt_scan_timer = krt_start_timer(p);
976
  else
977
    tm_start(krt_scan_timer, 0);
978 979 980 981
  p->scan_timer = krt_scan_timer;
#else
  p->scan_timer = krt_start_timer(p);
#endif
982 983 984 985

  return PS_UP;
}

986
static int
987 988 989
krt_shutdown(struct proto *P)
{
  struct krt_proto *p = (struct krt_proto *) P;
990
  int last = 1;
991

992 993 994 995 996 997 998
#ifdef CONFIG_ALL_TABLES_AT_ONCE
  rem_node(&p->instance_node);
  if (--krt_instance_count)
    last = 0;
  else
#endif
    tm_stop(p->scan_timer);
999

1000 1001
  /* FIXME we should flush routes even when persist during reconfiguration */
  if (p->initialized && !KRT_CF->persist)
1002 1003
    krt_flush_routes(p);

1004
  krt_sys_shutdown(p, last);
1005 1006 1007 1008 1009

#ifdef CONFIG_ALL_TABLES_AT_ONCE
  if (last)
    rfree(krt_scan_timer);
#endif
1010 1011 1012 1013

  return PS_DOWN;
}

1014 1015
static int
krt_reconfigure(struct proto *p, struct proto_config *new)
1016
{
1017 1018
  struct krt_config *o = (struct krt_config *) p->cf;
  struct krt_config *n = (struct krt_config *) new;
1019

1020 1021
  if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
    return 0;
1022

1023 1024
  /* persist needn't be the same */
  return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
1025 1026
}

1027
static void
1028
krt_preconfig(struct protocol *P UNUSED, struct config *c)
1029
{
1030 1031
  krt_cf = NULL;
  krt_sys_preconfig(c);
1032
}
1033

1034 1035
static void
krt_postconfig(struct proto_config *C)
1036
{
1037
  struct krt_config *c = (struct krt_config *) C;
1038

1039 1040 1041 1042
#ifdef CONFIG_ALL_TABLES_AT_ONCE
  if (krt_cf->scan_time != c->scan_time)
    cf_error("All kernel syncers must use the same table scan interval");
#endif
1043

1044 1045 1046 1047
  if (C->table->krt_attached)
    cf_error("Kernel syncer (%s) already attached to table %s", C->table->krt_attached->name, C->table->name);
  C->table->krt_attached = C;
  krt_sys_postconfig(c);
1048 1049
}

1050 1051
struct proto_config *
krt_init_config(int class)
1052
{
1053 1054 1055 1056 1057 1058 1059
#ifndef CONFIG_MULTIPLE_TABLES
  if (krt_cf)
    cf_error("Kernel protocol already defined");
#endif

  krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, sizeof(struct krt_config), class);
  krt_cf->scan_time = 60;
1060

1061 1062
  krt_sys_init_config(krt_cf);
  return (struct proto_config *) krt_cf;
1063 1064
}

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
static void
krt_copy_config(struct proto_config *dest, struct proto_config *src)
{
  struct krt_config *d = (struct krt_config *) dest;
  struct krt_config *s = (struct krt_config *) src;

  /* Shallow copy of everything */
  proto_copy_rest(dest, src, sizeof(struct krt_config));

  /* Fix sysdep parts */
1075
  krt_sys_copy_config(d, s);
1076
}
1077 1078 1079 1080 1081 1082

static int
krt_get_attr(eattr * a, byte * buf, int buflen UNUSED)
{
  switch (a->id)
  {
1083 1084 1085 1086
  case EA_KRT_SOURCE:
    bsprintf(buf, "source");
    return GA_NAME;

1087 1088 1089 1090
  case EA_KRT_METRIC:
    bsprintf(buf, "metric");
    return GA_NAME;

1091 1092 1093
  case EA_KRT_PREFSRC:
    bsprintf(buf, "prefsrc");
    return GA_NAME;
1094

1095 1096 1097
  case EA_KRT_REALM:
    bsprintf(buf, "realm");
    return GA_NAME;
1098

1099 1100 1101 1102 1103 1104
  default:
    return GA_UNKNOWN;
  }
}


1105 1106
struct protocol proto_unix_kernel = {
  name:		"Kernel",
1107
  template:	"kernel%d",
1108
  attr_class:	EAP_KRT,
1109
  preference:	DEF_PREF_INHERITED,
1110 1111
  preconfig:	krt_preconfig,
  postconfig:	krt_postconfig,
1112 1113 1114
  init:		krt_init,
  start:	krt_start,
  shutdown:	krt_shutdown,
1115
  reconfigure:	krt_reconfigure,
1116
  copy_config:	krt_copy_config,
1117
  get_attr:	krt_get_attr,
1118 1119 1120 1121
#ifdef KRT_ALLOW_LEARN
  dump:		krt_dump,
  dump_attrs:	krt_dump_attrs,
#endif
1122
};