Commit 7de45ba4 authored by Martin Mareš's avatar Martin Mareš

Kernel route syncer supports multiple tables.

The changes are just too extensive for lazy me to list them
there, but see the comment at the top of sysdep/unix/krt.c.
The code got a bit more ifdeffy than I'd like, though.

Also fixed a bunch of FIXME's and added a couple of others. :)
parent 9d885689
......@@ -6,8 +6,6 @@ Core
- config: executable config files
- do we really need preconfig?
- counters (according to SNMP MIB?)
- better memory allocators
- default preferences of protocols: prefer BGP over OSPF/RIP external routes?
......@@ -26,6 +24,10 @@ Core
- OSPF: refuse running on non-multicast devices
- config: when parsing prefix, check zero bits
- krt: rescan interfaces when route addition fails?
Cleanup
~~~~~~~
- right usage of DBG vs. debug
......
......@@ -32,6 +32,7 @@ protocol kernel {
# async off; # Netlink: Disable asynchronous events
# import none; # Default is import all
export all; # Default is export none
# kernel table 5; # Kernel table to synchronize with (default: main)
}
protocol device {
......
......@@ -109,6 +109,7 @@ struct rtable_config {
node n;
char *name;
struct rtable *table;
struct proto_config *krt_attached; /* Kernel syncer attached to this table */
};
typedef struct rtable {
......
......@@ -104,6 +104,7 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list *
struct proto *p = a->proto;
rte *new0 = new;
rte *old0 = old;
if (new)
{
int ok = p->import_control ? p->import_control(p, &new, &tmpa, rte_update_pool) : 0;
......
......@@ -4,6 +4,8 @@ Available configuration variables:
CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel
CONFIG_ALL_MULTICAST All devices support multicasting (i.e., ignore IFF_MULTICAST)
CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us
CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables
CONFIG_ALL_TABLES_AT_ONCE Kernel scanner wants to process all tables at once
CONFIG_UNIX_IFACE Use Unix interface scanner
CONFIG_UNIX_SET Use Unix route setting
......
......@@ -9,7 +9,8 @@
#define CONFIG_AUTO_ROUTES
#define CONFIG_ALL_MULTICAST
#define CONFIG_SELF_CONSCIOUS
#define CONFIG_MULTIPLE_TABLES 256
#define CONFIG_MULTIPLE_TABLES
#define CONFIG_ALL_TABLES_AT_ONCE
/*
Link: sysdep/linux/netlink
......
......@@ -174,17 +174,27 @@ krt_scan_fire(struct krt_proto *p)
}
void
krt_scan_preconfig(struct krt_config *c)
krt_scan_construct(struct krt_config *c)
{
}
void
krt_scan_start(struct krt_proto *x)
krt_scan_preconfig(struct config *c)
{
}
void
krt_scan_postconfig(struct krt_config *c)
{
}
void
krt_scan_start(struct krt_proto *x, int first)
{
init_list(&x->scan.temp_ifs);
}
void
krt_scan_shutdown(struct krt_proto *x)
krt_scan_shutdown(struct krt_proto *x, int last)
{
}
......@@ -19,7 +19,7 @@ struct krt_if_params {
struct krt_if_status {
};
static inline void krt_if_preconfig(struct kif_config *c) { };
static inline void krt_if_construct(struct kif_config *c) { };
static inline void krt_if_shutdown(struct kif_proto *p) { };
static inline void krt_if_io_init(void) { };
......
......@@ -17,6 +17,7 @@
struct krt_scan_params {
int async; /* Allow asynchronous events */
int table_id; /* Kernel table ID we sync with */
};
struct krt_scan_status {
......
......@@ -19,8 +19,8 @@ struct krt_set_params {
struct krt_set_status {
};
static inline void krt_set_preconfig(struct krt_config *c) { };
static inline void krt_set_start(struct krt_proto *p) { };
static inline void krt_set_shutdown(struct krt_proto *p) { };
static inline void krt_set_construct(struct krt_config *c) { };
static inline void krt_set_start(struct krt_proto *p, int first) { };
static inline void krt_set_shutdown(struct krt_proto *p, int last) { };
#endif
......@@ -10,7 +10,7 @@ CF_HDR
CF_DECLS
CF_KEYWORDS(ASYNC)
CF_KEYWORDS(ASYNC, KERNEL, TABLE)
CF_GRAMMAR
......@@ -18,6 +18,11 @@ CF_ADDTO(kern_proto, kern_proto nl_item ';')
nl_item:
ASYNC bool { THIS_KRT->scan.async = $2; }
| KERNEL TABLE expr {
if ($3 <= 0 || $3 >= 255)
cf_error("Kernel routing table number out of range");
THIS_KRT->scan.table_id = $3;
}
;
CF_CODE
......
......@@ -24,6 +24,7 @@
#include "lib/unix.h"
#include "lib/krt.h"
#include "lib/socket.h"
#include "conf/conf.h"
#include <asm/types.h>
#include <linux/netlink.h>
......@@ -403,6 +404,8 @@ krt_if_scan(struct kif_proto *p)
* Routes
*/
static struct krt_proto *nl_table_map[256];
int
krt_capable(rte *e)
{
......@@ -431,7 +434,7 @@ krt_capable(rte *e)
}
static void
nl_send_route(rte *e, int new)
nl_send_route(struct krt_proto *p, rte *e, int new)
{
net *net = e->net;
rta *a = e->attrs;
......@@ -453,10 +456,10 @@ nl_send_route(rte *e, int new)
r.r.rtm_family = AF_INET;
r.r.rtm_dst_len = net->n.pxlen;
r.r.rtm_tos = 0; /* FIXME: Non-zero TOS? */
r.r.rtm_table = RT_TABLE_MAIN; /* FIXME: Other tables? */
r.r.rtm_tos = 0;
r.r.rtm_table = KRT_CF->scan.table_id;
r.r.rtm_protocol = RTPROT_BIRD;
r.r.rtm_scope = RT_SCOPE_UNIVERSE; /* FIXME: Other scopes? */
r.r.rtm_scope = RT_SCOPE_UNIVERSE;
nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
switch (a->dest)
{
......@@ -489,19 +492,22 @@ krt_set_notify(struct krt_proto *p, net *n, rte *new, rte *old)
{
if (old && new)
{
/* FIXME: Priorities and TOS should be identical as well, but we don't use them yet. */
nl_send_route(new, 1);
/*
* We should check whether priority and TOS is identical as well,
* but we don't use these and default value is always equal to default value. :-)
*/
nl_send_route(p, new, 1);
}
else
{
if (old)
{
if (!old->attrs->iface || (old->attrs->iface->flags & IF_UP))
nl_send_route(old, 0);
nl_send_route(p, old, 0);
/* else the kernel has already flushed it */
}
if (new)
nl_send_route(new, 1);
nl_send_route(p, new, 1);
}
}
......@@ -524,8 +530,9 @@ krt_temp_iface(struct krt_proto *p, unsigned index)
}
static void
nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
nl_parse_route(struct nlmsghdr *h, int scan)
{
struct krt_proto *p;
struct rtmsg *i;
struct rtattr *a[RTA_CACHEINFO+1];
int new = h->nlmsg_type == RTM_NEWROUTE;
......@@ -549,11 +556,16 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
return;
}
if (i->rtm_table != RT_TABLE_MAIN) /* FIXME: What about other tables? */
return;
if (i->rtm_tos != 0) /* FIXME: What about TOS? */
p = nl_table_map[i->rtm_table]; /* Do we know this table? */
if (!p)
return;
if (i->rtm_tos != 0) /* We don't support TOS */
{
DBG("KRT: Ignoring route with TOS %02x\n", i->rtm_tos);
return;
}
if (scan && !new)
{
DBG("KRT: Ignoring route deletion\n");
......@@ -572,7 +584,7 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
else
oif = ~0;
DBG("Got %I/%d, type=%d, oif=%d\n", dst, i->rtm_dst_len, i->rtm_type, oif);
DBG("Got %I/%d, type=%d, oif=%d, table=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, p->p.name);
switch (i->rtm_protocol)
{
......@@ -647,7 +659,7 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
return;
}
if (i->rtm_scope != RT_SCOPE_UNIVERSE) /* FIXME: Other scopes? */
if (i->rtm_scope != RT_SCOPE_UNIVERSE)
{
DBG("KRT: Ignoring route with scope=%d\n", i->rtm_scope);
return;
......@@ -669,14 +681,14 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
}
void
krt_scan_fire(struct krt_proto *p)
krt_scan_fire(struct krt_proto *p) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
{
struct nlmsghdr *h;
nl_request_dump(RTM_GETROUTE);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(p, h, 1);
nl_parse_route(h, 1);
else
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
}
......@@ -689,14 +701,14 @@ static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
static void
nl_async_msg(struct krt_proto *p, struct nlmsghdr *h)
nl_async_msg(struct nlmsghdr *h)
{
switch (h->nlmsg_type)
{
case RTM_NEWROUTE:
case RTM_DELROUTE:
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
nl_parse_route(p, h, 0);
nl_parse_route(h, 0);
break;
case RTM_NEWLINK:
case RTM_DELLINK:
......@@ -716,7 +728,6 @@ nl_async_msg(struct krt_proto *p, struct nlmsghdr *h)
static int
nl_async_hook(sock *sk, int size)
{
struct krt_proto *p = sk->data;
struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
struct sockaddr_nl sa;
struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
......@@ -746,7 +757,7 @@ nl_async_hook(sock *sk, int size)
}
while (NLMSG_OK(h, len))
{
nl_async_msg(p, h);
nl_async_msg(h);
h = NLMSG_NEXT(h, len);
}
if (len)
......@@ -755,7 +766,7 @@ nl_async_hook(sock *sk, int size)
}
static void
nl_open_async(struct krt_proto *p)
nl_open_async(void)
{
sock *sk;
struct sockaddr_nl sa;
......@@ -766,7 +777,7 @@ nl_open_async(struct krt_proto *p)
fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (fd < 0)
{
log(L_ERR "Unable to open secondary rtnetlink socket: %m");
log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
return;
}
......@@ -775,13 +786,12 @@ nl_open_async(struct krt_proto *p)
sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
{
log(L_ERR "Unable to bind secondary rtnetlink socket: %m");
log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
return;
}
sk = nl_async_sk = sk_new(p->p.pool);
sk = nl_async_sk = sk_new(krt_pool);
sk->type = SK_MAGIC;
sk->data = p;
sk->rx_hook = nl_async_hook;
sk->fd = fd;
if (sk_open(sk))
......@@ -795,24 +805,47 @@ nl_open_async(struct krt_proto *p)
* Interface to the UNIX krt module
*/
static u8 nl_cf_table[256 / 8];
void
krt_scan_preconfig(struct krt_config *x)
krt_scan_preconfig(struct config *c)
{
bzero(&nl_cf_table, sizeof(nl_cf_table));
}
void
krt_scan_postconfig(struct krt_config *x)
{
int id = x->scan.table_id;
if (nl_cf_table[id/8] & (1 << (id%8)))
cf_error("Multiple kernel syncers defined for table #%d", id);
nl_cf_table[id/8] |= (1 << (id%8));
}
void
krt_scan_construct(struct krt_config *x)
{
x->scan.async = 1;
x->scan.table_id = RT_TABLE_MAIN;
/* FIXME: Use larger defaults for scanning times? */
}
void
krt_scan_start(struct krt_proto *p)
krt_scan_start(struct krt_proto *p, int first)
{
init_list(&p->scan.temp_ifs);
nl_open();
if (KRT_CF->scan.async) /* FIXME: Async is for debugging only. Get rid of it some day. */
nl_open_async(p);
nl_table_map[KRT_CF->scan.table_id] = p;
if (first)
{
nl_open();
if (KRT_CF->scan.async) /* FIXME: Async is for debugging only. Get rid of it some day. */
nl_open_async();
}
}
void
krt_scan_shutdown(struct krt_proto *p)
krt_scan_shutdown(struct krt_proto *p, int last)
{
}
......
......@@ -205,7 +205,7 @@ krt_if_scan(struct kif_proto *p)
}
void
krt_if_preconfig(struct kif_config *c)
krt_if_construct(struct kif_config *c)
{
}
......
......@@ -92,18 +92,18 @@ krt_set_notify(struct krt_proto *p, net *net, rte *new, rte *old)
}
void
krt_set_start(struct krt_proto *x)
krt_set_start(struct krt_proto *x, int first)
{
if (if_scan_sock < 0)
bug("krt set: missing socket");
}
void
krt_set_preconfig(struct krt_config *c)
krt_set_construct(struct krt_config *c)
{
}
void
krt_set_shutdown(struct krt_proto *x)
krt_set_shutdown(struct krt_proto *x, int last)
{
}
......@@ -24,14 +24,16 @@ CF_GRAMMAR
CF_ADDTO(proto, kern_proto '}')
kern_proto_start: proto_start KERNEL {
#ifndef CONFIG_MULTIPLE_TABLES
if (cf_krt)
cf_error("Kernel protocol already defined");
#endif
cf_krt = this_proto = proto_config_new(&proto_unix_kernel, sizeof(struct krt_config));
this_proto->preference = 0;
THIS_KRT->scan_time = 60;
THIS_KRT->learn = THIS_KRT->persist = 0;
krt_scan_preconfig(THIS_KRT);
krt_set_preconfig(THIS_KRT);
krt_scan_construct(THIS_KRT);
krt_set_construct(THIS_KRT);
}
;
......@@ -64,7 +66,7 @@ kif_proto_start: proto_start DEVICE {
cf_kif = this_proto = proto_config_new(&proto_unix_iface, sizeof(struct kif_config));
this_proto->preference = DEF_PREF_DIRECT;
THIS_KIF->scan_time = 60;
krt_if_preconfig(THIS_KIF);
krt_if_construct(THIS_KIF);
}
;
......
......@@ -13,19 +13,47 @@
#include "nest/route.h"
#include "nest/protocol.h"
#include "lib/timer.h"
#include "conf/conf.h"
#include "unix.h"
#include "krt.h"
/*
* The whole kernel synchronization is a bit messy and touches some internals
* of the routing table engine, because routing table maintenance is a typical
* example of the proverbial compatibility between different Unices and we want
* to keep the overhead of our krt business as low as possible and avoid maintaining
* a local routing table copy.
*
* The kernel syncer can work in three different modes (according to system config header):
* o Single routing table, single krt protocol. [traditional Unix]
* o Many routing tables, separate krt protocols for all of them.
* o Many routing tables, but every scan includes all tables, so we start
* separate krt protocols which cooperate with each other. [Linux 2.2]
* In this case, we keep only a single scan timer.
*
* The hacky bits:
* o We use FIB node flags to keep track of route synchronization status.
* o When starting up, we cheat by looking if there is another kernel
* krt instance to be initialized later and performing table scan
* only once for all the instances.
* o We attach temporary rte's to routing tables.
*
* If you are brave enough, continue now. You cannot say you haven't been warned.
*/
static int krt_uptodate(rte *k, rte *e);
/*
* Global resources
*/
pool *krt_pool;
void
krt_io_init(void)
{
krt_pool = rp_new(&root_pool, "Kernel Syncer");
krt_if_io_init();
}
......@@ -96,6 +124,12 @@ kif_shutdown(struct proto *P)
if_start_update(); /* Remove all interfaces */
if_end_update();
/*
* FIXME: Is it really a good idea? It causes routes to be flushed,
* but at the same time it avoids sending of these deletions to the kernel,
* because krt thinks the kernel itself has already removed the route
* when downing the interface. Sad.
*/
return PS_DOWN;
}
......@@ -182,8 +216,6 @@ krt_learn_scan(struct krt_proto *p, rte *e)
}
}
/* FIXME: Add dump function */
static void
krt_learn_prune(struct krt_proto *p)
{
......@@ -353,6 +385,12 @@ krt_dump_attrs(rte *e)
* Routes
*/
#ifdef CONFIG_ALL_TABLES_AT_ONCE
static timer *krt_scan_timer;
static int krt_instance_count;
static list krt_instance_list;
#endif
static void
krt_flush_routes(struct krt_proto *p)
{
......@@ -373,8 +411,6 @@ krt_flush_routes(struct krt_proto *p)
FIB_WALK_END;
}
/* FIXME: Synchronization of multiple routing tables? */
static int
krt_uptodate(rte *k, rte *e)
{
......@@ -469,7 +505,7 @@ krt_prune(struct krt_proto *p)
struct rtable *t = p->p.table;
struct fib_node *f;
DBG("Pruning routes...\n");
DBG("Pruning routes in table %s...\n", t->name);
FIB_WALK(&t->fib, f)
{
net *n = (net *) f;
......@@ -556,17 +592,29 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new)
* Periodic scanning
*/
static timer *krt_scan_timer;
static void
krt_scan(timer *t)
{
struct krt_proto *p = t->data;
struct krt_proto *p;
kif_force_scan();
DBG("KRT: It's route scan time...\n");
#ifdef CONFIG_ALL_TABLES_AT_ONCE
{
void *q;
DBG("KRT: It's route scan time...\n");
krt_scan_fire(NULL);
WALK_LIST(q, krt_instance_list)
{
p = SKIP_BACK(struct krt_proto, instance_node, q);
krt_prune(p);
}
}
#else
p = t->data;
DBG("KRT: It's route scan time for %s...\n", p->p.name);
krt_scan_fire(p);
krt_prune(p);
#endif
}
/*
......@@ -595,41 +643,106 @@ krt_notify(struct proto *P, net *net, rte *new, rte *old, struct ea_list *tmpa)
struct proto_config *cf_krt;
static void
krt_preconfig(struct protocol *P, struct config *c)
{
krt_scan_preconfig(c);
}
static void
krt_postconfig(struct proto_config *C)
{
struct krt_config *c = (struct krt_config *) C;
#ifdef CONFIG_ALL_TABLES_AT_ONCE
struct krt_config *first = (struct krt_config *) cf_krt;
if (first->scan_time != c->scan_time)
cf_error("All kernel syncers must use the same table scan interval");
#endif
if (C->table->krt_attached)
cf_error("Kernel syncer (%s) already attached to table %s", C->table->krt_attached->name, C->table->name);
C->table->krt_attached = C;
krt_scan_postconfig(c);
}
static timer *
krt_start_timer(struct krt_proto *p)
{
timer *t;
t = tm_new(p->krt_pool);
t->hook = krt_scan;
t->data = p;
t->recurrent = KRT_CF->scan_time;
tm_start(t, KRT_CF->scan_time);
return t;
}
static int
krt_start(struct proto *P)
{
struct krt_proto *p = (struct krt_proto *) P;
int first = 1;
#ifdef CONFIG_ALL_TABLES_AT_ONCE
if (!krt_instance_count++)
init_list(&krt_instance_list);
else
first = 0;
p->krt_pool = krt_pool;
add_tail(&krt_instance_list, &p->instance_node);
#else
p->krt_pool = P->pool;
#endif
#ifdef KRT_ALLOW_LEARN
krt_learn_init(p);
#endif
krt_scan_start(p);
krt_set_start(p);
krt_scan_start(p, first);
krt_set_start(p, first);
/* Start periodic routing table scanning */
krt_scan_timer = tm_new(P->pool);
krt_scan_timer->hook = krt_scan;
krt_scan_timer->data = p;
krt_scan_timer->recurrent = KRT_CF->scan_time;
krt_scan(krt_scan_timer);
tm_start(krt_scan_timer, KRT_CF->scan_time);
#ifdef CONFIG_ALL_TABLES_AT_ONCE
if (first)
krt_scan_timer = krt_start_timer(p);
p->scan_timer = krt_scan_timer;
/* If this is the last instance to be initialized, kick the timer */
if (!P->proto->startup_counter)
krt_scan(p->scan_timer);
#else
p->scan_timer = krt_start_timer(p);
krt_scan(p->scan_timer);
#endif
return PS_UP;
}
int
static int
krt_shutdown(struct proto *P)
{
struct krt_proto *p = (struct krt_proto *) P;
int last = 1;
tm_stop(krt_scan_timer);
#ifdef CONFIG_ALL_TABLES_AT_ONCE
rem_node(&p->instance_node);
if (--krt_instance_count)
last = 0;
else
#endif
tm_stop(p->scan_timer);
if (!KRT_CF->persist)
krt_flush_routes(p);
krt_set_shutdown(p);
krt_scan_shutdown(p);
krt_set_shutdown(p, last);
krt_scan_shutdown(p, last);
#ifdef CONFIG_ALL_TABLES_AT_ONCE
if (last)
rfree(krt_scan_timer);
#endif
return PS_DOWN;
}
......@@ -646,6 +759,8 @@ krt_init(struct proto_config *c)
struct protocol proto_unix_kernel = {
name: "Kernel",
priority: 80,
preconfig: krt_preconfig,
postconfig: krt_postconfig,
init: krt_init,
start: krt_start,
shutdown: krt_shutdown,
......
......@@ -9,6 +9,7 @@
#ifndef _BIRD_KRT_H_
#define _BIRD_KRT_H_
struct config;
struct krt_config;
struct krt_proto;
struct kif_config;
......@@ -55,10 +56,16 @@ struct krt_proto {
struct krt_if_status iface;
#ifdef KRT_ALLOW_LEARN
struct rtable krt_table; /* Internal table of inherited routes */
#endif
pool *krt_pool; /* Pool used for common krt data */
timer *scan_timer;
#ifdef CONFIG_ALL_TABLES_AT_ONCE
node instance_node; /* Node in krt instance list */
#endif
};
extern struct proto_config *cf_krt;
extern pool *krt_pool;
#define KRT_CF ((struct krt_config *)p->p.cf)
......@@ -90,24 +97,26 @@ extern struct proto_config *cf_kif;
/* krt-scan.c */
void krt_scan_preconfig(struct krt_config *);
void krt_scan_start(struct krt_proto *);
void krt_scan_shutdown(struct krt_proto *);
void krt_scan_preconfig(struct config *);
void krt_scan_postconfig(struct krt_config *);
void krt_scan_construct(struct krt_config *);
void krt_scan_start(struct krt_proto *, int);
void krt_scan_shutdown(struct krt_proto *, int);
void krt_scan_fire(struct krt_proto *);
/* krt-se