Commit cfe34a31 authored by Ondřej Zajíček's avatar Ondřej Zajíček

Implements hostcache and recursive next hops.

Hostcache is a structure for monitoring changes in a routing table that
is used for routes with dynamic/recursive next hops. This is needed for
proper iBGP next hop handling.
parent 824de84d
......@@ -577,10 +577,8 @@ proto_fell_down(struct proto *p)
bzero(&p->stats, sizeof(struct proto_stats));
rt_unlock_table(p->table);
#ifdef CONFIG_PIPE
if (proto_is_pipe(p))
rt_unlock_table(pipe_get_peer_table(p));
#endif
if (p->proto->cleanup)
p->proto->cleanup(p);
proto_rethink_goal(p);
}
......
......@@ -48,6 +48,7 @@ struct protocol {
void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */
int (*start)(struct proto *); /* Start the instance */
int (*shutdown)(struct proto *); /* Stop the instance */
void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */
void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */
void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */
int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */
......
......@@ -129,14 +129,19 @@ typedef struct rtable {
list hooks; /* List of announcement hooks */
int pipe_busy; /* Pipe loop detection */
int use_count; /* Number of protocols using this table */
struct hostcache *hostcache;
struct rtable_config *config; /* Configuration of this table */
struct config *deleted; /* Table doesn't exist in current configuration,
* delete as soon as use_count becomes 0 and remove
* obstacle from this routing table.
*/
struct event *gc_event; /* Garbage collector event */
struct event *rt_event; /* Routing table event */
int gc_counter; /* Number of operations since last GC */
bird_clock_t gc_time; /* Time of last GC */
byte gc_scheduled; /* GC is scheduled */
byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
} rtable;
typedef struct network {
......@@ -144,6 +149,23 @@ typedef struct network {
struct rte *routes; /* Available routes for this network */
} net;
struct hostcache {
struct fib htable;
list hostentries;
byte update_hostcache;
};
struct hostentry {
struct fib_node fn;
node ln;
unsigned uc; /* Use count */
struct iface *iface; /* Chosen outgoing interface */
ip_addr gw; /* Chosen next hop */
byte dest; /* Chosen route destination type (RTD_...) */
byte pxlen; /* Pxlen from net that matches route */
struct rtable *tab;
};
typedef struct rte {
struct rte *next;
net *net; /* Network this RTE belongs to */
......@@ -207,7 +229,6 @@ void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_baby(struct proto *p);
void rt_feed_baby_abort(struct proto *p);
void rt_prune(rtable *tab);
void rt_prune_all(void);
struct rtable_config *rt_new_table(struct symbol *s);
......@@ -248,6 +269,7 @@ typedef struct rta {
u16 hash_key; /* Hash over important fields */
ip_addr gw; /* Next hop */
ip_addr from; /* Advertising router */
struct hostentry *hostentry; /* Hostentry for recursive next-hops */
struct iface *iface; /* Outgoing interface */
struct ea_list *eattrs; /* Extended Attribute chain */
} rta;
......@@ -357,6 +379,25 @@ static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
void rta_dump(rta *);
void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *);
void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw);
/*
* rta_set_recursive_next_hop() acquires hostentry from hostcache and
* fills rta->hostentry field. New hostentry has zero use
* count. Cached rta locks its hostentry (increases its use count),
* uncached rta does not lock it. Hostentry with zero use count is
* removed asynchronously during host cache update, therefore it is
* safe to hold such hostentry temorarily. There is no need to hold
* a lock for hostentry->dep table, because that table contains routes
* responsible for that hostentry, and therefore is non-empty if given
* hostentry has non-zero use count. The protocol responsible for routes
* with recursive next hops should also hold a lock for a table governing
* that routes (argument tab to rta_set_recursive_next_hop()).
*/
static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; }
static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; }
extern struct protocol *attr_class_to_protocol[EAP_MAX];
......
......@@ -671,6 +671,7 @@ rta_lookup(rta *o)
r = rta_copy(o);
r->hash_key = h;
r->aflags = RTAF_CACHED;
rt_lock_hostentry(r->hostentry);
rta_insert(r);
if (++rta_cache_count > rta_cache_limit)
......@@ -688,6 +689,7 @@ rta__free(rta *a)
if (a->next)
a->next->pprev = a->pprev;
a->aflags = 0; /* Poison the entry */
rt_unlock_hostentry(a->hostentry);
ea_free(a->eattrs);
sl_free(rta_slab, a);
}
......
This diff is collapsed.
......@@ -1265,15 +1265,13 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin
ea_list *ea;
struct adata *ad;
bzero(a, sizeof(rta));
a->proto = &bgp->p;
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
a->cast = RTC_UNICAST;
a->dest = RTD_ROUTER;
a->flags = 0;
a->aflags = 0;
/* a->dest = RTD_ROUTER; -- set in bgp_set_next_hop() */
a->from = bgp->cf->remote_ip;
a->eattrs = NULL;
/* Parse the attributes */
bzero(seen, sizeof(seen));
......
......@@ -786,6 +786,8 @@ bgp_start(struct proto *P)
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
rt_lock_table(p->igp_table);
p->event = ev_new(p->p.pool);
p->event->hook = bgp_decision;
p->event->data = p;
......@@ -837,6 +839,19 @@ bgp_shutdown(struct proto *P)
return p->p.proto_state;
}
static void
bgp_cleanup(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
rt_unlock_table(p->igp_table);
}
static rtable *
get_igp_table(struct bgp_config *cf)
{
return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
}
static struct proto *
bgp_init(struct proto_config *C)
{
......@@ -854,6 +869,7 @@ bgp_init(struct proto_config *C)
p->local_as = c->local_as;
p->remote_as = c->remote_as;
p->is_internal = (c->local_as == c->remote_as);
p->igp_table = get_igp_table(c);
return P;
}
......@@ -1065,7 +1081,8 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
|| (old->password && new->password && !strcmp(old->password, new->password)));
|| (old->password && new->password && !strcmp(old->password, new->password)))
&& (get_igp_table(old) == get_igp_table(new));
/* We should update our copy of configuration ptr as old configuration will be freed */
if (same)
......@@ -1081,6 +1098,7 @@ struct protocol proto_bgp = {
init: bgp_init,
start: bgp_start,
shutdown: bgp_shutdown,
cleanup: bgp_cleanup,
reconfigure: bgp_reconfigure,
get_status: bgp_get_status,
get_attr: bgp_get_attr,
......
......@@ -47,6 +47,7 @@ struct bgp_config {
unsigned error_delay_time_max;
unsigned disable_after_error; /* Disable the protocol when error is detected */
char *password; /* Password used for MD5 authentication */
struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
};
#define MLL_SELF 1
......@@ -92,6 +93,7 @@ struct bgp_proto {
struct neighbor *neigh; /* Neighbor entry corresponding to next_hop */
ip_addr local_addr; /* Address of the local end of the link to next_hop */
ip_addr source_addr; /* Address used as advertised next hop, usually local_addr */
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
......
......@@ -24,7 +24,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4,
CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES,
BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST)
BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE)
CF_GRAMMAR
......@@ -89,6 +89,7 @@ bgp_proto:
| bgp_proto ROUTE LIMIT expr ';' { BGP_CFG->route_limit = $4; }
| bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; }
| bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
;
CF_ADDTO(dynamic_attr, BGP_ORIGIN
......
......@@ -802,26 +802,26 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
} while (0)
static inline int
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
bgp_set_next_hop(struct bgp_proto *p, rta *a)
{
neighbor *neigh;
ip_addr nexthop;
struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
ASSERT(nh);
nexthop = *(ip_addr *) nh->u.ptr->data;
neigh = neigh_find(&bgp->p, &nexthop, 0);
if (neigh)
{
if (neigh->scope == SCOPE_HOST)
ip_addr nexthop = *(ip_addr *) nh->u.ptr->data;
if (!p->is_internal) /* FIXME better option
*/
{
DBG("BGP: Loop!\n");
neighbor *ng = neigh_find(&p->p, &nexthop, 0) ? : p->neigh;
if (ng->scope == SCOPE_HOST)
return 0;
}
a->dest = RTD_ROUTER;
a->gw = ng->addr;
a->iface = ng->iface;
a->hostentry = NULL;
}
else
neigh = bgp->neigh;
a->gw = neigh->addr;
a->iface = neigh->iface;
rta_set_recursive_next_hop(p->p.table, a, p->igp_table, &nexthop);
return 1;
}
......@@ -853,7 +853,7 @@ bgp_do_rx_update(struct bgp_conn *conn,
return;
a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
if (a0 && nlri_len && bgp_get_nexthop(p, a0))
if (a0 && nlri_len && bgp_set_next_hop(p, a0))
{
a = rta_lookup(a0);
while (nlri_len)
......
......@@ -63,6 +63,7 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e
a.aflags = 0;
a.eattrs = attrs;
a.hostentry = NULL;
e = rte_get_temp(&a);
e->net = nn;
e->pflags = 0;
......@@ -120,7 +121,7 @@ pipe_start(struct proto *P)
/* Clean up the secondary stats */
bzero(&p->peer_stats, sizeof(struct proto_stats));
/* Lock the peer table, unlock is handled in proto_fell_down() */
/* Lock the peer table, unlock is handled in pipe_cleanup() */
rt_lock_table(p->peer);
/* Connect the protocol also to the peer routing table. */
......@@ -129,6 +130,13 @@ pipe_start(struct proto *P)
return PS_UP;
}
static void
pipe_cleanup(struct proto *P)
{
struct pipe_proto *p = (struct pipe_proto *) P;
rt_unlock_table(p->peer);
}
static struct proto *
pipe_init(struct proto_config *C)
{
......@@ -185,6 +193,7 @@ struct protocol proto_pipe = {
postconfig: pipe_postconfig,
init: pipe_init,
start: pipe_start,
cleanup: pipe_cleanup,
reconfigure: pipe_reconfigure,
get_status: pipe_get_status,
};
......@@ -244,7 +244,6 @@ krt_set_start(struct krt_proto *x, int first UNUSED)
static void
krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan)
{
rta a;
rte *e;
net *net;
sockaddr dst, gate, mask;
......@@ -329,17 +328,12 @@ krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan)
net = net_get(p->p.table, idst, pxlen);
bzero(&a, sizeof(a));
a.proto = &p->p;
a.source = RTS_INHERIT;
a.scope = SCOPE_UNIVERSE;
a.cast = RTC_UNICAST;
a.flags = a.aflags = 0;
a.from = IPA_NONE;
a.gw = IPA_NONE;
a.iface = NULL;
a.eattrs = NULL;
rta a = {
.proto = &p->p,
.source = RTS_INHERIT,
.scope = SCOPE_UNIVERSE,
.cast = RTC_UNICAST
};
/* reject/blackhole routes have also set RTF_GATEWAY,
we wil check them first. */
......
......@@ -48,7 +48,6 @@ krt_parse_entry(byte *ent, struct krt_proto *p)
int masklen;
net *net;
byte *iface = ent;
rta a;
rte *e;
if (sscanf(ent, "%*s\t%x\t%x\t%x\t%*d\t%*d\t%*d\t%x\t", &dest0, &gw0, &flags, &mask0) != 4)
......@@ -88,14 +87,12 @@ krt_parse_entry(byte *ent, struct krt_proto *p)
net = net_get(p->p.table, dest, masklen);
a.proto = &p->p;
a.source = RTS_INHERIT;
a.scope = SCOPE_UNIVERSE;
a.cast = RTC_UNICAST;
a.flags = a.aflags = 0;
a.from = IPA_NONE;
a.iface = NULL;
a.eattrs = NULL;
rta a = {
.proto = &p->p,
.source = RTS_INHERIT,
.scope = SCOPE_UNIVERSE,
.cast = RTC_UNICAST
};
if (flags & RTF_GATEWAY)
{
......
......@@ -570,7 +570,6 @@ nl_parse_route(struct nlmsghdr *h, int scan)
struct rtattr *a[RTA_CACHEINFO+1];
int new = h->nlmsg_type == RTM_NEWROUTE;
ip_addr dst;
rta ra;
rte *e;
net *net;
u32 oif;
......@@ -655,15 +654,13 @@ nl_parse_route(struct nlmsghdr *h, int scan)
}
net = net_get(p->p.table, dst, i->rtm_dst_len);
ra.proto = &p->p;
ra.source = RTS_INHERIT;
ra.scope = SCOPE_UNIVERSE;
ra.cast = RTC_UNICAST;
ra.flags = ra.aflags = 0;
ra.from = IPA_NONE;
ra.gw = IPA_NONE;
ra.iface = NULL;
ra.eattrs = NULL;
rta ra = {
.proto = &p->p,
.source = RTS_INHERIT,
.scope = SCOPE_UNIVERSE,
.cast = RTC_UNICAST
};
switch (i->rtm_type)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment