Commit 7e95c05d authored by Ondřej Zajíček's avatar Ondřej Zajíček

Core multipath support.

parent 01427d3f
......@@ -51,7 +51,7 @@ CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIREC
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE)
CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST)
CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT)
CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH)
%type <i32> idval
%type <f> imexport
......
......@@ -170,7 +170,7 @@ struct hostentry {
struct hostentry *next; /* Next in hash chain */
unsigned hash_key; /* Hash key */
unsigned uc; /* Use count */
struct iface *iface; /* Chosen outgoing interface */
struct rta *src; /* Source rta entry */
ip_addr gw; /* Chosen next hop */
byte dest; /* Chosen route destination type (RTD_...) */
u32 igp_metric; /* Chosen route IGP metric */
......@@ -266,6 +266,14 @@ void rt_show(struct rt_show_data *);
* construction of BGP route attribute lists.
*/
/* Multipath next-hop */
struct mpnh {
ip_addr gw; /* Next hop */
struct iface *iface; /* Outgoing interface */
struct mpnh *next;
unsigned char weight;
};
typedef struct rta {
struct rta *next, **pprev; /* Hash chain */
struct proto *proto; /* Protocol instance that originally created the route */
......@@ -282,6 +290,7 @@ typedef struct rta {
ip_addr from; /* Advertising router */
struct hostentry *hostentry; /* Hostentry for recursive next-hops */
struct iface *iface; /* Outgoing interface */
struct mpnh *nexthops; /* Next-hops for multipath routes */
struct ea_list *eattrs; /* Extended Attribute chain */
} rta;
......@@ -309,7 +318,8 @@ typedef struct rta {
#define RTD_BLACKHOLE 2 /* Silently drop packets */
#define RTD_UNREACHABLE 3 /* Reject as unreachable */
#define RTD_PROHIBIT 4 /* Administratively prohibited */
#define RTD_NONE 5 /* Invalid RTD */
#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */
#define RTD_NONE 6 /* Invalid RTD */
#define RTAF_CACHED 1 /* This is a cached rta */
......@@ -387,6 +397,10 @@ void ea_format(eattr *e, byte *buf);
#define EA_FORMAT_BUF_SIZE 256
ea_list *ea_append(ea_list *to, ea_list *what);
int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */
static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
{ return (x == y) || mpnh__same(x, y); }
void rta_init(void);
rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
......@@ -403,12 +417,14 @@ void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, i
* count. Cached rta locks its hostentry (increases its use count),
* uncached rta does not lock it. Hostentry with zero use count is
* removed asynchronously during host cache update, therefore it is
* safe to hold such hostentry temorarily. There is no need to hold
* a lock for hostentry->dep table, because that table contains routes
* responsible for that hostentry, and therefore is non-empty if given
* hostentry has non-zero use count. The protocol responsible for routes
* with recursive next hops should also hold a lock for a table governing
* that routes (argument tab to rta_set_recursive_next_hop()).
* safe to hold such hostentry temorarily. Hostentry holds a lock for
* a 'source' rta, mainly to share multipath nexthops. There is no
* need to hold a lock for hostentry->dep table, because that table
* contains routes responsible for that hostentry, and therefore is
* non-empty if given hostentry has non-zero use count. The protocol
* responsible for routes with recursive next hops should also hold a
* lock for a table governing that routes (argument tab to
* rta_set_recursive_next_hop()).
*/
static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; }
......
......@@ -57,9 +57,65 @@
pool *rta_pool;
static slab *rta_slab;
static slab *mpnh_slab;
struct protocol *attr_class_to_protocol[EAP_MAX];
static inline unsigned int
mpnh_hash(struct mpnh *x)
{
unsigned int h = 0;
for (; x; x = x->next)
h ^= ipa_hash(x->gw);
return h;
}
int
mpnh__same(struct mpnh *x, struct mpnh *y)
{
for (; x && y; x = x->next, y = y->next)
if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight))
return 0;
return x == y;
}
static struct mpnh *
mpnh_copy(struct mpnh *o)
{
struct mpnh *first = NULL;
struct mpnh **last = &first;
for (; o; o = o->next)
{
struct mpnh *n = sl_alloc(mpnh_slab);
n->gw = o->gw;
n->iface = o->iface;
n->next = NULL;
n->weight = o->weight;
*last = n;
last = &(n->next);
}
return first;
}
static void
mpnh_free(struct mpnh *o)
{
struct mpnh *n;
while (o)
{
n = o->next;
sl_free(mpnh_slab, o);
o = n;
}
}
/*
* Extended Attributes
*/
......@@ -587,7 +643,8 @@ rta_alloc_hash(void)
static inline unsigned int
rta_hash(rta *a)
{
return (a->proto->hash_key ^ ipa_hash(a->gw) ^ ea_hash(a->eattrs)) & 0xffff;
return (a->proto->hash_key ^ ipa_hash(a->gw) ^
mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff;
}
static inline int
......@@ -604,6 +661,7 @@ rta_same(rta *x, rta *y)
ipa_equal(x->from, y->from) &&
x->iface == y->iface &&
x->hostentry == y->hostentry &&
mpnh_same(x->nexthops, y->nexthops) &&
ea_same(x->eattrs, y->eattrs));
}
......@@ -614,6 +672,7 @@ rta_copy(rta *o)
memcpy(r, o, sizeof(rta));
r->uc = 1;
r->nexthops = mpnh_copy(o->nexthops);
r->eattrs = ea_list_copy(o->eattrs);
return r;
}
......@@ -707,6 +766,7 @@ rta__free(rta *a)
a->next->pprev = a->pprev;
a->aflags = 0; /* Poison the entry */
rt_unlock_hostentry(a->hostentry);
mpnh_free(a->nexthops);
ea_free(a->eattrs);
sl_free(rta_slab, a);
}
......@@ -798,6 +858,7 @@ rta_init(void)
{
rta_pool = rp_new(&root_pool, "Attributes");
rta_slab = sl_new(rta_pool, sizeof(rta));
mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh));
rta_alloc_hash();
}
......
......@@ -962,29 +962,31 @@ rt_preconfig(struct config *c)
* triggered by rt_schedule_nhu().
*/
static inline int
hostentry_diff(struct hostentry *he, struct iface *iface, ip_addr gw,
byte dest, u32 igp_metric)
{
return (he->iface != iface) || !ipa_equal(he->gw, gw) ||
(he->dest != dest) || (he->igp_metric != igp_metric);
}
static inline int
rta_next_hop_outdated(rta *a)
{
struct hostentry *he = a->hostentry;
return he && hostentry_diff(he, a->iface, a->gw, a->dest, a->igp_metric);
if (!he)
return 0;
if (!he->src)
return a->dest != RTD_UNREACHABLE;
return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
(a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
!mpnh_same(a->nexthops, he->src->nexthops);
}
static inline void
rta_apply_hostentry(rta *a, struct hostentry *he)
{
a->hostentry = he;
a->iface = he->iface;
a->iface = he->src ? he->src->iface : NULL;
a->gw = he->gw;
a->dest = he->dest;
a->igp_metric = he->igp_metric;
a->nexthops = he->src ? he->src->nexthops : NULL;
}
static inline rte *
......@@ -1388,6 +1390,7 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
he->tab = dep;
he->hash_key = k;
he->uc = 0;
he->src = NULL;
add_tail(&hc->hostentries, &he->ln);
hc_insert(hc, he);
......@@ -1402,6 +1405,8 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
static void
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
{
rta_free(he->src);
rem_node(&he->ln);
hc_remove(hc, he);
sl_free(hc->slab, he);
......@@ -1436,6 +1441,8 @@ rt_free_hostcache(rtable *tab)
WALK_LIST(n, hc->hostentries)
{
struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
rta_free(he->src);
if (he->uc)
log(L_ERR "Hostcache is not empty in table %s", tab->name);
}
......@@ -1488,7 +1495,7 @@ rt_get_igp_metric(rte *rt)
return rt->u.rip.metric;
/* Device routes */
if (a->dest != RTD_ROUTER)
if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
return 0;
return IGP_METRIC_UNKNOWN;
......@@ -1497,12 +1504,15 @@ rt_get_igp_metric(rte *rt)
static int
rt_update_hostentry(rtable *tab, struct hostentry *he)
{
struct iface *old_iface = he->iface;
ip_addr old_gw = he->gw;
byte old_dest = he->dest;
u32 old_metric = he->igp_metric;
rta *old_src = he->src;
int pxlen = 0;
/* Reset the hostentry */
he->src = NULL;
he->gw = IPA_NONE;
he->dest = RTD_UNREACHABLE;
he->igp_metric = 0;
net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
if (n)
{
......@@ -1513,53 +1523,41 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
{
/* Recursive route should not depend on another recursive route */
log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
he->addr, n->n.prefix, n->n.pxlen);
he->iface = NULL;
he->gw = IPA_NONE;
he->dest = RTD_UNREACHABLE;
he->addr, n->n.prefix, pxlen);
goto done;
}
else if (a->dest == RTD_DEVICE)
if (a->dest == RTD_DEVICE)
{
if (if_local_addr(he->addr, a->iface))
{
/* The host address is a local address, this is not valid */
log(L_WARN "Next hop address %I is a local address of iface %s",
he->addr, a->iface->name);
he->iface = NULL;
he->gw = IPA_NONE;
he->dest = RTD_UNREACHABLE;
goto done;
}
else
{
/* The host is directly reachable, use link as a gateway */
he->iface = a->iface;
he->gw = he->link;
he->dest = RTD_ROUTER;
}
/* The host is directly reachable, use link as a gateway */
he->gw = he->link;
he->dest = RTD_ROUTER;
}
else
{
/* The host is reachable through some route entry */
he->iface = a->iface;
he->gw = a->gw;
he->dest = a->dest;
}
he->igp_metric = he->iface ? rt_get_igp_metric(n->routes) : 0;
}
else
{
/* The host is unreachable */
he->iface = NULL;
he->gw = IPA_NONE;
he->dest = RTD_UNREACHABLE;
he->igp_metric = 0;
he->src = rta_clone(a);
he->igp_metric = rt_get_igp_metric(n->routes);
}
done:
/* Add a prefix range to the trie */
trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
return hostentry_diff(he, old_iface, old_gw, old_dest, old_metric);
rta_free(old_src);
return old_src != he->src;
}
static void
......@@ -1630,6 +1628,7 @@ rt_format_via(rte *e, byte *via)
case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
default: bsprintf(via, "???");
}
}
......@@ -1641,6 +1640,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
rta *a = e->attrs;
int primary = (e->net->routes == e);
struct mpnh *nh;
rt_format_via(e, via);
tm_format_datetime(tm, &config->tf_route, e->lastmod);
......@@ -1663,6 +1663,8 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
bsprintf(info, " (%d)", e->pref);
cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->proto->name,
tm, from, primary ? " *" : "", info);
for (nh = a->nexthops; nh; nh = nh->next)
cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
if (d->verbose)
rta_show(c, a, tmpa);
}
......
......@@ -1015,6 +1015,13 @@ bgp_get_neighbor(rte *r)
return ((struct bgp_proto *) r->attrs->proto)->remote_as;
}
static inline int
rte_resolvable(rte *rt)
{
int rd = rt->attrs->dest;
return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
}
int
bgp_rte_better(rte *new, rte *old)
{
......@@ -1024,9 +1031,8 @@ bgp_rte_better(rte *new, rte *old)
u32 n, o;
/* RFC 4271 9.1.2.1. Route resolvability test */
/* non-NULL iface means it is either RTD_ROUTER or RTD_DEVICE route */
n = new->attrs->iface != NULL;
o = old->attrs->iface != NULL;
n = rte_resolvable(new);
o = rte_resolvable(old);
if (n > o)
return 1;
if (n < o)
......@@ -1502,7 +1508,7 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
buf += bsprintf(buf, " (%d", e->pref);
if (e->attrs->hostentry)
{
if (!e->attrs->iface)
if (!rte_resolvable(e))
buf += bsprintf(buf, "/-");
else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
buf += bsprintf(buf, "/?");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment