Commit 9aed29e6 authored by Ondřej Zajíček's avatar Ondřej Zajíček

BGP: Enhanced route refresh (RFC 7313) support

Also hook feed_done is renamed to feed_end.
parent a5a5a41e
......@@ -1804,13 +1804,17 @@ using the following configuration parameters:
other means. Default: 0 (no local AS number allowed).
<tag>enable route refresh <m/switch/</tag>
When BGP speaker changes its import filter, it has to re-examine all
routes received from its neighbor against the new filter. As these
routes might not be available, there is a BGP protocol extension Route
Refresh (specified in RFC 2918) that allows BGP speaker to request
re-advertisement of all routes from its neighbor. This option specifies
whether BIRD advertises this capability and accepts such requests. Even
when disabled, BIRD can send route refresh requests. Default: on.
After the initial route exchange, BGP protocol uses incremental updates
to keep BGP speakers synchronized. Sometimes (e.g., if BGP speaker
changes its import filter, or if there is suspicion of inconsistency) it
is necessary to do a new complete route exchange. BGP protocol extension
Route Refresh (RFC 2918) allows BGP speaker to request re-advertisement
of all routes from its neighbor. BGP protocol extension Enhanced Route
Refresh (RFC 7313) specifies explicit begin and end for such exchanges,
therefore the receiver can remove stale routes that were not advertised
during the exchange. This option specifies whether BIRD advertises these
capabilities and supports related procedures. Note that even when
disabled, BIRD can send route refresh requests. Default: on.
<tag>graceful restart <m/switch/|aware</tag>
When a BGP speaker restarts or crashes, neighbors will discard all
......
......@@ -942,8 +942,8 @@ proto_feed_more(void *P)
p->export_state = ES_READY;
proto_log_state_change(p);
if (p->feed_done)
p->feed_done(p);
if (p->feed_end)
p->feed_end(p);
}
else
{
......@@ -976,6 +976,9 @@ proto_schedule_feed(struct proto *p, int initial)
p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
ev_schedule(p->attn);
if (p->feed_begin)
p->feed_begin(p, initial);
}
/*
......
......@@ -179,7 +179,8 @@ struct proto {
* reload_routes Request protocol to reload all its routes to the core
* (using rte_update()). Returns: 0=reload cannot be done,
* 1= reload is scheduled and will happen (asynchronously).
* feed_done Notify protocol about finish of route feeding.
* feed_begin Notify protocol about beginning of route feeding.
* feed_end Notify protocol about finish of route feeding.
*/
void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
......@@ -190,7 +191,8 @@ struct proto {
void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
int (*reload_routes)(struct proto *);
void (*feed_done)(struct proto *);
void (*feed_begin)(struct proto *, int initial);
void (*feed_end)(struct proto *);
/*
* Routing entry hooks (called only for routes belonging to this protocol):
......
......@@ -377,6 +377,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
p->feed_state = BFS_NONE;
p->load_state = BFS_NONE;
bgp_init_bucket_table(p);
bgp_init_prefix_table(p, 8);
......@@ -394,6 +396,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
bgp_graceful_restart_done(p);
/* GR capability implies that neighbor will send End-of-RIB */
if (conn->peer_gr_aware)
p->load_state = BFS_LOADING;
/* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
}
......@@ -504,6 +512,47 @@ bgp_graceful_restart_timeout(timer *t)
bgp_stop(p, 0);
}
/**
* bgp_refresh_begin - start incoming enhanced route refresh sequence
* @p: BGP instance
*
* This function is called when an incoming enhanced route refresh sequence is
* started by the neighbor, demarcated by the BoRR packet. The function updates
* the load state and starts the routing table refresh cycle. Note that graceful
* restart also uses routing table refresh cycle, but RFC 7313 and load states
* ensure that these two sequences do not overlap.
*/
void
bgp_refresh_begin(struct bgp_proto *p)
{
if (p->load_state == BFS_LOADING)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
p->load_state = BFS_REFRESHING;
rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
}
/**
* bgp_refresh_end - finish incoming enhanced route refresh sequence
* @p: BGP instance
*
* This function is called when an incoming enhanced route refresh sequence is
* finished by the neighbor, demarcated by the EoRR packet. The function updates
* the load state and ends the routing table refresh cycle. Routes not received
* during the sequence are removed by the nest.
*/
void
bgp_refresh_end(struct bgp_proto *p)
{
if (p->load_state != BFS_REFRESHING)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
p->load_state = BFS_NONE;
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
}
static void
bgp_send_open(struct bgp_conn *conn)
{
......@@ -514,6 +563,7 @@ bgp_send_open(struct bgp_conn *conn)
conn->peer_refresh_support = 0;
conn->peer_as4_support = 0;
conn->peer_add_path = 0;
conn->peer_enhanced_refresh_support = 0;
conn->peer_gr_aware = 0;
conn->peer_gr_able = 0;
conn->peer_gr_time = 0;
......@@ -959,16 +1009,56 @@ bgp_reload_routes(struct proto *P)
}
static void
bgp_feed_done(struct proto *P)
bgp_feed_begin(struct proto *P, int initial)
{
struct bgp_proto *p = (struct bgp_proto *) P;
if (!p->conn || !p->cf->gr_mode || p->p.refeeding)
/* This should not happen */
if (!p->conn)
return;
p->send_end_mark = 1;
if (initial && p->cf->gr_mode)
p->feed_state = BFS_LOADING;
/* It is refeed and both sides support enhanced route refresh */
if (!initial && p->cf->enable_refresh &&
p->conn->peer_enhanced_refresh_support)
{
/* BoRR must not be sent before End-of-RIB */
if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
return;
p->feed_state = BFS_REFRESHING;
bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
}
}
static void
bgp_feed_end(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
/* This should not happen */
if (!p->conn)
return;
/* Non-demarcated feed ended, nothing to do */
if (p->feed_state == BFS_NONE)
return;
/* Schedule End-of-RIB packet */
if (p->feed_state == BFS_LOADING)
p->feed_state = BFS_LOADED;
/* Schedule EoRR packet */
if (p->feed_state == BFS_REFRESHING)
p->feed_state = BFS_REFRESHED;
/* Kick TX hook */
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
static void
bgp_start_locked(struct object_lock *lock)
{
......@@ -1150,7 +1240,8 @@ bgp_init(struct proto_config *C)
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_done = bgp_feed_done;
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
P->rte_better = bgp_rte_better;
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
......@@ -1426,8 +1517,9 @@ bgp_show_proto_info(struct proto *P)
else if (P->proto_state == PS_UP)
{
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
cli_msg(-1006, " Neighbor caps: %s%s%s%s%s",
cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s",
c->peer_refresh_support ? " refresh" : "",
c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
c->peer_as4_support ? " AS4" : "",
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
......
......@@ -103,6 +103,7 @@ struct bgp_conn {
u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
u8 peer_enhanced_refresh_support; /* Peer supports enhanced refresh [RFC7313] */
u8 peer_gr_aware;
u8 peer_gr_able;
u16 peer_gr_time;
......@@ -127,6 +128,8 @@ struct bgp_proto {
int rs_client; /* Whether neighbor is RS client of me */
u8 gr_ready; /* Neighbor could do graceful restart */
u8 gr_active; /* Neighbor is doing graceful restart */
u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
......@@ -144,7 +147,6 @@ struct bgp_proto {
slab *prefix_slab; /* Slab holding prefix nodes */
list bucket_queue; /* Queue of buckets to send */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
......@@ -196,6 +198,8 @@ void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
void bgp_handle_graceful_restart(struct bgp_proto *p);
void bgp_graceful_restart_done(struct bgp_proto *p);
void bgp_refresh_begin(struct bgp_proto *p);
void bgp_refresh_end(struct bgp_proto *p);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);
......@@ -263,7 +267,8 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define PKT_UPDATE 0x02
#define PKT_NOTIFICATION 0x03
#define PKT_KEEPALIVE 0x04
#define PKT_ROUTE_REFRESH 0x05
#define PKT_ROUTE_REFRESH 0x05 /* [RFC2918] */
#define PKT_BEGIN_REFRESH 0x1e /* Dummy type for BoRR packet [RFC7313] */
#define PKT_SCHEDULE_CLOSE 0x1f /* Used internally to schedule socket close */
/* Attributes */
......@@ -309,10 +314,10 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
*
* Used in PS_START for fine-grained specification of starting state.
*
* When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol
* done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT.
* When some connection attempt failed because of option or capability error, it goes to
* BSS_CONNECT_NOCAP.
* When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
* protocol done what is neccessary to start itself (like acquiring the lock),
* it goes to BSS_CONNECT. When some connection attempt failed because of
* option or capability error, it goes to BSS_CONNECT_NOCAP.
*/
#define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */
......@@ -320,6 +325,33 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BSS_CONNECT 2 /* Ordinary BGP connecting */
#define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */
/* BGP feed states (TX)
*
* RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
*
* RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
*
* These states (stored in p->feed_state) are used to keep track of these
* requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
* set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
* or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
*
* Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
* without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
* demarcation) is active, BFS_NONE is set.
*
* BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
* with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
*/
#define BFS_NONE 0 /* No feed or original non-demarcated feed */
#define BFS_LOADING 1 /* Initial feed active, End-of-RIB planned */
#define BFS_LOADED 2 /* Loading done, End-of-RIB marker scheduled */
#define BFS_REFRESHING 3 /* Route refresh (introduced by BoRR) active */
#define BFS_REFRESHED 4 /* Refresh done, EoRR packet scheduled */
/* Error classes */
#define BE_NONE 0
......
......@@ -22,6 +22,12 @@
#include "bgp.h"
#define BGP_RR_REQUEST 0
#define BGP_RR_BEGIN 1
#define BGP_RR_END 2
static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
......@@ -209,6 +215,15 @@ bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
return buf;
}
static byte *
bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 70; /* Capability 70: Support for enhanced route refresh */
*buf++ = 0; /* Capability data length */
return buf;
}
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
......@@ -256,6 +271,9 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
if (p->cf->add_path)
cap = bgp_put_cap_add_path(p, cap);
if (p->cf->enable_refresh)
cap = bgp_put_cap_err(p, cap);
cap_len = cap - buf - 12;
if (cap_len > 0)
{
......@@ -389,7 +407,7 @@ static byte *
bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
put_u32(buf, 0);
return buf+4;
......@@ -568,7 +586,7 @@ static byte *
bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
put_u16(buf+0, 0);
put_u16(buf+2, 6); /* length 4-9 */
......@@ -586,19 +604,49 @@ bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
#endif
static byte *
static inline byte *
bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
/* Original original route refresh request, RFC 2918 */
*buf++ = 0;
*buf++ = BGP_AF;
*buf++ = 0; /* RFU */
*buf++ = 1; /* and SAFI 1 */
*buf++ = BGP_RR_REQUEST;
*buf++ = 1; /* SAFI */
return buf;
}
static inline byte *
bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
/* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
*buf++ = 0;
*buf++ = BGP_AF;
*buf++ = BGP_RR_BEGIN;
*buf++ = 1; /* SAFI */
return buf;
}
static inline byte *
bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
/* Demarcation of ending of route refresh (EoRR), RFC 7313 */
*buf++ = 0;
*buf++ = BGP_AF;
*buf++ = BGP_RR_END;
*buf++ = 1; /* SAFI */
return buf;
}
static void
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
{
......@@ -666,24 +714,44 @@ bgp_fire_tx(struct bgp_conn *conn)
type = PKT_ROUTE_REFRESH;
end = bgp_create_route_refresh(conn, pkt);
}
else if (s & (1 << PKT_BEGIN_REFRESH))
{
s &= ~(1 << PKT_BEGIN_REFRESH);
type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
end = bgp_create_begin_refresh(conn, pkt);
}
else if (s & (1 << PKT_UPDATE))
{
end = bgp_create_update(conn, pkt);
type = PKT_UPDATE;
end = bgp_create_update(conn, pkt);
if (!end)
{
/* No update to send, perhaps we need to send End-of-RIB or EoRR */
conn->packets_to_send = 0;
if (!p->send_end_mark)
if (p->feed_state == BFS_LOADED)
{
type = PKT_UPDATE;
end = bgp_create_end_mark(conn, pkt);
}
else if (p->feed_state == BFS_REFRESHED)
{
type = PKT_ROUTE_REFRESH;
end = bgp_create_end_refresh(conn, pkt);
}
else /* Really nothing to send */
return 0;
p->send_end_mark = 0;
end = bgp_create_end_mark(conn, pkt);
p->feed_state = BFS_NONE;
}
}
else
return 0;
conn->packets_to_send = s;
bgp_create_header(buf, end - buf, type);
return sk_send(sk, end - buf);
......@@ -780,7 +848,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
conn->peer_add_path = opt[2+i+3];
if (conn->peer_add_path > ADD_PATH_FULL)
goto err;
break;
case 70: /* Enhanced route refresh capability, RFC 7313 */
if (cl != 0)
goto err;
conn->peer_enhanced_refresh_support = 1;
break;
/* We can safely ignore all other capabilities */
......@@ -945,7 +1018,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
static inline void
bgp_rx_end_mark(struct bgp_proto *p)
{
BGP_TRACE(D_PACKETS, "Got End-of-RIB");
BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
if (p->load_state == BFS_LOADING)
p->load_state = BFS_NONE;
if (p->p.gr_recovery)
proto_graceful_restart_unlock(&p->p);
......@@ -1353,7 +1429,9 @@ static struct {
{ 6, 5, "Connection rejected" },
{ 6, 6, "Other configuration change" },
{ 6, 7, "Connection collision resolution" },
{ 6, 8, "Out of Resources" }
{ 6, 8, "Out of Resources" },
{ 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
{ 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
};
/**
......@@ -1484,22 +1562,47 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
{
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
if (conn->state != BS_ESTABLISHED)
{ bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
if (!p->cf->enable_refresh)
{ bgp_error(conn, 1, 3, pkt+18, 1); return; }
if (len != (BGP_HEADER_LENGTH + 4))
if (len < (BGP_HEADER_LENGTH + 4))
{ bgp_error(conn, 1, 2, pkt+16, 2); return; }
if (len > (BGP_HEADER_LENGTH + 4))
{ bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
/* FIXME - we ignore AFI/SAFI values, as we support
just one value and even an error code for an invalid
request is not defined */
/* RFC 7313 redefined reserved field as RR message subtype */
uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
switch (subtype)
{
case BGP_RR_REQUEST:
BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
proto_request_feeding(&p->p);
break;
case BGP_RR_BEGIN:
BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
bgp_refresh_begin(p);
break;
case BGP_RR_END:
BGP_TRACE(D_PACKETS, "Got END-OF-RR");
bgp_refresh_end(p);
break;
default:
log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
p->p.name, subtype);
break;
}
}
......
......@@ -1023,7 +1023,7 @@ krt_reload_routes(struct proto *P)
}
static void
krt_feed_done(struct proto *P)
krt_feed_end(struct proto *P)
{
struct krt_proto *p = (struct krt_proto *) P;
......@@ -1056,7 +1056,7 @@ krt_init(struct proto_config *c)
p->p.rt_notify = krt_rt_notify;
p->p.if_notify = krt_if_notify;
p->p.reload_routes = krt_reload_routes;
p->p.feed_done = krt_feed_done;
p->p.feed_end = krt_feed_end;
p->p.make_tmp_attrs = krt_make_tmp_attrs;
p->p.store_tmp_attrs = krt_store_tmp_attrs;
p->p.rte_same = krt_rte_same;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment