Commit 0c791f87 authored by Ondřej Zajíček's avatar Ondřej Zajíček

BGP graceful restart support.

Also significant core protocol state changes needed for that,
global graceful restart recovery state and kernel proto support
for recovery.
parent 4e398e34
......@@ -98,6 +98,7 @@ config_alloc(byte *name)
c->load_time = now;
c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600};
c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0};
c->gr_wait = DEFAULT_GR_WAIT;
return c;
}
......
......@@ -38,6 +38,7 @@ struct config {
struct timeformat tf_proto; /* Time format for 'show protocol' */
struct timeformat tf_log; /* Time format for the logfile */
struct timeformat tf_base; /* Time format for other purposes */
u32 gr_wait; /* Graceful restart wait timeout */
int cli_debug; /* Tracing of CLI connections and commands */
char *err_msg; /* Parser error message */
......
......@@ -32,6 +32,7 @@ Reply codes of BIRD command-line interface
0021 Undo requested
0022 Undo scheduled
0023 Evaluation of expression
0024 Graceful restart status report
1000 BIRD version
1001 Interface list
......
......@@ -36,6 +36,8 @@ typedef struct list { /* In fact two overlayed nodes */
#define NODE_NEXT(n) ((void *)((NODE (n))->next))
#define NODE_VALID(n) ((NODE (n))->next)
#define WALK_LIST(n,list) for(n=HEAD(list); NODE_VALID(n); n=NODE_NEXT(n))
#define WALK_LIST2(n,nn,list,pos) \
for(nn=(list).head; NODE_VALID(nn) && (n=SKIP_BACK(typeof(*n),pos,nn)); nn=nn->next)
#define WALK_LIST_DELSAFE(n,nxt,list) \
for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt)
/* WALK_LIST_FIRST supposes that called code removes each processed node */
......
......@@ -7,6 +7,7 @@
*/
#include "nest/bird.h"
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/cli.h"
#include "conf/conf.h"
......@@ -32,6 +33,8 @@ cmd_show_status(void)
tm_format_datetime(tim, &config->tf_base, config->load_time);
cli_msg(-1011, "Last reconfiguration on %s", tim);
graceful_restart_show_status();
if (shutting_down)
cli_msg(13, "Shutdown in progress");
else if (configuring)
......
......@@ -49,6 +49,7 @@ CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFA
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_KEYWORDS(GRACEFUL, RESTART, WAIT)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
......@@ -110,6 +111,11 @@ listen_opt:
;
CF_ADDTO(conf, gr_opts)
gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ;
/* Creation of routing tables */
tab_sorted:
......
This diff is collapsed.
......@@ -148,10 +148,13 @@ struct proto {
byte disabled; /* Manually disabled */
byte proto_state; /* Protocol state machine (PS_*, see below) */
byte core_state; /* Core state machine (FS_*, see below) */
byte core_goal; /* State we want to reach (FS_*, see below) */
byte export_state; /* Route export state (ES_*, see below) */
byte reconfiguring; /* We're shutting down due to reconfiguration */
byte refeeding; /* We are refeeding (valid only if core_state == FS_FEEDING) */
byte refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */
byte flushing; /* Protocol is flushed in current flush loop round */
byte gr_recovery; /* Protocol should participate in graceful restart recovery */
byte gr_lock; /* Graceful restart mechanism should wait for this proto */
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
byte down_code; /* Reason for shutdown (PDC_* codes) */
u32 hash_key; /* Random key used for hashing of neighbors */
......@@ -175,6 +178,7 @@ struct proto {
* reload_routes Request protocol to reload all its routes to the core
* (using rte_update()). Returns: 0=reload cannot be done,
* 1= reload is scheduled and will happen (asynchronously).
* feed_done Notify protocol about finish of route feeding.
*/
void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
......@@ -185,6 +189,7 @@ struct proto {
void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
int (*reload_routes)(struct proto *);
void (*feed_done)(struct proto *);
/*
* Routing entry hooks (called only for routes belonging to this protocol):
......@@ -242,6 +247,13 @@ static inline void
proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size)
{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); }
void graceful_restart_recovery(void);
void graceful_restart_init(void);
void graceful_restart_show_status(void);
void proto_graceful_restart_lock(struct proto *p);
void proto_graceful_restart_unlock(struct proto *p);
#define DEFAULT_GR_WAIT 240
void proto_show_limit(struct proto_limit *l, const char *dsc);
void proto_show_basic_info(struct proto *p);
......@@ -343,10 +355,17 @@ void proto_notify_state(struct proto *p, unsigned state);
* as a result of received ROUTE-REFRESH request).
*/
#define FS_HUNGRY 0
#define FS_FEEDING 1
#define FS_HAPPY 2
#define FS_FLUSHING 3
#define FS_HUNGRY 0
#define FS_FEEDING 1 /* obsolete */
#define FS_HAPPY 2
#define FS_FLUSHING 3
#define ES_DOWN 0
#define ES_FEEDING 1
#define ES_READY 2
/*
* Debugging flags
......
......@@ -148,6 +148,10 @@ typedef struct rtable {
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
} rtable;
#define RPS_NONE 0
#define RPS_SCHEDULED 1
#define RPS_RUNNING 2
typedef struct network {
struct fib_node n; /* FIB flags reserved for kernel syncer */
struct rte *routes; /* Available routes for this network */
......@@ -222,6 +226,8 @@ typedef struct rte {
#define REF_COW 1 /* Copy this rte on write */
#define REF_FILTERED 2 /* Route is rejected by import filter */
#define REF_STALE 4 /* Route is stale in a refresh cycle */
#define REF_DISCARD 8 /* Route is scheduled for discard */
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
......@@ -257,6 +263,8 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
......@@ -268,6 +276,15 @@ void rt_feed_baby_abort(struct proto *p);
int rt_prune_loop(void);
struct rtable_config *rt_new_table(struct symbol *s);
static inline void
rt_mark_for_prune(rtable *tab)
{
if (tab->prune_state == RPS_RUNNING)
fit_get(&tab->fib, &tab->prune_fit);
tab->prune_state = RPS_SCHEDULED;
}
struct rt_show_data {
ip_addr prefix;
unsigned pxlen;
......
......@@ -55,8 +55,10 @@ static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
static inline int rt_prune_table(rtable *tab);
static inline void rt_schedule_gc(rtable *tab);
static inline void rt_schedule_prune(rtable *tab);
static inline struct ea_list *
make_tmp_attrs(struct rte *rt, struct linpool *pool)
......@@ -570,7 +572,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
struct announce_hook *a;
WALK_LIST(a, tab->hooks)
{
ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING);
ASSERT(a->proto->export_state != ES_DOWN);
if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
......@@ -1108,6 +1110,46 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter
return v > 0;
}
void
rt_refresh_begin(rtable *t, struct announce_hook *ah)
{
net *n;
rte *e;
FIB_WALK(&t->fib, fn)
{
n = (net *) fn;
for (e = n->routes; e; e = e->next)
if (e->sender == ah)
e->flags |= REF_STALE;
}
FIB_WALK_END;
}
void
rt_refresh_end(rtable *t, struct announce_hook *ah)
{
int prune = 0;
net *n;
rte *e;
FIB_WALK(&t->fib, fn)
{
n = (net *) fn;
for (e = n->routes; e; e = e->next)
if ((e->sender == ah) && (e->flags & REF_STALE))
{
e->flags |= REF_DISCARD;
prune = 1;
}
}
FIB_WALK_END;
if (prune)
rt_schedule_prune(t);
}
/**
* rte_dump - dump a route
* @e: &rte to be dumped
......@@ -1169,6 +1211,13 @@ rt_dump_all(void)
rt_dump(t);
}
static inline void
rt_schedule_prune(rtable *tab)
{
rt_mark_for_prune(tab);
ev_schedule(tab->rt_event);
}
static inline void
rt_schedule_gc(rtable *tab)
{
......@@ -1199,6 +1248,7 @@ rt_schedule_nhu(rtable *tab)
tab->nhu_state |= 1;
}
static void
rt_prune_nets(rtable *tab)
{
......@@ -1242,6 +1292,14 @@ rt_event(void *ptr)
if (tab->nhu_state)
rt_next_hop_update(tab);
if (tab->prune_state)
if (!rt_prune_table(tab))
{
/* Table prune unfinished */
ev_schedule(tab->rt_event);
return;
}
if (tab->gc_scheduled)
{
rt_prune_nets(tab);
......@@ -1283,8 +1341,8 @@ rt_init(void)
}
static inline int
rt_prune_step(rtable *tab, int step, int *max_feed)
static int
rt_prune_step(rtable *tab, int step, int *limit)
{
static struct rate_limit rl_flush;
struct fib_iterator *fit = &tab->prune_fit;
......@@ -1294,13 +1352,13 @@ rt_prune_step(rtable *tab, int step, int *max_feed)
fib_check(&tab->fib);
#endif
if (tab->prune_state == 0)
if (tab->prune_state == RPS_NONE)
return 1;
if (tab->prune_state == 1)
if (tab->prune_state == RPS_SCHEDULED)
{
FIB_ITERATE_INIT(fit, &tab->fib);
tab->prune_state = 2;
tab->prune_state = RPS_RUNNING;
}
again:
......@@ -1312,9 +1370,10 @@ again:
rescan:
for (e=n->routes; e; e=e->next)
if (e->sender->proto->flushing ||
(e->flags & REF_DISCARD) ||
(step && e->attrs->src->proto->flushing))
{
if (*max_feed <= 0)
if (*limit <= 0)
{
FIB_ITERATE_PUT(fit, fn);
return 0;
......@@ -1325,7 +1384,7 @@ again:
n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
rte_discard(tab, e);
(*max_feed)--;
(*limit)--;
goto rescan;
}
......@@ -1342,10 +1401,17 @@ again:
fib_check(&tab->fib);
#endif
tab->prune_state = 0;
tab->prune_state = RPS_NONE;
return 1;
}
static inline int
rt_prune_table(rtable *tab)
{
int limit = 512;
return rt_prune_step(tab, 0, &limit);
}
/**
* rt_prune_loop - prune routing tables
*
......@@ -1364,19 +1430,19 @@ int
rt_prune_loop(void)
{
static int step = 0;
int max_feed = 512;
int limit = 512;
rtable *t;
again:
WALK_LIST(t, routing_tables)
if (! rt_prune_step(t, step, &max_feed))
if (! rt_prune_step(t, step, &limit))
return 0;
if (step == 0)
{
/* Prepare for the second step */
WALK_LIST(t, routing_tables)
t->prune_state = 1;
t->prune_state = RPS_SCHEDULED;
step = 1;
goto again;
......@@ -1721,7 +1787,7 @@ again:
(p->accept_ra_types == RA_ACCEPTED))
if (rte_is_valid(e))
{
if (p->core_state != FS_FEEDING)
if (p->export_state != ES_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
do_feed_baby(p, p->accept_ra_types, h, n, e);
max_feed--;
......@@ -1730,7 +1796,7 @@ again:
if (p->accept_ra_types == RA_ANY)
for(e = n->routes; rte_is_valid(e); e = e->next)
{
if (p->core_state != FS_FEEDING)
if (p->export_state != ES_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
do_feed_baby(p, RA_ANY, h, n, e);
max_feed--;
......@@ -2223,9 +2289,7 @@ rt_show_cont(struct cli *c)
cli_printf(c, 8004, "Stopped due to reconfiguration");
goto done;
}
if (d->export_protocol &&
d->export_protocol->core_state != FS_HAPPY &&
d->export_protocol->core_state != FS_FEEDING)
if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
{
cli_printf(c, 8005, "Protocol is down");
goto done;
......
......@@ -319,6 +319,7 @@ bgp_decision(void *vp)
DBG("BGP: Decision start\n");
if ((p->p.proto_state == PS_START)
&& (p->outgoing_conn.state == BS_IDLE)
&& (p->incoming_conn.state != BS_OPENCONFIRM)
&& (!p->cf->passive))
bgp_active(p);
......@@ -363,7 +364,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
/* For multi-hop BGP sessions */
if (ipa_zero(p->source_addr))
p->source_addr = conn->sk->saddr;
p->source_addr = conn->sk->saddr;
p->conn = conn;
p->last_error_class = 0;
......@@ -371,6 +372,20 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
bgp_init_bucket_table(p);
bgp_init_prefix_table(p, 8);
int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
if (p->p.gr_recovery && !peer_gr_ready)
proto_graceful_restart_unlock(&p->p);
if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
p->p.gr_wait = 1;
if (p->gr_active)
tm_stop(p->gr_timer);
if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
bgp_graceful_restart_done(p);
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
}
......@@ -416,16 +431,56 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
bgp_conn_leave_established_state(p);
}
void
bgp_handle_graceful_restart(struct bgp_proto *p)
{
ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
p->gr_active ? " - already pending" : "");
proto_notify_state(&p->p, PS_START);
if (p->gr_active)
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
p->gr_active = 1;
bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
}
void
bgp_graceful_restart_done(struct bgp_proto *p)
{
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
p->gr_active = 0;
tm_stop(p->gr_timer);
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
}
static void
bgp_graceful_restart_timeout(timer *t)
{
struct bgp_proto *p = t->data;
BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
bgp_stop(p, 0);
}
static void
bgp_send_open(struct bgp_conn *conn)
{
conn->start_state = conn->bgp->start_state;
// Default values, possibly changed by receiving capabilities.
conn->advertised_as = 0;
conn->peer_refresh_support = 0;
conn->peer_as4_support = 0;
conn->peer_add_path = 0;
conn->advertised_as = 0;
conn->peer_gr_aware = 0;
conn->peer_gr_able = 0;
conn->peer_gr_time = 0;
conn->peer_gr_flags = 0;
conn->peer_gr_aflags = 0;
DBG("BGP: Sending open\n");
conn->sk->rx_hook = bgp_rx;
......@@ -484,6 +539,9 @@ bgp_sock_err(sock *sk, int err)
else
BGP_TRACE(D_EVENTS, "Connection closed");
if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
bgp_handle_graceful_restart(p);
bgp_conn_enter_idle_state(conn);
}
......@@ -649,6 +707,14 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED)
int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
{
bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
bgp_handle_graceful_restart(p);
bgp_conn_enter_idle_state(p->conn);
acc = 1;
}
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
sk->dport, acc ? "accepted" : "rejected");
......@@ -817,6 +883,17 @@ bgp_reload_routes(struct proto *P)
return 1;
}
static void
bgp_feed_done(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
if (!p->conn || !p->cf->gr_mode)
return;
p->send_end_mark = 1;
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
static void
bgp_start_locked(struct object_lock *lock)
{
......@@ -867,6 +944,8 @@ bgp_start(struct proto *P)
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
p->bfd_req = NULL;
p->gr_ready = 0;
p->gr_active = 0;
rt_lock_table(p->igp_table);
......@@ -878,6 +957,10 @@ bgp_start(struct proto *P)
p->startup_timer->hook = bgp_startup_timeout;
p->startup_timer->data = p;
p->gr_timer = tm_new(p->p.pool);
p->gr_timer->hook = bgp_graceful_restart_timeout;
p->gr_timer->data = p;
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
......@@ -885,6 +968,9 @@ bgp_start(struct proto *P)
p->remote_id = 0;
p->source_addr = p->cf->source_addr;
if (P->gr_recovery)
proto_graceful_restart_lock(P);
/*
* Before attempting to create the connection, we need to lock the
* port, so that are sure we're the only instance attempting to talk
......@@ -985,6 +1071,7 @@ bgp_init(struct proto_config *C)
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_done = bgp_feed_done;
P->rte_better = bgp_rte_better;
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
......@@ -1164,7 +1251,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down", "Graceful restart"};
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
static const char *
......@@ -1225,25 +1312,32 @@ bgp_show_proto_info(struct proto *P)
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
if (p->gr_active)
cli_msg(-1006, " Neighbor graceful restart active");
if (P->proto_state == PS_START)
{
struct bgp_conn *oc = &p->outgoing_conn;
if ((p->start_state < BSS_CONNECT) &&
(p->startup_timer->expires))
cli_msg(-1006, " Error wait: %d/%d",
cli_msg(-1006, " Error wait: %d/%d",
p->startup_timer->expires - now, p->startup_delay);
if ((oc->state == BS_ACTIVE) &&
(oc->connect_retry_timer->expires))
cli_msg(-1006, " Start delay: %d/%d",
cli_msg(-1006, " Start delay: %d/%d",
oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
if (p->gr_active && p->gr_timer->expires)
cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
}
else if (P->proto_state == PS_UP)
{
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
cli_msg(-1006, " Neighbor caps: %s%s%s%s",
cli_msg(-1006, " Neighbor caps: %s%s%s%s%s",
c->peer_refresh_support ? " refresh" : "",
c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
c->peer_as4_support ? " AS4" : "",
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
(c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
......
......@@ -48,6 +48,8 @@ struct bgp_config {
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
int add_path; /* Use ADD-PATH extension [draft] */
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
unsigned gr_time; /* Graceful restart timeout */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
......@@ -73,6 +75,15 @@ struct bgp_config {
#define ADD_PATH_TX 2
#define ADD_PATH_FULL 3
#define BGP_GR_ABLE 1
#define BGP_GR_AWARE 2
/* For peer_gr_flags */
#define BGP_GRF_RESTART 0x80
/* For peer_gr_aflags */
#define BGP_GRF_FORWARDING 0x80
struct bgp_conn {
struct bgp_proto *bgp;
......@@ -90,6 +101,11 @@ struct bgp_conn {
u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
u8 peer_gr_aware;
u8 peer_gr_able;
u16 peer_gr_time;
u8 peer_gr_flags;
u8 peer_gr_aflags;
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
......@@ -107,6 +123,8 @@ struct bgp_proto {
u32 rr_cluster_id; /* Route reflector cluster ID */
int rr_client; /* Whether neighbor is RR client of me */
int rs_client; /* Whether neighbor is RS client of me */
u8 gr_ready; /* Neighbor could do graceful restart */
u8 gr_active; /* Neighbor is doing graceful restart */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
......@@ -117,12 +135,14 @@ struct bgp_proto {
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
unsigned int hash_size, hash_count, hash_limit;
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
list bucket_queue; /* Queue of buckets to send */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
......@@ -172,6 +192,8 @@ void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
void bgp_conn_enter_established_state(struct bgp_conn *conn);
void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
void bgp_handle_graceful_restart(struct bgp_proto *p);
void bgp_graceful_restart_done(struct bgp_proto *p);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);
......@@ -313,6 +335,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
#define BEM_NO_SOCKET 4
#define BEM_BFD_DOWN 5
#define BEM_GRACEFUL_RESTART 6
/* Automatic shutdown error codes */
......
......@@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX)
SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE)
CF_GRAMMAR
......@@ -50,6 +50,8 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->advertise_ipv4 = 1;
BGP_CFG->interpret_communities = 1;
BGP_CFG->default_local_pref = 100;
BGP_CFG->gr_mode = BGP_GR_AWARE;
BGP_CFG->gr_time = 120;
}
;
......@@ -115,6 +117,9 @@ bgp_proto:
| bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
| bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
......
......@@ -122,7 +122,7 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
#ifdef IPV6
static byte *
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
*buf++ = 4; /* Capability data length */
......@@ -136,7 +136,7 @@ bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
#else
static byte *
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
*buf++ = 4; /* Capability data length */
......@@ -149,7 +149,7 @@ bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
#endif
static byte *
bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 2; /* Capability 2: Support for route refresh */