bgp.h 14.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *	BIRD -- The Border Gateway Protocol
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

#ifndef _BIRD_BGP_H_
#define _BIRD_BGP_H_

12
#include <stdint.h>
Martin Mareš's avatar
Martin Mareš committed
13
#include "nest/route.h"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
14
#include "nest/bfd.h"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
15
#include "lib/hash.h"
Martin Mareš's avatar
Martin Mareš committed
16

17
struct linpool;
18
struct eattr;
19

20 21
struct bgp_config {
  struct proto_config c;
22
  u32 local_as, remote_as;
23
  ip_addr remote_ip;
24 25
  ip_addr source_addr;			/* Source address to use */
  struct iface *iface;			/* Interface for link-local addresses */
26
  u16 remote_port; 			/* Neighbor destination port */
27
  int multihop;				/* Number of hops if multihop */
28
  int ttl_security;			/* Enable TTL security [RFC5082] */
29
  int next_hop_self;			/* Always set next hop to local IP address */
30
  int next_hop_keep;			/* Do not touch next hop attribute */
31
  int missing_lladdr;			/* What we will do when we don' know link-local addr, see MLL_* */
32
  int gw_mode;				/* How we compute route gateway from next_hop attr, see GW_* */
33
  int compare_path_lengths;		/* Use path lengths when selecting best route */
34
  int med_metric;			/* Compare MULTI_EXIT_DISC even between routes from differen ASes */
35
  int igp_metric;			/* Use IGP metrics when selecting best route */
36
  int prefer_older;			/* Prefer older routes according to RFC 5004 */
37
  int deterministic_med;		/* Use more complicated algo to have strict RFC 4271 MED comparison */
38 39
  u32 default_local_pref;		/* Default value for LOCAL_PREF attribute */
  u32 default_med;			/* Default value for MULTI_EXIT_DISC attribute */
40
  int capabilities;			/* Enable capability handshake [RFC3392] */
41
  int enable_refresh;			/* Enable local support for route refresh [RFC2918] */
42
  int enable_as4;			/* Enable local support for 4B AS numbers [RFC4893] */
43 44
  u32 rr_cluster_id;			/* Route reflector cluster ID, if different from local ID */
  int rr_client;			/* Whether neighbor is RR client of me */
45
  int rs_client;			/* Whether neighbor is RS client of me */
46
  int advertise_ipv4;			/* Whether we should add IPv4 capability advertisement to OPEN message */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
47
  int passive;				/* Do not initiate outgoing connection */
48
  int interpret_communities;		/* Hardwired handling of well-known communities */
49
  int secondary;			/* Accept also non-best routes (i.e. RA_ACCEPTED) */
50
  int add_path;				/* Use ADD-PATH extension [draft] */
51
  int allow_local_as;			/* Allow that number of local ASNs in incoming AS_PATHs */
52 53
  int gr_mode;				/* Graceful restart mode (BGP_GR_*) */
  unsigned gr_time;			/* Graceful restart timeout */
54 55
  unsigned connect_delay_time;		/* Minimum delay between connect attempts */
  unsigned connect_retry_time;		/* Timeout for connect attempts */
56 57
  unsigned hold_time, initial_hold_time;
  unsigned keepalive_time;
58 59 60 61
  unsigned error_amnesia_time;		/* Errors are forgotten after */
  unsigned error_delay_time_min;	/* Time to wait after an error is detected */
  unsigned error_delay_time_max;
  unsigned disable_after_error;		/* Disable the protocol when error is detected */
62

63
  char *password;			/* Password used for MD5 authentication */
64
  struct rtable_config *igp_table;	/* Table used for recursive next hop lookups */
65
  int check_link;			/* Use iface link state for liveness detection */
66
  int bfd;				/* Use BFD for liveness detection */
67 68
};

69 70 71 72
#define MLL_SELF 1
#define MLL_DROP 2
#define MLL_IGNORE 3

73 74 75
#define GW_DIRECT 1
#define GW_RECURSIVE 2

76 77 78 79
#define ADD_PATH_RX 1
#define ADD_PATH_TX 2
#define ADD_PATH_FULL 3

80 81 82 83 84 85 86 87 88
#define BGP_GR_ABLE 1
#define BGP_GR_AWARE 2

/* For peer_gr_flags */
#define BGP_GRF_RESTART 0x80

/* For peer_gr_aflags */
#define BGP_GRF_FORWARDING 0x80

89

90 91 92
struct bgp_conn {
  struct bgp_proto *bgp;
  struct birdsock *sk;
93
  unsigned int state;			/* State of connection state machine */
94 95 96
  struct timer *connect_retry_timer;
  struct timer *hold_timer;
  struct timer *keepalive_timer;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
97
  struct event *tx_ev;
98
  int packets_to_send;			/* Bitmap of packet types to be sent */
99 100
  int notify_code, notify_subcode, notify_size;
  byte *notify_data;
101
  u32 advertised_as;			/* Temporary value for AS number received */
102
  int start_state;			/* protocol start_state snapshot when connection established */
103 104 105
  u8 peer_refresh_support;		/* Peer supports route refresh [RFC2918] */
  u8 peer_as4_support;			/* Peer supports 4B AS numbers [RFC4893] */
  u8 peer_add_path;			/* Peer supports ADD-PATH [draft] */
106 107 108 109 110
  u8 peer_gr_aware;
  u8 peer_gr_able;
  u16 peer_gr_time;
  u8 peer_gr_flags;
  u8 peer_gr_aflags;
111
  unsigned hold_time, keepalive_time;	/* Times calculated from my and neighbor's requirements */
112 113 114 115
};

struct bgp_proto {
  struct proto p;
116
  struct bgp_config *cf;		/* Shortcut to BGP configuration */
117
  u32 local_as, remote_as;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
118
  int start_state;			/* Substates that partitions BS_START */
119 120 121 122
  u8 is_internal;			/* Internal BGP connection (local_as == remote_as) */
  u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
  u8 add_path_rx;			/* Session expects receive of ADD-PATH extended NLRI */
  u8 add_path_tx;			/* Session expects transmit of ADD-PATH extended NLRI */
123 124
  u32 local_id;				/* BGP identifier of this router */
  u32 remote_id;			/* BGP identifier of the neighbor */
125 126
  u32 rr_cluster_id;			/* Route reflector cluster ID */
  int rr_client;			/* Whether neighbor is RR client of me */
127
  int rs_client;			/* Whether neighbor is RS client of me */
128 129
  u8 gr_ready;				/* Neighbor could do graceful restart */
  u8 gr_active;				/* Neighbor is doing graceful restart */
130 131
  struct bgp_conn *conn;		/* Connection we have established */
  struct bgp_conn outgoing_conn;	/* Outgoing connection we're working with */
132 133
  struct bgp_conn incoming_conn;	/* Incoming connection we have neither accepted nor rejected yet */
  struct object_lock *lock;		/* Lock for neighbor connection */
134
  struct neighbor *neigh;		/* Neighbor entry corresponding to remote ip, NULL if multihop */
135
  struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
136
  ip_addr source_addr;			/* Local address used as an advertised next hop */
137
  rtable *igp_table;			/* Table used for recursive next hop lookups */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
138
  struct event *event;			/* Event for respawning and shutting process */
139
  struct timer *startup_timer;		/* Timer used to delay protocol startup due to previous errors (startup_delay) */
140
  struct timer *gr_timer;		/* Timer waiting for reestablishment after graceful restart */
Martin Mareš's avatar
Martin Mareš committed
141
  struct bgp_bucket **bucket_hash;	/* Hash table of attribute buckets */
142
  unsigned int hash_size, hash_count, hash_limit;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
143
  HASH(struct bgp_prefix) prefix_hash;	/* Prefixes to be sent */
144
  slab *prefix_slab;			/* Slab holding prefix nodes */
Martin Mareš's avatar
Martin Mareš committed
145 146
  list bucket_queue;			/* Queue of buckets to send */
  struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
147
  unsigned send_end_mark;		/* End-of-RIB mark scheduled for transmit */
148
  unsigned startup_delay;		/* Time to delay protocol startup by due to errors */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
149 150 151 152
  bird_clock_t last_proto_error;	/* Time of last error that leads to protocol stop */
  u8 last_error_class; 			/* Error class of last error */
  u32 last_error_code;			/* Error code of last error. BGP protocol errors
					   are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
Martin Mareš's avatar
Martin Mareš committed
153 154 155
#ifdef IPV6
  byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
  unsigned mp_reach_len, mp_unreach_len;
156
  ip_addr local_link;			/* Link-level version of source_addr */
Martin Mareš's avatar
Martin Mareš committed
157
#endif
Martin Mareš's avatar
Martin Mareš committed
158 159 160
};

struct bgp_prefix {
161 162 163 164 165 166
  struct {
    ip_addr prefix;
    int pxlen;
  } n;
  u32 path_id;
  struct bgp_prefix *next;
Martin Mareš's avatar
Martin Mareš committed
167 168 169 170 171
  node bucket_node;			/* Node in per-bucket list */
};

struct bgp_bucket {
  node send_node;			/* Node in send queue */
172
  struct bgp_bucket *hash_next, *hash_prev;	/* Node in bucket hash table */
Martin Mareš's avatar
Martin Mareš committed
173 174 175
  unsigned hash;			/* Hash over extended attributes */
  list prefixes;			/* Prefixes in this buckets */
  ea_list eattrs[0];			/* Per-bucket extended attributes */
176 177
};

178 179 180 181 182 183
#define BGP_PORT		179
#define BGP_VERSION		4
#define BGP_HEADER_LENGTH	19
#define BGP_MAX_PACKET_LENGTH	4096
#define BGP_RX_BUFFER_SIZE	4096
#define BGP_TX_BUFFER_SIZE	BGP_MAX_PACKET_LENGTH
184

185 186
extern struct linpool *bgp_linpool;

187

188
void bgp_start_timer(struct timer *t, int value);
189
void bgp_check_config(struct bgp_config *c);
190
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
191
void bgp_close_conn(struct bgp_conn *c);
192
void bgp_update_startup_delay(struct bgp_proto *p);
193
void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
194 195 196
void bgp_conn_enter_established_state(struct bgp_conn *conn);
void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
197 198
void bgp_handle_graceful_restart(struct bgp_proto *p);
void bgp_graceful_restart_done(struct bgp_proto *p);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
199
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
200 201
void bgp_stop(struct bgp_proto *p, unsigned subcode);

202 203 204
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);

Ondřej Zajíček's avatar
Ondřej Zajíček committed
205

206

Martin Mareš's avatar
Martin Mareš committed
207 208 209 210 211 212 213 214
#ifdef LOCAL_DEBUG
#define BGP_FORCE_DEBUG 1
#else
#define BGP_FORCE_DEBUG 0
#endif
#define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
	log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)

215 216 217 218
#define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
	log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)


219 220
/* attrs.c */

221 222 223 224 225 226 227 228 229 230 231
/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
 * we store two addesses in it - a global address and a link local address.
 */
#ifdef IPV6
#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
#else
#define NEXT_HOP_LENGTH sizeof(ip_addr)
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
#endif

232 233
void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
234
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory);
235
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
Martin Mareš's avatar
Martin Mareš committed
236
int bgp_rte_better(struct rte *, struct rte *);
237
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
238
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
Martin Mareš's avatar
Martin Mareš committed
239
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
240
void bgp_init_bucket_table(struct bgp_proto *);
241
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
242 243 244
void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
245
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
246

247 248 249
inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }

250 251
/* packets.c */

252
void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
253
void bgp_schedule_packet(struct bgp_conn *conn, int type);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
254
void bgp_kick_tx(void *vconn);
255 256
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, int size);
257
const char * bgp_error_dsc(unsigned code, unsigned subcode);
258
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
259

260 261 262 263 264 265
/* Packet types */

#define PKT_OPEN		0x01
#define PKT_UPDATE		0x02
#define PKT_NOTIFICATION	0x03
#define PKT_KEEPALIVE		0x04
266
#define PKT_ROUTE_REFRESH	0x05
267
#define PKT_SCHEDULE_CLOSE	0x1f	/* Used internally to schedule socket close */
268 269 270 271 272 273 274 275 276 277 278 279

/* Attributes */

#define BAF_OPTIONAL		0x80
#define BAF_TRANSITIVE		0x40
#define BAF_PARTIAL		0x20
#define BAF_EXT_LEN		0x10

#define BA_ORIGIN		0x01	/* [RFC1771] */		/* WM */
#define BA_AS_PATH		0x02				/* WM */
#define BA_NEXT_HOP		0x03				/* WM */
#define BA_MULTI_EXIT_DISC	0x04				/* ON */
280
#define BA_LOCAL_PREF		0x05				/* WD */
281 282 283 284 285 286 287 288 289 290 291
#define BA_ATOMIC_AGGR		0x06				/* WD */
#define BA_AGGREGATOR		0x07				/* OT */
#define BA_COMMUNITY		0x08	/* [RFC1997] */		/* OT */
#define BA_ORIGINATOR_ID	0x09	/* [RFC1966] */		/* ON */
#define BA_CLUSTER_LIST		0x0a				/* ON */
/* We don't support these: */
#define BA_DPA			0x0b	/* ??? */
#define BA_ADVERTISER		0x0c	/* [RFC1863] */
#define BA_RCID_PATH		0x0d
#define BA_MP_REACH_NLRI	0x0e	/* [RFC2283] */
#define BA_MP_UNREACH_NLRI	0x0f
292
#define BA_EXT_COMMUNITY	0x10	/* [RFC4360] */
293 294
#define BA_AS4_PATH             0x11    /* [RFC4893] */
#define BA_AS4_AGGREGATOR       0x12
295

Ondřej Zajíček's avatar
Ondřej Zajíček committed
296
/* BGP connection states */
297 298 299 300 301 302 303

#define BS_IDLE			0
#define BS_CONNECT		1	/* Attempting to connect */
#define BS_ACTIVE		2	/* Waiting for connection retry & listening */
#define BS_OPENSENT		3
#define BS_OPENCONFIRM		4
#define BS_ESTABLISHED		5
Ondřej Zajíček's avatar
Ondřej Zajíček committed
304 305
#define BS_CLOSE		6	/* Used during transition to BS_IDLE */

306 307
#define BS_MAX			7

Ondřej Zajíček's avatar
Ondřej Zajíček committed
308 309 310 311 312 313 314 315 316 317 318
/* BGP start states
 * 
 * Used in PS_START for fine-grained specification of starting state.
 *
 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol
 * done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT.
 * When some connection attempt failed because of option or capability error, it goes to
 * BSS_CONNECT_NOCAP.
 */

#define BSS_PREPARE		0	/* Used before ordinary BGP started, i. e. waiting for lock */
319 320 321
#define BSS_DELAY		1	/* Startup delay due to previous errors */
#define BSS_CONNECT		2	/* Ordinary BGP connecting */
#define BSS_CONNECT_NOCAP	3	/* Legacy BGP connecting (without capabilities) */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

/* Error classes */

#define BE_NONE			0
#define BE_MISC			1	/* Miscellaneous error */
#define BE_SOCKET		2	/* Socket error */
#define BE_BGP_RX		3	/* BGP protocol error notification received */
#define BE_BGP_TX		4	/* BGP protocol error notification sent */
#define BE_AUTO_DOWN		5	/* Automatic shutdown */
#define BE_MAN_DOWN		6	/* Manual shutdown */

/* Misc error codes */

#define BEM_NEIGHBOR_LOST	1
#define BEM_INVALID_NEXT_HOP	2
337
#define BEM_INVALID_MD5		3	/* MD5 authentication kernel request failed (possibly not supported) */
338
#define BEM_NO_SOCKET		4
339 340 341
#define BEM_LINK_DOWN		5
#define BEM_BFD_DOWN		6
#define BEM_GRACEFUL_RESTART	7
Ondřej Zajíček's avatar
Ondřej Zajíček committed
342

343 344 345
/* Automatic shutdown error codes */

#define BEA_ROUTE_LIMIT_EXCEEDED 1
346

347 348 349 350 351 352
/* Well-known communities */

#define BGP_COMM_NO_EXPORT		0xffffff01	/* Don't export outside local AS / confed. */
#define BGP_COMM_NO_ADVERTISE		0xffffff02	/* Don't export at all */
#define BGP_COMM_NO_EXPORT_SUBCONFED	0xffffff03	/* NO_EXPORT even in local confederation */

353 354 355 356 357 358
/* Origins */

#define ORIGIN_IGP		0
#define ORIGIN_EGP		1
#define ORIGIN_INCOMPLETE	2

Martin Mareš's avatar
Martin Mareš committed
359 360
/* Address families */

361
#define BGP_AF_IPV4		1
Martin Mareš's avatar
Martin Mareš committed
362 363
#define BGP_AF_IPV6		2

364 365 366 367 368 369
#ifdef IPV6
#define BGP_AF BGP_AF_IPV6
#else
#define BGP_AF BGP_AF_IPV4
#endif

370
#endif