bgp.c 25.2 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *	BIRD -- The Border Gateway Protocol
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/**
 * DOC: Border Gateway Protocol
 *
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
 * connection and most of the interface with BIRD core, |packets.c| handling
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
 * manipulation with BGP attribute lists.
 *
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
 * architecture which is able to keep all the information needed by BGP in the
 * primary routing table, therefore no complex data structures like a central
 * BGP table are needed. This increases memory footprint of a BGP router with
 * many connections, but not too much and, which is more important, it makes
 * BGP much easier to implement.
 *
Martin Mareš's avatar
Martin Mareš committed
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
Martin Mareš's avatar
Martin Mareš committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
 * structure to which are attached individual connections represented by &bgp_connection
 * (usually, there exists only one connection, but during BGP session setup, there
 * can be more of them). The connections are handled according to the BGP state machine
 * defined in the RFC with all the timers and all the parameters configurable.
 *
 * In incoming direction, we listen on the connection's socket and each time we receive
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
 * passes complete packets to bgp_rx_packet() which distributes the packet according
 * to its type.
 *
 * In outgoing direction, we gather all the routing updates and sort them to buckets
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
 * of &rta's and a &fib which helps us to find if we already have another route for
 * the same destination queued for sending, so that we can replace it with the new one
 * immediately instead of sending both updates). There also exists a special bucket holding
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
 * attributes. If we have any packet to send (due to either new routes or the connection
Martin Mareš's avatar
Martin Mareš committed
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
Martin Mareš's avatar
Martin Mareš committed
43 44 45 46 47 48 49 50 51 52 53 54 55
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
 * type if we have more data of the same type to send.
 *
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
 * @bgp_attr_table array describing all important characteristics of all known attributes.
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
 */

56
#undef LOCAL_DEBUG
57 58 59 60 61

#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
62
#include "nest/locks.h"
63
#include "conf/conf.h"
64
#include "lib/socket.h"
65
#include "lib/resource.h"
Martin Mareš's avatar
Martin Mareš committed
66
#include "lib/string.h"
67 68 69

#include "bgp.h"

70
struct linpool *bgp_linpool;		/* Global temporary pool */
71 72 73
static sock *bgp_listen_sk;		/* Global listening socket */
static int bgp_counter;			/* Number of protocol instances using the listening socket */

Ondřej Zajíček's avatar
Ondřej Zajíček committed
74
static void bgp_close(struct bgp_proto *p, int apply_md5);
75
static void bgp_connect(struct bgp_proto *p);
76
static void bgp_active(struct bgp_proto *p);
77
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
78

79

Ondřej Zajíček's avatar
Ondřej Zajíček committed
80 81 82 83 84 85 86 87 88 89 90 91 92
/**
 * bgp_open - open a BGP instance
 * @p: BGP instance
 *
 * This function allocates and configures shared BGP resources.
 * Should be called as the last step during initialization
 * (when lock is acquired and neighbor is ready).
 * When error, state changed to PS_DOWN, -1 is returned and caller
 * should return immediately.
 */
static int
bgp_open(struct bgp_proto *p)
{
93
  struct config *cfg = p->cf->c.global;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
94 95 96
  bgp_counter++;

  if (!bgp_listen_sk)
97
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117

  if (!bgp_linpool)
    bgp_linpool = lp_new(&root_pool, 4080);

  if (p->cf->password)
    {
      int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password);
      if (rv < 0)
	{
	  bgp_close(p, 0);
	  p->p.disabled = 1;
	  bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_MD5);
	  proto_notify_state(&p->p, PS_DOWN);
	  return -1;
	}
    }

  return 0;
}

118 119 120 121 122
static void
bgp_startup(struct bgp_proto *p)
{
  BGP_TRACE(D_EVENTS, "Started");
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
123 124 125

  if (!p->cf->passive)
    bgp_active(p);
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
}

static void
bgp_startup_timeout(timer *t)
{
  bgp_startup(t->data);
}


static void
bgp_initiate(struct bgp_proto *p)
{
  if (p->startup_delay)
    {
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
      bgp_start_timer(p->startup_timer, p->startup_delay);
    }
  else
    bgp_startup(p);
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
147 148 149 150 151 152 153 154 155
/**
 * bgp_close - close a BGP instance
 * @p: BGP instance
 * @apply_md5: 0 to disable unsetting MD5 auth
 *
 * This function frees and deconfigures shared BGP resources.
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
 * from failed bgp_open().
 */
156
static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
157
bgp_close(struct bgp_proto *p, int apply_md5)
158 159 160
{
  ASSERT(bgp_counter);
  bgp_counter--;
161

Ondřej Zajíček's avatar
Ondřej Zajíček committed
162
  if (p->cf->password && apply_md5)
163 164
    sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL);

165 166 167 168
  if (!bgp_counter)
    {
      rfree(bgp_listen_sk);
      bgp_listen_sk = NULL;
169 170
      rfree(bgp_linpool);
      bgp_linpool = NULL;
171 172 173
    }
}

Martin Mareš's avatar
Martin Mareš committed
174 175 176 177 178 179 180 181 182
/**
 * bgp_start_timer - start a BGP timer
 * @t: timer
 * @value: time to fire (0 to disable the timer)
 *
 * This functions calls tm_start() on @t with time @value and the
 * amount of randomization suggested by the BGP standard. Please use
 * it for all BGP timers.
 */
183
void
184 185
bgp_start_timer(timer *t, int value)
{
186
  if (value)
187 188 189 190 191
    {
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
      t->randomize = value / 4;
      tm_start(t, value - t->randomize);
    }
192 193 194 195
  else
    tm_stop(t);
}

Martin Mareš's avatar
Martin Mareš committed
196 197 198 199 200 201 202
/**
 * bgp_close_conn - close a BGP connection
 * @conn: connection to close
 *
 * This function takes a connection described by the &bgp_conn structure,
 * closes its socket and frees all resources associated with it.
 */
203 204 205 206 207 208 209 210 211 212 213 214 215
void
bgp_close_conn(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;

  DBG("BGP: Closing connection\n");
  conn->packets_to_send = 0;
  rfree(conn->connect_retry_timer);
  conn->connect_retry_timer = NULL;
  rfree(conn->keepalive_timer);
  conn->keepalive_timer = NULL;
  rfree(conn->hold_timer);
  conn->hold_timer = NULL;
216
  rfree(conn->sk);
217
  conn->sk = NULL;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
  rfree(conn->tx_ev);
  conn->tx_ev = NULL;
}


/**
 * bgp_update_startup_delay - update a startup delay
 * @p: BGP instance
 *
 * This function updates a startup delay that is used to postpone next BGP connect.
 * It also handles disable_after_error and might stop BGP instance when error
 * happened and disable_after_error is on.
 *
 * It should be called when BGP protocol error happened.
 */
void
234
bgp_update_startup_delay(struct bgp_proto *p)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
235 236 237
{
  struct bgp_config *cf = p->cf;

238
  DBG("BGP: Updating startup delay\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
239

240 241 242
  if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time))
    p->startup_delay = 0;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
243 244 245 246 247 248 249
  p->last_proto_error = now;

  if (cf->disable_after_error)
    {
      p->startup_delay = 0;
      p->p.disabled = 1;
      return;
250
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
251 252 253 254

  if (!p->startup_delay)
    p->startup_delay = cf->error_delay_time_min;
  else
255
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
256 257
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
258
static void
259
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
260
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
261
  switch (conn->state)
262 263
    {
    case BS_IDLE:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
264 265
    case BS_CLOSE:
      return;
266 267
    case BS_CONNECT:
    case BS_ACTIVE:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
268 269
      bgp_conn_enter_idle_state(conn);
      return;
270 271 272
    case BS_OPENSENT:
    case BS_OPENCONFIRM:
    case BS_ESTABLISHED:
273
      bgp_error(conn, 6, subcode, NULL, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
274
      return;
275
    default:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
276
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
277 278 279
    }
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
280 281 282 283 284 285
static void
bgp_down(struct bgp_proto *p)
{
  if (p->start_state > BSS_PREPARE)
    bgp_close(p, 1);

286
  BGP_TRACE(D_EVENTS, "Down");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
287 288 289 290 291 292 293 294 295 296
  proto_notify_state(&p->p, PS_DOWN);
}

static void
bgp_decision(void *vp)
{
  struct bgp_proto *p = vp;

  DBG("BGP: Decision start\n");
  if ((p->p.proto_state == PS_START)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
297 298
      && (p->outgoing_conn.state == BS_IDLE)
      && (!p->cf->passive))
299
    bgp_active(p);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
300 301 302 303 304 305 306

  if ((p->p.proto_state == PS_STOP)
      && (p->outgoing_conn.state == BS_IDLE)
      && (p->incoming_conn.state == BS_IDLE))
    bgp_down(p);
}

307 308
void
bgp_stop(struct bgp_proto *p, unsigned subcode)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
309 310
{
  proto_notify_state(&p->p, PS_STOP);
311 312
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
  ev_schedule(p->event);
}

void
bgp_conn_enter_established_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
 
  BGP_TRACE(D_EVENTS, "BGP session established");
  DBG("BGP: UP!!!\n");

  p->conn = conn;
  p->last_error_class = 0;
  p->last_error_code = 0;
  bgp_attr_init(conn->bgp);
  conn->state = BS_ESTABLISHED;
  proto_notify_state(&p->p, PS_UP);
}

static void
bgp_conn_leave_established_state(struct bgp_proto *p)
{
  BGP_TRACE(D_EVENTS, "BGP session closed");
  p->conn = NULL;

  if (p->p.proto_state == PS_UP)
339
    bgp_stop(p, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
}

void
bgp_conn_enter_close_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
  int os = conn->state;

  conn->state = BS_CLOSE;
  tm_stop(conn->hold_timer);
  tm_stop(conn->keepalive_timer);
  conn->sk->rx_hook = NULL;

  if (os == BS_ESTABLISHED)
    bgp_conn_leave_established_state(p);
}

void
bgp_conn_enter_idle_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
  int os = conn->state;

  bgp_close_conn(conn);
  conn->state = BS_IDLE;
  ev_schedule(p->event);

  if (os == BS_ESTABLISHED)
    bgp_conn_leave_established_state(p);
}

371 372 373
static void
bgp_send_open(struct bgp_conn *conn)
{
374 375 376 377
  conn->start_state = conn->bgp->start_state;
  conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP);
  conn->peer_as4_support = 0;	// Default value, possibly changed by receiving capability.

378 379
  DBG("BGP: Sending open\n");
  conn->sk->rx_hook = bgp_rx;
380
  conn->sk->tx_hook = bgp_tx;
381
  tm_stop(conn->connect_retry_timer);
382
  bgp_schedule_packet(conn, PKT_OPEN);
383
  conn->state = BS_OPENSENT;
384
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
385 386
}

387 388
static void
bgp_connected(sock *sk)
389 390
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
391
  struct bgp_proto *p = conn->bgp;
392

Martin Mareš's avatar
Martin Mareš committed
393
  BGP_TRACE(D_EVENTS, "Connected");
394 395 396 397 398 399
  bgp_send_open(conn);
}

static void
bgp_connect_timeout(timer *t)
{
400
  struct bgp_conn *conn = t->data;
Martin Mareš's avatar
Martin Mareš committed
401
  struct bgp_proto *p = conn->bgp;
402

Martin Mareš's avatar
Martin Mareš committed
403
  DBG("BGP: connect_timeout\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
404 405 406 407 408 409 410
  if (p->p.proto_state == PS_START)
    {
      bgp_close_conn(conn);
      bgp_connect(p);
    }
  else
    bgp_conn_enter_idle_state(conn);
411 412 413
}

static void
414
bgp_sock_err(sock *sk, int err)
415 416
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
417
  struct bgp_proto *p = conn->bgp;
418

Ondřej Zajíček's avatar
Ondřej Zajíček committed
419 420
  bgp_store_error(p, conn, BE_SOCKET, err);

421 422 423 424
  if (err)
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
  else
    BGP_TRACE(D_EVENTS, "Connection closed");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
425 426

  bgp_conn_enter_idle_state(conn);
427 428
}

429 430 431 432 433
static void
bgp_hold_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

434
  DBG("BGP: Hold timeout, closing connection\n");
435
  bgp_error(conn, 4, 0, NULL, 0);
436 437 438 439 440 441 442 443 444 445 446
}

static void
bgp_keepalive_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

  DBG("BGP: Keepalive timer\n");
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
}

447
static void
448
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
449 450 451
{
  timer *t;

452
  conn->sk = NULL;
453
  conn->bgp = p;
454
  conn->packets_to_send = 0;
455 456 457

  t = conn->connect_retry_timer = tm_new(p->p.pool);
  t->hook = bgp_connect_timeout;
458 459
  t->data = conn;
  t = conn->hold_timer = tm_new(p->p.pool);
460
  t->hook = bgp_hold_timeout;
461 462
  t->data = conn;
  t = conn->keepalive_timer = tm_new(p->p.pool);
463
  t->hook = bgp_keepalive_timeout;
464
  t->data = conn;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
465 466 467
  conn->tx_ev = ev_new(p->p.pool);
  conn->tx_ev->hook = bgp_kick_tx;
  conn->tx_ev->data = conn;
468 469
}

470 471 472 473 474 475 476 477
static void
bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s)
{
  s->data = conn;
  s->err_hook = bgp_sock_err;
  conn->sk = s;
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
478
static void
479
bgp_active(struct bgp_proto *p)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
480
{
481
  int delay = MAX(1, p->cf->start_delay_time);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
482 483 484 485 486 487 488 489
  struct bgp_conn *conn = &p->outgoing_conn;

  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
  bgp_setup_conn(p, conn);
  conn->state = BS_ACTIVE;
  bgp_start_timer(conn->connect_retry_timer, delay);
}

490 491 492 493 494 495 496
int
bgp_apply_limits(struct bgp_proto *p)
{
  if (p->cf->route_limit && (p->p.stats.imp_routes > p->cf->route_limit))
    {
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
497 498
      bgp_update_startup_delay(p);
      bgp_stop(p, 1); // Errcode 6, 1 - max number of prefixes reached
499 500 501 502 503 504 505
      return -1;
    }

  return 0;
}


Martin Mareš's avatar
Martin Mareš committed
506 507 508 509 510 511 512 513
/**
 * bgp_connect - initiate an outgoing connection
 * @p: BGP instance
 *
 * The bgp_connect() function creates a new &bgp_conn and initiates
 * a TCP connection to the peer. The rest of connection setup is governed
 * by the BGP state machine as described in the standard.
 */
514 515 516 517
static void
bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
{
  sock *s;
518
  struct bgp_conn *conn = &p->outgoing_conn;
519 520 521 522

  DBG("BGP: Connecting\n");
  s = sk_new(p->p.pool);
  s->type = SK_TCP_ACTIVE;
523
  s->saddr = p->source_addr;
524 525
  s->daddr = p->cf->remote_ip;
  s->dport = BGP_PORT;
526 527 528 529 530 531
  s->ttl = p->cf->multihop ? : 1;
  s->rbsize = BGP_RX_BUFFER_SIZE;
  s->tbsize = BGP_TX_BUFFER_SIZE;
  s->tos = IP_PREC_INTERNET_CONTROL;
  s->password = p->cf->password;
  s->tx_hook = bgp_connected;
Martin Mareš's avatar
Martin Mareš committed
532
  BGP_TRACE(D_EVENTS, "Connecting to %I from local address %I", s->daddr, s->saddr);
533
  bgp_setup_conn(p, conn);
534 535 536 537
  bgp_setup_sk(p, conn, s);
  conn->state = BS_CONNECT;
  if (sk_open(s))
    {
538
      bgp_sock_err(s, 0);
539 540 541 542 543 544
      return;
    }
  DBG("BGP: Waiting for connect success\n");
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
}

Martin Mareš's avatar
Martin Mareš committed
545 546 547 548 549 550 551 552 553 554 555 556
/**
 * bgp_incoming_connection - handle an incoming connection
 * @sk: TCP socket
 * @dummy: unused
 *
 * This function serves as a socket hook for accepting of new BGP
 * connections. It searches a BGP instance corresponding to the peer
 * which has connected and if such an instance exists, it creates a
 * &bgp_conn structure, attaches it to the instance and either sends
 * an Open message or (if there already is an active connection) it
 * closes the new connection by sending a Notification message.
 */
557
static int
Martin Mareš's avatar
Martin Mareš committed
558
bgp_incoming_connection(sock *sk, int dummy UNUSED)
559
{
560
  struct proto_config *pc;
561

562
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
563 564 565 566 567 568
  WALK_LIST(pc, config->protos)
    if (pc->protocol == &proto_bgp && pc->proto)
      {
	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
	if (ipa_equal(p->cf->remote_ip, sk->daddr))
	  {
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
	    /* We are in proper state and there is no other incoming connection */
	    int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
	      (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);

	    BGP_TRACE(D_EVENTS, "Incoming connection from %I (port %d) %s",
		      sk->daddr, sk->dport, acc ? "accepted" : "rejected");

	    if (!acc)
	      goto err;

	    bgp_setup_conn(p, &p->incoming_conn);
	    bgp_setup_sk(p, &p->incoming_conn, sk);
	    sk_set_ttl(sk, p->cf->multihop ? : 1);
	    bgp_send_open(&p->incoming_conn);
	    return 0;
584 585
	  }
      }
586 587 588

  log(L_WARN "BGP: Unexpected connect from unknown address %I (port %d)", sk->daddr, sk->dport);
 err:
589 590 591 592
  rfree(sk);
  return 0;
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
593
static sock *
594
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
595
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
596 597 598
  sock *s = sk_new(&root_pool);
  DBG("BGP: Creating incoming socket\n");
  s->type = SK_TCP_PASSIVE;
599 600 601
  s->saddr = addr;
  s->sport = port ? port : BGP_PORT;
  s->flags = flags;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
602 603 604 605 606
  s->tos = IP_PREC_INTERNET_CONTROL;
  s->rbsize = BGP_RX_BUFFER_SIZE;
  s->tbsize = BGP_TX_BUFFER_SIZE;
  s->rx_hook = bgp_incoming_connection;
  if (sk_open(s))
607
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
608 609 610
      log(L_ERR "Unable to open incoming BGP socket");
      rfree(s);
      return NULL;
611
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
612 613
  else
    return s;
614 615 616 617 618 619
}

static void
bgp_start_neighbor(struct bgp_proto *p)
{
  p->local_addr = p->neigh->iface->addr->ip;
620 621 622
  p->source_addr = ipa_nonzero(p->cf->source_addr) ? p->cf->source_addr : p->local_addr;

  DBG("BGP: local=%I remote=%I\n", p->source_addr, p->next_hop);
623 624 625
#ifdef IPV6
  {
    struct ifa *a;
626
    p->local_link = IPA_NONE;
627 628 629 630 631 632
    WALK_LIST(a, p->neigh->iface->addrs)
      if (a->scope == SCOPE_LINK)
        {
	  p->local_link = a->ip;
	  break;
	}
633 634 635 636

    if (! ipa_nonzero(p->local_link))
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);

637 638 639
    DBG("BGP: Selected link-level address %I\n", p->local_link);
  }
#endif
Ondřej Zajíček's avatar
Ondřej Zajíček committed
640 641 642 643 644

  int rv = bgp_open(p);
  if (rv < 0)
    return;

645
  bgp_initiate(p);
646 647 648 649 650 651 652 653 654
}

static void
bgp_neigh_notify(neighbor *n)
{
  struct bgp_proto *p = (struct bgp_proto *) n->proto;

  if (n->iface)
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
655 656 657 658 659
      if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE))
	{
	  BGP_TRACE(D_EVENTS, "Neighbor found");
	  bgp_start_neighbor(p);
	}
660 661 662
    }
  else
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
663 664 665 666
      if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP))
	{
	  BGP_TRACE(D_EVENTS, "Neighbor lost");
	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
667
	  bgp_stop(p, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
668
	}
669 670 671 672 673 674 675 676 677
    }
}

static void
bgp_start_locked(struct object_lock *lock)
{
  struct bgp_proto *p = lock->data;
  struct bgp_config *cf = p->cf;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
678 679 680 681 682 683
  if (p->p.proto_state != PS_START)
    {
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
    return;
    }

684
  DBG("BGP: Got lock\n");
685
  p->local_id = cf->c.global->router_id;
686 687
  p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
  p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
688 689 690 691 692 693 694

  if (cf->rr_client)
    {
      p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id;
      p->rr_client = cf->rr_client;
    }

695 696
  p->rs_client = cf->rs_client;

697 698 699
  if (!p->neigh)
    {
      log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
700
      /* As we do not start yet, we can just disable protocol */
701
      p->p.disabled = 1;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
702
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
703
      proto_notify_state(&p->p, PS_DOWN);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
704
      return;
705
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
706 707
  
  if (p->neigh->iface)
708 709
    bgp_start_neighbor(p);
  else
Martin Mareš's avatar
Martin Mareš committed
710
    BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop);
711 712
}

713 714 715
static int
bgp_start(struct proto *P)
{
716 717 718
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct object_lock *lock;

719
  DBG("BGP: Startup.\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
720
  p->start_state = BSS_PREPARE;
721 722
  p->outgoing_conn.state = BS_IDLE;
  p->incoming_conn.state = BS_IDLE;
723
  p->neigh = NULL;
724

Ondřej Zajíček's avatar
Ondřej Zajíček committed
725 726 727
  p->event = ev_new(p->p.pool);
  p->event->hook = bgp_decision;
  p->event->data = p;
728

729 730 731 732
  p->startup_timer = tm_new(p->p.pool);
  p->startup_timer->hook = bgp_startup_timeout;
  p->startup_timer->data = p;

733 734 735 736 737 738 739 740 741 742 743 744 745 746
  /*
   *  Before attempting to create the connection, we need to lock the
   *  port, so that are sure we're the only instance attempting to talk
   *  with that neighbor.
   */

  lock = p->lock = olock_new(P->pool);
  lock->addr = p->cf->remote_ip;
  lock->type = OBJLOCK_TCP;
  lock->port = BGP_PORT;
  lock->iface = NULL;
  lock->hook = bgp_start_locked;
  lock->data = p;
  olock_acquire(lock);
747

748
  return PS_START;
749 750 751 752 753
}

static int
bgp_shutdown(struct proto *P)
{
754
  struct bgp_proto *p = (struct bgp_proto *) P;
755
  unsigned subcode;
756

Martin Mareš's avatar
Martin Mareš committed
757
  BGP_TRACE(D_EVENTS, "Shutdown requested");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
758
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
759 760 761 762 763 764 765 766 767 768 769

  if (P->reconfiguring)
    {
      if (P->cf_new)
	subcode = 6; // Errcode 6, 6 - other configuration change
      else
	subcode = 3; // Errcode 6, 3 - peer de-configured
    }
  else
    subcode = 2; // Errcode 6, 2 - administrative shutdown

Ondřej Zajíček's avatar
Ondřej Zajíček committed
770
  p->startup_delay = 0;
771
  bgp_stop(p, subcode);
772

Ondřej Zajíček's avatar
Ondřej Zajíček committed
773
  return p->p.proto_state;
774 775
}

776 777 778 779 780 781 782
static struct proto *
bgp_init(struct proto_config *C)
{
  struct bgp_config *c = (struct bgp_config *) C;
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
  struct bgp_proto *p = (struct bgp_proto *) P;

783
  P->accept_ra_types = RA_OPTIMAL;
784 785 786 787 788 789 790 791 792 793 794
  P->rt_notify = bgp_rt_notify;
  P->rte_better = bgp_rte_better;
  P->import_control = bgp_import_control;
  P->neigh_notify = bgp_neigh_notify;
  p->cf = c;
  p->local_as = c->local_as;
  p->remote_as = c->remote_as;
  p->is_internal = (c->local_as == c->remote_as);
  return P;
}

Martin Mareš's avatar
Martin Mareš committed
795 796 797 798
/**
 * bgp_error - report a protocol error
 * @c: connection
 * @code: error code (according to the RFC)
799
 * @subcode: error sub-code
Martin Mareš's avatar
Martin Mareš committed
800 801 802 803
 * @data: data to be passed in the Notification message
 * @len: length of the data
 *
 * bgp_error() sends a notification packet to tell the other side that a protocol
804
 * error has occurred (including the data considered erroneous if possible) and
Martin Mareš's avatar
Martin Mareš committed
805 806
 * closes the connection.
 */
807
void
808
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
809
{
810 811
  struct bgp_proto *p = c->bgp;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
812
  if (c->state == BS_CLOSE)
813
    return;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
814

815 816
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
817 818
  bgp_conn_enter_close_state(c);

819 820
  c->notify_code = code;
  c->notify_subcode = subcode;
821 822
  c->notify_data = data;
  c->notify_size = (len > 0) ? len : 0;
823
  bgp_schedule_packet(c, PKT_NOTIFICATION);
824 825 826 827 828 829

  if (code != 6)
    {
      bgp_update_startup_delay(p);
      bgp_stop(p, 0);
    }
830 831
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
/**
 * bgp_store_error - store last error for status report
 * @p: BGP instance
 * @c: connection
 * @class: error class (BE_xxx constants)
 * @code: error code (class specific)
 *
 * bgp_store_error() decides whether given error is interesting enough
 * and store that error to last_error variables of @p
 */
void
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
{
  /* During PS_UP, we ignore errors on secondary connection */
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
    return;

  /* During PS_STOP, we ignore any errors, as we want to report
   * the error that caused transition to PS_STOP
   */
  if (p->p.proto_state == PS_STOP)
    return;

  p->last_error_class = class;
  p->last_error_code = code;
}

859 860 861 862 863
void
bgp_check(struct bgp_config *c)
{
  if (!c->local_as)
    cf_error("Local AS number must be set");
864

865 866
  if (!c->remote_as)
    cf_error("Neighbor must be configured");
867

868
  if (!bgp_as4_support && c->enable_as4)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
869
    cf_error("AS4 support disabled globally");
870 871

  if (!bgp_as4_support && (c->local_as > 0xFFFF))
872
    cf_error("Local AS number out of range");
873 874 875 876

  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
    cf_error("Neighbor AS number out of range (AS4 not available)");

877 878
  if ((c->local_as != c->remote_as) && (c->rr_client))
    cf_error("Only internal neighbor can be RR client");
879

880 881
  if ((c->local_as == c->remote_as) && (c->rs_client))
    cf_error("Only external neighbor can be RS client");
882 883
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
884
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
885
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
Ondřej Zajíček's avatar
Ondřej Zajíček committed
886
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" };
887
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
Ondřej Zajíček's avatar
Ondřej Zajíček committed
888 889


890
static void
891 892 893 894
bgp_get_status(struct proto *P, byte *buf)
{
  struct bgp_proto *p = (struct bgp_proto *) P;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
  const byte *err1 = bgp_err_classes[p->last_error_class];
  const byte *err2 = "";
  byte errbuf[32];

  switch (p->last_error_class)
    {
    case BE_MISC:
      err2 = bgp_misc_errors[p->last_error_code];
      break;
    case BE_SOCKET:
      err2 = (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
      break;
    case BE_BGP_RX:
    case BE_BGP_TX:
      err2 = bgp_error_dsc(errbuf, p->last_error_code >> 16, p->last_error_code & 0xFF);
      break;
911 912 913
    case BE_AUTO_DOWN:
      err2 = bgp_auto_errors[p->last_error_code];
      break;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
914 915
    }

916
  if (P->proto_state == PS_DOWN)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
917
    bsprintf(buf, "%s%s", err1, err2);
918
  else
Ondřej Zajíček's avatar
Ondřej Zajíček committed
919 920 921
    bsprintf(buf, "%-14s%s%s",
	     bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)],
	     err1, err2);
922 923
}

924 925 926 927 928 929 930
static int
bgp_reconfigure(struct proto *P, struct proto_config *C)
{
  struct bgp_config *new = (struct bgp_config *) C;
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct bgp_config *old = p->cf;

931 932
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
		     ((byte *) new) + sizeof(struct proto_config),
933 934 935 936
		     // password item is last and must be checked separately
		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
    && ((!old->password && !new->password)
	|| (old->password && new->password && !strcmp(old->password, new->password)));
937 938 939 940 941 942

  /* We should update our copy of configuration ptr as old configuration will be freed */
  if (same)
    p->cf = new;

  return same;
943 944
}

945 946 947
struct protocol proto_bgp = {
  name:			"BGP",
  template:		"bgp%d",
948
  attr_class:		EAP_BGP,
949 950 951
  init:			bgp_init,
  start:		bgp_start,
  shutdown:		bgp_shutdown,
952
  get_status:		bgp_get_status,
953
  get_attr:		bgp_get_attr,
954
  reconfigure:		bgp_reconfigure,
955 956
  get_route_info:	bgp_get_route_info,
};