bgp.c 28.7 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *	BIRD -- The Border Gateway Protocol
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/**
 * DOC: Border Gateway Protocol
 *
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
 * connection and most of the interface with BIRD core, |packets.c| handling
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
 * manipulation with BGP attribute lists.
 *
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
 * architecture which is able to keep all the information needed by BGP in the
 * primary routing table, therefore no complex data structures like a central
 * BGP table are needed. This increases memory footprint of a BGP router with
 * many connections, but not too much and, which is more important, it makes
 * BGP much easier to implement.
 *
Martin Mareš's avatar
Martin Mareš committed
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
Martin Mareš's avatar
Martin Mareš committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
 * structure to which are attached individual connections represented by &bgp_connection
 * (usually, there exists only one connection, but during BGP session setup, there
 * can be more of them). The connections are handled according to the BGP state machine
 * defined in the RFC with all the timers and all the parameters configurable.
 *
 * In incoming direction, we listen on the connection's socket and each time we receive
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
 * passes complete packets to bgp_rx_packet() which distributes the packet according
 * to its type.
 *
 * In outgoing direction, we gather all the routing updates and sort them to buckets
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
 * of &rta's and a &fib which helps us to find if we already have another route for
 * the same destination queued for sending, so that we can replace it with the new one
 * immediately instead of sending both updates). There also exists a special bucket holding
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
 * attributes. If we have any packet to send (due to either new routes or the connection
Martin Mareš's avatar
Martin Mareš committed
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
Martin Mareš's avatar
Martin Mareš committed
43 44 45 46 47 48 49 50 51 52 53 54 55
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
 * type if we have more data of the same type to send.
 *
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
 * @bgp_attr_table array describing all important characteristics of all known attributes.
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
 */

56
#undef LOCAL_DEBUG
57 58 59 60 61

#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
62
#include "nest/locks.h"
63
#include "nest/cli.h"
64
#include "conf/conf.h"
65
#include "lib/socket.h"
66
#include "lib/resource.h"
Martin Mareš's avatar
Martin Mareš committed
67
#include "lib/string.h"
68 69 70

#include "bgp.h"

71
struct linpool *bgp_linpool;		/* Global temporary pool */
72 73 74
static sock *bgp_listen_sk;		/* Global listening socket */
static int bgp_counter;			/* Number of protocol instances using the listening socket */

Ondřej Zajíček's avatar
Ondřej Zajíček committed
75
static void bgp_close(struct bgp_proto *p, int apply_md5);
76
static void bgp_connect(struct bgp_proto *p);
77
static void bgp_active(struct bgp_proto *p);
78
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
79

80

Ondřej Zajíček's avatar
Ondřej Zajíček committed
81 82 83 84 85 86 87 88 89 90 91 92 93
/**
 * bgp_open - open a BGP instance
 * @p: BGP instance
 *
 * This function allocates and configures shared BGP resources.
 * Should be called as the last step during initialization
 * (when lock is acquired and neighbor is ready).
 * When error, state changed to PS_DOWN, -1 is returned and caller
 * should return immediately.
 */
static int
bgp_open(struct bgp_proto *p)
{
94
  struct config *cfg = p->cf->c.global;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
95 96 97
  bgp_counter++;

  if (!bgp_listen_sk)
98
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118

  if (!bgp_linpool)
    bgp_linpool = lp_new(&root_pool, 4080);

  if (p->cf->password)
    {
      int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password);
      if (rv < 0)
	{
	  bgp_close(p, 0);
	  p->p.disabled = 1;
	  bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_MD5);
	  proto_notify_state(&p->p, PS_DOWN);
	  return -1;
	}
    }

  return 0;
}

119 120 121 122 123
static void
bgp_startup(struct bgp_proto *p)
{
  BGP_TRACE(D_EVENTS, "Started");
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
124 125 126

  if (!p->cf->passive)
    bgp_active(p);
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
}

static void
bgp_startup_timeout(timer *t)
{
  bgp_startup(t->data);
}


static void
bgp_initiate(struct bgp_proto *p)
{
  if (p->startup_delay)
    {
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
      bgp_start_timer(p->startup_timer, p->startup_delay);
    }
  else
    bgp_startup(p);
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
148 149 150 151 152 153 154 155 156
/**
 * bgp_close - close a BGP instance
 * @p: BGP instance
 * @apply_md5: 0 to disable unsetting MD5 auth
 *
 * This function frees and deconfigures shared BGP resources.
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
 * from failed bgp_open().
 */
157
static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
158
bgp_close(struct bgp_proto *p, int apply_md5)
159 160 161
{
  ASSERT(bgp_counter);
  bgp_counter--;
162

Ondřej Zajíček's avatar
Ondřej Zajíček committed
163
  if (p->cf->password && apply_md5)
164 165
    sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL);

166 167 168 169
  if (!bgp_counter)
    {
      rfree(bgp_listen_sk);
      bgp_listen_sk = NULL;
170 171
      rfree(bgp_linpool);
      bgp_linpool = NULL;
172 173 174
    }
}

Martin Mareš's avatar
Martin Mareš committed
175 176 177 178 179 180 181 182 183
/**
 * bgp_start_timer - start a BGP timer
 * @t: timer
 * @value: time to fire (0 to disable the timer)
 *
 * This functions calls tm_start() on @t with time @value and the
 * amount of randomization suggested by the BGP standard. Please use
 * it for all BGP timers.
 */
184
void
185 186
bgp_start_timer(timer *t, int value)
{
187
  if (value)
188 189 190 191 192
    {
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
      t->randomize = value / 4;
      tm_start(t, value - t->randomize);
    }
193 194 195 196
  else
    tm_stop(t);
}

Martin Mareš's avatar
Martin Mareš committed
197 198 199 200 201 202 203
/**
 * bgp_close_conn - close a BGP connection
 * @conn: connection to close
 *
 * This function takes a connection described by the &bgp_conn structure,
 * closes its socket and frees all resources associated with it.
 */
204 205 206
void
bgp_close_conn(struct bgp_conn *conn)
{
207
  // struct bgp_proto *p = conn->bgp;
208 209 210 211 212 213 214 215 216

  DBG("BGP: Closing connection\n");
  conn->packets_to_send = 0;
  rfree(conn->connect_retry_timer);
  conn->connect_retry_timer = NULL;
  rfree(conn->keepalive_timer);
  conn->keepalive_timer = NULL;
  rfree(conn->hold_timer);
  conn->hold_timer = NULL;
217
  rfree(conn->sk);
218
  conn->sk = NULL;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
  rfree(conn->tx_ev);
  conn->tx_ev = NULL;
}


/**
 * bgp_update_startup_delay - update a startup delay
 * @p: BGP instance
 *
 * This function updates a startup delay that is used to postpone next BGP connect.
 * It also handles disable_after_error and might stop BGP instance when error
 * happened and disable_after_error is on.
 *
 * It should be called when BGP protocol error happened.
 */
void
235
bgp_update_startup_delay(struct bgp_proto *p)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
236 237 238
{
  struct bgp_config *cf = p->cf;

239
  DBG("BGP: Updating startup delay\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
240

241
  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
242 243
    p->startup_delay = 0;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
244 245 246 247 248 249 250
  p->last_proto_error = now;

  if (cf->disable_after_error)
    {
      p->startup_delay = 0;
      p->p.disabled = 1;
      return;
251
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
252 253 254 255

  if (!p->startup_delay)
    p->startup_delay = cf->error_delay_time_min;
  else
256
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
257 258
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
259
static void
260
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
261
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
262
  switch (conn->state)
263 264
    {
    case BS_IDLE:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
265 266
    case BS_CLOSE:
      return;
267 268
    case BS_CONNECT:
    case BS_ACTIVE:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
269 270
      bgp_conn_enter_idle_state(conn);
      return;
271 272 273
    case BS_OPENSENT:
    case BS_OPENCONFIRM:
    case BS_ESTABLISHED:
274
      bgp_error(conn, 6, subcode, NULL, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
275
      return;
276
    default:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
277
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
278 279 280
    }
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
281 282 283 284 285 286
static void
bgp_down(struct bgp_proto *p)
{
  if (p->start_state > BSS_PREPARE)
    bgp_close(p, 1);

287
  BGP_TRACE(D_EVENTS, "Down");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
288 289 290 291 292 293 294 295 296 297
  proto_notify_state(&p->p, PS_DOWN);
}

static void
bgp_decision(void *vp)
{
  struct bgp_proto *p = vp;

  DBG("BGP: Decision start\n");
  if ((p->p.proto_state == PS_START)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
298 299
      && (p->outgoing_conn.state == BS_IDLE)
      && (!p->cf->passive))
300
    bgp_active(p);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
301 302 303 304 305 306 307

  if ((p->p.proto_state == PS_STOP)
      && (p->outgoing_conn.state == BS_IDLE)
      && (p->incoming_conn.state == BS_IDLE))
    bgp_down(p);
}

308 309
void
bgp_stop(struct bgp_proto *p, unsigned subcode)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
310 311
{
  proto_notify_state(&p->p, PS_STOP);
312 313
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
314 315 316
  ev_schedule(p->event);
}

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
static inline void
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
{
  if (conn->bgp->p.mrtdump & MD_STATES)
    mrt_dump_bgp_state_change(conn, conn->state, new_state);

  conn->state = new_state;
}

void
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
{
  /* Really, most of the work is done in bgp_rx_open(). */
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
333 334 335 336 337 338 339 340 341 342 343 344
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
 
  BGP_TRACE(D_EVENTS, "BGP session established");
  DBG("BGP: UP!!!\n");

  p->conn = conn;
  p->last_error_class = 0;
  p->last_error_code = 0;
  bgp_attr_init(conn->bgp);
345
  bgp_conn_set_state(conn, BS_ESTABLISHED);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
346 347 348 349 350 351 352 353 354 355
  proto_notify_state(&p->p, PS_UP);
}

static void
bgp_conn_leave_established_state(struct bgp_proto *p)
{
  BGP_TRACE(D_EVENTS, "BGP session closed");
  p->conn = NULL;

  if (p->p.proto_state == PS_UP)
356
    bgp_stop(p, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
357 358 359 360 361 362 363 364
}

void
bgp_conn_enter_close_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
  int os = conn->state;

365
  bgp_conn_set_state(conn, BS_CLOSE);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
  tm_stop(conn->hold_timer);
  tm_stop(conn->keepalive_timer);
  conn->sk->rx_hook = NULL;

  if (os == BS_ESTABLISHED)
    bgp_conn_leave_established_state(p);
}

void
bgp_conn_enter_idle_state(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
  int os = conn->state;

  bgp_close_conn(conn);
381
  bgp_conn_set_state(conn, BS_IDLE);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
382 383 384 385 386 387
  ev_schedule(p->event);

  if (os == BS_ESTABLISHED)
    bgp_conn_leave_established_state(p);
}

388 389 390
static void
bgp_send_open(struct bgp_conn *conn)
{
391 392 393
  conn->start_state = conn->bgp->start_state;
  conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP);
  conn->peer_as4_support = 0;	// Default value, possibly changed by receiving capability.
394
  conn->advertised_as = 0;
395

396 397
  DBG("BGP: Sending open\n");
  conn->sk->rx_hook = bgp_rx;
398
  conn->sk->tx_hook = bgp_tx;
399
  tm_stop(conn->connect_retry_timer);
400
  bgp_schedule_packet(conn, PKT_OPEN);
401
  bgp_conn_set_state(conn, BS_OPENSENT);
402
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
403 404
}

405 406
static void
bgp_connected(sock *sk)
407 408
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
409
  struct bgp_proto *p = conn->bgp;
410

Martin Mareš's avatar
Martin Mareš committed
411
  BGP_TRACE(D_EVENTS, "Connected");
412 413 414 415 416 417
  bgp_send_open(conn);
}

static void
bgp_connect_timeout(timer *t)
{
418
  struct bgp_conn *conn = t->data;
Martin Mareš's avatar
Martin Mareš committed
419
  struct bgp_proto *p = conn->bgp;
420

Martin Mareš's avatar
Martin Mareš committed
421
  DBG("BGP: connect_timeout\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
422 423 424 425 426 427 428
  if (p->p.proto_state == PS_START)
    {
      bgp_close_conn(conn);
      bgp_connect(p);
    }
  else
    bgp_conn_enter_idle_state(conn);
429 430 431
}

static void
432
bgp_sock_err(sock *sk, int err)
433 434
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
435
  struct bgp_proto *p = conn->bgp;
436

Ondřej Zajíček's avatar
Ondřej Zajíček committed
437 438
  bgp_store_error(p, conn, BE_SOCKET, err);

439 440 441 442
  if (err)
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
  else
    BGP_TRACE(D_EVENTS, "Connection closed");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
443 444

  bgp_conn_enter_idle_state(conn);
445 446
}

447 448 449 450 451
static void
bgp_hold_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

452 453 454 455 456 457 458 459 460
  DBG("BGP: Hold timeout\n");

  /* If there is something in input queue, we are probably congested
     and perhaps just not processed BGP packets in time. */

  if (sk_rx_ready(conn->sk) > 0)
    bgp_start_timer(conn->hold_timer, 10);
  else
    bgp_error(conn, 4, 0, NULL, 0);
461 462 463 464 465 466 467 468 469 470 471
}

static void
bgp_keepalive_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

  DBG("BGP: Keepalive timer\n");
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
}

472
static void
473
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
474 475 476
{
  timer *t;

477
  conn->sk = NULL;
478
  conn->bgp = p;
479
  conn->packets_to_send = 0;
480 481 482

  t = conn->connect_retry_timer = tm_new(p->p.pool);
  t->hook = bgp_connect_timeout;
483 484
  t->data = conn;
  t = conn->hold_timer = tm_new(p->p.pool);
485
  t->hook = bgp_hold_timeout;
486 487
  t->data = conn;
  t = conn->keepalive_timer = tm_new(p->p.pool);
488
  t->hook = bgp_keepalive_timeout;
489
  t->data = conn;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
490 491 492
  conn->tx_ev = ev_new(p->p.pool);
  conn->tx_ev->hook = bgp_kick_tx;
  conn->tx_ev->data = conn;
493 494
}

495
static void
496
bgp_setup_sk(struct bgp_conn *conn, sock *s)
497 498 499 500 501 502
{
  s->data = conn;
  s->err_hook = bgp_sock_err;
  conn->sk = s;
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
503
static void
504
bgp_active(struct bgp_proto *p)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
505
{
506
  int delay = MAX(1, p->cf->start_delay_time);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
507 508 509 510
  struct bgp_conn *conn = &p->outgoing_conn;

  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
  bgp_setup_conn(p, conn);
511
  bgp_conn_set_state(conn, BS_ACTIVE);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
512 513 514
  bgp_start_timer(conn->connect_retry_timer, delay);
}

515 516 517 518 519 520 521
int
bgp_apply_limits(struct bgp_proto *p)
{
  if (p->cf->route_limit && (p->p.stats.imp_routes > p->cf->route_limit))
    {
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
522 523
      bgp_update_startup_delay(p);
      bgp_stop(p, 1); // Errcode 6, 1 - max number of prefixes reached
524 525 526 527 528 529 530
      return -1;
    }

  return 0;
}


Martin Mareš's avatar
Martin Mareš committed
531 532 533 534 535 536 537 538
/**
 * bgp_connect - initiate an outgoing connection
 * @p: BGP instance
 *
 * The bgp_connect() function creates a new &bgp_conn and initiates
 * a TCP connection to the peer. The rest of connection setup is governed
 * by the BGP state machine as described in the standard.
 */
539 540 541 542
static void
bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
{
  sock *s;
543
  struct bgp_conn *conn = &p->outgoing_conn;
544 545 546 547

  DBG("BGP: Connecting\n");
  s = sk_new(p->p.pool);
  s->type = SK_TCP_ACTIVE;
548
  s->saddr = p->source_addr;
549 550
  s->daddr = p->cf->remote_ip;
  s->dport = BGP_PORT;
551 552 553 554 555 556
  s->ttl = p->cf->multihop ? : 1;
  s->rbsize = BGP_RX_BUFFER_SIZE;
  s->tbsize = BGP_TX_BUFFER_SIZE;
  s->tos = IP_PREC_INTERNET_CONTROL;
  s->password = p->cf->password;
  s->tx_hook = bgp_connected;
Martin Mareš's avatar
Martin Mareš committed
557
  BGP_TRACE(D_EVENTS, "Connecting to %I from local address %I", s->daddr, s->saddr);
558
  bgp_setup_conn(p, conn);
559
  bgp_setup_sk(conn, s);
560
  bgp_conn_set_state(conn, BS_CONNECT);
561 562
  if (sk_open(s))
    {
563
      bgp_sock_err(s, 0);
564 565 566 567 568 569
      return;
    }
  DBG("BGP: Waiting for connect success\n");
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
}

Martin Mareš's avatar
Martin Mareš committed
570 571 572 573 574 575 576 577 578 579 580 581
/**
 * bgp_incoming_connection - handle an incoming connection
 * @sk: TCP socket
 * @dummy: unused
 *
 * This function serves as a socket hook for accepting of new BGP
 * connections. It searches a BGP instance corresponding to the peer
 * which has connected and if such an instance exists, it creates a
 * &bgp_conn structure, attaches it to the instance and either sends
 * an Open message or (if there already is an active connection) it
 * closes the new connection by sending a Notification message.
 */
582
static int
Martin Mareš's avatar
Martin Mareš committed
583
bgp_incoming_connection(sock *sk, int dummy UNUSED)
584
{
585
  struct proto_config *pc;
586

587
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
588 589 590 591 592 593
  WALK_LIST(pc, config->protos)
    if (pc->protocol == &proto_bgp && pc->proto)
      {
	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
	if (ipa_equal(p->cf->remote_ip, sk->daddr))
	  {
594 595 596 597 598 599 600 601 602 603 604
	    /* We are in proper state and there is no other incoming connection */
	    int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
	      (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);

	    BGP_TRACE(D_EVENTS, "Incoming connection from %I (port %d) %s",
		      sk->daddr, sk->dport, acc ? "accepted" : "rejected");

	    if (!acc)
	      goto err;

	    bgp_setup_conn(p, &p->incoming_conn);
605
	    bgp_setup_sk(&p->incoming_conn, sk);
606 607 608
	    sk_set_ttl(sk, p->cf->multihop ? : 1);
	    bgp_send_open(&p->incoming_conn);
	    return 0;
609 610
	  }
      }
611 612 613

  log(L_WARN "BGP: Unexpected connect from unknown address %I (port %d)", sk->daddr, sk->dport);
 err:
614 615 616 617
  rfree(sk);
  return 0;
}

618
static void
619
bgp_listen_sock_err(sock *sk UNUSED, int err)
620 621 622 623 624 625 626
{
  if (err == ECONNABORTED)
    log(L_WARN "BGP: Incoming connection aborted");
  else
    log(L_ERR "BGP: Error on incoming socket: %M", err);
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
627
static sock *
628
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
629
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
630 631 632
  sock *s = sk_new(&root_pool);
  DBG("BGP: Creating incoming socket\n");
  s->type = SK_TCP_PASSIVE;
633 634 635
  s->saddr = addr;
  s->sport = port ? port : BGP_PORT;
  s->flags = flags;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
636 637 638 639
  s->tos = IP_PREC_INTERNET_CONTROL;
  s->rbsize = BGP_RX_BUFFER_SIZE;
  s->tbsize = BGP_TX_BUFFER_SIZE;
  s->rx_hook = bgp_incoming_connection;
640
  s->err_hook = bgp_listen_sock_err;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
641
  if (sk_open(s))
642
    {
643
      log(L_ERR "BGP: Unable to open incoming socket");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
644 645
      rfree(s);
      return NULL;
646
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
647 648
  else
    return s;
649 650 651 652 653 654
}

static void
bgp_start_neighbor(struct bgp_proto *p)
{
  p->local_addr = p->neigh->iface->addr->ip;
655 656 657
  p->source_addr = ipa_nonzero(p->cf->source_addr) ? p->cf->source_addr : p->local_addr;

  DBG("BGP: local=%I remote=%I\n", p->source_addr, p->next_hop);
658 659 660
#ifdef IPV6
  {
    struct ifa *a;
661
    p->local_link = IPA_NONE;
662 663 664 665 666 667
    WALK_LIST(a, p->neigh->iface->addrs)
      if (a->scope == SCOPE_LINK)
        {
	  p->local_link = a->ip;
	  break;
	}
668 669 670 671

    if (! ipa_nonzero(p->local_link))
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);

672 673 674
    DBG("BGP: Selected link-level address %I\n", p->local_link);
  }
#endif
Ondřej Zajíček's avatar
Ondřej Zajíček committed
675 676 677 678 679

  int rv = bgp_open(p);
  if (rv < 0)
    return;

680
  bgp_initiate(p);
681 682 683 684 685 686 687 688 689
}

static void
bgp_neigh_notify(neighbor *n)
{
  struct bgp_proto *p = (struct bgp_proto *) n->proto;

  if (n->iface)
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
690 691 692 693 694
      if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE))
	{
	  BGP_TRACE(D_EVENTS, "Neighbor found");
	  bgp_start_neighbor(p);
	}
695 696 697
    }
  else
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
698 699 700 701
      if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP))
	{
	  BGP_TRACE(D_EVENTS, "Neighbor lost");
	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
702
	  bgp_stop(p, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
703
	}
704 705 706
    }
}

707 708 709 710 711 712 713 714 715 716 717
static int
bgp_reload_routes(struct proto *P)
{
  struct bgp_proto *p = (struct bgp_proto *) P;
  if (!p->conn || !p->conn->peer_refresh_support)
    return 0;

  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
  return 1;
}

718 719 720 721 722 723
static void
bgp_start_locked(struct object_lock *lock)
{
  struct bgp_proto *p = lock->data;
  struct bgp_config *cf = p->cf;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
724 725 726 727 728 729
  if (p->p.proto_state != PS_START)
    {
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
    return;
    }

730
  DBG("BGP: Got lock\n");
731
  p->local_id = proto_get_router_id(&cf->c);
732 733
  p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
  p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
734 735 736 737 738 739 740

  if (cf->rr_client)
    {
      p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id;
      p->rr_client = cf->rr_client;
    }

741 742
  p->rs_client = cf->rs_client;

743 744 745
  if (!p->neigh)
    {
      log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
746
      /* As we do not start yet, we can just disable protocol */
747
      p->p.disabled = 1;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
748
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
749
      proto_notify_state(&p->p, PS_DOWN);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
750
      return;
751
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
752 753
  
  if (p->neigh->iface)
754 755
    bgp_start_neighbor(p);
  else
Martin Mareš's avatar
Martin Mareš committed
756
    BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop);
757 758
}

759 760 761
static int
bgp_start(struct proto *P)
{
762 763 764
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct object_lock *lock;

765
  DBG("BGP: Startup.\n");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
766
  p->start_state = BSS_PREPARE;
767 768
  p->outgoing_conn.state = BS_IDLE;
  p->incoming_conn.state = BS_IDLE;
769
  p->neigh = NULL;
770

Ondřej Zajíček's avatar
Ondřej Zajíček committed
771 772 773
  p->event = ev_new(p->p.pool);
  p->event->hook = bgp_decision;
  p->event->data = p;
774

775 776 777 778
  p->startup_timer = tm_new(p->p.pool);
  p->startup_timer->hook = bgp_startup_timeout;
  p->startup_timer->data = p;

779 780 781 782 783 784 785 786 787 788 789 790 791 792
  /*
   *  Before attempting to create the connection, we need to lock the
   *  port, so that are sure we're the only instance attempting to talk
   *  with that neighbor.
   */

  lock = p->lock = olock_new(P->pool);
  lock->addr = p->cf->remote_ip;
  lock->type = OBJLOCK_TCP;
  lock->port = BGP_PORT;
  lock->iface = NULL;
  lock->hook = bgp_start_locked;
  lock->data = p;
  olock_acquire(lock);
793

794
  return PS_START;
795 796 797 798 799
}

static int
bgp_shutdown(struct proto *P)
{
800
  struct bgp_proto *p = (struct bgp_proto *) P;
801
  unsigned subcode;
802

Martin Mareš's avatar
Martin Mareš committed
803
  BGP_TRACE(D_EVENTS, "Shutdown requested");
Ondřej Zajíček's avatar
Ondřej Zajíček committed
804
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
805 806 807 808 809 810 811 812 813 814 815

  if (P->reconfiguring)
    {
      if (P->cf_new)
	subcode = 6; // Errcode 6, 6 - other configuration change
      else
	subcode = 3; // Errcode 6, 3 - peer de-configured
    }
  else
    subcode = 2; // Errcode 6, 2 - administrative shutdown

Ondřej Zajíček's avatar
Ondřej Zajíček committed
816
  p->startup_delay = 0;
817
  bgp_stop(p, subcode);
818

Ondřej Zajíček's avatar
Ondřej Zajíček committed
819
  return p->p.proto_state;
820 821
}

822 823 824 825 826 827 828
static struct proto *
bgp_init(struct proto_config *C)
{
  struct bgp_config *c = (struct bgp_config *) C;
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
  struct bgp_proto *p = (struct bgp_proto *) P;

829
  P->accept_ra_types = RA_OPTIMAL;
830 831 832 833
  P->rt_notify = bgp_rt_notify;
  P->rte_better = bgp_rte_better;
  P->import_control = bgp_import_control;
  P->neigh_notify = bgp_neigh_notify;
834
  P->reload_routes = bgp_reload_routes;
835 836 837 838 839 840 841
  p->cf = c;
  p->local_as = c->local_as;
  p->remote_as = c->remote_as;
  p->is_internal = (c->local_as == c->remote_as);
  return P;
}

Martin Mareš's avatar
Martin Mareš committed
842 843 844 845
/**
 * bgp_error - report a protocol error
 * @c: connection
 * @code: error code (according to the RFC)
846
 * @subcode: error sub-code
Martin Mareš's avatar
Martin Mareš committed
847 848 849 850
 * @data: data to be passed in the Notification message
 * @len: length of the data
 *
 * bgp_error() sends a notification packet to tell the other side that a protocol
851
 * error has occurred (including the data considered erroneous if possible) and
Martin Mareš's avatar
Martin Mareš committed
852 853
 * closes the connection.
 */
854
void
855
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
856
{
857 858
  struct bgp_proto *p = c->bgp;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
859
  if (c->state == BS_CLOSE)
860
    return;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
861

862 863
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
864 865
  bgp_conn_enter_close_state(c);

866 867
  c->notify_code = code;
  c->notify_subcode = subcode;
868 869
  c->notify_data = data;
  c->notify_size = (len > 0) ? len : 0;
870
  bgp_schedule_packet(c, PKT_NOTIFICATION);
871 872 873 874 875 876

  if (code != 6)
    {
      bgp_update_startup_delay(p);
      bgp_stop(p, 0);
    }
877 878
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
/**
 * bgp_store_error - store last error for status report
 * @p: BGP instance
 * @c: connection
 * @class: error class (BE_xxx constants)
 * @code: error code (class specific)
 *
 * bgp_store_error() decides whether given error is interesting enough
 * and store that error to last_error variables of @p
 */
void
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
{
  /* During PS_UP, we ignore errors on secondary connection */
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
    return;

  /* During PS_STOP, we ignore any errors, as we want to report
   * the error that caused transition to PS_STOP
   */
  if (p->p.proto_state == PS_STOP)
    return;

  p->last_error_class = class;
  p->last_error_code = code;
}

906 907 908 909 910
void
bgp_check(struct bgp_config *c)
{
  if (!c->local_as)
    cf_error("Local AS number must be set");
911

912 913
  if (!c->remote_as)
    cf_error("Neighbor must be configured");
914 915 916 917

  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
    cf_error("Neighbor AS number out of range (AS4 not available)");

918 919
  if ((c->local_as != c->remote_as) && (c->rr_client))
    cf_error("Only internal neighbor can be RR client");
920

921 922
  if ((c->local_as == c->remote_as) && (c->rs_client))
    cf_error("Only external neighbor can be RS client");
923 924 925 926

  /* Different default based on rs_client */
  if (c->missing_lladdr == 0)
    c->missing_lladdr = c->rs_client ? MLL_DROP : MLL_SELF;
927 928
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
929
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
930
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
Ondřej Zajíček's avatar
Ondřej Zajíček committed
931
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" };
932
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
Ondřej Zajíček's avatar
Ondřej Zajíček committed
933

934 935
static const char *
bgp_last_errmsg(struct bgp_proto *p)
936
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
937 938 939
  switch (p->last_error_class)
    {
    case BE_MISC:
940
      return bgp_misc_errors[p->last_error_code];
Ondřej Zajíček's avatar
Ondřej Zajíček committed
941
    case BE_SOCKET:
942
      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
943 944
    case BE_BGP_RX:
    case BE_BGP_TX:
945
      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
946
    case BE_AUTO_DOWN:
947 948 949
      return bgp_auto_errors[p->last_error_code];
    default:
      return "";
Ondřej Zajíček's avatar
Ondřej Zajíček committed
950
    }
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
}

static const char *
bgp_state_dsc(struct bgp_proto *p)
{
  //if (p->p.proto_state == PS_DOWN)
  //  return "Down";

  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
    return "Passive";

  return bgp_state_names[state];
}

static void
bgp_get_status(struct proto *P, byte *buf)
{
  struct bgp_proto *p = (struct bgp_proto *) P;

  const char *err1 = bgp_err_classes[p->last_error_class];
  const char *err2 = bgp_last_errmsg(p);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
973

974
  if (P->proto_state == PS_DOWN)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
975
    bsprintf(buf, "%s%s", err1, err2);
976
  else
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
}

static void
bgp_show_proto_info(struct proto *P)
{
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct bgp_conn *c = p->conn;

  if (P->proto_state == PS_DOWN)
    return;

  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));

  if (P->proto_state == PS_START)
    {
      struct bgp_conn *oc = &p->outgoing_conn;

      if ((p->start_state < BSS_CONNECT) &&
	  (p->startup_timer->expires))
	cli_msg(-1006, "    Error wait:       %d/%d", 
		p->startup_timer->expires - now, p->startup_delay);

      if ((oc->state == BS_ACTIVE) &&
	  (oc->connect_retry_timer->expires))
	cli_msg(-1006, "    Start delay:      %d/%d", 
		oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
    }
  else if (P->proto_state == PS_UP)
    {
      cli_msg(-1006, "    Session:          %s%s%s%s",
	      p->is_internal ? "internal" : "external",
	      p->rr_client ? " route-reflector" : "",
	      p->rs_client ? " route-server" : "",
	      p->as4_session ? " AS4" : "");
      cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
      cli_msg(-1006, "    Neighbor address: %I", p->cf->remote_ip);
      cli_msg(-1006, "    Nexthop address:  %I", p->next_hop);
      cli_msg(-1006, "    Source address:   %I", p->source_addr);
      cli_msg(-1006, "    Neighbor caps:   %s%s",
	      c->peer_refresh_support ? " refresh" : "",
	      c->peer_as4_support ? " AS4" : "");
      if (p->cf->route_limit)
	cli_msg(-1006, "    Route limit:      %d/%d",
		p->p.stats.imp_routes, p->cf->route_limit);
      cli_msg(-1006, "    Hold timer:       %d/%d", 
	      c->hold_timer->expires - now, c->hold_time);
      cli_msg(-1006, "    Keepalive timer:  %d/%d", 
	      c->keepalive_timer->expires - now, c->keepalive_time);
    }

  if ((p->last_error_class != BE_NONE) && 
      (p->last_error_class != BE_MAN_DOWN))
    {
      const char *err1 = bgp_err_classes[p->last_error_class];
      const char *err2 = bgp_last_errmsg(p);
      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
    }
1036 1037
}

1038 1039 1040 1041 1042 1043 1044
static int
bgp_reconfigure(struct proto *P, struct proto_config *C)
{
  struct bgp_config *new = (struct bgp_config *) C;
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct bgp_config *old = p->cf;

1045 1046
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
		     ((byte *) new) + sizeof(struct proto_config),
1047 1048 1049 1050
		     // password item is last and must be checked separately
		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
    && ((!old->password && !new->password)
	|| (old->password && new->password && !strcmp(old->password, new->password)));
1051 1052 1053 1054 1055 1056

  /* We should update our copy of configuration ptr as old configuration will be freed */
  if (same)
    p->cf = new;

  return same;
1057 1058
}

1059 1060 1061
struct protocol proto_bgp = {
  name:			"BGP",
  template:		"bgp%d",
1062
  attr_class:		EAP_BGP,
1063 1064 1065
  init:			bgp_init,
  start:		bgp_start,
  shutdown:		bgp_shutdown,
1066
  reconfigure:		bgp_reconfigure,
1067
  get_status:		bgp_get_status,
1068
  get_attr:		bgp_get_attr,
1069
  get_route_info:	bgp_get_route_info,
1070
  show_proto_info:	bgp_show_proto_info
1071
};