bgp.c 12.5 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *	BIRD -- The Border Gateway Protocol
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9
#undef LOCAL_DEBUG
10 11 12 13 14

#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
15
#include "nest/locks.h"
16
#include "conf/conf.h"
17
#include "lib/socket.h"
18
#include "lib/resource.h"
19 20 21

#include "bgp.h"

22
struct linpool *bgp_linpool;		/* Global temporary pool */
23 24 25
static sock *bgp_listen_sk;		/* Global listening socket */
static int bgp_counter;			/* Number of protocol instances using the listening socket */
static list bgp_list;			/* List of active BGP instances */
26
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established" };
27 28

static void bgp_connect(struct bgp_proto *p);
29
static void bgp_initiate(struct bgp_proto *p);
30

31
void
32 33 34 35 36 37 38 39 40
bgp_close(struct bgp_proto *p)
{
  rem_node(&p->bgp_node);
  ASSERT(bgp_counter);
  bgp_counter--;
  if (!bgp_counter)
    {
      rfree(bgp_listen_sk);
      bgp_listen_sk = NULL;
41 42
      rfree(bgp_linpool);
      bgp_linpool = NULL;
43 44 45
    }
}

46
void
47 48
bgp_start_timer(timer *t, int value)
{
49
  if (value)
50 51 52 53 54
    {
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
      t->randomize = value / 4;
      tm_start(t, value - t->randomize);
    }
55 56 57 58 59 60 61 62
  else
    tm_stop(t);
}

void
bgp_close_conn(struct bgp_conn *conn)
{
  struct bgp_proto *p = conn->bgp;
63
  struct bgp_config *cf = p->cf;
64 65 66 67 68 69 70 71 72 73 74 75

  DBG("BGP: Closing connection\n");
  conn->packets_to_send = 0;
  rfree(conn->connect_retry_timer);
  conn->connect_retry_timer = NULL;
  rfree(conn->keepalive_timer);
  conn->keepalive_timer = NULL;
  rfree(conn->hold_timer);
  conn->hold_timer = NULL;
  sk_close(conn->sk);
  conn->sk = NULL;
  conn->state = BS_IDLE;
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
  if (conn->error_flag > 1)
    {
      if (cf->disable_after_error)
	p->p.disabled = 1;
      if (p->last_connect && (bird_clock_t)(p->last_connect + cf->error_amnesia_time) < now)
	p->startup_delay = 0;
      if (!p->startup_delay)
	p->startup_delay = cf->error_delay_time_min;
      else
	{
	  p->startup_delay *= 2;
	  if (p->startup_delay > cf->error_delay_time_max)
	    p->startup_delay = cf->error_delay_time_max;
	}
    }
91 92 93 94 95 96
  if (conn->primary)
    {
      bgp_close(p);
      p->conn = NULL;
      proto_notify_state(&p->p, PS_DOWN);
    }
97 98
  else if (conn->error_flag > 1)
    bgp_initiate(p);
99 100
}

101 102 103 104 105 106 107 108 109 110 111 112 113 114
static int
bgp_graceful_close_conn(struct bgp_conn *c)
{
  switch (c->state)
    {
    case BS_IDLE:
      return 0;
    case BS_CONNECT:
    case BS_ACTIVE:
      bgp_close_conn(c);
      return 1;
    case BS_OPENSENT:
    case BS_OPENCONFIRM:
    case BS_ESTABLISHED:
115
      bgp_error(c, 6, 0, NULL, 0);
116 117 118 119 120 121
      return 1;
    default:
      bug("bgp_graceful_close_conn: Unknown state %d", c->state);
    }
}

122 123 124 125 126
static void
bgp_send_open(struct bgp_conn *conn)
{
  DBG("BGP: Sending open\n");
  conn->sk->rx_hook = bgp_rx;
127
  conn->sk->tx_hook = bgp_tx;
128
  tm_stop(conn->connect_retry_timer);
129
  bgp_schedule_packet(conn, PKT_OPEN);
130
  conn->state = BS_OPENSENT;
131
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
132 133
}

134 135
static void
bgp_connected(sock *sk)
136 137
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
138
  struct bgp_proto *p = conn->bgp;
139

Martin Mareš's avatar
Martin Mareš committed
140
  BGP_TRACE(D_EVENTS, "Connected");
141 142 143 144 145 146
  bgp_send_open(conn);
}

static void
bgp_connect_timeout(timer *t)
{
147
  struct bgp_conn *conn = t->data;
Martin Mareš's avatar
Martin Mareš committed
148
  struct bgp_proto *p = conn->bgp;
149

Martin Mareš's avatar
Martin Mareš committed
150
  DBG("BGP: connect_timeout\n");
151
  bgp_close_conn(conn);
Martin Mareš's avatar
Martin Mareš committed
152
  bgp_connect(p);
153 154 155
}

static void
156
bgp_sock_err(sock *sk, int err)
157 158
{
  struct bgp_conn *conn = sk->data;
Martin Mareš's avatar
Martin Mareš committed
159
  struct bgp_proto *p = conn->bgp;
160

Martin Mareš's avatar
Martin Mareš committed
161
  BGP_TRACE(D_EVENTS, "Connection closed (socket error %d)", err);
162 163 164 165
  switch (conn->state)
    {
    case BS_CONNECT:
    case BS_OPENSENT:
166 167
      sk_close(conn->sk);
      conn->sk = NULL;
168
      conn->state = BS_ACTIVE;
Martin Mareš's avatar
Martin Mareš committed
169
      bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
170 171 172
      break;
    case BS_OPENCONFIRM:
    case BS_ESTABLISHED:
173
      bgp_close_conn(conn);
174
      break;
175
    default:
176
      bug("bgp_sock_err called in invalid state %d", conn->state);
177 178 179
    }
}

180 181 182 183 184
static void
bgp_hold_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

185
  DBG("BGP: Hold timeout, closing connection\n");
186
  bgp_error(conn, 4, 0, NULL, 0);
187 188 189 190 191 192 193 194 195 196 197
}

static void
bgp_keepalive_timeout(timer *t)
{
  struct bgp_conn *conn = t->data;

  DBG("BGP: Keepalive timer\n");
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
}

198
static void
199
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
200 201 202
{
  timer *t;

203
  conn->sk = NULL;
204
  conn->bgp = p;
205
  conn->packets_to_send = 0;
206 207
  conn->error_flag = 0;
  conn->primary = 0;
208 209 210

  t = conn->connect_retry_timer = tm_new(p->p.pool);
  t->hook = bgp_connect_timeout;
211 212
  t->data = conn;
  t = conn->hold_timer = tm_new(p->p.pool);
213
  t->hook = bgp_hold_timeout;
214 215
  t->data = conn;
  t = conn->keepalive_timer = tm_new(p->p.pool);
216
  t->hook = bgp_keepalive_timeout;
217
  t->data = conn;
218 219
}

220 221 222 223 224 225 226 227 228 229 230 231
static void
bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s)
{
  s->data = conn;
  s->ttl = p->cf->multihop ? : 1;
  s->rbsize = BGP_RX_BUFFER_SIZE;
  s->tbsize = BGP_TX_BUFFER_SIZE;
  s->err_hook = bgp_sock_err;
  s->tos = IP_PREC_INTERNET_CONTROL;
  conn->sk = s;
}

232 233 234 235
static void
bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
{
  sock *s;
236
  struct bgp_conn *conn = &p->outgoing_conn;
237 238

  DBG("BGP: Connecting\n");
239
  p->last_connect = now;
240 241
  s = sk_new(p->p.pool);
  s->type = SK_TCP_ACTIVE;
242 243 244 245
  if (ipa_nonzero(p->cf->source_addr))
    s->saddr = p->cf->source_addr;
  else
    s->saddr = p->local_addr;
246 247
  s->daddr = p->cf->remote_ip;
  s->dport = BGP_PORT;
Martin Mareš's avatar
Martin Mareš committed
248
  BGP_TRACE(D_EVENTS, "Connecting to %I from local address %I", s->daddr, s->saddr);
249
  bgp_setup_conn(p, conn);
250
  bgp_setup_sk(p, conn, s);
251
  s->tx_hook = bgp_connected;
252 253 254
  conn->state = BS_CONNECT;
  if (sk_open(s))
    {
255
      bgp_sock_err(s, 0);
256 257 258 259 260 261
      return;
    }
  DBG("BGP: Waiting for connect success\n");
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
}

262 263 264 265 266 267 268 269 270 271
static void
bgp_initiate(struct bgp_proto *p)
{
  unsigned delay;

  delay = p->cf->start_delay_time;
  if (p->startup_delay > delay)
    delay = p->startup_delay;
  if (delay)
    {
Martin Mareš's avatar
Martin Mareš committed
272
      BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
273 274 275 276 277 278 279
      bgp_setup_conn(p, &p->outgoing_conn);
      bgp_start_timer(p->outgoing_conn.connect_retry_timer, delay);
    }
  else
    bgp_connect(p);
}

280 281
static int
bgp_incoming_connection(sock *sk, int dummy)
282
{
283
  node *n;
284

285 286 287 288
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
  WALK_LIST(n, bgp_list)
    {
      struct bgp_proto *p = SKIP_BACK(struct bgp_proto, bgp_node, n);
289
      if (ipa_equal(p->cf->remote_ip, sk->daddr))
290
	{
Martin Mareš's avatar
Martin Mareš committed
291
	  BGP_TRACE(D_EVENTS, "Incoming connection from %I port %d", sk->daddr, sk->dport);
292 293 294 295 296
	  if (p->incoming_conn.sk)
	    {
	      DBG("BGP: But one incoming connection already exists, how is that possible?\n");
	      break;
	    }
297
	  bgp_setup_conn(p, &p->incoming_conn);
298 299 300 301 302
	  bgp_setup_sk(p, &p->incoming_conn, sk);
	  bgp_send_open(&p->incoming_conn);
	  return 0;
	}
    }
Martin Mareš's avatar
Martin Mareš committed
303
  log(L_AUTH "BGP: Unauthorized connect from %I port %d", sk->daddr, sk->dport);
304 305 306 307 308 309 310 311 312
  rfree(sk);
  return 0;
}

static void
bgp_start_neighbor(struct bgp_proto *p)
{
  p->local_addr = p->neigh->iface->addr->ip;
  DBG("BGP: local=%I remote=%I\n", p->local_addr, p->next_hop);
313 314 315 316 317 318 319 320 321 322 323
  if (!bgp_counter++)
    init_list(&bgp_list);
  if (!bgp_listen_sk)
    {
      sock *s = sk_new(&root_pool);
      DBG("BGP: Creating incoming socket\n");
      s->type = SK_TCP_PASSIVE;
      s->sport = BGP_PORT;
      s->tos = IP_PREC_INTERNET_CONTROL;
      s->ttl = 1;
      s->rbsize = BGP_RX_BUFFER_SIZE;
Martin Mareš's avatar
Martin Mareš committed
324
      s->tbsize = BGP_TX_BUFFER_SIZE;
325 326 327 328 329 330 331 332 333
      s->rx_hook = bgp_incoming_connection;
      if (sk_open(s))
	{
	  log(L_ERR "Unable to open incoming BGP socket");
	  rfree(s);
	}
      else
	bgp_listen_sk = s;
    }
334 335
  if (!bgp_linpool)
    bgp_linpool = lp_new(&root_pool, 4080);
336
  add_tail(&bgp_list, &p->bgp_node);
337
  bgp_initiate(p);
338 339 340 341 342 343 344 345 346
}

static void
bgp_neigh_notify(neighbor *n)
{
  struct bgp_proto *p = (struct bgp_proto *) n->proto;

  if (n->iface)
    {
Martin Mareš's avatar
Martin Mareš committed
347
      BGP_TRACE(D_EVENTS, "Neighbor found");
348 349 350 351
      bgp_start_neighbor(p);
    }
  else
    {
Martin Mareš's avatar
Martin Mareš committed
352
      BGP_TRACE(D_EVENTS, "Neighbor lost");
353 354 355 356 357 358 359 360 361 362 363 364 365 366
      /* Send cease packets, but don't wait for them to be delivered */
      bgp_graceful_close_conn(&p->outgoing_conn);
      bgp_graceful_close_conn(&p->incoming_conn);
      proto_notify_state(&p->p, PS_DOWN);
    }
}

static void
bgp_start_locked(struct object_lock *lock)
{
  struct bgp_proto *p = lock->data;
  struct bgp_config *cf = p->cf;

  DBG("BGP: Got lock\n");
367
  p->local_id = cf->c.global->router_id;
368 369 370 371 372 373 374 375 376 377 378
  p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
  p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
  if (!p->neigh)
    {
      log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
      p->p.disabled = 1;
      proto_notify_state(&p->p, PS_DOWN);
    }
  else if (p->neigh->iface)
    bgp_start_neighbor(p);
  else
Martin Mareš's avatar
Martin Mareš committed
379
    BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop);
380 381
}

382 383 384
static int
bgp_start(struct proto *P)
{
385 386 387
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct object_lock *lock;

388 389 390
  DBG("BGP: Startup.\n");
  p->outgoing_conn.state = BS_IDLE;
  p->incoming_conn.state = BS_IDLE;
391
  p->startup_delay = 0;
392

393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
  /*
   *  Before attempting to create the connection, we need to lock the
   *  port, so that are sure we're the only instance attempting to talk
   *  with that neighbor.
   */

  lock = p->lock = olock_new(P->pool);
  lock->addr = p->cf->remote_ip;
  lock->type = OBJLOCK_TCP;
  lock->port = BGP_PORT;
  lock->iface = NULL;
  lock->hook = bgp_start_locked;
  lock->data = p;
  olock_acquire(lock);
  return PS_START;
408 409 410 411 412
}

static int
bgp_shutdown(struct proto *P)
{
413 414
  struct bgp_proto *p = (struct bgp_proto *) P;

Martin Mareš's avatar
Martin Mareš committed
415
  BGP_TRACE(D_EVENTS, "Shutdown requested");
416

417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
  /*
   *  We want to send the Cease notification message to all connections
   *  we have open, but we don't want to wait for all of them to complete.
   *  We are willing to handle the primary connection carefully, but for
   *  the others we just try to send the packet and if there is no buffer
   *  space free, we'll gracefully finish.
   */

  proto_notify_state(&p->p, PS_STOP);
  if (!p->conn)
    {
      if (p->outgoing_conn.state != BS_IDLE)
	p->outgoing_conn.primary = 1;	/* Shuts protocol down after connection close */
      else if (p->incoming_conn.state != BS_IDLE)
	p->incoming_conn.primary = 1;
    }
433
  if (bgp_graceful_close_conn(&p->outgoing_conn) || bgp_graceful_close_conn(&p->incoming_conn))
434 435 436 437 438 439 440
    return p->p.proto_state;
  else
    {
      /* No connections open, shutdown automatically */
      bgp_close(p);
      return PS_DOWN;
    }
441 442
}

443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
static struct proto *
bgp_init(struct proto_config *C)
{
  struct bgp_config *c = (struct bgp_config *) C;
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
  struct bgp_proto *p = (struct bgp_proto *) P;

  P->rt_notify = bgp_rt_notify;
  P->rte_better = bgp_rte_better;
  P->import_control = bgp_import_control;
  P->neigh_notify = bgp_neigh_notify;
  p->cf = c;
  p->local_as = c->local_as;
  p->remote_as = c->remote_as;
  p->is_internal = (c->local_as == c->remote_as);
  return P;
}

461
void
462
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
463 464 465
{
  if (c->error_flag)
    return;
466
  bgp_log_error(c->bgp, "Error", code, subcode, data, (len > 0) ? len : -len);
467
  c->error_flag = 1 + (code != 6);
468 469
  c->notify_code = code;
  c->notify_subcode = subcode;
470 471
  c->notify_data = data;
  c->notify_size = (len > 0) ? len : 0;
472 473
  if (c->primary)
    proto_notify_state(&c->bgp->p, PS_STOP);
474 475 476
  bgp_schedule_packet(c, PKT_NOTIFICATION);
}

477 478 479 480 481 482 483 484 485
void
bgp_check(struct bgp_config *c)
{
  if (!c->local_as)
    cf_error("Local AS number must be set");
  if (!c->remote_as)
    cf_error("Neighbor must be configured");
}

486
static void
487 488 489 490 491 492 493
bgp_get_status(struct proto *P, byte *buf)
{
  struct bgp_proto *p = (struct bgp_proto *) P;

  strcpy(buf, bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)]);
}

494 495 496 497 498 499 500 501 502 503 504 505
static int
bgp_reconfigure(struct proto *P, struct proto_config *C)
{
  struct bgp_config *new = (struct bgp_config *) C;
  struct bgp_proto *p = (struct bgp_proto *) P;
  struct bgp_config *old = p->cf;

  return !memcmp(((byte *) old) + sizeof(struct proto_config),
		 ((byte *) new) + sizeof(struct proto_config),
		 sizeof(struct bgp_config) - sizeof(struct proto_config));
}

506 507 508
struct protocol proto_bgp = {
  name:			"BGP",
  template:		"bgp%d",
509
  attr_class:		EAP_BGP,
510 511 512
  init:			bgp_init,
  start:		bgp_start,
  shutdown:		bgp_shutdown,
513
  get_status:		bgp_get_status,
514
  get_attr:		bgp_get_attr,
515
  reconfigure:		bgp_reconfigure,
516 517 518 519 520
#if 0
  dump:			bgp_dump,
  get_route_info:	bgp_get_route_info,
#endif
};