packets.c 68.4 KB
Newer Older
1 2 3 4
/*
 *	BIRD -- BGP Packet Processing
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
Ondřej Zajíček's avatar
Ondřej Zajíček committed
5 6
 *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
 *	(c) 2008--2016 CZ.NIC z.s.p.o.
7 8 9 10
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
11
#undef LOCAL_DEBUG
Martin Mareš's avatar
Martin Mareš committed
12

Ondřej Zajíček's avatar
Ondřej Zajíček committed
13 14
#include <stdlib.h>

15 16 17 18
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
19
#include "nest/attrs.h"
20
#include "nest/mrtdump.h"
21
#include "conf/conf.h"
Martin Mareš's avatar
Martin Mareš committed
22
#include "lib/unaligned.h"
23
#include "lib/flowspec.h"
Martin Mareš's avatar
Martin Mareš committed
24
#include "lib/socket.h"
25

Ondřej Zajíček's avatar
Ondřej Zajíček committed
26 27
#include "nest/cli.h"

28
#include "bgp.h"
Martin Mareš's avatar
Martin Mareš committed
29

30 31 32 33 34

#define BGP_RR_REQUEST		0
#define BGP_RR_BEGIN		1
#define BGP_RR_END		2

35 36 37 38 39 40 41
#define BGP_NLRI_MAX		(4 + 1 + 32)

#define BGP_MPLS_BOS		1	/* Bottom-of-stack bit */
#define BGP_MPLS_MAX		10	/* Max number of labels that 24*n <= 255 */
#define BGP_MPLS_NULL		3	/* Implicit NULL label */
#define BGP_MPLS_MAGIC		0x800000 /* Magic withdraw label value, RFC 3107 3 */

42

43 44
static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
45

46 47 48 49 50 51 52
/* Table for state -> RFC 6608 FSM error subcodes */
static byte fsm_err_subcode[BS_MAX] = {
  [BS_OPENSENT] = 1,
  [BS_OPENCONFIRM] = 2,
  [BS_ESTABLISHED] = 3
};

Ondřej Zajíček's avatar
Ondřej Zajíček committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92

static struct bgp_channel *
bgp_get_channel(struct bgp_proto *p, u32 afi)
{
  uint i;

  for (i = 0; i < p->channel_count; i++)
    if (p->afi_map[i] == afi)
      return p->channel_map[i];

  return NULL;
}

static inline void
put_af3(byte *buf, u32 id)
{
  put_u16(buf, id >> 16);
  buf[2] = id & 0xff;
}

static inline void
put_af4(byte *buf, u32 id)
{
  put_u16(buf, id >> 16);
  buf[2] = 0;
  buf[3] = id & 0xff;
}

static inline u32
get_af3(byte *buf)
{
  return (get_u16(buf) << 16) | buf[2];
}

static inline u32
get_af4(byte *buf)
{
  return (get_u16(buf) << 16) | buf[3];
}

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
/*
 * MRT Dump format is not semantically specified.
 * We will use these values in appropriate fields:
 *
 * Local AS, Remote AS - configured AS numbers for given BGP instance.
 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
 *
 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
 * changes) and MESSAGE (for received BGP messages).
 *
 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
 * only when AS4 session is established and even in that case MESSAGE
 * does not use AS4 variant for initial OPEN message. This strange
 * behavior is here for compatibility with Quagga and Bgpdump,
 */

static byte *
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
{
  struct bgp_proto *p = conn->bgp;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
113
  uint v4 = ipa_is_ip4(p->cf->remote_ip);
114 115

  if (as4)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
116 117 118 119 120
  {
    put_u32(buf+0, p->remote_as);
    put_u32(buf+4, p->public_as);
    buf+=8;
  }
121
  else
Ondřej Zajíček's avatar
Ondřej Zajíček committed
122 123 124 125 126
  {
    put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
    put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
    buf+=4;
  }
127

128
  put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
129
  put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
130
  buf+=4;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
131 132 133 134 135 136 137 138 139 140 141

  if (v4)
  {
    buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
    buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
  }
  else
  {
    buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
    buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
  }
142 143 144 145 146

  return buf;
}

static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
147
mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
148
{
149
  byte *buf = alloca(128+len);	/* 128 is enough for MRT headers */
150 151 152 153 154 155 156 157 158 159 160
  byte *bp = buf + MRTDUMP_HDR_LENGTH;
  int as4 = conn->bgp->as4_session;

  bp = mrt_put_bgp4_hdr(bp, conn, as4);
  memcpy(bp, pkt, len);
  bp += len;
  mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
		   buf, bp-buf);
}

static inline u16
Ondřej Zajíček's avatar
Ondřej Zajíček committed
161
convert_state(uint state)
162 163 164 165 166 167
{
  /* Convert state from our BS_* values to values used in MRTDump */
  return (state == BS_CLOSE) ? 1 : state + 1;
}

void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
168
mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
169 170 171 172 173 174 175 176 177 178 179
{
  byte buf[128];
  byte *bp = buf + MRTDUMP_HDR_LENGTH;

  bp = mrt_put_bgp4_hdr(bp, conn, 1);
  put_u16(bp+0, convert_state(old));
  put_u16(bp+2, convert_state(new));
  bp += 4;
  mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
}

Martin Mareš's avatar
Martin Mareš committed
180 181 182
static byte *
bgp_create_notification(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
183 184 185
  struct bgp_proto *p = conn->bgp;

  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
Martin Mareš's avatar
Martin Mareš committed
186 187
  buf[0] = conn->notify_code;
  buf[1] = conn->notify_subcode;
188 189
  memcpy(buf+2, conn->notify_data, conn->notify_size);
  return buf + 2 + conn->notify_size;
Martin Mareš's avatar
Martin Mareš committed
190 191
}

192

Ondřej Zajíček's avatar
Ondřej Zajíček committed
193
/* Capability negotiation as per RFC 5492 */
194

Ondřej Zajíček's avatar
Ondřej Zajíček committed
195 196
const struct bgp_af_caps *
bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
197
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
198
  struct bgp_af_caps *ac;
199

Ondřej Zajíček's avatar
Ondřej Zajíček committed
200 201 202 203 204
  WALK_AF_CAPS(caps, ac)
    if (ac->afi == afi)
      return ac;

  return NULL;
205 206
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
207 208
static struct bgp_af_caps *
bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
209
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
210
  struct bgp_af_caps *ac;
211

Ondřej Zajíček's avatar
Ondřej Zajíček committed
212 213 214
  WALK_AF_CAPS(caps, ac)
    if (ac->afi == afi)
      return ac;
215

Ondřej Zajíček's avatar
Ondřej Zajíček committed
216 217 218
  ac = &caps->af_data[caps->af_count++];
  memset(ac, 0, sizeof(struct bgp_af_caps));
  ac->afi = afi;
219

Ondřej Zajíček's avatar
Ondřej Zajíček committed
220
  return ac;
221 222
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
223 224
static int
bgp_af_caps_cmp(const void *X, const void *Y)
225
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
226 227
  const struct bgp_af_caps *x = X, *y = Y;
  return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
228 229
}

230

231
static byte *
Ondřej Zajíček's avatar
Ondřej Zajíček committed
232
bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
233
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
234 235 236 237
  struct bgp_proto *p = conn->bgp;
  struct bgp_channel *c;
  struct bgp_caps *caps;
  struct bgp_af_caps *ac;
238
  uint any_ext_next_hop = 0;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
239 240
  uint any_add_path = 0;
  byte *data;
241

Ondřej Zajíček's avatar
Ondřej Zajíček committed
242
  /* Prepare bgp_caps structure */
243

Ondřej Zajíček's avatar
Ondřej Zajíček committed
244 245 246
  int n = list_length(&p->p.channels);
  caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
  conn->local_caps = caps;
247

Ondřej Zajíček's avatar
Ondřej Zajíček committed
248 249 250 251
  caps->as4_support = p->cf->enable_as4;
  caps->ext_messages = p->cf->enable_extended_messages;
  caps->route_refresh = p->cf->enable_refresh;
  caps->enhanced_refresh = p->cf->enable_refresh;
252

Ondřej Zajíček's avatar
Ondřej Zajíček committed
253 254
  if (caps->as4_support)
    caps->as4_number = p->public_as;
255

Ondřej Zajíček's avatar
Ondřej Zajíček committed
256 257 258 259 260 261
  if (p->cf->gr_mode)
  {
    caps->gr_aware = 1;
    caps->gr_time = p->cf->gr_time;
    caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
  }
262

Ondřej Zajíček's avatar
Ondřej Zajíček committed
263 264 265 266 267 268
  /* Allocate and fill per-AF fields */
  WALK_LIST(c, p->p.channels)
  {
    ac = &caps->af_data[caps->af_count++];
    ac->afi = c->afi;
    ac->ready = 1;
Martin Mareš's avatar
Martin Mareš committed
269

270 271 272
    ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
    any_ext_next_hop |= ac->ext_next_hop;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
273 274
    ac->add_path = c->cf->add_path;
    any_add_path |= ac->add_path;
275

Ondřej Zajíček's avatar
Ondřej Zajíček committed
276
    if (c->cf->gr_able)
277
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
278 279 280 281
      ac->gr_able = 1;

      if (p->p.gr_recovery)
	ac->gr_af_flags |= BGP_GRF_FORWARDING;
282
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
283
  }
284

Ondřej Zajíček's avatar
Ondřej Zajíček committed
285 286
  /* Sort capability fields by AFI/SAFI */
  qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
287

288

Ondřej Zajíček's avatar
Ondřej Zajíček committed
289
  /* Create capability list in buffer */
290

Ondřej Zajíček's avatar
Ondřej Zajíček committed
291
  /*
292 293
   * Note that max length is ~ 20+14*af_count. With max 12 channels that is
   * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
Ondřej Zajíček's avatar
Ondřej Zajíček committed
294 295 296
   * unless we add new capabilities or more AFs.
   */

Ondřej Zajíček's avatar
Ondřej Zajíček committed
297 298 299 300 301 302 303 304
  WALK_AF_CAPS(caps, ac)
    if (ac->ready)
    {
      *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
      *buf++ = 4;		/* Capability data length */
      put_af4(buf, ac->afi);
      buf += 4;
    }
305

Ondřej Zajíček's avatar
Ondřej Zajíček committed
306 307 308 309 310
  if (caps->route_refresh)
  {
    *buf++ = 2;			/* Capability 2: Support for route refresh */
    *buf++ = 0;			/* Capability data length */
  }
311

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
  if (any_ext_next_hop)
  {
    *buf++ = 5;			/* Capability 5: Support for extended next hop */
    *buf++ = 0;			/* Capability data length, will be fixed later */
    data = buf;

    WALK_AF_CAPS(caps, ac)
      if (ac->ext_next_hop)
      {
	put_af4(buf, ac->afi);
	put_u16(buf+4, BGP_AFI_IPV6);
	buf += 6;
      }

    data[-1] = buf - data;
  }

Ondřej Zajíček's avatar
Ondřej Zajíček committed
329 330 331 332 333
  if (caps->ext_messages)
  {
    *buf++ = 6;			/* Capability 6: Support for extended messages */
    *buf++ = 0;			/* Capability data length */
  }
334

Ondřej Zajíček's avatar
Ondřej Zajíček committed
335 336 337 338 339
  if (caps->gr_aware)
  {
    *buf++ = 64;		/* Capability 64: Support for graceful restart */
    *buf++ = 0;			/* Capability data length, will be fixed later */
    data = buf;
340

Ondřej Zajíček's avatar
Ondřej Zajíček committed
341 342 343
    put_u16(buf, caps->gr_time);
    buf[0] |= caps->gr_flags;
    buf += 2;
344

Ondřej Zajíček's avatar
Ondřej Zajíček committed
345 346 347 348 349 350 351
    WALK_AF_CAPS(caps, ac)
      if (ac->gr_able)
      {
	put_af3(buf, ac->afi);
	buf[3] = ac->gr_af_flags;
	buf += 4;
      }
352

Ondřej Zajíček's avatar
Ondřej Zajíček committed
353 354
    data[-1] = buf - data;
  }
Martin Mareš's avatar
Martin Mareš committed
355

Ondřej Zajíček's avatar
Ondřej Zajíček committed
356 357 358 359 360 361 362
  if (caps->as4_support)
  {
    *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
    *buf++ = 4;			/* Capability data length */
    put_u32(buf, p->public_as);
    buf += 4;
  }
363

Ondřej Zajíček's avatar
Ondřej Zajíček committed
364 365 366 367 368
  if (any_add_path)
  {
    *buf++ = 69;		/* Capability 69: Support for ADD-PATH */
    *buf++ = 0;			/* Capability data length, will be fixed later */
    data = buf;
369

Ondřej Zajíček's avatar
Ondřej Zajíček committed
370 371 372 373 374 375 376
    WALK_AF_CAPS(caps, ac)
      if (ac->add_path)
      {
	put_af3(buf, ac->afi);
	buf[3] = ac->add_path;
	buf += 4;
      }
377

Ondřej Zajíček's avatar
Ondřej Zajíček committed
378 379 380 381 382 383 384 385 386 387
    data[-1] = buf - data;
  }

  if (caps->enhanced_refresh)
  {
    *buf++ = 70;		/* Capability 70: Support for enhanced route refresh */
    *buf++ = 0;			/* Capability data length */
  }

  return buf;
388 389
}

390
static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
391
bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
392
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
393 394 395 396
  struct bgp_proto *p = conn->bgp;
  struct bgp_af_caps *ac;
  int i, cl;
  u32 af;
397

Ondřej Zajíček's avatar
Ondřej Zajíček committed
398 399 400 401
  while (len > 0)
  {
    if (len < 2 || len < (2 + pos[1]))
      goto err;
Martin Mareš's avatar
Martin Mareš committed
402

Ondřej Zajíček's avatar
Ondřej Zajíček committed
403 404
    /* Capability length */
    cl = pos[1];
405

Ondřej Zajíček's avatar
Ondřej Zajíček committed
406 407
    /* Capability type */
    switch (pos[0])
408
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
    case  1: /* Multiprotocol capability, RFC 4760 */
      if (cl != 4)
	goto err;

      af = get_af4(pos+2);
      ac = bgp_get_af_caps(caps, af);
      ac->ready = 1;
      break;

    case  2: /* Route refresh capability, RFC 2918 */
      if (cl != 0)
	goto err;

      caps->route_refresh = 1;
      break;

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
    case  5: /* Extended next hop encoding capability, RFC 5549 */
      if (cl % 6)
	goto err;

      for (i = 0; i < cl; i += 6)
      {
	/* Specified only for IPv4 prefixes with IPv6 next hops */
	if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
	    (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
	  continue;

	af = get_af4(pos+2+i);
	ac = bgp_get_af_caps(caps, af);
	ac->ext_next_hop = 1;
      }
      break;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
    case  6: /* Extended message length capability, RFC draft */
      if (cl != 0)
	goto err;

      caps->ext_messages = 1;
      break;

    case 64: /* Graceful restart capability, RFC 4724 */
      if (cl % 4 != 2)
	goto err;

      /* Only the last instance is valid */
      WALK_AF_CAPS(caps, ac)
      {
	ac->gr_able = 0;
	ac->gr_af_flags = 0;
      }

      caps->gr_aware = 1;
      caps->gr_flags = pos[2] & 0xf0;
      caps->gr_time = get_u16(pos + 2) & 0x0fff;

      for (i = 2; i < cl; i += 4)
      {
	af = get_af3(pos+2+i);
	ac = bgp_get_af_caps(caps, af);
	ac->gr_able = 1;
	ac->gr_af_flags = pos[2+i+3];
      }
      break;

473
    case 65: /* AS4 capability, RFC 6793 */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
      if (cl != 4)
	goto err;

      caps->as4_support = 1;
      caps->as4_number = get_u32(pos + 2);
      break;

    case 69: /* ADD-PATH capability, RFC 7911 */
      if (cl % 4)
	goto err;

      for (i = 0; i < cl; i += 4)
      {
	byte val = pos[2+i+3];
	if (!val || (val > BGP_ADD_PATH_FULL))
489
	{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
490 491
	  log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
	      p->p.name, val);
492 493
	  break;
	}
Ondřej Zajíček's avatar
Ondřej Zajíček committed
494
      }
Martin Mareš's avatar
Martin Mareš committed
495

Ondřej Zajíček's avatar
Ondřej Zajíček committed
496 497 498 499 500 501 502
      for (i = 0; i < cl; i += 4)
      {
	af = get_af3(pos+2+i);
	ac = bgp_get_af_caps(caps, af);
	ac->add_path = pos[2+i+3];
      }
      break;
503

Ondřej Zajíček's avatar
Ondřej Zajíček committed
504 505 506
    case 70: /* Enhanced route refresh capability, RFC 7313 */
      if (cl != 0)
	goto err;
507

Ondřej Zajíček's avatar
Ondřej Zajíček committed
508 509
      caps->enhanced_refresh = 1;
      break;
Martin Mareš's avatar
Martin Mareš committed
510

Ondřej Zajíček's avatar
Ondřej Zajíček committed
511 512 513 514 515 516 517 518 519 520
      /* We can safely ignore all other capabilities */
    }

    ADVANCE(pos, len, 2 + cl);
  }
  return;

err:
  bgp_error(conn, 2, 0, NULL, 0);
  return;
521 522
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
523 524
static int
bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
Martin Mareš's avatar
Martin Mareš committed
525
{
526
  struct bgp_proto *p = conn->bgp;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
527 528
  struct bgp_caps *caps;
  int ol;
529

Ondřej Zajíček's avatar
Ondřej Zajíček committed
530 531 532
  /* Max number of announced AFIs is limited by max option length (255) */
  caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
  memset(caps, 0, sizeof(struct bgp_caps));
533

Ondřej Zajíček's avatar
Ondřej Zajíček committed
534 535 536 537 538 539 540
  while (len > 0)
  {
    if ((len < 2) || (len < (2 + pos[1])))
    { bgp_error(conn, 2, 0, NULL, 0); return -1; }

    ol = pos[1];
    if (pos[0] == 2)
541
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
542 543 544
      /* BGP capabilities, RFC 5492 */
      if (p->cf->capabilities)
	bgp_read_capabilities(conn, caps, pos + 2, ol);
545
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
546
    else
547
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
548 549 550
      /* Unknown option */
      bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
      return -1;
551 552
    }

Ondřej Zajíček's avatar
Ondřej Zajíček committed
553 554 555 556 557 558 559 560
    ADVANCE(pos, len, 2 + ol);
  }

  uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
  conn->remote_caps = mb_allocz(p->p.pool, n);
  memcpy(conn->remote_caps, caps, n);

  return 0;
Martin Mareš's avatar
Martin Mareš committed
561 562
}

563
static byte *
Ondřej Zajíček's avatar
Ondřej Zajíček committed
564
bgp_create_open(struct bgp_conn *conn, byte *buf)
565 566 567
{
  struct bgp_proto *p = conn->bgp;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
568 569
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
	    BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
570

Ondřej Zajíček's avatar
Ondřej Zajíček committed
571 572 573 574
  buf[0] = BGP_VERSION;
  put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
  put_u16(buf+3, p->cf->hold_time);
  put_u32(buf+5, p->local_id);
575

Ondřej Zajíček's avatar
Ondřej Zajíček committed
576 577 578 579 580
  if (p->cf->capabilities)
  {
    /* Prepare local_caps and write capabilities to buffer */
    byte *end = bgp_write_capabilities(conn, buf+12);
    uint len = end - (buf+12);
Martin Mareš's avatar
Martin Mareš committed
581

Ondřej Zajíček's avatar
Ondřej Zajíček committed
582 583 584
    buf[9] = len + 2;		/* Optional parameters length */
    buf[10] = 2;		/* Option 2: Capability list */
    buf[11] = len;		/* Option data length */
585

Ondřej Zajíček's avatar
Ondřej Zajíček committed
586 587 588 589 590 591
    return end;
  }
  else
  {
    /* Prepare empty local_caps */
    conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
592

Ondřej Zajíček's avatar
Ondřej Zajíček committed
593 594 595
    buf[9] = 0;			/* No optional parameters */
    return buf + 10;
  }
596 597 598 599

  return buf;
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
600 601
static void
bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
602 603
{
  struct bgp_proto *p = conn->bgp;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
604 605
  struct bgp_conn *other;
  u32 asn, hold, id;
606

Ondřej Zajíček's avatar
Ondřej Zajíček committed
607 608 609
  /* Check state */
  if (conn->state != BS_OPENSENT)
  { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
610

Ondřej Zajíček's avatar
Ondřej Zajíček committed
611 612 613
  /* Check message contents */
  if (len < 29 || len != 29 + (uint) pkt[28])
  { bgp_error(conn, 1, 2, pkt+16, 2); return; }
614

Ondřej Zajíček's avatar
Ondřej Zajíček committed
615 616
  if (pkt[19] != BGP_VERSION)
  { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
Martin Mareš's avatar
Martin Mareš committed
617

Ondřej Zajíček's avatar
Ondřej Zajíček committed
618 619 620 621 622 623 624 625 626 627
  asn = get_u16(pkt+20);
  hold = get_u16(pkt+22);
  id = get_u32(pkt+24);
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);

  if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
    return;

  if (hold > 0 && hold < 3)
  { bgp_error(conn, 2, 6, pkt+22, 2); return; }
Martin Mareš's avatar
Martin Mareš committed
628

Ondřej Zajíček's avatar
Ondřej Zajíček committed
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
  /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
  if (!id || (p->is_internal && id == p->local_id))
  { bgp_error(conn, 2, 3, pkt+24, -4); return; }

  struct bgp_caps *caps = conn->remote_caps;

  if (caps->as4_support)
  {
    u32 as4 = caps->as4_number;

    if ((as4 != asn) && (asn != AS_TRANS))
      log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);

    if (as4 != p->remote_as)
    { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
  }
  else
  {
    if (asn != p->remote_as)
    { bgp_error(conn, 2, 2, pkt+20, 2); return; }
  }

  /* Check the other connection */
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
  switch (other->state)
  {
  case BS_CONNECT:
  case BS_ACTIVE:
    /* Stop outgoing connection attempts */
    bgp_conn_enter_idle_state(other);
    break;

  case BS_IDLE:
  case BS_OPENSENT:
  case BS_CLOSE:
    break;

  case BS_OPENCONFIRM:
    /*
     * Description of collision detection rules in RFC 4271 is confusing and
     * contradictory, but it is essentially:
     *
     * 1. Router with higher ID is dominant
     * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
     * 3. When both connections are in OpenConfirm state, one initiated by
     *    the dominant router is kept.
     *
     * The first line in the expression below evaluates whether the neighbor
     * is dominant, the second line whether the new connection was initiated
     * by the neighbor. If both are true (or both are false), we keep the new
     * connection, otherwise we keep the old one.
     */
    if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
	== (conn == &p->incoming_conn))
683
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
684 685 686 687
      /* Should close the other connection */
      BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
      bgp_error(other, 6, 7, NULL, 0);
      break;
688
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
689 690 691 692 693 694
    /* Fall thru */
  case BS_ESTABLISHED:
    /* Should close this connection */
    BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
    bgp_error(conn, 6, 7, NULL, 0);
    return;
695

Ondřej Zajíček's avatar
Ondřej Zajíček committed
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
  default:
    bug("bgp_rx_open: Unknown state");
  }

  /* Update our local variables */
  conn->hold_time = MIN(hold, p->cf->hold_time);
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
  conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
  conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
  p->remote_id = id;

  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
      conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);

  bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
  bgp_start_timer(conn->hold_timer, conn->hold_time);
  bgp_conn_enter_openconfirm_state(conn);
}


/*
 *	Next hop handling
 */

#define REPORT(msg, args...) \
  ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })

Ondřej Zajíček's avatar
Ondřej Zajíček committed
723 724 725
#define DISCARD(msg, args...) \
  ({ REPORT(msg, ## args); return; })

Ondřej Zajíček's avatar
Ondřej Zajíček committed
726 727 728
#define WITHDRAW(msg, args...) \
  ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })

Ondřej Zajíček's avatar
Ondřej Zajíček committed
729
#define BAD_AFI		"Unexpected AF <%u/%u> in UPDATE"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
730 731
#define BAD_NEXT_HOP	"Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP	"Missing NEXT_HOP attribute"
732
#define NO_LABEL_STACK	"Missing MPLS stack"
Ondřej Zajíček's avatar
Ondřej Zajíček committed
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753


static void
bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
{
  struct bgp_proto *p = s->proto;
  struct bgp_channel *c = s->channel;

  if (c->cf->gw_mode == GW_DIRECT)
  {
    neighbor *nbr = NULL;

    /* GW_DIRECT -> single_hop -> p->neigh != NULL */
    if (ipa_nonzero(gw))
      nbr = neigh_find2(&p->p, &gw, NULL, 0);
    else if (ipa_nonzero(ll))
      nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);

    if (!nbr || (nbr->scope == SCOPE_HOST))
      WITHDRAW(BAD_NEXT_HOP);

754
    a->dest = RTD_UNICAST;
755 756
    a->nh.gw = nbr->addr;
    a->nh.iface = nbr->iface;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
757 758 759 760 761 762
  }
  else /* GW_RECURSIVE */
  {
    if (ipa_zero(gw))
      WITHDRAW(BAD_NEXT_HOP);

763 764
    rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
    s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801

    if (!s->mpls)
      rta_apply_hostentry(a, s->hostentry, NULL);

    /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
  }
}

static void
bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
{
  if (lnum > MPLS_MAX_LABEL_STACK)
  {
    REPORT("Too many MPLS labels ($u)", lnum);

    a->dest = RTD_UNREACHABLE;
    a->hostentry = NULL;
    a->nh = (struct nexthop) { };
    return;
  }

  /* Handle implicit NULL as empty MPLS stack */
  if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
    lnum = 0;

  if (s->channel->cf->gw_mode == GW_DIRECT)
  {
    a->nh.labels = lnum;
    memcpy(a->nh.label, labels, 4*lnum);
  }
  else /* GW_RECURSIVE */
  {
    mpls_label_stack ms;

    ms.len = lnum;
    memcpy(ms.stack, labels, 4*lnum);
    rta_apply_hostentry(a, s->hostentry, &ms);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
802 803 804
  }
}

805

Ondřej Zajíček's avatar
Ondřej Zajíček committed
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
static inline int
bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
{
  struct bgp_proto *p = s->proto;
  ip_addr *nh = (void *) a->u.ptr->data;

  if (s->channel->cf->next_hop_self)
    return 0;

  if (s->channel->cf->next_hop_keep)
    return 1;

  /* Keep it when explicitly set in export filter */
  if (a->type & EAF_FRESH)
    return 1;

  /* Keep it when exported to internal peers */
  if (p->is_interior && ipa_nonzero(*nh))
    return 1;

  /* Keep it when forwarded between single-hop BGPs on the same iface */
  struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
  return p->neigh && (p->neigh->iface == ifa);
}

static inline int
bgp_use_gateway(struct bgp_export_state *s)
{
  struct bgp_proto *p = s->proto;
  rta *ra = s->route->attrs;

  if (s->channel->cf->next_hop_self)
    return 0;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
840 841
  /* We need one valid global gateway */
  if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
Ondřej Zajíček's avatar
Ondřej Zajíček committed
842 843 844 845 846 847 848
    return 0;

  /* Use it when exported to internal peers */
  if (p->is_interior)
    return 1;

  /* Use it when forwarded to single-hop BGP peer on on the same iface */
849
  return p->neigh && (p->neigh->iface == ra->nh.iface);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
850 851 852 853 854 855 856 857
}

static void
bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{
  if (!a || !bgp_use_next_hop(s, a))
  {
    if (bgp_use_gateway(s))
Martin Mareš's avatar
Martin Mareš committed
858
    {
859 860
      rta *ra = s->route->attrs;
      ip_addr nh[1] = { ra->nh.gw };
Ondřej Zajíček's avatar
Ondřej Zajíček committed
861
      bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
862 863 864 865 866 867 868 869

      if (s->mpls)
      {
	u32 implicit_null = BGP_MPLS_NULL;
	u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
	uint lnum = ra->nh.labels ? ra->nh.labels : 1;
	bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
      }
Martin Mareš's avatar
Martin Mareš committed
870
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
871
    else
Martin Mareš's avatar
Martin Mareš committed
872
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
873 874
      ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
      bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
875 876 877 878

      /* TODO: Use local MPLS assigned label */
      if (s->mpls)
	bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
Martin Mareš's avatar
Martin Mareš committed
879
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
880 881 882 883 884 885 886 887 888 889 890
  }

  /* Check if next hop is valid */
  a = bgp_find_attr(*to, BA_NEXT_HOP);
  if (!a)
    WITHDRAW(NO_NEXT_HOP);

  ip_addr *nh = (void *) a->u.ptr->data;
  ip_addr peer = s->proto->cf->remote_ip;
  uint len = a->u.ptr->length;

891
  /* Forbid zero next hop */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
892 893 894
  if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
    WITHDRAW(BAD_NEXT_HOP);

895
  /* Forbid next hop equal to neighbor IP */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
896 897
  if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
    WITHDRAW(BAD_NEXT_HOP);
898

899 900 901 902 903
  /* Forbid next hop with non-matching AF */
  if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
      !s->channel->ext_next_hop)
    WITHDRAW(BAD_NEXT_HOP);

904 905 906
  /* Just check if MPLS stack */
  if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
    WITHDRAW(NO_LABEL_STACK);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
907 908
}

909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
static uint
bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
{
  /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
  ip_addr *nh = (void *) a->u.ptr->data;
  uint len = a->u.ptr->length;

  ASSERT((len == 16) || (len == 32));

  /*
   * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
   * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
   * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
   * IPv6 address with IPv6 NLRI.
   */

  if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
  {
    put_ip4(buf, ipa_to_ip4(nh[0]));
    return 4;
  }

  put_ip6(buf, ipa_to_ip6(nh[0]));

  if (len == 32)
    put_ip6(buf+16, ipa_to_ip6(nh[1]));

  return len;
}

static void
bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
  struct bgp_channel *c = s->channel;
  struct adata *ad = lp_alloc_adata(s->pool, 32);
  ip_addr *nh = (void *) ad->data;

  if (len == 4)
  {
    nh[0] = ipa_from_ip4(get_ip4(data));
    nh[1] = IPA_NONE;
  }
  else if (len == 16)
  {
    nh[0] = ipa_from_ip6(get_ip6(data));
    nh[1] = IPA_NONE;

    if (ipa_is_link_local(nh[0]))
    { nh[1] = nh[0]; nh[0] = IPA_NONE; }
  }
  else if (len == 32)
  {
    nh[0] = ipa_from_ip6(get_ip6(data));
    nh[1] = ipa_from_ip6(get_ip6(data+16));

    if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
      nh[1] = IPA_NONE;
  }
  else
    bgp_parse_error(s, 9);

  if (ipa_zero(nh[1]))
    ad->length = 16;

  if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
    WITHDRAW(BAD_NEXT_HOP);

  // XXXX validate next hop

  bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
  bgp_apply_next_hop(s, a, nh[0], nh[1]);
}

static uint
bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
{
  ip_addr *nh = (void *) a->u.ptr->data;
  uint len = a->u.ptr->length;

  ASSERT((len == 16) || (len == 32));

  /*
   * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
   * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
   * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
   * IPv6 address with VPNv6 NLRI.
   */

  if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
  {
    put_u64(buf, 0); /* VPN RD is 0 */
    put_ip4(buf+8, ipa_to_ip4(nh[0]));
    return 12;
  }

  put_u64(buf, 0); /* VPN RD is 0 */
  put_ip6(buf+8, ipa_to_ip6(nh[0]));

  if (len == 16)
    return 24;

  put_u64(buf+24, 0); /* VPN RD is 0 */
  put_ip6(buf+32, ipa_to_ip6(nh[1]));

  return 48;
}

static void
bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
  struct bgp_channel *c = s->channel;
  struct adata *ad = lp_alloc_adata(s->pool, 32);
  ip_addr *nh = (void *) ad->data;

  if (len == 12)
  {
    nh[0] = ipa_from_ip4(get_ip4(data+8));
    nh[1] = IPA_NONE;
  }
  else if (len == 24)
  {
    nh[0] = ipa_from_ip6(get_ip6(data+8));
    nh[1] = IPA_NONE;

    if (ipa_is_link_local(nh[0]))
    { nh[1] = nh[0]; nh[0] = IPA_NONE; }
  }
  else if (len == 48)
  {
    nh[0] = ipa_from_ip6(get_ip6(data+8));
    nh[1] = ipa_from_ip6(get_ip6(data+32));

    if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
      nh[1] = IPA_NONE;
  }
  else
    bgp_parse_error(s, 9);

  if (ipa_zero(nh[1]))
    ad->length = 16;

  /* XXXX which error */
  if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
    bgp_parse_error(s, 9);

  if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
    WITHDRAW(BAD_NEXT_HOP);

  // XXXX validate next hop

  bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
  bgp_apply_next_hop(s, a, nh[0], nh[1]);
}



1065 1066 1067 1068 1069 1070 1071 1072 1073
static uint
bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
{
  return 0;
}

static void
bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1074 1075 1076 1077 1078 1079
  /*
   * Although we expect no next hop and RFC 7606 7.11 states that attribute
   * MP_REACH_NLRI with unexpected next hop length is considered malformed,
   * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
   */

1080 1081 1082 1083
  return;
}

static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1084
bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
1085
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1086 1087 1088
  /* NEXT_HOP shall not pass */
  if (a)
    bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
1089 1090
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133

/*
 *	UPDATE
 */

static void
bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
{
  if (path_id != s->last_id)
  {
    s->last_src = rt_get_source(&s->proto->p, path_id);
    s->last_id = path_id;

    rta_free(s->cached_rta);
    s->cached_rta = NULL;
  }

  if (!a0)
  {
    /* Route withdraw */
    rte_update2(&s->channel->c, n, NULL, s->last_src);
    return;
  }

  /* Prepare cached route attributes */
  if (s->cached_rta == NULL)
  {
    a0->src = s->last_src;

    /* Workaround for rta_lookup() breaking eattrs */
    ea_list *ea = a0->eattrs;
    s->cached_rta = rta_lookup(a0);
    a0->eattrs = ea;
  }

  rta *a = rta_clone(s->cached_rta);
  rte *e = rte_get_temp(a);

  e->pflags = 0;
  e->u.bgp.suppressed = 0;
  rte_update2(&s->channel->c, n, e, s->last_src);
}

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
static void
bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
{
  u32 dummy = 0;
  u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
  uint lnum = mpls ? (mpls->length / 4) : 1;

  for (uint i = 0; i < lnum; i++)
  {
    put_u24(*pos, labels[i] << 4);
    ADVANCE(*pos, *size, 3);
  }

  /* Add bottom-of-stack flag */
  (*pos)[-1] |= BGP_MPLS_BOS;

  *pxlen += 24 * lnum;
}

static void
bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
{
  u32 labels[BGP_MPLS_MAX], label;
  uint lnum = 0;

  do {
    if (*pxlen < 24)
      bgp_parse_error(s, 1);

    label = get_u24(*pos);
    labels[lnum++] = label >> 4;
    ADVANCE(*pos, *len, 3);
    *pxlen -= 24;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1167

1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
    /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
    if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC))
      break;
  }
  while (!(label & BGP_MPLS_BOS));

  if (!a)
    return;

  /* Attach MPLS attribute unless we already have one */
  if (!s->mpls_labels)
  {
    s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
    bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
  }

  /* Overwrite data in the attribute */
  s->mpls_labels->length = 4*lnum;
  memcpy(s->mpls_labels->data, labels, 4*lnum);

  /* Update next hop entry in rta */
  bgp_apply_mpls_labels(s, a, labels, lnum);

  /* Attributes were changed, invalidate cached entry */
  rta_free(s->cached_rta);
  s->cached_rta = NULL;

  return;
}
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1197 1198 1199 1200 1201 1202

static uint
bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

1203
  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1204 1205 1206 1207 1208 1209
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_ip4 *net = (void *) px->net;

    /* Encode path ID */
    if (s->add_path)
Martin Mareš's avatar
Martin Mareš committed
1210
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1211 1212
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
Martin Mareš's avatar
Martin Mareš committed
1213
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1214 1215 1216 1217 1218

    /* Encode prefix length */
    *pos = net->pxlen;
    ADVANCE(pos, size, 1);

1219 1220 1221 1222
    /* Encode MPLS labels */
    if (s->mpls)
      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1223
    /* Encode prefix body */
1224 1225
    ip4_addr a = ip4_hton(net->prefix);
    uint b = (net->pxlen + 7) / 8;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
    memcpy(pos, &a, b);
    ADVANCE(pos, size, b);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {
    net_addr_ip4 net;
    u32 path_id = 0;

    /* Decode path ID */
    if (s->add_path)
Martin Mareš's avatar
Martin Mareš committed
1245
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1246 1247 1248 1249 1250
      if (len < 5)
	bgp_parse_error(s, 1);

      path_id = get_u32(pos);
      ADVANCE(pos, len, 4);
Martin Mareš's avatar
Martin Mareš committed
1251
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1252 1253 1254 1255 1256

    /* Decode prefix length */
    uint l = *pos;
    ADVANCE(pos, len, 1);

1257 1258 1259 1260 1261 1262 1263
    if (len < ((l + 7) / 8))
      bgp_parse_error(s, 1);

    /* Decode MPLS labels */
    if (s->mpls)
      bgp_decode_mpls_labels(s, &pos, &len, &l, a);

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1264 1265 1266 1267 1268
    if (l > IP4_MAX_PREFIX_LENGTH)
      bgp_parse_error(s, 10);

    /* Decode prefix body */
    ip4_addr addr = IP4_NONE;
1269
    uint b = (l + 7) / 8;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
    memcpy(&addr, pos, b);
    ADVANCE(pos, len, b);

    net = NET_ADDR_IP4(ip4_ntoh(addr), l);
    net_normalize_ip4(&net);

    // XXXX validate prefix

    bgp_rte_update(s, (net_addr *) &net, path_id, a);
  }
}


static uint
bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

1288
  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1289 1290 1291 1292 1293 1294
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_ip6 *net = (void *) px->net;

    /* Encode path ID */
    if (s->add_path)
1295
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1296 1297
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
1298
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1299 1300 1301 1302 1303

    /* Encode prefix length */
    *pos = net->pxlen;
    ADVANCE(pos, size, 1);

1304 1305 1306 1307
    /* Encode MPLS labels */
    if (s->mpls)
      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1308
    /* Encode prefix body */
1309 1310
    ip6_addr a = ip6_hton(net->prefix);
    uint b = (net->pxlen + 7) / 8;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
    memcpy(pos, &a, b);
    ADVANCE(pos, size, b);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {
    net_addr_ip6 net;
    u32 path_id = 0;

    /* Decode path ID */
    if (s->add_path)
1330
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1331 1332 1333 1334 1335
      if (len < 5)
	bgp_parse_error(s, 1);

      path_id = get_u32(pos);
      ADVANCE(pos, len, 4);
1336
    }
1337

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1338 1339 1340
    /* Decode prefix length */
    uint l = *pos;
    ADVANCE(pos, len, 1);
1341

1342 1343 1344 1345 1346 1347 1348
    if (len < ((l + 7) / 8))
      bgp_parse_error(s, 1);

    /* Decode MPLS labels */
    if (s->mpls)
      bgp_decode_mpls_labels(s, &pos, &len, &l, a);

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1349 1350
    if (l > IP6_MAX_PREFIX_LENGTH)
      bgp_parse_error(s, 10);
1351

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1352 1353
    /* Decode prefix body */
    ip6_addr addr = IP6_NONE;
1354
    uint b = (l + 7) / 8;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1355 1356
    memcpy(&addr, pos, b);
    ADVANCE(pos, len, b);
1357

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1358 1359
    net = NET_ADDR_IP6(ip6_ntoh(addr), l);
    net_normalize_ip6(&net);
1360

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1361
    // XXXX validate prefix
1362

Ondřej Zajíček's avatar
Ondřej Zajíček committed
1363 1364
    bgp_rte_update(s, (net_addr *) &net, path_id, a);
  }
Martin Mareš's avatar
Martin Mareš committed
1365 1366
}

1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
static uint
bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_vpn4 *net = (void *) px->net;

    /* Encode path ID */
    if (s->add_path)
    {
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
    }

    /* Encode prefix length */
1385
    *pos = 64 + net->pxlen;
1386 1387 1388
    ADVANCE(pos, size, 1);

    /* Encode MPLS labels */
1389 1390
    if (s->mpls)
      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433

    /* Encode route distinguisher */
    put_u64(pos, net->rd);
    ADVANCE(pos, size, 8);

    /* Encode prefix body */
    ip4_addr a = ip4_hton(net->prefix);
    uint b = (net->pxlen + 7) / 8;
    memcpy(pos, &a, b);
    ADVANCE(pos, size, b);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {
    net_addr_vpn4 net;
    u32 path_id = 0;

    /* Decode path ID */
    if (s->add_path)
    {
      if (len < 5)
	bgp_parse_error(s, 1);

      path_id = get_u32(pos);
      ADVANCE(pos, len, 4);
    }

    /* Decode prefix length */
    uint l = *pos;
    ADVANCE(pos, len, 1);

    if (len < ((l + 7) / 8))
      bgp_parse_error(s, 1);

    /* Decode MPLS labels */
1434 1435
    if (s->mpls)
      bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481

    /* Decode route distinguisher */
    if (l < 64)
      bgp_parse_error(s, 1);

    u64 rd = get_u64(pos);
    ADVANCE(pos, len, 8);
    l -= 64;

    if (l > IP4_MAX_PREFIX_LENGTH)
      bgp_parse_error(s, 10);

    /* Decode prefix body */
    ip4_addr addr = IP4_NONE;
    uint b = (l + 7) / 8;
    memcpy(&addr, pos, b);
    ADVANCE(pos, len, b);

    net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
    net_normalize_vpn4(&net);

    // XXXX validate prefix

    bgp_rte_update(s, (net_addr *) &net, path_id, a);
  }
}


static uint
bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_vpn6 *net = (void *) px->net;

    /* Encode path ID */
    if (s->add_path)
    {
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
    }

    /* Encode prefix length */
1482
    *pos = 64 + net->pxlen;
1483 1484 1485
    ADVANCE(pos, size, 1);

    /* Encode MPLS labels */
1486 1487
    if (s->mpls)
      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560

    /* Encode route distinguisher */
    put_u64(pos, net->rd);
    ADVANCE(pos, size, 8);

    /* Encode prefix body */
    ip6_addr a = ip6_hton(net->prefix);
    uint b = (net->pxlen + 7) / 8;
    memcpy(pos, &a, b);
    ADVANCE(pos, size, b);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {
    net_addr_vpn6 net;
    u32 path_id = 0;

    /* Decode path ID */
    if (s->add_path)
    {
      if (len < 5)
	bgp_parse_error(s, 1);

      path_id = get_u32(pos);
      ADVANCE(pos, len, 4);
    }

    /* Decode prefix length */
    uint l = *pos;
    ADVANCE(pos, len, 1);

    if (len < ((l + 7) / 8))
      bgp_parse_error(s, 1);

    /* Decode MPLS labels */
    if (s->mpls)
      bgp_decode_mpls_labels(s, &pos, &len, &l, a);

    /* Decode route distinguisher */
    if (l < 64)
      bgp_parse_error(s, 1);

    u64 rd = get_u64(pos);
    ADVANCE(pos, len, 8);
    l -= 64;

    if (l > IP6_MAX_PREFIX_LENGTH)
      bgp_parse_error(s, 10);

    /* Decode prefix body */
    ip6_addr addr = IP6_NONE;
    uint b = (l + 7) / 8;
    memcpy(&addr, pos, b);
    ADVANCE(pos, len, b);

    net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
    net_normalize_vpn6(&net);

    // XXXX validate prefix

    bgp_rte_update(s, (net_addr *) &net, path_id, a);
  }
}


1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
static uint
bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

  while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_flow4 *net = (void *) px->net;
    uint flen = net->length - sizeof(net_addr_flow4);

    /* Encode path ID */
    if (s->add_path)
    {
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
    }

    if (flen > size)
      break;

    /* Copy whole flow data including length */
    memcpy(pos, net->data, flen);
    ADVANCE(pos, size, flen);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {
    u32 path_id = 0;

    /* Decode path ID */
    if (s->add_path)
    {
      if (len < 4)
	bgp_parse_error(s, 1);

      path_id = get_u32(pos);
      ADVANCE(pos, len, 4);
    }

    if (len < 2)
      bgp_parse_error(s, 1);

    /* Decode flow length */
    uint hlen = flow_hdr_length(pos);
    uint dlen = flow_read_length(pos);
    uint flen = hlen + dlen;
    byte *data = pos + hlen;

    if (len < flen)
      bgp_parse_error(s, 1);

    /* Validate flow data */
    enum flow_validated_state r = flow4_validate(data, dlen);
    if (r != FLOW_ST_VALID)
    {
      log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
      bgp_parse_error(s, 1);
    }

    if (data[0] != FLOW_TYPE_DST_PREFIX)
    {
      log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
      bgp_parse_error(s, 1);
    }

    /* Decode dst prefix */
    ip4_addr px = IP4_NONE;
    uint pxlen = data[1];

    // FIXME: Use some generic function
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1640 1641
    memcpy(&px, data+2, BYTES(pxlen));
    px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731

    /* Prepare the flow */
    net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
    net_fill_flow4(n, px, pxlen, pos, flen);
    ADVANCE(pos, len, flen);

    bgp_rte_update(s, n, path_id, a);
  }
}


static uint
bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
  byte *pos = buf;

  while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
  {
    struct bgp_prefix *px = HEAD(buck->prefixes);
    struct net_addr_flow6 *net = (void *) px->net;
    uint flen = net->length - sizeof(net_addr_flow6);

    /* Encode path ID */
    if (s->add_path)
    {
      put_u32(pos, px->path_id);
      ADVANCE(pos, size, 4);
    }

    if (flen > size)
      break;

    /* Copy whole flow data including length */
    memcpy(pos, net->data, flen);
    ADVANCE(pos, size, flen);

    bgp_free_prefix(s->channel, px);
  }

  return pos - buf;
}

static void
bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
  while (len)
  {