netlink.c 21.6 KB
Newer Older
1 2 3
/*
 *	BIRD -- Linux Netlink Interface
 *
4
 *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
5 6 7 8 9 10 11
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

#include <stdio.h>
#include <fcntl.h>
#include <sys/socket.h>
12
#include <sys/uio.h>
13
#include <errno.h>
14

15
#undef LOCAL_DEBUG
16

17 18 19 20 21 22 23
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "lib/timer.h"
#include "lib/unix.h"
#include "lib/krt.h"
24
#include "lib/socket.h"
25
#include "lib/string.h"
26
#include "conf/conf.h"
27 28

#include <asm/types.h>
29
#include <linux/if.h>
30 31 32
#include <linux/netlink.h>
#include <linux/rtnetlink.h>

Martin Mareš's avatar
Martin Mareš committed
33
#ifndef MSG_TRUNC			/* Hack: Several versions of glibc miss this one :( */
34 35
#define MSG_TRUNC 0x20
#endif
36

37 38 39 40
#ifndef IFF_LOWER_UP
#define IFF_LOWER_UP 0x10000
#endif

41 42 43 44
/*
 *	Synchronous Netlink interface
 */

45 46 47 48
struct nl_sock
{
  int fd;
  u32 seq;
49 50 51
  byte *rx_buffer;			/* Receive buffer */
  struct nlmsghdr *last_hdr;		/* Recently received packet */
  unsigned int last_size;
52 53
};

54
#define NL_RX_SIZE 8192
55

56 57 58
static struct nl_sock nl_scan = {.fd = -1};	/* Netlink socket for synchronous scan */
static struct nl_sock nl_req  = {.fd = -1};	/* Netlink socket for requests */

59
static void
60
nl_open_sock(struct nl_sock *nl)
61
{
62
  if (nl->fd < 0)
63
    {
64 65
      nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
      if (nl->fd < 0)
66
	die("Unable to open rtnetlink socket: %m");
67
      nl->seq = now;
68 69 70
      nl->rx_buffer = xmalloc(NL_RX_SIZE);
      nl->last_hdr = NULL;
      nl->last_size = 0;
71 72 73
    }
}

74
static void
75 76 77 78 79 80 81 82
nl_open(void)
{
  nl_open_sock(&nl_scan);
  nl_open_sock(&nl_req);
}

static void
nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
83 84 85 86 87 88
{
  struct sockaddr_nl sa;

  memset(&sa, 0, sizeof(sa));
  sa.nl_family = AF_NETLINK;
  nh->nlmsg_pid = 0;
89 90
  nh->nlmsg_seq = ++(nl->seq);
  if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
91
    die("rtnetlink sendto: %m");
92
  nl->last_hdr = NULL;
93 94 95 96 97 98 99 100 101 102
}

static void
nl_request_dump(int cmd)
{
  struct {
    struct nlmsghdr nh;
    struct rtgenmsg g;
  } req;
  req.nh.nlmsg_type = cmd;
103
  req.nh.nlmsg_len = sizeof(req);
104
  req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
105 106 107
  /* Is it important which PF_* is used for link-level interface scan?
     It seems that some information is available only when PF_INET is used. */
  req.g.rtgen_family = (cmd == RTM_GETLINK) ? PF_INET : BIRD_PF;
108
  nl_send(&nl_scan, &req.nh);
109 110 111
}

static struct nlmsghdr *
112
nl_get_reply(struct nl_sock *nl)
113 114 115
{
  for(;;)
    {
116
      if (!nl->last_hdr)
117
	{
118
	  struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
119 120
	  struct sockaddr_nl sa;
	  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
121
	  int x = recvmsg(nl->fd, &m, 0);
122 123
	  if (x < 0)
	    die("nl_get_reply: %m");
124 125 126 127 128
	  if (sa.nl_pid)		/* It isn't from the kernel */
	    {
	      DBG("Non-kernel packet\n");
	      continue;
	    }
129 130
	  nl->last_size = x;
	  nl->last_hdr = (void *) nl->rx_buffer;
131 132 133
	  if (m.msg_flags & MSG_TRUNC)
	    bug("nl_get_reply: got truncated reply which should be impossible");
	}
134
      if (NLMSG_OK(nl->last_hdr, nl->last_size))
135
	{
136 137
	  struct nlmsghdr *h = nl->last_hdr;
	  nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
138
	  if (h->nlmsg_seq != nl->seq)
139 140
	    {
	      log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
141
		  h->nlmsg_seq, nl->seq);
142 143 144 145
	      continue;
	    }
	  return h;
	}
146 147 148
      if (nl->last_size)
	log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
      nl->last_hdr = NULL;
149 150 151
    }
}

152 153
static struct rate_limit rl_netlink_err;

154
static int
155 156
nl_error(struct nlmsghdr *h)
{
157 158 159
  struct nlmsgerr *e;
  int ec;

160
  if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
161 162 163 164 165 166 167
    {
      log(L_WARN "Netlink: Truncated error message received");
      return ENOBUFS;
    }
  e = (struct nlmsgerr *) NLMSG_DATA(h);
  ec = -e->error;
  if (ec)
168
    log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
169
  return ec;
170 171 172 173 174
}

static struct nlmsghdr *
nl_get_scan(void)
{
175
  struct nlmsghdr *h = nl_get_reply(&nl_scan);
176 177 178 179 180

  if (h->nlmsg_type == NLMSG_DONE)
    return NULL;
  if (h->nlmsg_type == NLMSG_ERROR)
    {
181
      nl_error(h);
182 183 184 185 186
      return NULL;
    }
  return h;
}

187 188 189 190 191
static int
nl_exchange(struct nlmsghdr *pkt)
{
  struct nlmsghdr *h;

192
  nl_send(&nl_req, pkt);
193 194
  for(;;)
    {
195
      h = nl_get_reply(&nl_req);
196 197 198 199 200 201 202
      if (h->nlmsg_type == NLMSG_ERROR)
	break;
      log(L_WARN "nl_exchange: Unexpected reply received");
    }
  return nl_error(h);
}

203
/*
204
 *	Netlink attributes
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
 */

static int nl_attr_len;

static void *
nl_checkin(struct nlmsghdr *h, int lsize)
{
  nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
  if (nl_attr_len < 0)
    {
      log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
      return NULL;
    }
  return NLMSG_DATA(h);
}

static int
nl_parse_attrs(struct rtattr *a, struct rtattr **k, int ksize)
{
  int max = ksize / sizeof(struct rtattr *);
  bzero(k, ksize);
  while (RTA_OK(a, nl_attr_len))
    {
      if (a->rta_type < max)
	k[a->rta_type] = a;
      a = RTA_NEXT(a, nl_attr_len);
    }
  if (nl_attr_len)
    {
      log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
      return 0;
    }
  else
    return 1;
}

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
static void
nl_add_attr_u32(struct nlmsghdr *h, unsigned maxsize, int code, u32 data)
{
  unsigned len = RTA_LENGTH(4);
  struct rtattr *a;

  if (NLMSG_ALIGN(h->nlmsg_len) + len > maxsize)
    bug("nl_add_attr32: packet buffer overflow");
  a = (struct rtattr *)((char *)h + NLMSG_ALIGN(h->nlmsg_len));
  a->rta_type = code;
  a->rta_len = len;
  memcpy(RTA_DATA(a), &data, 4);
  h->nlmsg_len = NLMSG_ALIGN(h->nlmsg_len) + len;
}

static void
nl_add_attr_ipa(struct nlmsghdr *h, unsigned maxsize, int code, ip_addr ipa)
{
  unsigned len = RTA_LENGTH(sizeof(ipa));
  struct rtattr *a;

  if (NLMSG_ALIGN(h->nlmsg_len) + len > maxsize)
    bug("nl_add_attr_ipa: packet buffer overflow");
  a = (struct rtattr *)((char *)h + NLMSG_ALIGN(h->nlmsg_len));
  a->rta_type = code;
  a->rta_len = len;
267
  ipa_hton(ipa);
268 269 270 271
  memcpy(RTA_DATA(a), &ipa, sizeof(ipa));
  h->nlmsg_len = NLMSG_ALIGN(h->nlmsg_len) + len;
}

272 273 274 275 276
/*
 *	Scanning of interfaces
 */

static void
277
nl_parse_link(struct nlmsghdr *h, int scan)
278 279
{
  struct ifinfomsg *i;
280
  struct rtattr *a[IFLA_WIRELESS+1];
281 282 283 284 285 286
  int new = h->nlmsg_type == RTM_NEWLINK;
  struct iface f;
  struct iface *ifi;
  char *name;
  u32 mtu;
  unsigned int fl;
287 288 289

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, sizeof(a)))
    return;
290 291 292
  if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 ||
      !a[IFLA_MTU] || RTA_PAYLOAD(a[IFLA_MTU]) != 4)
    {
293
      if (scan || !a[IFLA_WIRELESS])
294
        log(L_ERR "nl_parse_link: Malformed message received");
295 296 297 298 299 300 301 302
      return;
    }
  name = RTA_DATA(a[IFLA_IFNAME]);
  memcpy(&mtu, RTA_DATA(a[IFLA_MTU]), sizeof(u32));

  ifi = if_find_by_index(i->ifi_index);
  if (!new)
    {
303
      DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
304 305 306
      if (ifi && !scan)
	{
	  memcpy(&f, ifi, sizeof(struct iface));
307
	  f.flags |= IF_SHUTDOWN;
308 309 310 311 312
	  if_update(&f);
	}
    }
  else
    {
313
      DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
314 315 316 317 318 319 320 321 322 323 324 325
      if (ifi)
	memcpy(&f, ifi, sizeof(f));
      else
	{
	  bzero(&f, sizeof(f));
	  f.index = i->ifi_index;
	}
      strncpy(f.name, RTA_DATA(a[IFLA_IFNAME]), sizeof(f.name)-1);
      f.mtu = mtu;
      f.flags = 0;
      fl = i->ifi_flags;
      if (fl & IFF_UP)
326 327
	f.flags |= IF_ADMIN_UP;
      if (fl & IFF_LOWER_UP)
328
	f.flags |= IF_LINK_UP;
329 330 331 332 333 334 335 336
      if (fl & IFF_LOOPBACK)		/* Loopback */
	f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
      else if (fl & IFF_POINTOPOINT)	/* PtP */
	f.flags |= IF_MULTICAST;
      else if (fl & IFF_BROADCAST)	/* Broadcast */
	f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
      else
	f.flags |= IF_MULTIACCESS;	/* NBMA */
337 338
      if_update(&f);
    }
339 340 341 342 343 344 345
}

static void
nl_parse_addr(struct nlmsghdr *h)
{
  struct ifaddrmsg *i;
  struct rtattr *a[IFA_ANYCAST+1];
346
  int new = h->nlmsg_type == RTM_NEWADDR;
347
  struct ifa ifa;
348
  struct iface *ifi;
349
  int scope;
350 351 352

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, sizeof(a)))
    return;
353
  if (i->ifa_family != BIRD_AF)
354
    return;
355 356 357 358 359 360 361 362
  if (!a[IFA_ADDRESS] || RTA_PAYLOAD(a[IFA_ADDRESS]) != sizeof(ip_addr)
#ifdef IPV6
      || a[IFA_LOCAL] && RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
#else
      || !a[IFA_LOCAL] || RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
      || (a[IFA_BROADCAST] && RTA_PAYLOAD(a[IFA_BROADCAST]) != sizeof(ip_addr))
#endif
      )
363 364 365 366 367 368 369 370
    {
      log(L_ERR "nl_parse_addr: Malformed message received");
      return;
    }

  ifi = if_find_by_index(i->ifa_index);
  if (!ifi)
    {
371
      log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
372 373 374
      return;
    }

375 376 377 378
  bzero(&ifa, sizeof(ifa));
  ifa.iface = ifi;
  if (i->ifa_flags & IFA_F_SECONDARY)
    ifa.flags |= IA_SECONDARY;
379

380 381
  /* IFA_LOCAL can be unset for IPv6 interfaces */
  memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
382
  ipa_ntoh(ifa.ip);
383
  ifa.pxlen = i->ifa_prefixlen;
384
  if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
385
    {
386 387 388 389 390
      log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
      new = 0;
    }
  if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
    {
391 392 393 394 395 396 397 398 399 400 401
      ip_addr addr;
      memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
      ipa_ntoh(addr);
      ifa.prefix = ifa.brd = addr;

      /* It is either a peer address, or loopback/dummy address */
      if (!ipa_equal(ifa.ip, addr))
	{
	  ifa.flags |= IA_UNNUMBERED;
	  ifa.opposite = addr;
	}
402
    }
403
  else
404
    {
405
      ip_addr netmask = ipa_mkmask(ifa.pxlen);
406 407
      ifa.prefix = ipa_and(ifa.ip, netmask);
      ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
408 409 410
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
	ifa.opposite = ipa_opposite_m1(ifa.ip);

411
#ifndef IPV6
412
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
413 414
	ifa.opposite = ipa_opposite_m2(ifa.ip);

415 416
      if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
	{
417
	  ip_addr xbrd;
418 419 420 421
	  memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
	  ipa_ntoh(xbrd);
	  if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
	    ifa.brd = xbrd;
422
	  else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
423
	    log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
424
	}
425
#endif
426
    }
427

428 429 430
  scope = ipa_classify(ifa.ip);
  if (scope < 0)
    {
431
      log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
432 433 434 435
      return;
    }
  ifa.scope = scope & IADDR_SCOPE_MASK;

436
  DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
437 438
      ifi->index, ifi->name,
      new ? "added" : "removed",
439
      ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
440 441 442 443
  if (new)
    ifa_update(&ifa);
  else
    ifa_delete(&ifa);
444 445
}

446
void
447
krt_if_scan(struct kif_proto *p UNUSED)
448 449 450
{
  struct nlmsghdr *h;

451 452
  if_start_update();

453 454 455
  nl_request_dump(RTM_GETLINK);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
456
      nl_parse_link(h, 1);
457 458 459 460 461 462 463 464 465 466
    else
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);

  nl_request_dump(RTM_GETADDR);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
      nl_parse_addr(h);
    else
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);

467
  if_end_update();
468 469 470
}

/*
471
 *	Routes
472 473
 */

474
static struct krt_proto *nl_table_map[NL_NUM_TABLES];
475

476
int
477 478
krt_capable(rte *e)
{
Martin Mareš's avatar
Martin Mareš committed
479 480
  rta *a = e->attrs;

481
  if (a->cast != RTC_UNICAST)
482
    return 0;
483

Martin Mareš's avatar
Martin Mareš committed
484 485 486
  switch (a->dest)
    {
    case RTD_ROUTER:
487 488
      if (ipa_has_link_scope(a->gw) && (a->iface == NULL))
	return 0;
Martin Mareš's avatar
Martin Mareš committed
489 490 491 492 493 494 495 496 497 498 499 500
    case RTD_DEVICE:
    case RTD_BLACKHOLE:
    case RTD_UNREACHABLE:
    case RTD_PROHIBIT:
      break;
    default:
      return 0;
    }
  return 1;
}

static void
501
nl_send_route(struct krt_proto *p, rte *e, int new)
Martin Mareš's avatar
Martin Mareš committed
502
{
503 504 505 506 507 508 509 510
  net *net = e->net;
  rta *a = e->attrs;
  struct {
    struct nlmsghdr h;
    struct rtmsg r;
    char buf[128];
  } r;

511 512
  DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);

513 514 515 516
  bzero(&r.h, sizeof(r.h));
  bzero(&r.r, sizeof(r.r));
  r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
  r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
517
  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
518

519
  r.r.rtm_family = BIRD_AF;
520
  r.r.rtm_dst_len = net->n.pxlen;
521 522
  r.r.rtm_tos = 0;
  r.r.rtm_table = KRT_CF->scan.table_id;
523
  r.r.rtm_protocol = RTPROT_BIRD;
524
  r.r.rtm_scope = RT_SCOPE_UNIVERSE;
525 526 527 528 529 530
  nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
  switch (a->dest)
    {
    case RTD_ROUTER:
      r.r.rtm_type = RTN_UNICAST;
      nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
531 532 533 534 535

      /* a->iface != NULL checked in krt_capable() */
      if (ipa_has_link_scope(a->gw))
	nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);

536 537
      break;
    case RTD_DEVICE:
538 539
      if (!a->iface)
	return;
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
      r.r.rtm_type = RTN_UNICAST;
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
      break;
    case RTD_BLACKHOLE:
      r.r.rtm_type = RTN_BLACKHOLE;
      break;
    case RTD_UNREACHABLE:
      r.r.rtm_type = RTN_UNREACHABLE;
      break;
    case RTD_PROHIBIT:
      r.r.rtm_type = RTN_PROHIBIT;
      break;
    default:
      bug("krt_capable inconsistent with nl_send_route");
    }
Martin Mareš's avatar
Martin Mareš committed
555

556
  nl_exchange(&r.h);
557
}
558

559
void
560
krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
561
{
562 563
  if (old)
    nl_send_route(p, old, 0);
564

565 566
  if (new)
    nl_send_route(p, new, 1);
Martin Mareš's avatar
Martin Mareš committed
567 568
}

569
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
Martin Mareš's avatar
Martin Mareš committed
570 571

static void
572
nl_parse_route(struct nlmsghdr *h, int scan)
Martin Mareš's avatar
Martin Mareš committed
573
{
574
  struct krt_proto *p;
Martin Mareš's avatar
Martin Mareš committed
575 576 577 578 579 580 581
  struct rtmsg *i;
  struct rtattr *a[RTA_CACHEINFO+1];
  int new = h->nlmsg_type == RTM_NEWROUTE;
  ip_addr dst;
  rte *e;
  net *net;
  u32 oif;
582
  int src;
Martin Mareš's avatar
Martin Mareš committed
583 584 585

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a)))
    return;
586
  if (i->rtm_family != BIRD_AF)
Martin Mareš's avatar
Martin Mareš committed
587 588 589
    return;
  if ((a[RTA_DST] && RTA_PAYLOAD(a[RTA_DST]) != sizeof(ip_addr)) ||
      (a[RTA_OIF] && RTA_PAYLOAD(a[RTA_OIF]) != 4) ||
590
      (a[RTA_PRIORITY] && RTA_PAYLOAD(a[RTA_PRIORITY]) != 4) ||
591 592 593
#ifdef IPV6
      (a[RTA_IIF] && RTA_PAYLOAD(a[RTA_IIF]) != 4) ||
#endif
Martin Mareš's avatar
Martin Mareš committed
594 595
      (a[RTA_GATEWAY] && RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr)))
    {
596
      log(L_ERR "KRT: Malformed message received");
Martin Mareš's avatar
Martin Mareš committed
597 598 599 600 601 602
      return;
    }

  if (a[RTA_DST])
    {
      memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
603
      ipa_ntoh(dst);
Martin Mareš's avatar
Martin Mareš committed
604 605 606
    }
  else
    dst = IPA_NONE;
607

Martin Mareš's avatar
Martin Mareš committed
608 609 610 611 612
  if (a[RTA_OIF])
    memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif));
  else
    oif = ~0;

613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
  DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p->p.name);

  p = nl_table_map[i->rtm_table];	/* Do we know this table? */
  if (!p)
    SKIP("unknown table %d", i->rtm_table);

#ifdef IPV6
  if (a[RTA_IIF])
    SKIP("IIF set\n");
#else
  if (i->rtm_tos != 0)			/* We don't support TOS */
    SKIP("TOS %02x\n", i->rtm_tos);
#endif

  if (scan && !new)
    SKIP("RTM_DELROUTE in scan\n");

  int c = ipa_classify_net(dst);
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
    SKIP("strange class/scope\n");

  // ignore rtm_scope, it is not a real scope
  // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
  //   SKIP("scope %u\n", i->rtm_scope);
Martin Mareš's avatar
Martin Mareš committed
637

638 639
  switch (i->rtm_protocol)
    {
640 641 642
    case RTPROT_UNSPEC:
      SKIP("proto unspec\n");

643 644 645
    case RTPROT_REDIRECT:
      src = KRT_SRC_REDIRECT;
      break;
646

647
    case RTPROT_KERNEL:
648
      src = KRT_SRC_KERNEL;
649
      return;
650

651 652
    case RTPROT_BIRD:
      if (!scan)
653
	SKIP("echo\n");
654 655
      src = KRT_SRC_BIRD;
      break;
656 657

    case RTPROT_BOOT:
658 659 660 661
    default:
      src = KRT_SRC_ALIEN;
    }

662
  net = net_get(p->p.table, dst, i->rtm_dst_len);
663 664 665 666 667 668 669

  rta ra = {
    .proto = &p->p,
    .source = RTS_INHERIT,
    .scope = SCOPE_UNIVERSE,
    .cast = RTC_UNICAST
  };
Martin Mareš's avatar
Martin Mareš committed
670 671 672 673

  switch (i->rtm_type)
    {
    case RTN_UNICAST:
674 675
      ra.iface = if_find_by_index(oif);
      if (!ra.iface)
Martin Mareš's avatar
Martin Mareš committed
676
	{
677 678
	  log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
	      net->n.prefix, net->n.pxlen, oif);
Martin Mareš's avatar
Martin Mareš committed
679 680
	  return;
	}
681

Martin Mareš's avatar
Martin Mareš committed
682 683 684 685 686
      if (a[RTA_GATEWAY])
	{
	  neighbor *ng;
	  ra.dest = RTD_ROUTER;
	  memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
687
	  ipa_ntoh(ra.gw);
688

689 690 691
	  ng = neigh_find2(&p->p, &ra.gw, ra.iface,
			   (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
	  if (!ng || (ng->scope == SCOPE_HOST))
692
	    {
693 694 695
	      log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
		  net->n.prefix, net->n.pxlen, ra.gw);
	      return;
696
	    }
Martin Mareš's avatar
Martin Mareš committed
697 698 699 700
	}
      else
	{
	  ra.dest = RTD_DEVICE;
701 702 703 704 705 706 707 708 709 710 711 712 713

	  /*
	   * In Linux IPv6, 'native' device routes have proto
	   * RTPROT_BOOT and not RTPROT_KERNEL (which they have in
	   * IPv4 and which is expected). We cannot distinguish
	   * 'native' and user defined device routes, so we ignore all
	   * such device routes and for consistency, we have the same
	   * behavior in IPv4. Anyway, users should use RTPROT_STATIC
	   * for their 'alien' routes.
	   */

	  if (i->rtm_protocol == RTPROT_BOOT)
	    src = KRT_SRC_KERNEL;
Martin Mareš's avatar
Martin Mareš committed
714
	}
715

Martin Mareš's avatar
Martin Mareš committed
716 717 718 719 720 721 722 723 724 725 726 727
      break;
    case RTN_BLACKHOLE:
      ra.dest = RTD_BLACKHOLE;
      break;
    case RTN_UNREACHABLE:
      ra.dest = RTD_UNREACHABLE;
      break;
    case RTN_PROHIBIT:
      ra.dest = RTD_PROHIBIT;
      break;
    /* FIXME: What about RTN_THROW? */
    default:
728
      SKIP("type %d\n", i->rtm_type);
729 730 731
      return;
    }

Martin Mareš's avatar
Martin Mareš committed
732 733
  e = rte_get_temp(&ra);
  e->net = net;
734 735 736 737 738 739 740
  e->u.krt.src = src;
  e->u.krt.proto = i->rtm_protocol;
  e->u.krt.type = i->rtm_type;
  if (a[RTA_PRIORITY])
    memcpy(&e->u.krt.metric, RTA_DATA(a[RTA_PRIORITY]), sizeof(e->u.krt.metric));
  else
    e->u.krt.metric = 0;
741 742 743 744
  if (scan)
    krt_got_route(p, e);
  else
    krt_got_route_async(p, e, new);
745 746 747
}

void
748
krt_scan_fire(struct krt_proto *p UNUSED)	/* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
749
{
Martin Mareš's avatar
Martin Mareš committed
750 751 752 753 754
  struct nlmsghdr *h;

  nl_request_dump(RTM_GETROUTE);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
755
      nl_parse_route(h, 1);
Martin Mareš's avatar
Martin Mareš committed
756 757
    else
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
758 759 760
}

/*
761
 *	Asynchronous Netlink interface
762 763
 */

764
static sock *nl_async_sk;		/* BIRD socket for asynchronous notifications */
765 766 767
static byte *nl_async_rx_buffer;	/* Receive buffer */

static void
768
nl_async_msg(struct nlmsghdr *h)
769 770 771 772 773 774
{
  switch (h->nlmsg_type)
    {
    case RTM_NEWROUTE:
    case RTM_DELROUTE:
      DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
775
      nl_parse_route(h, 0);
776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
      break;
    case RTM_NEWLINK:
    case RTM_DELLINK:
      DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
      nl_parse_link(h, 0);
      break;
    case RTM_NEWADDR:
    case RTM_DELADDR:
      DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
      nl_parse_addr(h);
      break;
    default:
      DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
    }
}
791

792
static int
793
nl_async_hook(sock *sk, int size UNUSED)
794
{
795 796 797 798 799 800 801 802 803 804
  struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
  struct sockaddr_nl sa;
  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
  struct nlmsghdr *h;
  int x;
  unsigned int len;

  x = recvmsg(sk->fd, &m, 0);
  if (x < 0)
    {
805 806 807 808 809 810 811 812 813 814
      if (errno == ENOBUFS)
	{
	  /*
	   *  Netlink reports some packets have been thrown away.
	   *  One day we might react to it by asking for route table
	   *  scan in near future.
	   */
	  return 1;	/* More data are likely to be ready */
	}
      else if (errno != EWOULDBLOCK)
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
	log(L_ERR "Netlink recvmsg: %m");
      return 0;
    }
  if (sa.nl_pid)		/* It isn't from the kernel */
    {
      DBG("Non-kernel packet\n");
      return 1;
    }
  h = (void *) nl_async_rx_buffer;
  len = x;
  if (m.msg_flags & MSG_TRUNC)
    {
      log(L_WARN "Netlink got truncated asynchronous message");
      return 1;
    }
  while (NLMSG_OK(h, len))
    {
832
      nl_async_msg(h);
833 834 835 836 837
      h = NLMSG_NEXT(h, len);
    }
  if (len)
    log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
  return 1;
838 839
}

840
static void
841
nl_open_async(void)
842 843 844
{
  sock *sk;
  struct sockaddr_nl sa;
845
  int fd;
846 847 848 849 850
  static int nl_open_tried = 0;

  if (nl_open_tried)
    return;
  nl_open_tried = 1;
851

852 853
  DBG("KRT: Opening async netlink socket\n");

854 855 856
  fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  if (fd < 0)
    {
857
      log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
858 859
      return;
    }
860 861 862

  bzero(&sa, sizeof(sa));
  sa.nl_family = AF_NETLINK;
863 864 865
#ifdef IPV6
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
#else
866
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
867
#endif
868 869
  if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
    {
870
      log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
871 872 873
      return;
    }

874
  sk = nl_async_sk = sk_new(krt_pool);
875 876 877 878 879 880 881 882
  sk->type = SK_MAGIC;
  sk->rx_hook = nl_async_hook;
  sk->fd = fd;
  if (sk_open(sk))
    bug("Netlink: sk_open failed");

  if (!nl_async_rx_buffer)
    nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
883 884
}

885 886 887
/*
 *	Interface to the UNIX krt module
 */
888

889
static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8];
890

891
void
892
krt_scan_preconfig(struct config *c UNUSED)
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
{
  bzero(&nl_cf_table, sizeof(nl_cf_table));
}

void
krt_scan_postconfig(struct krt_config *x)
{
  int id = x->scan.table_id;

  if (nl_cf_table[id/8] & (1 << (id%8)))
    cf_error("Multiple kernel syncers defined for table #%d", id);
  nl_cf_table[id/8] |= (1 << (id%8));
}

void
krt_scan_construct(struct krt_config *x)
909
{
910
#ifndef IPV6
911
  x->scan.table_id = RT_TABLE_MAIN;
912 913
#else
  x->scan.table_id = 254;
914
#endif
915 916
}

917
void
918
krt_scan_start(struct krt_proto *p, int first)
919
{
Martin Mareš's avatar
Martin Mareš committed
920
  init_list(&p->scan.temp_ifs);
921 922 923 924
  nl_table_map[KRT_CF->scan.table_id] = p;
  if (first)
    {
      nl_open();
925
      nl_open_async();
926
    }
927 928 929
}

void
930
krt_scan_shutdown(struct krt_proto *p UNUSED, int last UNUSED)
931 932
{
}
933 934

void
935
krt_if_start(struct kif_proto *p UNUSED)
936 937
{
  nl_open();
938
  nl_open_async();
939
}