netlink.c 21.5 KB
Newer Older
1 2 3
/*
 *	BIRD -- Linux Netlink Interface
 *
4
 *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
5 6 7 8 9 10
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

#include <stdio.h>
#include <fcntl.h>
11
#include <net/if.h>
12
#include <sys/socket.h>
13
#include <sys/uio.h>
14
#include <errno.h>
15

16
#undef LOCAL_DEBUG
17

18 19 20 21 22 23 24
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "lib/timer.h"
#include "lib/unix.h"
#include "lib/krt.h"
25
#include "lib/socket.h"
26
#include "lib/string.h"
27
#include "conf/conf.h"
28 29 30 31 32

#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>

Martin Mareš's avatar
Martin Mareš committed
33
#ifndef MSG_TRUNC			/* Hack: Several versions of glibc miss this one :( */
34 35
#define MSG_TRUNC 0x20
#endif
36

37 38 39 40
/*
 *	Synchronous Netlink interface
 */

41 42 43 44
struct nl_sock
{
  int fd;
  u32 seq;
45 46 47
  byte *rx_buffer;			/* Receive buffer */
  struct nlmsghdr *last_hdr;		/* Recently received packet */
  unsigned int last_size;
48 49
};

50
#define NL_RX_SIZE 8192
51

52 53 54
static struct nl_sock nl_scan = {.fd = -1};	/* Netlink socket for synchronous scan */
static struct nl_sock nl_req  = {.fd = -1};	/* Netlink socket for requests */

55
static void
56
nl_open_sock(struct nl_sock *nl)
57
{
58
  if (nl->fd < 0)
59
    {
60 61
      nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
      if (nl->fd < 0)
62
	die("Unable to open rtnetlink socket: %m");
63
      nl->seq = now;
64 65 66
      nl->rx_buffer = xmalloc(NL_RX_SIZE);
      nl->last_hdr = NULL;
      nl->last_size = 0;
67 68 69
    }
}

70
static void
71 72 73 74 75 76 77 78
nl_open(void)
{
  nl_open_sock(&nl_scan);
  nl_open_sock(&nl_req);
}

static void
nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
79 80 81 82 83 84
{
  struct sockaddr_nl sa;

  memset(&sa, 0, sizeof(sa));
  sa.nl_family = AF_NETLINK;
  nh->nlmsg_pid = 0;
85 86
  nh->nlmsg_seq = ++(nl->seq);
  if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
87
    die("rtnetlink sendto: %m");
88
  nl->last_hdr = NULL;
89 90 91 92 93 94 95 96 97 98
}

static void
nl_request_dump(int cmd)
{
  struct {
    struct nlmsghdr nh;
    struct rtgenmsg g;
  } req;
  req.nh.nlmsg_type = cmd;
99
  req.nh.nlmsg_len = sizeof(req);
100
  req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
101 102 103
  /* Is it important which PF_* is used for link-level interface scan?
     It seems that some information is available only when PF_INET is used. */
  req.g.rtgen_family = (cmd == RTM_GETLINK) ? PF_INET : BIRD_PF;
104
  nl_send(&nl_scan, &req.nh);
105 106 107
}

static struct nlmsghdr *
108
nl_get_reply(struct nl_sock *nl)
109 110 111
{
  for(;;)
    {
112
      if (!nl->last_hdr)
113
	{
114
	  struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
115 116
	  struct sockaddr_nl sa;
	  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
117
	  int x = recvmsg(nl->fd, &m, 0);
118 119
	  if (x < 0)
	    die("nl_get_reply: %m");
120 121 122 123 124
	  if (sa.nl_pid)		/* It isn't from the kernel */
	    {
	      DBG("Non-kernel packet\n");
	      continue;
	    }
125 126
	  nl->last_size = x;
	  nl->last_hdr = (void *) nl->rx_buffer;
127 128 129
	  if (m.msg_flags & MSG_TRUNC)
	    bug("nl_get_reply: got truncated reply which should be impossible");
	}
130
      if (NLMSG_OK(nl->last_hdr, nl->last_size))
131
	{
132 133
	  struct nlmsghdr *h = nl->last_hdr;
	  nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
134
	  if (h->nlmsg_seq != nl->seq)
135 136
	    {
	      log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
137
		  h->nlmsg_seq, nl->seq);
138 139 140 141
	      continue;
	    }
	  return h;
	}
142 143 144
      if (nl->last_size)
	log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
      nl->last_hdr = NULL;
145 146 147
    }
}

148 149
static struct rate_limit rl_netlink_err;

150
static int
151 152
nl_error(struct nlmsghdr *h)
{
153 154 155
  struct nlmsgerr *e;
  int ec;

156
  if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
157 158 159 160 161 162 163
    {
      log(L_WARN "Netlink: Truncated error message received");
      return ENOBUFS;
    }
  e = (struct nlmsgerr *) NLMSG_DATA(h);
  ec = -e->error;
  if (ec)
164
    log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
165
  return ec;
166 167 168 169 170
}

static struct nlmsghdr *
nl_get_scan(void)
{
171
  struct nlmsghdr *h = nl_get_reply(&nl_scan);
172 173 174 175 176

  if (h->nlmsg_type == NLMSG_DONE)
    return NULL;
  if (h->nlmsg_type == NLMSG_ERROR)
    {
177
      nl_error(h);
178 179 180 181 182
      return NULL;
    }
  return h;
}

183 184 185 186 187
static int
nl_exchange(struct nlmsghdr *pkt)
{
  struct nlmsghdr *h;

188
  nl_send(&nl_req, pkt);
189 190
  for(;;)
    {
191
      h = nl_get_reply(&nl_req);
192 193 194 195 196 197 198
      if (h->nlmsg_type == NLMSG_ERROR)
	break;
      log(L_WARN "nl_exchange: Unexpected reply received");
    }
  return nl_error(h);
}

199
/*
200
 *	Netlink attributes
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
 */

static int nl_attr_len;

static void *
nl_checkin(struct nlmsghdr *h, int lsize)
{
  nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
  if (nl_attr_len < 0)
    {
      log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
      return NULL;
    }
  return NLMSG_DATA(h);
}

static int
nl_parse_attrs(struct rtattr *a, struct rtattr **k, int ksize)
{
  int max = ksize / sizeof(struct rtattr *);
  bzero(k, ksize);
  while (RTA_OK(a, nl_attr_len))
    {
      if (a->rta_type < max)
	k[a->rta_type] = a;
      a = RTA_NEXT(a, nl_attr_len);
    }
  if (nl_attr_len)
    {
      log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
      return 0;
    }
  else
    return 1;
}

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
static void
nl_add_attr_u32(struct nlmsghdr *h, unsigned maxsize, int code, u32 data)
{
  unsigned len = RTA_LENGTH(4);
  struct rtattr *a;

  if (NLMSG_ALIGN(h->nlmsg_len) + len > maxsize)
    bug("nl_add_attr32: packet buffer overflow");
  a = (struct rtattr *)((char *)h + NLMSG_ALIGN(h->nlmsg_len));
  a->rta_type = code;
  a->rta_len = len;
  memcpy(RTA_DATA(a), &data, 4);
  h->nlmsg_len = NLMSG_ALIGN(h->nlmsg_len) + len;
}

static void
nl_add_attr_ipa(struct nlmsghdr *h, unsigned maxsize, int code, ip_addr ipa)
{
  unsigned len = RTA_LENGTH(sizeof(ipa));
  struct rtattr *a;

  if (NLMSG_ALIGN(h->nlmsg_len) + len > maxsize)
    bug("nl_add_attr_ipa: packet buffer overflow");
  a = (struct rtattr *)((char *)h + NLMSG_ALIGN(h->nlmsg_len));
  a->rta_type = code;
  a->rta_len = len;
263
  ipa_hton(ipa);
264 265 266 267
  memcpy(RTA_DATA(a), &ipa, sizeof(ipa));
  h->nlmsg_len = NLMSG_ALIGN(h->nlmsg_len) + len;
}

268 269 270 271 272
/*
 *	Scanning of interfaces
 */

static void
273
nl_parse_link(struct nlmsghdr *h, int scan)
274 275
{
  struct ifinfomsg *i;
276
  struct rtattr *a[IFLA_WIRELESS+1];
277 278 279 280 281 282
  int new = h->nlmsg_type == RTM_NEWLINK;
  struct iface f;
  struct iface *ifi;
  char *name;
  u32 mtu;
  unsigned int fl;
283 284 285

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, sizeof(a)))
    return;
286 287 288
  if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 ||
      !a[IFLA_MTU] || RTA_PAYLOAD(a[IFLA_MTU]) != 4)
    {
289
      if (scan || !a[IFLA_WIRELESS])
290
        log(L_ERR "nl_parse_link: Malformed message received");
291 292 293 294 295 296 297 298
      return;
    }
  name = RTA_DATA(a[IFLA_IFNAME]);
  memcpy(&mtu, RTA_DATA(a[IFLA_MTU]), sizeof(u32));

  ifi = if_find_by_index(i->ifi_index);
  if (!new)
    {
299
      DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
300 301 302 303 304 305 306 307 308
      if (ifi && !scan)
	{
	  memcpy(&f, ifi, sizeof(struct iface));
	  f.flags |= IF_ADMIN_DOWN;
	  if_update(&f);
	}
    }
  else
    {
309
      DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
310 311 312 313 314 315 316 317 318 319 320 321 322
      if (ifi)
	memcpy(&f, ifi, sizeof(f));
      else
	{
	  bzero(&f, sizeof(f));
	  f.index = i->ifi_index;
	}
      strncpy(f.name, RTA_DATA(a[IFLA_IFNAME]), sizeof(f.name)-1);
      f.mtu = mtu;
      f.flags = 0;
      fl = i->ifi_flags;
      if (fl & IFF_UP)
	f.flags |= IF_LINK_UP;
323 324 325 326 327 328 329 330
      if (fl & IFF_LOOPBACK)		/* Loopback */
	f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
      else if (fl & IFF_POINTOPOINT)	/* PtP */
	f.flags |= IF_MULTICAST;
      else if (fl & IFF_BROADCAST)	/* Broadcast */
	f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
      else
	f.flags |= IF_MULTIACCESS;	/* NBMA */
331 332
      if_update(&f);
    }
333 334 335 336 337 338 339
}

static void
nl_parse_addr(struct nlmsghdr *h)
{
  struct ifaddrmsg *i;
  struct rtattr *a[IFA_ANYCAST+1];
340
  int new = h->nlmsg_type == RTM_NEWADDR;
341
  struct ifa ifa;
342
  struct iface *ifi;
343
  int scope;
344 345 346

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, sizeof(a)))
    return;
347
  if (i->ifa_family != BIRD_AF)
348
    return;
349 350 351 352 353 354 355 356
  if (!a[IFA_ADDRESS] || RTA_PAYLOAD(a[IFA_ADDRESS]) != sizeof(ip_addr)
#ifdef IPV6
      || a[IFA_LOCAL] && RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
#else
      || !a[IFA_LOCAL] || RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
      || (a[IFA_BROADCAST] && RTA_PAYLOAD(a[IFA_BROADCAST]) != sizeof(ip_addr))
#endif
      )
357 358 359 360 361 362 363 364
    {
      log(L_ERR "nl_parse_addr: Malformed message received");
      return;
    }

  ifi = if_find_by_index(i->ifa_index);
  if (!ifi)
    {
365
      log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
366 367 368
      return;
    }

369 370 371 372
  bzero(&ifa, sizeof(ifa));
  ifa.iface = ifi;
  if (i->ifa_flags & IFA_F_SECONDARY)
    ifa.flags |= IA_SECONDARY;
373

374 375
  /* IFA_LOCAL can be unset for IPv6 interfaces */
  memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
376
  ipa_ntoh(ifa.ip);
377
  ifa.pxlen = i->ifa_prefixlen;
378
  if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
379
    {
380 381 382 383 384
      log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
      new = 0;
    }
  if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
    {
385 386 387 388 389 390 391 392 393 394 395
      ip_addr addr;
      memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
      ipa_ntoh(addr);
      ifa.prefix = ifa.brd = addr;

      /* It is either a peer address, or loopback/dummy address */
      if (!ipa_equal(ifa.ip, addr))
	{
	  ifa.flags |= IA_UNNUMBERED;
	  ifa.opposite = addr;
	}
396
    }
397
  else
398
    {
399
      ip_addr netmask = ipa_mkmask(ifa.pxlen);
400 401
      ifa.prefix = ipa_and(ifa.ip, netmask);
      ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
402 403 404
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
	ifa.opposite = ipa_opposite_m1(ifa.ip);

405
#ifndef IPV6
406
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
407 408
	ifa.opposite = ipa_opposite_m2(ifa.ip);

409 410
      if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
	{
411
	  ip_addr xbrd;
412 413 414 415
	  memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
	  ipa_ntoh(xbrd);
	  if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
	    ifa.brd = xbrd;
416
	  else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
417
	    log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
418
	}
419
#endif
420
    }
421

422 423 424
  scope = ipa_classify(ifa.ip);
  if (scope < 0)
    {
425
      log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
426 427 428 429
      return;
    }
  ifa.scope = scope & IADDR_SCOPE_MASK;

430
  DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
431 432
      ifi->index, ifi->name,
      new ? "added" : "removed",
433
      ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
434 435 436 437
  if (new)
    ifa_update(&ifa);
  else
    ifa_delete(&ifa);
438 439
}

440
void
441
krt_if_scan(struct kif_proto *p UNUSED)
442 443 444
{
  struct nlmsghdr *h;

445 446
  if_start_update();

447 448 449
  nl_request_dump(RTM_GETLINK);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
450
      nl_parse_link(h, 1);
451 452 453 454 455 456 457 458 459 460
    else
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);

  nl_request_dump(RTM_GETADDR);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
      nl_parse_addr(h);
    else
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);

461
  if_end_update();
462 463 464
}

/*
465
 *	Routes
466 467
 */

468
static struct krt_proto *nl_table_map[NL_NUM_TABLES];
469

470
int
471 472
krt_capable(rte *e)
{
Martin Mareš's avatar
Martin Mareš committed
473 474
  rta *a = e->attrs;

475
  if (a->cast != RTC_UNICAST)
476
    return 0;
477

Martin Mareš's avatar
Martin Mareš committed
478 479 480
  switch (a->dest)
    {
    case RTD_ROUTER:
481 482
      if (ipa_has_link_scope(a->gw) && (a->iface == NULL))
	return 0;
Martin Mareš's avatar
Martin Mareš committed
483 484 485 486 487 488 489 490 491 492 493 494
    case RTD_DEVICE:
    case RTD_BLACKHOLE:
    case RTD_UNREACHABLE:
    case RTD_PROHIBIT:
      break;
    default:
      return 0;
    }
  return 1;
}

static void
495
nl_send_route(struct krt_proto *p, rte *e, int new)
Martin Mareš's avatar
Martin Mareš committed
496
{
497 498 499 500 501 502 503 504
  net *net = e->net;
  rta *a = e->attrs;
  struct {
    struct nlmsghdr h;
    struct rtmsg r;
    char buf[128];
  } r;

505 506
  DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);

507 508 509 510
  bzero(&r.h, sizeof(r.h));
  bzero(&r.r, sizeof(r.r));
  r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
  r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
511
  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
512

513
  r.r.rtm_family = BIRD_AF;
514
  r.r.rtm_dst_len = net->n.pxlen;
515 516
  r.r.rtm_tos = 0;
  r.r.rtm_table = KRT_CF->scan.table_id;
517
  r.r.rtm_protocol = RTPROT_BIRD;
518
  r.r.rtm_scope = RT_SCOPE_UNIVERSE;
519 520 521 522 523 524
  nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
  switch (a->dest)
    {
    case RTD_ROUTER:
      r.r.rtm_type = RTN_UNICAST;
      nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
525 526 527 528 529

      /* a->iface != NULL checked in krt_capable() */
      if (ipa_has_link_scope(a->gw))
	nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);

530 531
      break;
    case RTD_DEVICE:
532 533
      if (!a->iface)
	return;
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
      r.r.rtm_type = RTN_UNICAST;
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
      break;
    case RTD_BLACKHOLE:
      r.r.rtm_type = RTN_BLACKHOLE;
      break;
    case RTD_UNREACHABLE:
      r.r.rtm_type = RTN_UNREACHABLE;
      break;
    case RTD_PROHIBIT:
      r.r.rtm_type = RTN_PROHIBIT;
      break;
    default:
      bug("krt_capable inconsistent with nl_send_route");
    }
Martin Mareš's avatar
Martin Mareš committed
549

550
  nl_exchange(&r.h);
551
}
552

553
void
554
krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
555
{
556 557
  if (old)
    nl_send_route(p, old, 0);
558

559 560
  if (new)
    nl_send_route(p, new, 1);
Martin Mareš's avatar
Martin Mareš committed
561 562
}

563
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
Martin Mareš's avatar
Martin Mareš committed
564 565

static void
566
nl_parse_route(struct nlmsghdr *h, int scan)
Martin Mareš's avatar
Martin Mareš committed
567
{
568
  struct krt_proto *p;
Martin Mareš's avatar
Martin Mareš committed
569 570 571 572 573 574 575
  struct rtmsg *i;
  struct rtattr *a[RTA_CACHEINFO+1];
  int new = h->nlmsg_type == RTM_NEWROUTE;
  ip_addr dst;
  rte *e;
  net *net;
  u32 oif;
576
  int src;
Martin Mareš's avatar
Martin Mareš committed
577 578 579

  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a)))
    return;
580
  if (i->rtm_family != BIRD_AF)
Martin Mareš's avatar
Martin Mareš committed
581 582 583
    return;
  if ((a[RTA_DST] && RTA_PAYLOAD(a[RTA_DST]) != sizeof(ip_addr)) ||
      (a[RTA_OIF] && RTA_PAYLOAD(a[RTA_OIF]) != 4) ||
584
      (a[RTA_PRIORITY] && RTA_PAYLOAD(a[RTA_PRIORITY]) != 4) ||
585 586 587
#ifdef IPV6
      (a[RTA_IIF] && RTA_PAYLOAD(a[RTA_IIF]) != 4) ||
#endif
Martin Mareš's avatar
Martin Mareš committed
588 589
      (a[RTA_GATEWAY] && RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr)))
    {
590
      log(L_ERR "KRT: Malformed message received");
Martin Mareš's avatar
Martin Mareš committed
591 592 593 594 595 596
      return;
    }

  if (a[RTA_DST])
    {
      memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
597
      ipa_ntoh(dst);
Martin Mareš's avatar
Martin Mareš committed
598 599 600
    }
  else
    dst = IPA_NONE;
601

Martin Mareš's avatar
Martin Mareš committed
602 603 604 605 606
  if (a[RTA_OIF])
    memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif));
  else
    oif = ~0;

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
  DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p->p.name);

  p = nl_table_map[i->rtm_table];	/* Do we know this table? */
  if (!p)
    SKIP("unknown table %d", i->rtm_table);

#ifdef IPV6
  if (a[RTA_IIF])
    SKIP("IIF set\n");
#else
  if (i->rtm_tos != 0)			/* We don't support TOS */
    SKIP("TOS %02x\n", i->rtm_tos);
#endif

  if (scan && !new)
    SKIP("RTM_DELROUTE in scan\n");

  int c = ipa_classify_net(dst);
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
    SKIP("strange class/scope\n");

  // ignore rtm_scope, it is not a real scope
  // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
  //   SKIP("scope %u\n", i->rtm_scope);
Martin Mareš's avatar
Martin Mareš committed
631

632 633
  switch (i->rtm_protocol)
    {
634 635 636
    case RTPROT_UNSPEC:
      SKIP("proto unspec\n");

637 638 639
    case RTPROT_REDIRECT:
      src = KRT_SRC_REDIRECT;
      break;
640

641
    case RTPROT_KERNEL:
642
      src = KRT_SRC_KERNEL;
643
      return;
644

645 646
    case RTPROT_BIRD:
      if (!scan)
647
	SKIP("echo\n");
648 649
      src = KRT_SRC_BIRD;
      break;
650 651

    case RTPROT_BOOT:
652 653 654 655
    default:
      src = KRT_SRC_ALIEN;
    }

656
  net = net_get(p->p.table, dst, i->rtm_dst_len);
657 658 659 660 661 662 663

  rta ra = {
    .proto = &p->p,
    .source = RTS_INHERIT,
    .scope = SCOPE_UNIVERSE,
    .cast = RTC_UNICAST
  };
Martin Mareš's avatar
Martin Mareš committed
664 665 666 667

  switch (i->rtm_type)
    {
    case RTN_UNICAST:
668 669
      ra.iface = if_find_by_index(oif);
      if (!ra.iface)
Martin Mareš's avatar
Martin Mareš committed
670
	{
671 672
	  log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
	      net->n.prefix, net->n.pxlen, oif);
Martin Mareš's avatar
Martin Mareš committed
673 674
	  return;
	}
675

Martin Mareš's avatar
Martin Mareš committed
676 677 678 679 680
      if (a[RTA_GATEWAY])
	{
	  neighbor *ng;
	  ra.dest = RTD_ROUTER;
	  memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
681
	  ipa_ntoh(ra.gw);
682

683 684 685
	  ng = neigh_find2(&p->p, &ra.gw, ra.iface,
			   (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
	  if (!ng || (ng->scope == SCOPE_HOST))
686
	    {
687 688 689
	      log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
		  net->n.prefix, net->n.pxlen, ra.gw);
	      return;
690
	    }
Martin Mareš's avatar
Martin Mareš committed
691 692 693 694
	}
      else
	{
	  ra.dest = RTD_DEVICE;
695 696 697 698 699 700 701 702 703 704 705 706 707

	  /*
	   * In Linux IPv6, 'native' device routes have proto
	   * RTPROT_BOOT and not RTPROT_KERNEL (which they have in
	   * IPv4 and which is expected). We cannot distinguish
	   * 'native' and user defined device routes, so we ignore all
	   * such device routes and for consistency, we have the same
	   * behavior in IPv4. Anyway, users should use RTPROT_STATIC
	   * for their 'alien' routes.
	   */

	  if (i->rtm_protocol == RTPROT_BOOT)
	    src = KRT_SRC_KERNEL;
Martin Mareš's avatar
Martin Mareš committed
708
	}
709

Martin Mareš's avatar
Martin Mareš committed
710 711 712 713 714 715 716 717 718 719 720 721
      break;
    case RTN_BLACKHOLE:
      ra.dest = RTD_BLACKHOLE;
      break;
    case RTN_UNREACHABLE:
      ra.dest = RTD_UNREACHABLE;
      break;
    case RTN_PROHIBIT:
      ra.dest = RTD_PROHIBIT;
      break;
    /* FIXME: What about RTN_THROW? */
    default:
722
      SKIP("type %d\n", i->rtm_type);
723 724 725
      return;
    }

Martin Mareš's avatar
Martin Mareš committed
726 727
  e = rte_get_temp(&ra);
  e->net = net;
728 729 730 731 732 733 734
  e->u.krt.src = src;
  e->u.krt.proto = i->rtm_protocol;
  e->u.krt.type = i->rtm_type;
  if (a[RTA_PRIORITY])
    memcpy(&e->u.krt.metric, RTA_DATA(a[RTA_PRIORITY]), sizeof(e->u.krt.metric));
  else
    e->u.krt.metric = 0;
735 736 737 738
  if (scan)
    krt_got_route(p, e);
  else
    krt_got_route_async(p, e, new);
739 740 741
}

void
742
krt_scan_fire(struct krt_proto *p UNUSED)	/* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
743
{
Martin Mareš's avatar
Martin Mareš committed
744 745 746 747 748
  struct nlmsghdr *h;

  nl_request_dump(RTM_GETROUTE);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
749
      nl_parse_route(h, 1);
Martin Mareš's avatar
Martin Mareš committed
750 751
    else
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
752 753 754
}

/*
755
 *	Asynchronous Netlink interface
756 757
 */

758
static sock *nl_async_sk;		/* BIRD socket for asynchronous notifications */
759 760 761
static byte *nl_async_rx_buffer;	/* Receive buffer */

static void
762
nl_async_msg(struct nlmsghdr *h)
763 764 765 766 767 768
{
  switch (h->nlmsg_type)
    {
    case RTM_NEWROUTE:
    case RTM_DELROUTE:
      DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
769
      nl_parse_route(h, 0);
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
      break;
    case RTM_NEWLINK:
    case RTM_DELLINK:
      DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
      nl_parse_link(h, 0);
      break;
    case RTM_NEWADDR:
    case RTM_DELADDR:
      DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
      nl_parse_addr(h);
      break;
    default:
      DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
    }
}
785

786
static int
787
nl_async_hook(sock *sk, int size UNUSED)
788
{
789 790 791 792 793 794 795 796 797 798
  struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
  struct sockaddr_nl sa;
  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
  struct nlmsghdr *h;
  int x;
  unsigned int len;

  x = recvmsg(sk->fd, &m, 0);
  if (x < 0)
    {
799 800 801 802 803 804 805 806 807 808
      if (errno == ENOBUFS)
	{
	  /*
	   *  Netlink reports some packets have been thrown away.
	   *  One day we might react to it by asking for route table
	   *  scan in near future.
	   */
	  return 1;	/* More data are likely to be ready */
	}
      else if (errno != EWOULDBLOCK)
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
	log(L_ERR "Netlink recvmsg: %m");
      return 0;
    }
  if (sa.nl_pid)		/* It isn't from the kernel */
    {
      DBG("Non-kernel packet\n");
      return 1;
    }
  h = (void *) nl_async_rx_buffer;
  len = x;
  if (m.msg_flags & MSG_TRUNC)
    {
      log(L_WARN "Netlink got truncated asynchronous message");
      return 1;
    }
  while (NLMSG_OK(h, len))
    {
826
      nl_async_msg(h);
827 828 829 830 831
      h = NLMSG_NEXT(h, len);
    }
  if (len)
    log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
  return 1;
832 833
}

834
static void
835
nl_open_async(void)
836 837 838
{
  sock *sk;
  struct sockaddr_nl sa;
839
  int fd;
840 841 842 843 844
  static int nl_open_tried = 0;

  if (nl_open_tried)
    return;
  nl_open_tried = 1;
845

846 847
  DBG("KRT: Opening async netlink socket\n");

848 849 850
  fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  if (fd < 0)
    {
851
      log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
852 853
      return;
    }
854 855 856

  bzero(&sa, sizeof(sa));
  sa.nl_family = AF_NETLINK;
857 858 859
#ifdef IPV6
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
#else
860
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
861
#endif
862 863
  if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
    {
864
      log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
865 866 867
      return;
    }

868
  sk = nl_async_sk = sk_new(krt_pool);
869 870 871 872 873 874 875 876
  sk->type = SK_MAGIC;
  sk->rx_hook = nl_async_hook;
  sk->fd = fd;
  if (sk_open(sk))
    bug("Netlink: sk_open failed");

  if (!nl_async_rx_buffer)
    nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
877 878
}

879 880 881
/*
 *	Interface to the UNIX krt module
 */
882

883
static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8];
884

885
void
886
krt_scan_preconfig(struct config *c UNUSED)
887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
{
  bzero(&nl_cf_table, sizeof(nl_cf_table));
}

void
krt_scan_postconfig(struct krt_config *x)
{
  int id = x->scan.table_id;

  if (nl_cf_table[id/8] & (1 << (id%8)))
    cf_error("Multiple kernel syncers defined for table #%d", id);
  nl_cf_table[id/8] |= (1 << (id%8));
}

void
krt_scan_construct(struct krt_config *x)
903
{
904
#ifndef IPV6
905
  x->scan.table_id = RT_TABLE_MAIN;
906 907
#else
  x->scan.table_id = 254;
908
#endif
909 910
}

911
void
912
krt_scan_start(struct krt_proto *p, int first)
913
{
Martin Mareš's avatar
Martin Mareš committed
914
  init_list(&p->scan.temp_ifs);
915 916 917 918
  nl_table_map[KRT_CF->scan.table_id] = p;
  if (first)
    {
      nl_open();
919
      nl_open_async();
920
    }
921 922 923
}

void
924
krt_scan_shutdown(struct krt_proto *p UNUSED, int last UNUSED)
925 926
{
}
927 928

void
929
krt_if_start(struct kif_proto *p UNUSED)
930 931
{
  nl_open();
932
  nl_open_async();
933
}