udp-handler.c 14.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

17 18 19 20 21
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* Required for RTLD_DEFAULT. */
#endif

#include <dlfcn.h>
22
#include <config.h>
23
#include <time.h>
24 25
#include <unistd.h>
#include <errno.h>
26
#include <arpa/inet.h>
27
#include <sys/socket.h>
28
#include <sys/poll.h>
29
#include <sys/syscall.h>
30
#include <netinet/in.h>
31 32
#include <string.h>
#include <assert.h>
33
#include <errno.h>
34 35 36
#ifdef HAVE_CAP_NG_H
#include <cap-ng.h>
#endif /* HAVE_CAP_NG_H */
37

38
#include "common/sockaddr.h"
39
#include "knot/common.h"
40
#include "knot/other/error.h"
41
#include "knot/server/udp-handler.h"
Lubos Slovak's avatar
Lubos Slovak committed
42
#include "libknot/nameserver/name-server.h"
43 44
#include "knot/stat/stat.h"
#include "knot/server/server.h"
Lubos Slovak's avatar
Lubos Slovak committed
45
#include "libknot/util/wire.h"
Ondřej Surý's avatar
Ondřej Surý committed
46
#include "libknot/consts.h"
Lubos Slovak's avatar
Lubos Slovak committed
47
#include "libknot/packet/packet.h"
48 49
#include "knot/server/zones.h"
#include "knot/server/notify.h"
50
#include "libknot/util/error.h"
51

52
/* Check for sendmmsg syscall. */
53 54 55 56 57 58
#ifdef HAVE_SENDMMSG
  #define ENABLE_SENDMMSG 1
#else
  #ifdef SYS_sendmmsg
    #define ENABLE_SENDMMSG 1
  #endif
59 60 61 62 63
#endif

/*! \brief Pointer to selected UDP master implementation. */
static int (*_udp_master)(dthread_t *, stat_t *) = 0;

64 65 66 67 68
///*! \brief Wrapper for UDP send. */
//static int xfr_send_udp(int session, sockaddr_t *addr, uint8_t *msg, size_t msglen)
//{
//	return sendto(session, msg, msglen, 0, addr->ptr, addr->len);
//}
69 70

int udp_handle(int fd, uint8_t *qbuf, size_t qbuflen, size_t *resp_len,
71
	       sockaddr_t* addr, knot_nameserver_t *ns)
72
{
73 74 75
#ifdef DEBUG_ENABLE_BRIEF
	char strfrom[SOCKADDR_STRLEN];
	sockaddr_tostr(addr, strfrom, sizeof(strfrom));
76
	dbg_net("udp: fd=%d received %zd bytes from '%s@%d'.\n", fd, qbuflen,
77 78
	        strfrom, sockaddr_portnum(addr));
#endif
79
	
80
	knot_packet_type_t qtype = KNOT_QUERY_NORMAL;
81
	*resp_len = SOCKET_MTU_SZ;
82 83 84 85

	knot_packet_t *packet =
		knot_packet_new(KNOT_PACKET_PREALLOC_QUERY);
	if (packet == NULL) {
86
		dbg_net("udp: failed to create packet on fd=%d\n", fd);
87 88 89 90 91 92 93 94 95

		int ret = knot_ns_error_response_from_query(ns, qbuf, qbuflen,
		                                            KNOT_RCODE_SERVFAIL,
		                                            qbuf, resp_len);

		if (ret != KNOT_EOK) {
			return KNOTD_EMALF;
		}

96
		return KNOTD_EOK; /* Created error response. */
Marek Vavrusa's avatar
Marek Vavrusa committed
97 98
	}

99 100 101
	/* Parse query. */
	int res = knot_ns_parse_packet(qbuf, qbuflen, packet, &qtype);
	if (unlikely(res != KNOTD_EOK)) {
102
		dbg_net("udp: failed to parse packet on fd=%d\n", fd);
103
		if (res > 0) { /* Returned RCODE */
104 105 106 107 108 109 110 111 112
			int ret = knot_ns_error_response_from_query(ns, qbuf,
			                                            qbuflen,
			                                            res, qbuf,
			                                            resp_len);

			if (ret != KNOT_EOK) {
				knot_packet_free(&packet);
				return KNOTD_EMALF;
			}
113
		}
114

115
		knot_packet_free(&packet);
116
		return KNOTD_EOK; /* Created error response. */
117 118
	}

119
	/* Handle query. */
120 121
//	server_t *srv = (server_t *)knot_ns_get_data(ns);
//	knot_ns_xfr_t xfr;
122 123
	res = KNOTD_ERROR;
	switch(qtype) {
124

125 126
	/* Response types. */
	case KNOT_RESPONSE_NORMAL:
127
		res = zones_process_response(ns, addr, packet, qbuf, resp_len);
128
		break;
129
	case KNOT_RESPONSE_NOTIFY:
130
		res = notify_process_response(ns, packet, addr, qbuf, resp_len);
131
		break;
132
	
133 134
	/* Query types. */
	case KNOT_QUERY_NORMAL:
135
		res = zones_normal_query_answer(ns, packet, addr, qbuf,
136
		                                resp_len, NS_TRANSPORT_UDP);
137 138
		break;
	case KNOT_QUERY_AXFR:
139 140 141
		/* RFC1034, p.28 requires reliable transfer protocol.
		 * Bind responds with FORMERR.
 		 */
Marek Vavrusa's avatar
Marek Vavrusa committed
142
		/*! \note Draft exists for AXFR/UDP, but has not been standardized. */
143
		knot_ns_error_response(ns, knot_packet_id(packet),
144 145
		                       &packet->header.flags1,
		                       KNOT_RCODE_FORMERR, qbuf, resp_len);
146
		res = KNOTD_EOK;
147
		break;
148 149 150 151 152 153 154
	case KNOT_QUERY_IXFR:
		/* According to RFC1035, respond with SOA. 
		 * Draft proposes trying to fit response into one packet,
		 * but I have found no tool or slave server to actually attempt
		 * IXFR/UDP.
		 */
		knot_packet_set_qtype(packet, KNOT_RRTYPE_SOA);
155
		res = zones_normal_query_answer(ns, packet, addr,
156 157
		                                qbuf, resp_len, 
		                                NS_TRANSPORT_UDP);
158
		break;
159 160
	case KNOT_QUERY_NOTIFY:
		res = notify_process_request(ns, packet, addr,
161
					     qbuf, resp_len);
162
		break;
163
		
164
	case KNOT_QUERY_UPDATE:
165
		dbg_net("udp: UPDATE query on fd=%d not implemented\n", fd);
166
		knot_ns_error_response(ns, knot_packet_id(packet),
167 168
		                       &packet->header.flags1,
		                       KNOT_RCODE_NOTIMPL, qbuf, resp_len);
169
		res = KNOTD_EOK;
170
		break;
171 172 173 174 175
		
	/* Unhandled opcodes. */
	case KNOT_RESPONSE_AXFR: /*!< Processed in XFR handler. */
	case KNOT_RESPONSE_IXFR: /*!< Processed in XFR handler. */
		knot_ns_error_response(ns, knot_packet_id(packet),
176
		                       &packet->header.flags1,
177 178 179 180 181 182 183 184
		                       KNOT_RCODE_REFUSED, qbuf,
		                       resp_len);
		res = KNOTD_EOK;
		break;
			
	/* Unknown opcodes */
	default:
		knot_ns_error_response(ns, knot_packet_id(packet),
185 186
		                       &packet->header.flags1,
		                       KNOT_RCODE_FORMERR, qbuf, resp_len);
187 188
		res = KNOTD_EOK;
		break;
189 190
	}

191 192
	knot_packet_free(&packet);

193
	return res;
194
}
Marek Vavrusa's avatar
Marek Vavrusa committed
195

196 197 198
static inline int udp_master_recvfrom(dthread_t *thread, stat_t *thread_stat)
{
	iohandler_t *h = (iohandler_t *)thread->data;
Marek Vavrusa's avatar
Marek Vavrusa committed
199 200 201 202 203
	if (h == NULL || h->server == NULL || h->server->nameserver == NULL) {
		dbg_net("udp: invalid parameters for udp_master_recvfrom\n");
		return KNOTD_EINVAL;
	}
	
204 205 206 207 208
	/* Set CPU affinity to improve load distribution on multicore systems.
	 * Partial overlapping mask to be nice to scheduler.
	 */
	int cpcount = dt_online_cpus();
	if (cpcount > 0) {
209 210 211 212 213
		unsigned cpu[2];
		cpu[0] = dt_get_id(thread);
		cpu[1] = (cpu[0] + 1) % cpcount;
		cpu[0] = cpu[0] % cpcount;
		dt_setaffinity(thread, cpu, 2);
214 215
	}
	
216
	knot_nameserver_t *ns = h->server->nameserver;
Marek Vavrusa's avatar
Marek Vavrusa committed
217

218
	/* Initialize remote party address. */
219 220 221 222 223 224 225
	sockaddr_t addr;
	if (sockaddr_init(&addr, h->type) != KNOTD_EOK) {
		log_server_error("Socket type %d is not supported, "
				 "IPv6 support is probably disabled.\n",
				 h->type);
		return KNOTD_ENOTSUP;
	}
226
	
227 228 229 230 231 232 233 234
	/* Allocate buffer for answering. */
	uint8_t *qbuf = malloc(SOCKET_MTU_SZ);
	if (qbuf == NULL) {
		dbg_net("udp: out of memory when allocating buffer.\n");
		return KNOTD_ENOMEM;
	}
	
	/* Duplicate socket for performance reasons on some OS's */
235 236 237
	int sock = h->fd;
	int sock_dup = dup(h->fd);
	if (sock_dup < 0) {
238
		log_server_warning("Couldn't duplicate UDP socket for listening.\n");
239 240
	} else {
		sock = sock_dup;
241
	}
242 243

	/* Loop until all data is read. */
244
	ssize_t n = 0;
Marek Vavrusa's avatar
Marek Vavrusa committed
245 246
	while (n >= 0) {

247
		/* Receive packet. */
248
		n = recvfrom(sock, qbuf, SOCKET_MTU_SZ, 0, addr.ptr, &addr.len);
249

250
		/* Cancellation point. */
Marek Vavrusa's avatar
Marek Vavrusa committed
251 252 253 254
		if (dt_is_cancelled(thread)) {
			break;
		}

255
		/* Error and interrupt handling. */
256
		if (unlikely(n <= 0)) {
Marek Vavrusa's avatar
Marek Vavrusa committed
257
			if (errno != EINTR && errno != 0) {
258
				dbg_net("udp: recvmsg() failed: %d\n",
259
					  errno);
Marek Vavrusa's avatar
Marek Vavrusa committed
260 261
			}

262
			if (!(h->state & ServerRunning)) {
Marek Vavrusa's avatar
Marek Vavrusa committed
263 264 265 266 267 268
				break;
			} else {
				continue;
			}
		}

269
		/* Handle received pkt. */
270
		size_t resp_len = 0;
271
		int rc = udp_handle(sock, qbuf, n, &resp_len, &addr, ns);
272 273 274 275

		/* Send response. */
		if (rc == KNOTD_EOK && resp_len > 0) {

276 277
			dbg_net("udp: on fd=%d, sending answer size=%zd.\n",
			        sock, resp_len);
278 279 280 281 282 283 284

			// Send datagram
			rc = sendto(sock, qbuf, resp_len,
				    0, addr.ptr, addr.len);

			// Check result
			if (rc != (int)resp_len) {
285 286
				dbg_net("udp: sendto(): failed: %d - %d.\n",
				        rc, errno);
287 288
			}
		}
289
	}
290

291
	/* Free allocd resources. */
292 293
	if (sock_dup >= 0) {
		close(sock_dup);
294
	}
295 296
	
	free(qbuf);
297 298

	return KNOTD_EOK;
299
}
300

301 302
#ifdef ENABLE_RECVMMSG
#ifdef MSG_WAITFORONE
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339

/*! \brief Pointer to selected UDP send implementation. */
static int (*_send_mmsg)(int, sockaddr_t *, struct mmsghdr *, size_t) = 0;

/*!
 * \brief Send multiple packets.
 * 
 * Basic, sendto() based implementation.
 */
int udp_sendto(int sock, sockaddr_t * addrs, struct mmsghdr *msgs, size_t count)
{
	for (unsigned i = 0; i < count; ++i) {
		
		const size_t resp_len = msgs[i].msg_len;
		if (resp_len > 0) {
			dbg_net("udp: on fd=%d, sending answer size=%zd.\n",
			        sock, resp_len);

			// Send datagram
			sockaddr_t *addr = addrs + i;
			struct iovec *cvec = msgs[i].msg_hdr.msg_iov;
			int res = sendto(sock, cvec->iov_base, resp_len,
					 0, addr->ptr, addr->len);

			// Check result
			if (res != (int)resp_len) {
				dbg_net("udp: sendto(): failed: %d - %d.\n",
				        res, errno);
			}
		}
	}
	
	return KNOTD_EOK;
}

#ifdef ENABLE_SENDMMSG
/*! \brief sendmmsg() syscall interface. */
340
#ifndef HAVE_SENDMMSG
341 342 343 344 345
static inline int sendmmsg(int fd, struct mmsghdr *mmsg, unsigned vlen,
                           unsigned flags)
{
	return syscall(SYS_sendmmsg, fd, mmsg, vlen, flags, NULL);
}
346
#endif
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364

/*!
 * \brief Send multiple packets.
 * 
 * sendmmsg() implementation.
 */
int udp_sendmmsg(int sock, sockaddr_t *_, struct mmsghdr *msgs, size_t count)
{
	UNUSED(_);
	dbg_net("udp: sending multiple responses\n");
	if (sendmmsg(sock, msgs, count, 0) < 0) {
		return KNOTD_ERROR;
	}
	
	return KNOTD_EOK;
}
#endif

365 366 367 368 369
static inline int udp_master_recvmmsg(dthread_t *thread, stat_t *thread_stat)
{
	iohandler_t *h = (iohandler_t *)thread->data;
	knot_nameserver_t *ns = h->server->nameserver;
	int sock = dup(h->fd);
370 371 372 373 374 375
	
	/* Check socket. */
	if (sock < 0) {
		dbg_net("udp: unable to dup() socket, finishing.\n");
		return KNOTD_EINVAL;
	}
376 377 378 379 380 381

	/* Allocate batch for N packets. */
	char *iobuf = malloc(SOCKET_MTU_SZ * RECVMMSG_BATCHLEN);
	sockaddr_t *addrs = malloc(sizeof(sockaddr_t) * RECVMMSG_BATCHLEN);
	struct iovec *iov = malloc(sizeof(struct iovec) * RECVMMSG_BATCHLEN);
	struct mmsghdr *msgs = malloc(sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN);
382 383 384 385 386 387 388 389 390
	
	/* Check, free(NULL) is valid, so no need to nitpick. */
	if (iobuf == NULL || addrs == NULL || iov == NULL || msgs == NULL) {
		free(iobuf);
		free(addrs);
		free(iov);
		free(msgs);
		return KNOTD_ENOMEM;
	}
391 392 393 394 395 396 397 398 399 400 401 402

	/* Prepare batch. */
	memset(msgs, 0, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN);
	for (unsigned i = 0; i < RECVMMSG_BATCHLEN; ++i) {
		sockaddr_init(addrs + i, h->type);
		iov[i].iov_base = iobuf + i * SOCKET_MTU_SZ;
		iov[i].iov_len = SOCKET_MTU_SZ;
		msgs[i].msg_hdr.msg_iov = iov + i;
		msgs[i].msg_hdr.msg_iovlen = 1;
		msgs[i].msg_hdr.msg_name = addrs[i].ptr;
		msgs[i].msg_hdr.msg_namelen = addrs[i].len;
	}
403 404 405 406 407 408
	
	/* Set CPU affinity to improve load distribution on multicore systems.
	 * Partial overlapping mask to be nice to scheduler.
	 */
	int cpcount = dt_online_cpus();
	if (cpcount > 0) {
409 410 411 412 413
		unsigned cpu[2];
		cpu[0] = dt_get_id(thread);
		cpu[1] = (cpu[0] + 1) % cpcount;
		cpu[0] = cpu[0] % cpcount;
		dt_setaffinity(thread, cpu, 2);
414
	}
415

416 417 418
	/* Loop until all data is read. */
	ssize_t n = 0;
	while (n >= 0) {
419

420 421 422 423 424 425
		/* Receive multiple messages. */
		n = recvmmsg(sock, msgs, RECVMMSG_BATCHLEN, MSG_WAITFORONE, 0);

		/* Cancellation point. */
		if (dt_is_cancelled(thread)) {
			break;
426 427
		}

428 429 430
		/* Error and interrupt handling. */
		if (unlikely(n <= 0)) {
			if (errno != EINTR && errno != 0) {
431
				dbg_net("udp: recvmmsg() failed: %d\n",
432
				        errno);
433
			}
434

435 436 437 438 439 440
			if (!(h->state & ServerRunning)) {
				break;
			} else {
				continue;
			}
		}
441

442
		/* Handle each received msg. */
443
		int ret = 0;
444 445
		for (unsigned i = 0; i < n; ++i) {
			struct iovec *cvec = msgs[i].msg_hdr.msg_iov;
446
			size_t resp_len = msgs[i].msg_len;
447
			ret = udp_handle(sock, cvec->iov_base, resp_len, &resp_len,
448 449 450
			                 addrs + i, ns);
			if (ret == KNOTD_EOK) {
				msgs[i].msg_len = resp_len;
451
				iov[i].iov_len = resp_len;
452 453
			} else {
				msgs[i].msg_len = 0;
454
				iov[i].iov_len = 0;
455 456
			}
			
457 458 459
		}

		/* Gather results. */
460 461 462
		_send_mmsg(sock, addrs, msgs, n);
		
		/* Reset iov buffer size. */
463
		for (unsigned i = 0; i < n; ++i) {
464
			iov[i].iov_len = SOCKET_MTU_SZ;
465
		}
466
	}
Marek Vavrusa's avatar
Marek Vavrusa committed
467

468 469 470 471 472 473
	/* Free allocd resources. */
	free(iobuf);
	free(addrs);
	free(iov);
	free(msgs);
	close(sock);
474
	return KNOTD_EOK;
475
}
476 477
#endif
#endif
Marek Vavrusa's avatar
Marek Vavrusa committed
478

479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
/*! \brief Initialize UDP master routine on run-time. */
void __attribute__ ((constructor)) udp_master_init()
{
	/* Initialize defaults. */
	_udp_master = udp_master_recvfrom;

	/* Optimized functions. */
#ifdef ENABLE_RECVMMSG
#ifdef MSG_WAITFORONE
	/* Check for recvmmsg() support. */
	if (dlsym(RTLD_DEFAULT, "recvmmsg") != 0) {
		_udp_master = udp_master_recvmmsg;
	}
	
	/* Check for sendmmsg() support. */
	_send_mmsg = udp_sendto;
495
#ifdef ENABLE_SENDMMSG
496 497 498 499 500 501 502 503 504 505
	sendmmsg(0, 0, 0, 0); /* Just check if syscall exists */
	if (errno != ENOSYS) {
		_send_mmsg = udp_sendmmsg;
	}
#endif /* ENABLE_SENDMMSG */
#endif /* MSG_WAITFORONE */
#endif /* ENABLE_RECVMMSG */
}
	
	
506 507 508 509
int udp_master(dthread_t *thread)
{
	iohandler_t *handler = (iohandler_t *)thread->data;
	int sock = handler->fd;
510

511 512
	/* Check socket. */
	if (sock < 0) {
513
		dbg_net("udp: null socket recevied, finishing.\n");
514 515
		return KNOTD_EINVAL;
	}
Marek Vavrusa's avatar
Marek Vavrusa committed
516

517 518 519 520 521 522
	/* Set socket options. */
	int flag = 1;
#ifndef DISABLE_IPV6
	if (handler->type == AF_INET6) {
		/* Disable dual-stack for performance reasons. */
		setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, &flag, sizeof(flag));
523

524 525 526 527 528 529 530
		/* UDP packets will not exceed a minimum MTU size. */
		/*flag = IPV6_MIN_MTU;
		setsockopt(fd, IPPROTO_IPV6, IPV6_MTU, &flag, sizeof(flag));
		flag = 1; */
	}
#endif
	if (handler->type == AF_INET) {
Marek Vavrusa's avatar
Marek Vavrusa committed
531

532 533 534 535 536 537
//#ifdef IP_PMTUDISC_DONT
//		/* Disable fragmentation. */
//		flag = IP_PMTUDISC_DONT;
//		setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &flag, sizeof(flag));
//		flag = 1;
//#endif
Marek Vavrusa's avatar
Marek Vavrusa committed
538 539
	}

540 541 542 543 544 545 546
	/* in case of STAT_COMPILE the following code will declare thread_stat
	 * variable in following fashion: stat_t *thread_stat;
	 */

	stat_t *thread_stat = 0;
	STAT_INIT(thread_stat); //XXX new stat instance every time.
	stat_set_protocol(thread_stat, stat_UDP);
547 548 549 550 551 552 553 554 555
	
	/* Drop all capabilities on workers. */
#ifdef HAVE_CAP_NG_H
	if (capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) {
		capng_clear(CAPNG_SELECT_BOTH);
		capng_apply(CAPNG_SELECT_BOTH);
	}
#endif /* HAVE_CAP_NG_H */

556 557

	/* Execute proper handler. */
558
	dbg_net_verb("udp: thread started (worker %p).\n", thread);
559 560 561 562 563 564
	int ret = _udp_master(thread, thread_stat);
	if (ret != KNOTD_EOK) {
		log_server_warning("UDP answering module finished "
		                   "with an error (%s).\n",
		                   knotd_strerror(ret));
	}
565

Marek Vavrusa's avatar
Marek Vavrusa committed
566
	stat_free(thread_stat);
567
	dbg_net_verb("udp: worker %p finished.\n", thread);
568 569
	
	
570
	return ret;
571
}
572