main.c 16.3 KB
Newer Older
Marek Vavruša's avatar
Marek Vavruša committed
1 2 3 4 5 6 7 8 9 10 11 12 13
/*  Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
14
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
Marek Vavruša's avatar
Marek Vavruša committed
15 16
 */

Marek Vavruša's avatar
Marek Vavruša committed
17 18 19
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
20
#include <uv.h>
21
#include <assert.h>
22 23 24
#include <contrib/cleanup.h>
#include <contrib/ucw/mempool.h>
#include <contrib/ccan/asprintf/asprintf.h>
25
#include <libknot/error.h>
26 27 28
#ifdef HAS_SYSTEMD
#include <systemd/sd-daemon.h>
#endif
Marek Vavruša's avatar
Marek Vavruša committed
29

30
#include "lib/defines.h"
31
#include "lib/resolve.h"
32
#include "lib/dnssec.h"
33 34
#include "daemon/network.h"
#include "daemon/worker.h"
35
#include "daemon/engine.h"
36
#include "daemon/bindings.h"
37

38 39 40 41 42
/* We can fork early on Linux 3.9+ and do SO_REUSEPORT for better performance. */
#if defined(UV_VERSION_HEX) && defined(SO_REUSEPORT) && defined(__linux__)
 #define CAN_FORK_EARLY 1
#endif

43 44 45
/*
 * Globals
 */
46 47
static bool g_quiet = false;
static bool g_interactive = true;
48 49 50 51

/*
 * TTY control
 */
52 53
static void tty_read(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf)
{
54 55 56 57 58 59 60
	/* Set output streams */
	FILE *out = stdout, *outerr = stderr;
	uv_os_fd_t stream_fd = 0;
	uv_fileno((uv_handle_t *)stream, &stream_fd);
	if (stream_fd != STDIN_FILENO) {
		if (nread <= 0) { /* Close if disconnected */
			uv_close((uv_handle_t *)stream, (uv_close_cb) free);
61
			free(buf->base);
62 63
			return;
		}
64 65
		uv_os_fd_t dup_fd = dup(stream_fd);
		if (dup_fd >= 0) {
66
			out = outerr = fdopen(dup_fd, "w");
67
		}
68 69
	}
	/* Execute */
70 71
	if (stream && buf && nread > 0) {
		char *cmd = buf->base;
72 73 74 75 76
		if (cmd[nread - 1] == '\n') {
			cmd[nread - 1] = '\0';
		}
		struct engine *engine = stream->data;
		lua_State *L = engine->L;
77
		int ret = engine_cmd(L, cmd, false);
78
		const char *message = "";
79
		if (lua_gettop(L) > 0) {
80
			message = lua_tostring(L, -1);
81
		}
82 83
		/* Log to remote socket if connected */
		const char *delim = g_quiet ? "" : "> ";
84 85
		if (stream_fd != STDIN_FILENO) {
			fprintf(stdout, "%s\n", cmd); /* Duplicate command to logs */
86 87 88 89 90
			if (message)
				fprintf(out, "%s", message); /* Duplicate output to sender */
			if (message || !g_quiet)
				fprintf(out, "\n");
			fprintf(out, "%s", delim);
91
		}
92 93 94 95 96 97 98
		/* Log to standard streams */
		FILE *fp_out = ret ? stderr : stdout;
		if (message)
			fprintf(fp_out, "%s", message);
		if (message || !g_quiet)
			fprintf(fp_out, "\n");
		fprintf(fp_out, "%s", delim);
99
		lua_settop(L, 0);
100
	}
101
	fflush(out);
102
	free(buf->base);
103 104 105 106
	/* Close if redirected */
	if (stream_fd != STDIN_FILENO) {
		fclose(out); /* outerr is the same */
	}
107 108 109 110

}

static void tty_alloc(uv_handle_t *handle, size_t suggested, uv_buf_t *buf) {
Marek Vavruša's avatar
Marek Vavruša committed
111 112
	buf->len = suggested;
	buf->base = malloc(suggested);
113
}
114

115 116 117 118 119 120
static void tty_accept(uv_stream_t *master, int status)
{
	uv_tcp_t *client = malloc(sizeof(*client));
	if (client) {
		 uv_tcp_init(master->loop, client);
		 if (uv_accept(master, (uv_stream_t *)client) != 0) {
121 122
			free(client);
			return;
123 124 125 126
		 }
		 client->data = master->data;
		 uv_read_start((uv_stream_t *)client, tty_alloc, tty_read);
		 /* Write command line */
127 128 129 130
		 if (!g_quiet) {
		 	uv_buf_t buf = { "> ", 2 };
		 	uv_try_write((uv_stream_t *)client, &buf, 1);
		 }
131 132 133
	}
}

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
static void ipc_close(uv_handle_t *handle)
{
	free(handle);
}

/* @internal AF_LOCAL reads may still be interrupted, loop it. */
static bool ipc_readall(int fd, char *dst, size_t len)
{
	while (len > 0) {
		int rb = read(fd, dst, len);
		if (rb > 0) {
			dst += rb;
			len -= rb;
		} else if (errno != EAGAIN && errno != EINTR) {
			return false;
		}
	}
	return true;
}

static void ipc_activity(uv_poll_t* handle, int status, int events)
{
	struct engine *engine = handle->data;
	if (status != 0) {
		kr_log_error("[system] ipc: %s\n", uv_strerror(status));
		ipc_close((uv_handle_t *)handle);
		return;
	}
	/* Get file descriptor from handle */
	uv_os_fd_t fd = 0;
	(void) uv_fileno((uv_handle_t *)(handle), &fd);
	/* Read expression from IPC pipe */
	uint32_t len = 0;
	if (ipc_readall(fd, (char *)&len, sizeof(len))) {
		auto_free char *rbuf = malloc(len + 1);
		if (!rbuf) {
			kr_log_error("[system] ipc: %s\n", strerror(errno));
			engine_stop(engine); /* Panic and stop this fork. */
			return;
		}
		if (ipc_readall(fd, rbuf, len)) {
			rbuf[len] = '\0';
			/* Run expression */
			const char *message = "";
			int ret = engine_ipc(engine, rbuf);
			if (ret > 0) {
				message = lua_tostring(engine->L, -1);
			}
			/* Send response back */
			len = strlen(message);
			if (write(fd, &len, sizeof(len)) != sizeof(len) ||
				write(fd, message, len) != len) {
				kr_log_error("[system] ipc: %s\n", strerror(errno));
			}
			/* Clear the Lua stack */
			lua_settop(engine->L, 0);
		} else {
			kr_log_error("[system] ipc: %s\n", strerror(errno));
		}
	} else {
		kr_log_error("[system] ipc: %s\n", strerror(errno));
	}
}

static bool ipc_watch(uv_loop_t *loop, struct engine *engine, int fd)
{
	uv_poll_t *poller = malloc(sizeof(*poller));
	if (!poller) {
		return false;
	}
	int ret = uv_poll_init(loop, poller, fd);
	if (ret != 0) {
		free(poller);
		return false;
	}
	poller->data = engine;
	ret = uv_poll_start(poller, UV_READABLE, ipc_activity);
	if (ret != 0) {
		free(poller);
		return false;
	}
	/* libuv sets O_NONBLOCK whether we want it or not */
	(void) fcntl(fd, F_SETFD, fcntl(fd, F_GETFL) & ~O_NONBLOCK);
	return true;
}

220
static void signal_handler(uv_signal_t *handle, int signum)
221 222 223 224 225
{
	uv_stop(uv_default_loop());
	uv_signal_stop(handle);
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
static const char *set_addr(char *addr, int *port)
{
	char *p = strchr(addr, '#');
	if (p) {
		*port = atoi(p + 1);
		*p = '\0';
	}

	return addr;
}

/*
 * Server operation.
 */

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
static int fork_workers(fd_array_t *ipc_set, int forks)
{
	/* Fork subprocesses if requested */
	while (--forks > 0) {
		int sv[2] = {-1, -1};
		if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv) < 0) {
			perror("[system] socketpair");
			return kr_error(errno);
		}
		int pid = fork();
		if (pid < 0) {
			perror("[system] fork");
			return kr_error(errno);
		}

		/* Forked process */
		if (pid == 0) {
			array_clear(*ipc_set);
			array_push(*ipc_set, sv[0]);
			close(sv[1]);
			kr_crypto_reinit();
			return forks;
		/* Parent process */
		} else {
			array_push(*ipc_set, sv[1]);
			/* Do not share parent-end with other forks. */
			(void) fcntl(sv[1], F_SETFD, FD_CLOEXEC);
			close(sv[0]);
		}
	}
	return 0;
}

Marek Vavruša's avatar
Marek Vavruša committed
274
static void help(int argc, char *argv[])
Marek Vavruša's avatar
Marek Vavruša committed
275
{
276
	printf("Usage: %s [parameters] [rundir]\n", argv[0]);
Marek Vavruša's avatar
Marek Vavruša committed
277
	printf("\nParameters:\n"
278
	       " -a, --addr=[addr]    Server address (default: localhost#53).\n"
279
	       " -S, --fd=[fd]        Listen on given fd (handed out by supervisor).\n"
280
	       " -c, --config=[path]  Config file path (relative to [rundir]) (default: config).\n"
281 282
	       " -k, --keyfile=[path] File containing trust anchors (DS or DNSKEY).\n"
	       " -f, --forks=N        Start N forks sharing the configuration.\n"
283
	       " -q, --quiet          Quiet output, no prompt in interactive mode.\n"
284 285 286
	       " -v, --verbose        Run in verbose mode.\n"
	       " -V, --version        Print version of the server.\n"
	       " -h, --help           Print help and usage.\n"
287
	       "Options:\n"
288
	       " [rundir]             Path to the working directory (default: .)\n");
Marek Vavruša's avatar
Marek Vavruša committed
289 290
}

291
static struct worker_ctx *init_worker(struct engine *engine, knot_mm_t *pool, int worker_id, int worker_count)
292
{
293 294 295 296 297
	/* Load bindings */
	engine_lualib(engine, "modules", lib_modules);
	engine_lualib(engine, "net",     lib_net);
	engine_lualib(engine, "cache",   lib_cache);
	engine_lualib(engine, "event",   lib_event);
298
	engine_lualib(engine, "worker",  lib_worker);
299 300 301 302 303

	/* Create main worker. */
	struct worker_ctx *worker = mm_alloc(pool, sizeof(*worker));
	if(!worker) {
		return NULL;
Marek Vavruša's avatar
Marek Vavruša committed
304
	}
305
	memset(worker, 0, sizeof(*worker));
306 307
	worker->id = worker_id;
	worker->count = worker_count;
308
	worker->engine = engine;
309
	worker_reserve(worker, MP_FREELIST_SIZE);
310 311 312
	/* Register worker in Lua thread */
	lua_pushlightuserdata(engine->L, worker);
	lua_setglobal(engine->L, "__worker");
313 314 315
	lua_getglobal(engine->L, "worker");
	lua_pushnumber(engine->L, worker_id);
	lua_setfield(engine->L, -2, "id");
316 317
	lua_pushnumber(engine->L, getpid());
	lua_setfield(engine->L, -2, "pid");
318 319
	lua_pushnumber(engine->L, worker_count);
	lua_setfield(engine->L, -2, "count");
320
	lua_pop(engine->L, 1);
321 322
	return worker;
}
323

324
static int run_worker(uv_loop_t *loop, struct engine *engine, fd_array_t *ipc_set, bool leader, int control_fd)
325 326 327 328 329 330 331
{
	/* Control sockets or TTY */
	auto_free char *sock_file = NULL;
	uv_pipe_t pipe;
	uv_pipe_init(loop, &pipe, 0);
	pipe.data = engine;
	if (g_interactive) {
332 333
		if (!g_quiet)
			printf("[system] interactive mode\n> ");
334 335 336 337
		fflush(stdout);
		uv_pipe_open(&pipe, 0);
		uv_read_start((uv_stream_t*) &pipe, tty_alloc, tty_read);
	} else {
338 339 340 341 342 343 344 345 346
		int pipe_ret = -1;
		if (control_fd != -1) {
			pipe_ret = uv_pipe_open(&pipe, control_fd);
		} else {
			(void) mkdir("tty", S_IRWXU|S_IRWXG);
			sock_file = afmt("tty/%ld", getpid());
			if (sock_file) {
				pipe_ret = uv_pipe_bind(&pipe, sock_file);
			}
347
		}
348 349
		if (!pipe_ret)
			uv_listen((uv_stream_t *) &pipe, 16, tty_accept);
350
	}
351 352 353 354 355 356 357 358 359 360
	/* Watch IPC pipes (or just assign them if leading the pgroup). */
	if (!leader) {
		for (size_t i = 0; i < ipc_set->len; ++i) {
			if (!ipc_watch(loop, engine, ipc_set->at[i])) {
				kr_log_error("[system] failed to create poller: %s\n", strerror(errno));
				close(ipc_set->at[i]);
			}
		}
	}
	memcpy(&engine->ipc_set, ipc_set, sizeof(*ipc_set));
361 362 363 364
	/* Notify supervisor. */
#ifdef HAS_SYSTEMD
	sd_notify(0, "READY=1");
#endif
365
	/* Run event loop */
366
	uv_run(loop, UV_RUN_DEFAULT);
367 368 369
	if (sock_file) {
		unlink(sock_file);
	}
370
	return kr_ok();
Marek Vavruša's avatar
Marek Vavruša committed
371 372
}

373 374 375 376 377 378 379 380
void free_sd_socket_names(char **socket_names, int count)
{
	for (int i = 0; i < count; i++) {
		free(socket_names[i]);
	}
	free(socket_names);
}

Marek Vavruša's avatar
Marek Vavruša committed
381 382
int main(int argc, char **argv)
{
Marek Vavruša's avatar
Marek Vavruša committed
383
	int forks = 1;
384 385
	array_t(char*) addr_set;
	array_init(addr_set);
386
	array_t(int) fd_set;
387
	array_init(fd_set);
388
	char *keyfile = NULL;
389
	const char *config = NULL;
390
	char *keyfile_buf = NULL;
391
	int control_fd = -1;
Marek Vavruša's avatar
Marek Vavruša committed
392 393 394 395

	/* Long options. */
	int c = 0, li = 0, ret = 0;
	struct option opts[] = {
396
		{"addr", required_argument,   0, 'a'},
397
		{"fd",   required_argument,   0, 'S'},
398
		{"config", required_argument, 0, 'c'},
399 400 401
		{"keyfile",required_argument, 0, 'k'},
		{"forks",required_argument,   0, 'f'},
		{"verbose",    no_argument,   0, 'v'},
402
		{"quiet",      no_argument,   0, 'q'},
403 404
		{"version",   no_argument,    0, 'V'},
		{"help",      no_argument,    0, 'h'},
Marek Vavruša's avatar
Marek Vavruša committed
405 406
		{0, 0, 0, 0}
	};
407
	while ((c = getopt_long(argc, argv, "a:S:c:f:k:vqVh", opts, &li)) != -1) {
Marek Vavruša's avatar
Marek Vavruša committed
408 409 410
		switch (c)
		{
		case 'a':
411
			array_push(addr_set, optarg);
Marek Vavruša's avatar
Marek Vavruša committed
412
			break;
413 414 415
		case 'S':
			array_push(fd_set,  atoi(optarg));
			break;
416 417 418
		case 'c':
			config = optarg;
			break;
Marek Vavruša's avatar
Marek Vavruša committed
419
		case 'f':
420
			g_interactive = false;
Marek Vavruša's avatar
Marek Vavruša committed
421 422
			forks = atoi(optarg);
			if (forks == 0) {
423
				kr_log_error("[system] error '-f' requires number, not '%s'\n", optarg);
Marek Vavruša's avatar
Marek Vavruša committed
424 425 426
				return EXIT_FAILURE;
			}
			break;
427
		case 'k':
428
			keyfile_buf = malloc(PATH_MAX);
429
			assert(keyfile_buf);
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
			/* Check if the path is absolute */
			if (optarg[0] == '/') {
				keyfile = strdup(optarg);
			} else {
				/* Construct absolute path, the file may not exist */
				keyfile = realpath(".", keyfile_buf);
				if (keyfile) {
					int len = strlen(keyfile);
					int namelen = strlen(optarg);
					if (len + namelen < PATH_MAX - 1) {
						keyfile[len] = '/';
						memcpy(keyfile + len + 1, optarg, namelen + 1);
						keyfile = strdup(keyfile); /* Duplicate */
					} else {
						keyfile = NULL; /* Invalidate */
					}
				}
			}
448
			free(keyfile_buf);
449
			if (!keyfile) {
450
				kr_log_error("[system] keyfile '%s': not writeable\n", optarg);
451 452 453
				return EXIT_FAILURE;
			}
			break;
454
		case 'v':
455
			kr_debug_set(true);
456
			break;
457 458 459
		case 'q':
			g_quiet = true;
			break;
460
		case 'V':
461
			kr_log_info("%s, version %s\n", "Knot DNS Resolver", PACKAGE_VERSION);
Marek Vavruša's avatar
Marek Vavruša committed
462 463 464
			return EXIT_SUCCESS;
		case 'h':
		case '?':
Marek Vavruša's avatar
Marek Vavruša committed
465
			help(argc, argv);
Marek Vavruša's avatar
Marek Vavruša committed
466 467
			return EXIT_SUCCESS;
		default:
Marek Vavruša's avatar
Marek Vavruša committed
468
			help(argc, argv);
Marek Vavruša's avatar
Marek Vavruša committed
469 470 471 472
			return EXIT_FAILURE;
		}
	}

473 474
#ifdef HAS_SYSTEMD
	/* Accept passed sockets from systemd supervisor. */
475 476
	char **socket_names = NULL;
	int sd_nsocks = sd_listen_fds_with_names(0, &socket_names);
477 478
	for (int i = 0; i < sd_nsocks; ++i) {
		int fd = SD_LISTEN_FDS_START + i;
479 480 481 482 483 484 485 486 487 488 489 490 491
		/* when run under systemd supervision, do not use interactive mode */
		g_interactive = false;
		if (forks != 1) {
			kr_log_error("[system] when run under systemd-style supervision, "
				     "use single-process only (bad: --fork=%d).\n", forks);
			free_sd_socket_names(socket_names, sd_nsocks);
			return EXIT_FAILURE;
		}
		if (!strcasecmp("control",socket_names[i])) {
			control_fd = fd;
		} else {
			array_push(fd_set, fd);
		}
492
	}
493
	free_sd_socket_names(socket_names, sd_nsocks);
494 495
#endif

496 497
	/* Switch to rundir. */
	if (optind < argc) {
498 499
		const char *rundir = argv[optind];
		if (access(rundir, W_OK) != 0) {
500
			kr_log_error("[system] rundir '%s': %s\n", rundir, strerror(errno));
501 502 503
			return EXIT_FAILURE;
		}
		ret = chdir(rundir);
504
		if (ret != 0) {
505
			kr_log_error("[system] rundir '%s': %s\n", rundir, strerror(errno));
506 507
			return EXIT_FAILURE;
		}
508
		if(config && strcmp(config, "-") != 0 && access(config, R_OK) != 0) {
509 510
			kr_log_error("[system] rundir '%s'\n", rundir);
			kr_log_error("[system] config '%s': %s\n", config, strerror(errno));
511 512
			return EXIT_FAILURE;
		}
513
	}
514 515 516 517 518 519 520 521 522 523
#ifndef CAN_FORK_EARLY
	/* Forking is currently broken with libuv. We need libuv to bind to
	 * sockets etc. before forking, but at the same time can't touch it before
	 * forking otherwise it crashes, so it's a chicken and egg problem.
	 * Disabling until https://github.com/libuv/libuv/pull/846 is done. */
	 if (forks > 1 && fd_set.len == 0) {
	 	kr_log_error("[system] forking >1 workers supported only on Linux 3.9+ or with supervisor\n");
	 	return EXIT_FAILURE;
	 }
#endif
524

525 526
	kr_crypto_init();

527 528 529
	/* Connect forks with local socket */
	fd_array_t ipc_set;
	array_init(ipc_set);
530
	/* Fork subprocesses if requested */
531 532 533
	int fork_id = fork_workers(&ipc_set, forks);
	if (fork_id < 0) {
		return EXIT_FAILURE;
534 535
	}

536
	/* Create a server engine. */
537
	knot_mm_t pool = {
538
		.ctx = mp_new (4096),
539
		.alloc = (knot_mm_alloc_t) mp_alloc
540
	};
541 542 543
	struct engine engine;
	ret = engine_init(&engine, &pool);
	if (ret != 0) {
544
		kr_log_error("[system] failed to initialize engine: %s\n", kr_strerror(ret));
545 546
		return EXIT_FAILURE;
	}
547
	/* Create worker */
548
	struct worker_ctx *worker = init_worker(&engine, &pool, fork_id, forks);
549
	if (!worker) {
550
		kr_log_error("[system] not enough memory\n");
551 552
		return EXIT_FAILURE;
	}
553 554 555 556 557 558 559 560
	/* Bind to passed fds and run */
	for (size_t i = 0; i < fd_set.len; ++i) {
		ret = network_listen_fd(&engine.net, fd_set.at[i]);
		if (ret != 0) {
			kr_log_error("[system] listen on fd=%d %s\n", fd_set.at[i], kr_strerror(ret));
			ret = EXIT_FAILURE;
		}
	}
561
	/* Bind to sockets and run */
562 563 564
	for (size_t i = 0; i < addr_set.len; ++i) {
		int port = 53;
		const char *addr = set_addr(addr_set.at[i], &port);
565 566
		ret = network_listen(&engine.net, addr, (uint16_t)port, NET_UDP|NET_TCP);
		if (ret != 0) {
567
			kr_log_error("[system] bind to '%s#%d' %s\n", addr, port, kr_strerror(ret));
568 569 570
			ret = EXIT_FAILURE;
		}
	}
571 572 573 574 575 576 577 578

	/* Block signals. */
	uv_loop_t *loop = uv_default_loop();
	uv_signal_t sigint, sigterm;
	uv_signal_init(loop, &sigint);
	uv_signal_init(loop, &sigterm);
	uv_signal_start(&sigint, signal_handler, SIGINT);
	uv_signal_start(&sigterm, signal_handler, SIGTERM);
579
	/* Start the scripting engine */
580 581
	worker->loop = loop;
	loop->data = worker;
582
	if (ret == 0) {
583
		ret = engine_start(&engine, config ? config : "config");
584 585
		if (ret == 0) {
			if (keyfile) {
Marek Vavrusa's avatar
Marek Vavrusa committed
586
				auto_free char *cmd = afmt("trust_anchors.config('%s')", keyfile);
587
				if (!cmd) {
588
					kr_log_error("[system] not enough memory\n");
589 590
					return EXIT_FAILURE;
				}
591
				engine_cmd(engine.L, cmd, false);
592 593 594
				lua_settop(engine.L, 0);
			}
			/* Run the event loop */
595
			ret = run_worker(loop, &engine, &ipc_set, fork_id == 0, control_fd);
596
		}
597
	}
598
	if (ret != 0) {
599
		perror("[system] worker failed");
600 601
		ret = EXIT_FAILURE;
	}
602
	/* Cleanup. */
603
	engine_deinit(&engine);
604 605
	worker_reclaim(worker);
	mp_delete(pool.ctx);
606
	array_clear(addr_set);
607
	kr_crypto_cleanup();
608 609
	return ret;
}