net.c 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

Jan Včelák's avatar
Jan Včelák committed
17
#include <assert.h>
18
#include <errno.h>
Jan Včelák's avatar
Jan Včelák committed
19
#include <fcntl.h>
20
#include <netinet/in.h>
Jan Včelák's avatar
Jan Včelák committed
21 22
#include <stdbool.h>
#include <sys/socket.h>
23
#include <sys/uio.h>
Jan Včelák's avatar
Jan Včelák committed
24
#include <unistd.h>
25

26
#include "libknot/errcode.h"
27 28
#include "contrib/net.h"
#include "contrib/sockaddr.h"
29

30 31 32 33 34 35 36
/*
 * OS X doesn't support MSG_NOSIGNAL. Use SO_NOSIGPIPE socket option instead.
 */
#if defined(__APPLE__) && !defined(MSG_NOSIGNAL)
#  define MSG_NOSIGNAL 0
#  define osx_block_sigpipe(sock) sockopt_enable(sock, SOL_SOCKET, SO_NOSIGPIPE)
#else
37
#  define osx_block_sigpipe(sock) KNOT_EOK
38 39
#endif

40 41 42
/*!
 * \brief Enable socket option.
 */
43
static int sockopt_enable(int sock, int level, int optname)
44 45
{
	const int enable = 1;
46 47 48 49 50
	if (setsockopt(sock, level, optname, &enable, sizeof(enable)) != 0) {
		return knot_map_errno();
	}

	return KNOT_EOK;
51 52
}

53 54 55 56 57 58
/*!
 * \brief Create a non-blocking socket.
 *
 * Prefer SOCK_NONBLOCK if available to save one fcntl() syscall.
 *
 */
59 60
static int socket_create(int family, int type, int proto)
{
61 62 63 64 65
#ifdef SOCK_NONBLOCK
	type |= SOCK_NONBLOCK;
#endif
	int sock = socket(family, type, proto);
	if (sock < 0) {
66
		return knot_map_errno();
67 68
	}

69 70 71 72 73 74 75 76
#ifndef SOCK_NONBLOCK
	if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
		int ret = knot_map_errno();
		close(sock);
		return ret;
	}
#endif

77 78 79 80
	int ret = osx_block_sigpipe(sock);
	if (ret != KNOT_EOK) {
		return ret;
	}
81

82
	return sock;
83 84
}

85
int net_unbound_socket(int type, const struct sockaddr_storage *ss)
86 87 88 89 90 91
{
	if (ss == NULL) {
		return KNOT_EINVAL;
	}

	/* Create socket. */
92
	return socket_create(ss->ss_family, type, 0);
93 94
}

95 96 97 98 99
struct option {
	int level;
	int name;
};

100 101 102
/*!
 * \brief Get setsock option for binding non-local address.
 */
103
static const struct option *nonlocal_option(int family)
104
{
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
	static const struct option ipv4 = {
		#if defined(IP_FREEBIND)
			IPPROTO_IP, IP_FREEBIND
		#elif defined(IP_BINDANY)
			IPPROTO_IP, IP_BINDANY
		#else
			0, 0
		#endif
	};

	static const struct option ipv6 = {
		#if defined(IP_FREEBIND)
			IPPROTO_IP, IP_FREEBIND
		#elif defined(IPV6_BINDANY)
			IPPROTO_IPV6, IPV6_BINDANY
		#else
			0, 0
		#endif

	};
125

126
	switch (family) {
127 128
	case AF_INET:  return &ipv4;
	case AF_INET6: return &ipv6;
129
	default:
130
		return NULL;
131 132 133
	}
}

134
static int enable_nonlocal(int sock, int family)
135
{
136
	const struct option *opt = nonlocal_option(family);
137
	if (opt == NULL || opt->name == 0) {
138
		return KNOT_ENOTSUP;
139 140
	}

141 142 143 144 145 146 147 148 149 150
	return sockopt_enable(sock, opt->level, opt->name);
}

static int enable_reuseport(int sock)
{
#ifdef ENABLE_REUSEPORT
	return sockopt_enable(sock, SOL_SOCKET, SO_REUSEPORT);
#else
	return KNOT_ENOTSUP;
#endif
151 152
}

153 154
static void unlink_unix_socket(const struct sockaddr_storage *addr)
{
Jan Včelák's avatar
Jan Včelák committed
155
	char path[SOCKADDR_STRLEN] = { 0 };
156 157 158 159 160
	sockaddr_tostr(path, sizeof(path), addr);
	unlink(path);
}

int net_bound_socket(int type, const struct sockaddr_storage *ss, enum net_flags flags)
161 162
{
	/* Create socket. */
163 164 165
	int sock = net_unbound_socket(type, ss);
	if (sock < 0) {
		return sock;
166 167
	}

168
	/* Unlink UNIX sock if exists. */
169
	if (ss->ss_family == AF_UNIX) {
170
		unlink_unix_socket(ss);
171 172
	}

173
	/* Reuse old address if taken. */
174 175 176 177 178
	int ret = sockopt_enable(sock, SOL_SOCKET, SO_REUSEADDR);
	if (ret != KNOT_EOK) {
		close(sock);
		return ret;
	}
179 180

	/* Don't bind IPv4 for IPv6 any address. */
181
	if (ss->ss_family == AF_INET6) {
182 183 184 185 186
		ret = sockopt_enable(sock, IPPROTO_IPV6, IPV6_V6ONLY);
		if (ret != KNOT_EOK) {
			close(sock);
			return ret;
		}
187 188
	}

189
	/* Allow bind to non-local address. */
190
	if (flags & NET_BIND_NONLOCAL) {
191 192 193 194 195
		ret = enable_nonlocal(sock, ss->ss_family);
		if (ret != KNOT_EOK) {
			close(sock);
			return ret;
		}
196
	}
197

198 199
	/* Allow to bind the same address by multiple threads. */
	if (flags & NET_BIND_MULTIPLE) {
200 201
		ret = enable_reuseport(sock);
		if (ret != KNOT_EOK) {
202 203 204
			close(sock);
			return ret;
		}
205 206
	}

207
	/* Bind to specified address. */
208
	const struct sockaddr *sa = (const struct sockaddr *)ss;
209
	ret = bind(sock, sa, sockaddr_len(sa));
210
	if (ret < 0) {
211
		ret = knot_map_errno();
212
		close(sock);
213 214 215
		return ret;
	}

216
	return sock;
217 218
}

219
int net_connected_socket(int type, const struct sockaddr_storage *dst_addr,
220
                         const struct sockaddr_storage *src_addr)
221 222 223 224 225
{
	if (dst_addr == NULL) {
		return KNOT_EINVAL;
	}

226 227
	/* Check port. */
	if (sockaddr_port(dst_addr) == 0) {
228
		return KNOT_NET_EADDR;
229 230
	}

231
	/* Bind to specific source address - if set. */
232
	int sock = -1;
233
	if (src_addr && src_addr->ss_family != AF_UNSPEC) {
234
		sock = net_bound_socket(type, src_addr, 0);
235
	} else {
236
		sock = net_unbound_socket(type, dst_addr);
237
	}
238 239
	if (sock < 0) {
		return sock;
240
	}
241 242

	/* Connect to destination. */
243
	const struct sockaddr *sa = (const struct sockaddr *)dst_addr;
244
	int ret = connect(sock, sa, sockaddr_len(sa));
245
	if (ret != 0 && errno != EINPROGRESS) {
246 247 248
		ret = knot_map_errno();
		close(sock);
		return ret;
249 250
	}

251
	return sock;
252
}
253

254
bool net_is_connected(int sock)
255
{
256
	struct sockaddr_storage ss;
257
	socklen_t len = sizeof(ss);
258
	return (getpeername(sock, (struct sockaddr *)&ss, &len) == 0);
259
}
260

261 262
int net_socktype(int sock)
{
263
	int type;
264
	socklen_t size = sizeof(type);
265 266 267 268 269 270

	if (getsockopt(sock, SOL_SOCKET, SO_TYPE, &type, &size) == 0) {
		return type;
	} else {
		return AF_UNSPEC;
	}
271 272 273 274 275 276 277
}

bool net_is_stream(int sock)
{
	return net_socktype(sock) == SOCK_STREAM;
}

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
int net_accept(int sock, struct sockaddr_storage *addr)
{
	struct sockaddr *sa = (struct sockaddr *)addr;
	socklen_t sa_len = sizeof(*addr);

	int remote = -1;

#if defined(HAVE_ACCEPT4) && defined(SOCK_NONBLOCK)
	remote = accept4(sock, sa, &sa_len, SOCK_NONBLOCK);
	if (remote < 0) {
		return knot_map_errno();
	}
#else
	remote = accept(sock, sa, &sa_len);
	if (fcntl(remote, F_SETFL, O_NONBLOCK) != 0) {
		int error = knot_map_errno();
		close(remote);
		return error;
	}
#endif

	return remote;
}

302 303
/* -- I/O interface handling partial  -------------------------------------- */

304 305 306 307 308 309 310
/*!
 * \brief Perform \a select() on one socket.
 *
 * \param read   Wait for input readiness.
 * \param write  Wait for output readiness.
 */
static int select_one(int fd, bool read, bool write, struct timeval *timeout)
311 312 313 314
{
	fd_set set;
	FD_ZERO(&set);
	FD_SET(fd, &set);
315

316 317
	fd_set *rfds = read ? &set : NULL;
	fd_set *wfds = write ? &set : NULL;
318

319
	return select(fd + 1, rfds, wfds, NULL, timeout);
320 321
}

322 323 324
/*!
 * \brief Check if we should wait for I/O readiness.
 *
325
 * \param error  \a errno set by the failed I/O operation.
326 327 328
 */
static bool io_should_wait(int error)
{
329
	/* socket data not ready */
330 331 332 333 334 335 336
	if (error == EAGAIN || error == EWOULDBLOCK) {
		return true;
	}

#ifndef __linux__
	/* FreeBSD: connection in progress */
	if (error == ENOTCONN) {
337 338
		return true;
	}
339 340 341
#endif

	return false;
342 343
}

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
/*!
 * \brief I/O operation callbacks.
 */
struct io {
	ssize_t (*process)(int sockfd, struct msghdr *msg);
	int (*wait)(int sockfd, struct timeval *timeout);
};

/*!
 * \brief Get total size of I/O vector in a message.
 */
static size_t msg_iov_len(const struct msghdr *msg)
{
	size_t total = 0;

	for (int i = 0; i < msg->msg_iovlen; i++) {
		total += msg->msg_iov[i].iov_len;
	}

	return total;
}

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
/*!
 * \brief Shift processed data out of message IO vectors.
 */
static void msg_iov_shift(struct msghdr *msg, size_t done)
{
	struct iovec *iov = msg->msg_iov;
	int iovlen = msg->msg_iovlen;

	for (int i = 0; i < iovlen && done > 0; i++) {
		if (iov[i].iov_len > done) {
			iov[i].iov_base += done;
			iov[i].iov_len -= done;
			done = 0;
		} else {
			done -= iov[i].iov_len;
			msg->msg_iov += 1;
			msg->msg_iovlen -= 1;
		}
	}

	assert(done == 0);
}

/*!
390
 * \brief Perform an I/O operation with a socket with waiting.
391
 *
392
 * \param oneshot  If set, doesn't wait until the buffer is fully processed.
393 394
 *
 */
395 396
static ssize_t io_exec(const struct io *io, int fd, struct msghdr *msg,
                       bool oneshot, struct timeval *timeout)
397
{
398
	size_t done = 0;
399 400
	size_t total = msg_iov_len(msg);

401 402 403 404 405 406
	for (;;) {
		/* Perform I/O. */
		ssize_t ret = io->process(fd, msg);
		if (ret == -1 && errno == EINTR) {
			continue;
		}
407
		if (ret > 0) {
408 409
			done += ret;
			if (oneshot || done == total) {
410
				break;
411
			}
412
			msg_iov_shift(msg, ret);
413
		}
414

415 416
		/* Wait for data readiness. */
		if (ret > 0 || (ret == -1 && io_should_wait(errno))) {
417 418 419
			do {
				ret = io->wait(fd, timeout);
			} while (ret == -1 && errno == EINTR);
420
			if (ret == 1) {
421
				continue;
422 423
			} else if (ret == 0) {
				return KNOT_ETIMEOUT;
424 425
			}
		}
426

427
		/* Disconnected or error. */
428
		return KNOT_ECONN;
429 430
	}

431
	return done;
432 433
}

434
static ssize_t recv_process(int fd, struct msghdr *msg)
435
{
436 437
	return recvmsg(fd, msg, MSG_DONTWAIT | MSG_NOSIGNAL);
}
438

439 440 441 442
static int recv_wait(int fd, struct timeval *timeout)
{
	return select_one(fd, true, false, timeout);
}
443

444
static ssize_t recv_data(int sock, struct msghdr *msg, bool oneshot, struct timeval *timeout)
445 446 447 448 449
{
	static const struct io RECV_IO = {
		.process = recv_process,
		.wait = recv_wait
	};
450

451 452
	return io_exec(&RECV_IO, sock, msg, oneshot, timeout);
}
453

454 455 456 457
static ssize_t send_process(int fd, struct msghdr *msg)
{
	return sendmsg(fd, msg, MSG_NOSIGNAL);
}
458

459 460 461 462 463
static int send_wait(int fd, struct timeval *timeout)
{
	return select_one(fd, false, true, timeout);
}

464
static ssize_t send_data(int sock, struct msghdr *msg, struct timeval *timeout)
465 466 467 468 469 470 471
{
	static const struct io SEND_IO = {
		.process = send_process,
		.wait = send_wait
	};

	return io_exec(&SEND_IO, sock, msg, false, timeout);
472 473
}

474 475
/* -- generic stream and datagram I/O -------------------------------------- */

476 477
ssize_t net_send(int sock, const uint8_t *buffer, size_t size,
                 const struct sockaddr_storage *addr, struct timeval *timeout)
478 479 480 481 482 483
{
	if (sock < 0 || buffer == NULL) {
		return KNOT_EINVAL;
	}

	struct iovec iov = { 0 };
Jan Včelák's avatar
Jan Včelák committed
484
	iov.iov_base = (void *)buffer;
485 486 487 488
	iov.iov_len = size;

	struct msghdr msg = { 0 };
	msg.msg_name = (void *)addr;
489
	msg.msg_namelen = sockaddr_len((struct sockaddr *)addr);
490 491 492 493 494 495 496 497 498 499 500 501 502
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

	int ret = send_data(sock, &msg, timeout);
	if (ret < 0) {
		return ret;
	} else if (ret != size) {
		return KNOT_ECONN;
	}

	return ret;
}

503 504
ssize_t net_recv(int sock, uint8_t *buffer, size_t size,
                 struct sockaddr_storage *addr, struct timeval *timeout)
505 506 507 508 509
{
	if (sock < 0 || buffer == NULL) {
		return KNOT_EINVAL;
	}

510
	struct iovec iov = { 0 };
511
	iov.iov_base = buffer;
512 513 514 515
	iov.iov_len = size;

	struct msghdr msg = { 0 };
	msg.msg_name = (void *)addr;
516
	msg.msg_namelen = addr ? sizeof(*addr) : 0;
517 518 519 520
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

	return recv_data(sock, &msg, true, timeout);
521
}
522

523 524
ssize_t net_dgram_send(int sock, const uint8_t *buffer, size_t size,
                       const struct sockaddr_storage *addr)
525 526 527 528
{
	return net_send(sock, buffer, size, addr, NULL);
}

529
ssize_t net_dgram_recv(int sock, uint8_t *buffer, size_t size, struct timeval *timeout)
530
{
531
	return net_recv(sock, buffer, size, NULL, timeout);
532 533
}

534
ssize_t net_stream_send(int sock, const uint8_t *buffer, size_t size, struct timeval *timeout)
535 536 537 538
{
	return net_send(sock, buffer, size, NULL, timeout);
}

539
ssize_t net_stream_recv(int sock, uint8_t *buffer, size_t size, struct timeval *timeout)
540
{
541
	return net_recv(sock, buffer, size, NULL, timeout);
542 543
}

544 545
/* -- DNS specific I/O ----------------------------------------------------- */

546
ssize_t net_dns_tcp_send(int sock, const uint8_t *buffer, size_t size, struct timeval *timeout)
547
{
548
	if (sock < 0 || buffer == NULL || size > UINT16_MAX) {
549 550 551
		return KNOT_EINVAL;
	}

552
	struct iovec iov[2];
553
	uint16_t pktsize = htons(size);
554 555
	iov[0].iov_base = &pktsize;
	iov[0].iov_len = sizeof(uint16_t);
556 557 558 559 560 561
	iov[1].iov_base = (void *)buffer;
	iov[1].iov_len = size;

	struct msghdr msg = { 0 };
	msg.msg_iov = iov;
	msg.msg_iovlen = 2;
562

563
	ssize_t ret = send_data(sock, &msg, timeout);
564 565
	if (ret < 0) {
		return ret;
566 567
	}

568
	return size; /* Do not count the size prefix. */
569 570
}

571
ssize_t net_dns_tcp_recv(int sock, uint8_t *buffer, size_t size, struct timeval *timeout)
572
{
573
	if (sock < 0 || buffer == NULL) {
574 575 576
		return KNOT_EINVAL;
	}

577 578 579 580 581
	struct iovec iov = { 0 };
	struct msghdr msg = { 0 };
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

582
	/* Receive size. */
583 584 585
	uint16_t pktsize = 0;
	iov.iov_base = &pktsize;
	iov.iov_len = sizeof(pktsize);
586
	int ret = recv_data(sock, &msg, false, timeout);
587 588 589 590 591 592
	if (ret != sizeof(pktsize)) {
		return ret;
	}

	pktsize = ntohs(pktsize);

593
	/* Check packet size */
594
	if (size < pktsize) {
595
		return KNOT_ESPACE;
596 597 598
	}

	/* Receive payload. */
599 600
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
601
	iov.iov_base = buffer;
602
	iov.iov_len = pktsize;
603
	return recv_data(sock, &msg, false, timeout);
604
}