worker.c 73 KB
Newer Older
1
/*  Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
Marek Vavruša's avatar
Marek Vavruša committed
2 3 4 5 6 7 8 9 10 11 12 13

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
14
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
Marek Vavruša's avatar
Marek Vavruša committed
15 16
 */

17
#include <uv.h>
18
#include <lua.h>
19
#include <libknot/packet/pkt.h>
20
#include <libknot/descriptor.h>
21 22
#include <contrib/ucw/lib.h>
#include <contrib/ucw/mempool.h>
23
#include <contrib/wire.h>
Marek Vavruša's avatar
Marek Vavruša committed
24 25 26
#if defined(__GLIBC__) && defined(_GNU_SOURCE)
#include <malloc.h>
#endif
27
#include <assert.h>
28 29
#include <sys/types.h>
#include <unistd.h>
30
#include <gnutls/gnutls.h>
31
#include "lib/utils.h"
32
#include "lib/layer.h"
33
#include "daemon/worker.h"
34
#include "daemon/bindings.h"
35
#include "daemon/engine.h"
36
#include "daemon/io.h"
37
#include "daemon/tls.h"
38
#include "daemon/zimport.h"
39

40 41
#define VERBOSE_MSG(qry, fmt...) QRVERBOSE(qry, "wrkr", fmt)

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
/** Client request state. */
struct request_ctx
{
	struct kr_request req;
	struct {
		union inaddr addr;
		union inaddr dst_addr;
		/* uv_handle_t *handle; */

		/** NULL if the request didn't come over network. */
		struct session *session;
	} source;
	struct worker_ctx *worker;
	qr_tasklist_t tasks;
};

/** Query resolution task. */
struct qr_task
{
	struct request_ctx *ctx;
	knot_pkt_t *pktbuf;
	qr_tasklist_t waiting;
	uv_handle_t *pending[MAX_PENDING];
	uint16_t pending_count;
	uint16_t addrlist_count;
	uint16_t addrlist_turn;
	uint16_t timeouts;
	uint16_t iter_count;
	uint16_t bytes_remaining;
	struct sockaddr *addrlist;
	uint32_t refs;
	bool finished : 1;
	bool leading  : 1;
75 76
};

77

78 79 80 81
/* Convenience macros */
#define qr_task_ref(task) \
	do { ++(task)->refs; } while(0)
#define qr_task_unref(task) \
82
	do { if (task && --(task)->refs == 0) { qr_task_free(task); } } while (0)
83
#define qr_valid_handle(task, checked) \
84 85 86 87 88 89
	(!uv_is_closing((checked)) || (task)->ctx->source.session->handle == (checked))

/** @internal get key for tcp session
 *  @note kr_straddr() return pointer to static string
 */
#define tcpsess_key(addr) kr_straddr(addr)
90 91

/* Forward decls */
92
static void qr_task_free(struct qr_task *task);
93 94 95 96 97 98 99 100 101 102 103 104 105
static int qr_task_step(struct qr_task *task,
			const struct sockaddr *packet_source,
			knot_pkt_t *packet);
static int qr_task_send(struct qr_task *task, uv_handle_t *handle,
			struct sockaddr *addr, knot_pkt_t *pkt);
static int qr_task_finalize(struct qr_task *task, int state);
static void qr_task_complete(struct qr_task *task);
static int worker_add_tcp_connected(struct worker_ctx *worker,
				    const struct sockaddr *addr,
				    struct session *session);
static int worker_del_tcp_connected(struct worker_ctx *worker,
				    const struct sockaddr *addr);
static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
106
						 const struct sockaddr *addr);
107 108 109 110 111 112
static int worker_add_tcp_waiting(struct worker_ctx *worker,
				  const struct sockaddr *addr,
				  struct session *session);
static int worker_del_tcp_waiting(struct worker_ctx *worker,
				  const struct sockaddr *addr);
static struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
113
					       const struct sockaddr *addr);
114 115 116 117 118 119 120 121 122 123
static int session_add_waiting(struct session *session, struct qr_task *task);
static int session_del_waiting(struct session *session, struct qr_task *task);
static int session_add_tasks(struct session *session, struct qr_task *task);
static int session_del_tasks(struct session *session, struct qr_task *task);
static void session_close(struct session *session);
static void on_session_idle_timeout(uv_timer_t *timer);
static int timer_start(struct session *session, uv_timer_cb cb,
		       uint64_t timeout, uint64_t repeat);
static void on_tcp_connect_timeout(uv_timer_t *timer);
static void on_tcp_watchdog_timeout(uv_timer_t *timer);
124 125 126 127 128 129 130

/** @internal Get singleton worker. */
static inline struct worker_ctx *get_worker(void)
{
	return uv_default_loop()->data;
}

131 132 133 134
static inline void *iohandle_borrow(struct worker_ctx *worker)
{
	void *h = NULL;

135
	const size_t size = sizeof(uv_handles_t);
136 137 138 139 140 141 142 143 144 145 146 147
	if (worker->pool_iohandles.len > 0) {
		h = array_tail(worker->pool_iohandles);
		array_pop(worker->pool_iohandles);
		kr_asan_unpoison(h, size);
	} else {
		h = malloc(size);
	}

	return h;
}

static inline void iohandle_release(struct worker_ctx *worker, void *h)
148
{
149 150 151 152
	assert(h);

	if (worker->pool_iohandles.len < MP_FREELIST_SIZE) {
		array_push(worker->pool_iohandles, h);
153
		kr_asan_poison(h, sizeof(uv_handles_t));
154
	} else {
155
		free(h);
156 157 158
	}
}

159
void *worker_iohandle_borrow(struct worker_ctx *worker)
160
{
161 162 163 164 165 166 167 168 169
	return iohandle_borrow(worker);
}

void worker_iohandle_release(struct worker_ctx *worker, void *h)
{
	iohandle_release(worker, h);
}

static inline void *iorequest_borrow(struct worker_ctx *worker)
170
{
171 172
	void *r = NULL;

173
	const size_t size = sizeof(uv_reqs_t);
174 175 176 177
	if (worker->pool_ioreqs.len > 0) {
		r = array_tail(worker->pool_ioreqs);
		array_pop(worker->pool_ioreqs);
		kr_asan_unpoison(r, size);
178
	} else {
179
		r = malloc(size);
180
	}
181 182

	return r;
183 184
}

185
static inline void iorequest_release(struct worker_ctx *worker, void *r)
186
{
187 188 189 190
	assert(r);

	if (worker->pool_ioreqs.len < MP_FREELIST_SIZE) {
		array_push(worker->pool_ioreqs, r);
191
		kr_asan_poison(r, sizeof(uv_reqs_t));
192
	} else {
193
		free(r);
194 195 196
	}
}

197

198 199 200
/*! @internal Create a UDP/TCP handle for an outgoing AF_INET* connection.
 *  socktype is SOCK_* */
static uv_handle_t *ioreq_spawn(struct qr_task *task, int socktype, sa_family_t family)
201
{
202 203 204
	bool precond = (socktype == SOCK_DGRAM || socktype == SOCK_STREAM)
			&& (family == AF_INET  || family == AF_INET6);
	if (!precond) {
205 206
		/* assert(false); see #245 */
		kr_log_verbose("[work] ioreq_spawn: pre-condition failed\n");
207 208 209
		return NULL;
	}

210 211 212 213
	if (task->pending_count >= MAX_PENDING) {
		return NULL;
	}
	/* Create connection for iterative query */
214
	struct worker_ctx *worker = task->ctx->worker;
215 216
	void *h = iohandle_borrow(worker);
	uv_handle_t *handle = (uv_handle_t *)h;
217 218 219
	if (!handle) {
		return NULL;
	}
220
	io_create(worker->loop, handle, socktype);
221 222 223 224

	/* Bind to outgoing address, according to IP v4/v6. */
	union inaddr *addr;
	if (family == AF_INET) {
225
		addr = (union inaddr *)&worker->out_addr4;
226
	} else {
227
		addr = (union inaddr *)&worker->out_addr6;
228 229 230 231 232
	}
	int ret = 0;
	if (addr->ip.sa_family != AF_UNSPEC) {
		assert(addr->ip.sa_family == family);
		if (socktype == SOCK_DGRAM) {
233 234 235 236 237
			uv_udp_t *udp = (uv_udp_t *)handle;
			ret = uv_udp_bind(udp, &addr->ip, 0);
		} else if (socktype == SOCK_STREAM){
			uv_tcp_t *tcp = (uv_tcp_t *)handle;
			ret = uv_tcp_bind(tcp, &addr->ip, 0);
238 239 240
		}
	}

241 242
	/* Set current handle as a subrequest type. */
	struct session *session = handle->data;
243 244
	if (ret == 0) {
		session->outgoing = true;
245
		ret = session_add_tasks(session, task);
246
	}
247
	if (ret < 0) {
248
		io_deinit(handle);
249
		iohandle_release(worker, h);
250 251 252
		return NULL;
	}
	/* Connect or issue query datagram */
253
	task->pending[task->pending_count] = handle;
254
	task->pending_count += 1;
255
	return handle;
256 257
}

258
static void on_session_close(uv_handle_t *handle)
259
{
260 261
	uv_loop_t *loop = handle->loop;
	struct worker_ctx *worker = loop->data;
262
	struct session *session = handle->data;
263 264
	assert(session->handle == handle);
	session->handle = NULL;
265
	io_deinit(handle);
266
	iohandle_release(worker, handle);
267 268 269 270 271 272
}

static void on_session_timer_close(uv_handle_t *timer)
{
	struct session *session = timer->data;
	uv_handle_t *handle = session->handle;
273 274
	assert(handle && handle->data == session);
	assert (session->outgoing || handle->type == UV_TCP);
275 276 277
	if (!uv_is_closing(handle)) {
		uv_close(handle, on_session_close);
	}
278 279
}

280
static void ioreq_kill_udp(uv_handle_t *req, struct qr_task *task)
281 282
{
	assert(req);
283 284 285 286
	struct session *session = req->data;
	assert(session->outgoing);
	if (session->closing) {
		return;
287
	}
288 289 290 291
	uv_timer_stop(&session->timeout);
	session_del_tasks(session, task);
	assert(session->tasks.len == 0);
	session_close(session);
292 293
}

294
static void ioreq_kill_tcp(uv_handle_t *req, struct qr_task *task)
295
{
296 297 298 299 300 301 302 303 304 305 306 307 308
	assert(req);
	struct session *session = req->data;
	assert(session->outgoing);
	if (session->closing) {
		return;
	}

	session_del_waiting(session, task);
	session_del_tasks(session, task);

	int res = 0;

	if (session->outgoing && session->peer.ip.sa_family != AF_UNSPEC &&
309
	    session->tasks.len == 0 && session->waiting.len == 0 && !session->closing) {
310 311
		assert(session->peer.ip.sa_family == AF_INET ||
		       session->peer.ip.sa_family == AF_INET6);
312 313 314 315 316 317 318 319 320 321
		res = 1;
		if (session->connected) {
			/* This is outbound TCP connection which can be reused.
			* Close it after timeout */
			uv_timer_t *timer = &session->timeout;
			timer->data = session;
			uv_timer_stop(timer);
			res = uv_timer_start(timer, on_session_idle_timeout,
					     KR_CONN_RTT_MAX, 0);
		}
322 323 324 325 326
	}

	if (res != 0) {
		/* if any errors, close the session immediately */
		session_close(session);
327 328 329
	}
}

330
static void ioreq_kill_pending(struct qr_task *task)
331
{
332 333 334 335 336 337 338 339
	for (uint16_t i = 0; i < task->pending_count; ++i) {
		if (task->pending[i]->type == UV_UDP) {
			ioreq_kill_udp(task->pending[i], task);
		} else if (task->pending[i]->type == UV_TCP) {
			ioreq_kill_tcp(task->pending[i], task);
		} else {
			assert(false);
		}
340 341 342 343
	}
	task->pending_count = 0;
}

344 345 346 347 348 349 350 351
static void session_close(struct session *session)
{
	assert(session->tasks.len == 0 && session->waiting.len == 0);

	if (session->closing) {
		return;
	}

Grigorii Demidov's avatar
Grigorii Demidov committed
352
	if (!session->outgoing && session->buffering != NULL) {
353 354
		qr_task_complete(session->buffering);
	}
Grigorii Demidov's avatar
Grigorii Demidov committed
355
	session->buffering = NULL;
356

357 358
	uv_handle_t *handle = session->handle;
	io_stop_read(handle);
359 360 361 362 363 364 365 366 367 368 369 370
	session->closing = true;
	if (session->outgoing &&
	    session->peer.ip.sa_family != AF_UNSPEC) {
		struct worker_ctx *worker = get_worker();
		struct sockaddr *peer = &session->peer.ip;
		worker_del_tcp_connected(worker, peer);
		session->connected = false;
	}

	if (!uv_is_closing((uv_handle_t *)&session->timeout)) {
		uv_timer_stop(&session->timeout);
		if (session->tls_client_ctx) {
371
			tls_close(&session->tls_client_ctx->c);
372
		}
373
		if (session->tls_ctx) {
374
			tls_close(&session->tls_ctx->c);
375 376
		}

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
		session->timeout.data = session;
		uv_close((uv_handle_t *)&session->timeout, on_session_timer_close);
	}
}

static int session_add_waiting(struct session *session, struct qr_task *task)
{
	for (int i = 0; i < session->waiting.len; ++i) {
		if (session->waiting.at[i] == task) {
			return i;
		}
	}
	int ret = array_push(session->waiting, task);
	if (ret >= 0) {
		qr_task_ref(task);
	}
	return ret;
}

static int session_del_waiting(struct session *session, struct qr_task *task)
{
	int ret = kr_error(ENOENT);
	for (int i = 0; i < session->waiting.len; ++i) {
		if (session->waiting.at[i] == task) {
			array_del(session->waiting, i);
			qr_task_unref(task);
			ret = kr_ok();
			break;
		}
	}
	return ret;
}

static int session_add_tasks(struct session *session, struct qr_task *task)
{
	for (int i = 0; i < session->tasks.len; ++i) {
		if (session->tasks.at[i] == task) {
			return i;
		}
	}
	int ret = array_push(session->tasks, task);
	if (ret >= 0) {
		qr_task_ref(task);
	}
	return ret;
}

static int session_del_tasks(struct session *session, struct qr_task *task)
{
	int ret = kr_error(ENOENT);
	for (int i = 0; i < session->tasks.len; ++i) {
		if (session->tasks.at[i] == task) {
			array_del(session->tasks, i);
			qr_task_unref(task);
			ret = kr_ok();
			break;
		}
	}
	return ret;
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
/** @cond This memory layout is internal to mempool.c, use only for debugging. */
#if defined(__SANITIZE_ADDRESS__)
struct mempool_chunk {
  struct mempool_chunk *next;
  size_t size;
};
static void mp_poison(struct mempool *mp, bool poison)
{
	if (!poison) { /* @note mempool is part of the first chunk, unpoison it first */
		kr_asan_unpoison(mp, sizeof(*mp));
	}
	struct mempool_chunk *chunk = mp->state.last[0];
	void *chunk_off = (void *)chunk - chunk->size;
	if (poison) {
		kr_asan_poison(chunk_off, chunk->size);
	} else {
		kr_asan_unpoison(chunk_off, chunk->size);
	}
}
#else
#define mp_poison(mp, enable)
#endif
/** @endcond */

462
/** Get a mempool.  (Recycle if possible.)  */
463
static inline struct mempool *pool_borrow(struct worker_ctx *worker)
464 465
{
	struct mempool *mp = NULL;
466 467 468 469
	if (worker->pool_mp.len > 0) {
		mp = array_tail(worker->pool_mp);
		array_pop(worker->pool_mp);
		mp_poison(mp, 0);
470 471 472 473 474 475
	} else { /* No mempool on the freelist, create new one */
		mp = mp_new (4 * CPU_PAGE_SIZE);
	}
	return mp;
}

476
/** Return a mempool.  (Cache them up to some count.) */
477 478
static inline void pool_release(struct worker_ctx *worker, struct mempool *mp)
{
479
	if (worker->pool_mp.len < MP_FREELIST_SIZE) {
480
		mp_flush(mp);
481
		array_push(worker->pool_mp, mp);
482
		mp_poison(mp, 1);
483 484 485 486 487
	} else {
		mp_delete(mp);
	}
}

488 489 490 491 492 493 494 495 496 497 498 499
/** Create a key for an outgoing subrequest: qname, qclass, qtype.
 * @param key Destination buffer for key size, MUST be SUBREQ_KEY_LEN or larger.
 * @return key length if successful or an error
 */
static const size_t SUBREQ_KEY_LEN = KR_RRKEY_LEN;
static int subreq_key(char *dst, knot_pkt_t *pkt)
{
	assert(pkt);
	return kr_rrkey(dst, knot_pkt_qclass(pkt), knot_pkt_qname(pkt),
			knot_pkt_qtype(pkt), knot_pkt_qtype(pkt));
}

500 501 502 503 504 505 506 507
/** Create and initialize a request_ctx (on a fresh mempool).
 *
 * handle and addr point to the source of the request, and they are NULL
 * in case the request didn't come from network.
 */
static struct request_ctx *request_create(struct worker_ctx *worker,
					  uv_handle_t *handle,
					  const struct sockaddr *addr)
508
{
509
	knot_mm_t pool = {
510
		.ctx = pool_borrow(worker),
511
		.alloc = (knot_mm_alloc_t) mp_alloc
512
	};
513

514 515 516 517
	/* Create request context */
	struct request_ctx *ctx = mm_alloc(&pool, sizeof(*ctx));
	if (!ctx) {
		pool_release(worker, pool.ctx);
518 519
		return NULL;
	}
520

521 522 523 524 525
	memset(ctx, 0, sizeof(*ctx));

	/* TODO Relocate pool to struct request */
	ctx->worker = worker;
	array_init(ctx->tasks);
526 527 528
	struct session *session = handle ? handle->data : NULL;
	if (session) {
		assert(session->outgoing == false);
529
	}
530
	ctx->source.session = session;
531 532 533

	struct kr_request *req = &ctx->req;
	req->pool = pool;
534
	req->vars_ref = LUA_NOREF;
535

536
	/* Remember query source addr */
537 538 539
	if (!addr || (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)) {
		ctx->source.addr.ip.sa_family = AF_UNSPEC;
	} else {
540 541 542
		size_t addr_len = sizeof(struct sockaddr_in);
		if (addr->sa_family == AF_INET6)
			addr_len = sizeof(struct sockaddr_in6);
543 544
		memcpy(&ctx->source.addr.ip, addr, addr_len);
		ctx->req.qsource.addr = &ctx->source.addr.ip;
545
	}
546 547 548 549 550

	worker->stats.rconcurrent += 1;

	if (!handle) {
		return ctx;
551
	}
552

553
	/* Remember the destination address. */
554 555 556 557 558 559
	int addr_len = sizeof(ctx->source.dst_addr);
	struct sockaddr *dst_addr = &ctx->source.dst_addr.ip;
	ctx->source.dst_addr.ip.sa_family = AF_UNSPEC;
	if (handle->type == UV_UDP) {
		if (uv_udp_getsockname((uv_udp_t *)handle, dst_addr, &addr_len) == 0) {
			req->qsource.dst_addr = dst_addr;
560
		}
561 562 563 564
		req->qsource.tcp = false;
	} else if (handle->type == UV_TCP) {
		if (uv_tcp_getsockname((uv_tcp_t *)handle, dst_addr, &addr_len) == 0) {
			req->qsource.dst_addr = dst_addr;
565
		}
566
		req->qsource.tcp = true;
567
	}
568 569

	return ctx;
570 571
}

572 573
/** More initialization, related to the particular incoming query/packet. */
static int request_start(struct request_ctx *ctx, knot_pkt_t *query)
574
{
575 576 577 578 579 580 581 582 583 584 585
	assert(query && ctx);
	size_t answer_max = KNOT_WIRE_MIN_PKTSIZE;
	struct kr_request *req = &ctx->req;

	/* source.session can be empty if request was generated by kresd itself */
	if (!ctx->source.session ||
	     ctx->source.session->handle->type == UV_TCP) {
		answer_max = KNOT_WIRE_MAX_PKTSIZE;
	} else if (knot_pkt_has_edns(query)) { /* EDNS */
		answer_max = MAX(knot_edns_get_payload(query->opt_rr),
				 KNOT_WIRE_MIN_PKTSIZE);
586
	}
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
	req->qsource.size = query->size;

	req->answer = knot_pkt_new(NULL, answer_max, &req->pool);
	if (!req->answer) {
		return kr_error(ENOMEM);
	}

	/* Remember query source TSIG key */
	if (query->tsig_rr) {
		req->qsource.key = knot_rrset_copy(query->tsig_rr, &req->pool);
	}

	/* Remember query source EDNS data */
	if (query->opt_rr) {
		req->qsource.opt = knot_rrset_copy(query->opt_rr, &req->pool);
	}
	/* Start resolution */
	struct worker_ctx *worker = ctx->worker;
	struct engine *engine = worker->engine;
	kr_resolve_begin(req, &engine->resolver, req->answer);
	worker->stats.queries += 1;
	/* Throttle outbound queries only when high pressure */
	if (worker->stats.concurrent < QUERY_RATE_THRESHOLD) {
		req->options.NO_THROTTLE = true;
	}
	return kr_ok();
}

static void request_free(struct request_ctx *ctx)
{
	struct worker_ctx *worker = ctx->worker;
618 619 620 621 622 623 624 625 626 627 628 629 630 631
	/* Dereference any Lua vars table if exists */
	if (ctx->req.vars_ref != LUA_NOREF) {
		lua_State *L = worker->engine->L;
		/* Get worker variables table */
		lua_rawgeti(L, LUA_REGISTRYINDEX, worker->vars_table_ref);
		/* Get next free element (position 0) and store it under current reference (forming a list) */
		lua_rawgeti(L, -1, 0);
		lua_rawseti(L, -2, ctx->req.vars_ref);
		/* Set current reference as the next free element */
		lua_pushinteger(L, ctx->req.vars_ref);
		lua_rawseti(L, -2, 0);
		lua_pop(L, 1);
		ctx->req.vars_ref = LUA_NOREF;
	}
632
	/* Return mempool to ring or free it if it's full */
633
	pool_release(worker, ctx->req.pool.ctx);
634
	/* @note The 'task' is invalidated from now on. */
Marek Vavruša's avatar
Marek Vavruša committed
635
	/* Decommit memory every once in a while */
636
	static int mp_delete_count = 0;
637 638 639
	if (++mp_delete_count == 100000) {
		lua_gc(worker->engine->L, LUA_GCCOLLECT, 0);
#if defined(__GLIBC__) && defined(_GNU_SOURCE)
Marek Vavruša's avatar
Marek Vavruša committed
640
		malloc_trim(0);
641
#endif
Marek Vavruša's avatar
Marek Vavruša committed
642
		mp_delete_count = 0;
643
	}
644
	worker->stats.rconcurrent -= 1;
645
}
646

647
static int request_add_tasks(struct request_ctx *ctx, struct qr_task *task)
648
{
649 650 651 652 653 654 655 656
	for (int i = 0; i < ctx->tasks.len; ++i) {
		if (ctx->tasks.at[i] == task) {
			return i;
		}
	}
	int ret = array_push(ctx->tasks, task);
	if (ret >= 0) {
		qr_task_ref(task);
657
	}
658 659
	return ret;
}
660

661 662 663 664 665 666 667 668 669 670
static int request_del_tasks(struct request_ctx *ctx, struct qr_task *task)
{
	int ret = kr_error(ENOENT);
	for (int i = 0; i < ctx->tasks.len; ++i) {
		if (ctx->tasks.at[i] == task) {
			array_del(ctx->tasks, i);
			qr_task_unref(task);
			ret = kr_ok();
			break;
		}
671
	}
672 673
	return ret;
}
674

675 676 677 678 679 680 681 682 683

static struct qr_task *qr_task_create(struct request_ctx *ctx)
{
	/* How much can client handle? */
	struct engine *engine = ctx->worker->engine;
	size_t pktbuf_max = KR_EDNS_PAYLOAD;
	if (engine->resolver.opt_rr) {
		pktbuf_max = MAX(knot_edns_get_payload(engine->resolver.opt_rr),
				 pktbuf_max);
684 685
	}

686 687 688 689
	/* Create resolution task */
	struct qr_task *task = mm_alloc(&ctx->req.pool, sizeof(*task));
	if (!task) {
		return NULL;
690
	}
691
	memset(task, 0, sizeof(*task)); /* avoid accidentally unitialized fields */
692

693 694 695 696 697
	/* Create packet buffers for answer and subrequests */
	knot_pkt_t *pktbuf = knot_pkt_new(NULL, pktbuf_max, &ctx->req.pool);
	if (!pktbuf) {
		mm_free(&ctx->req.pool, task);
		return NULL;
698
	}
699
	pktbuf->size = 0;
700

701 702 703 704 705 706 707 708 709
	task->ctx = ctx;
	task->pktbuf = pktbuf;
	array_init(task->waiting);
	task->refs = 0;
	int ret = request_add_tasks(ctx, task);
	if (ret < 0) {
		mm_free(&ctx->req.pool, task);
		mm_free(&ctx->req.pool, pktbuf);
		return NULL;
710
	}
711 712
	ctx->worker->stats.concurrent += 1;
	return task;
713 714
}

715 716 717 718 719 720 721 722 723 724 725 726
/* This is called when the task refcount is zero, free memory. */
static void qr_task_free(struct qr_task *task)
{
	struct request_ctx *ctx = task->ctx;

	assert(ctx);

	/* Process outbound session. */
	struct session *source_session = ctx->source.session;
	struct worker_ctx *worker = ctx->worker;

	/* Process source session. */
Grigorii Demidov's avatar
Grigorii Demidov committed
727 728 729 730
	if (source_session &&
	    source_session->tasks.len < worker->tcp_pipeline_max/2 &&
	    !source_session->closing && source_session->throttled) {
		uv_handle_t *handle = source_session->handle;
731 732
		/* Start reading again if the session is throttled and
		 * the number of outgoing requests is below watermark. */
Grigorii Demidov's avatar
Grigorii Demidov committed
733 734 735
		if (handle) {
			io_start_read(handle);
			source_session->throttled = false;
736 737 738 739 740 741 742 743 744 745 746 747 748
		}
	}

	if (ctx->tasks.len == 0) {
		array_clear(ctx->tasks);
		request_free(ctx);
	}

	/* Update stats */
	worker->stats.concurrent -= 1;
}

/*@ Register new qr_task within session. */
749 750
static int qr_task_register(struct qr_task *task, struct session *session)
{
751
	assert(session->outgoing == false && session->handle->type == UV_TCP);
752

753 754 755 756
	int ret = array_reserve(session->tasks, session->tasks.len + 1);
	if (ret != 0) {
		return kr_error(ENOMEM);
	}
757 758 759 760 761 762

	session_add_tasks(session, task);

	struct request_ctx *ctx = task->ctx;
	assert(ctx && (ctx->source.session == NULL || ctx->source.session == session));
	ctx->source.session = session;
763 764 765 766 767
	/* Soft-limit on parallel queries, there is no "slow down" RCODE
	 * that we could use to signalize to client, but we can stop reading,
	 * an in effect shrink TCP window size. To get more precise throttling,
	 * we would need to copy remainder of the unread buffer and reassemble
	 * when resuming reading. This is NYI.  */
768 769
	if (session->tasks.len >= task->ctx->worker->tcp_pipeline_max) {
		uv_handle_t *handle = session->handle;
Grigorii Demidov's avatar
Grigorii Demidov committed
770
		if (handle && !session->throttled && !session->closing) {
771 772 773 774
			io_stop_read(handle);
			session->throttled = true;
		}
	}
775

776 777 778 779
	return 0;
}

static void qr_task_complete(struct qr_task *task)
780
{
781
	struct request_ctx *ctx = task->ctx;
782

783
	/* Kill pending I/O requests */
784
	ioreq_kill_pending(task);
785 786
	assert(task->waiting.len == 0);
	assert(task->leading == false);
787

Grigorii Demidov's avatar
Grigorii Demidov committed
788 789 790 791 792 793
	struct session *source_session = ctx->source.session;
	if (source_session) {
		assert(source_session->outgoing == false &&
		       source_session->waiting.len == 0);
		session_del_tasks(source_session, task);
	}
794

795
	/* Release primary reference to task. */
796
	request_del_tasks(ctx, task);
797 798
}

799
/* This is called when we send subrequest / answer */
800
static int qr_task_on_send(struct qr_task *task, uv_handle_t *handle, int status)
801
{
802 803 804 805 806 807 808
	if (task->finished) {
		assert(task->leading == false);
		qr_task_complete(task);
		if (!handle || handle->type != UV_TCP) {
			return status;
		}
		struct session* session = handle->data;
Grigorii Demidov's avatar
Grigorii Demidov committed
809
		assert(session);
810 811 812 813 814 815
		if (!session->outgoing ||
		    session->waiting.len == 0) {
			return status;
		}
	}

Grigorii Demidov's avatar
Grigorii Demidov committed
816
	if (handle) {
817
		struct session* session = handle->data;
Grigorii Demidov's avatar
Grigorii Demidov committed
818 819 820
		if (!session->outgoing && task->ctx->source.session) {
			assert (task->ctx->source.session->handle == handle);
		}
821 822 823
		if (handle->type == UV_TCP && session->outgoing &&
		    session->waiting.len > 0) {
			session_del_waiting(session, task);
Grigorii Demidov's avatar
Grigorii Demidov committed
824 825 826
			if (session->closing) {
				return status;
			}
827 828 829 830 831
			/* Finalize the task, if any errors.
			 * We can't add it to the end of waiting list for retrying
			 * since it may lead endless loop in some circumstances
			 * (for instance: tls; send->tls_push->too many non-critical errors->
			 * on_send with nonzero status->re-add to waiting->send->etc).*/
Grigorii Demidov's avatar
Grigorii Demidov committed
832
			if (status != 0) {
833 834 835 836 837 838 839
				if (session->outgoing) {
					qr_task_finalize(task, KR_STATE_FAIL);
				} else {
					assert(task->ctx->source.session == session);
					task->ctx->source.session = NULL;
				}
				session_del_tasks(session, task);
840
			}
841 842
			if (session->waiting.len > 0) {
				struct qr_task *t = session->waiting.at[0];
843
				int ret = qr_task_send(t, handle, &session->peer.ip, t->pktbuf);
844
				if (ret != kr_ok()) {
845
					while (session->waiting.len > 0) {
Grigorii Demidov's avatar
Grigorii Demidov committed
846
						struct qr_task *t = session->waiting.at[0];
847
						if (session->outgoing) {
Grigorii Demidov's avatar
Grigorii Demidov committed
848
							qr_task_finalize(t, KR_STATE_FAIL);
849
						} else {
Grigorii Demidov's avatar
Grigorii Demidov committed
850 851
							assert(t->ctx->source.session == session);
							t->ctx->source.session = NULL;
852
						}
853
						array_del(session->waiting, 0);
Grigorii Demidov's avatar
Grigorii Demidov committed
854 855
						session_del_tasks(session, t);
						qr_task_unref(t);
856 857
					}
					while (session->tasks.len > 0) {
Grigorii Demidov's avatar
Grigorii Demidov committed
858
						struct qr_task *t = session->tasks.at[0];
859
						if (session->outgoing) {
Grigorii Demidov's avatar
Grigorii Demidov committed
860
							qr_task_finalize(t, KR_STATE_FAIL);
861
						} else {
Grigorii Demidov's avatar
Grigorii Demidov committed
862 863
							assert(t->ctx->source.session == session);
							t->ctx->source.session = NULL;
864
						}
Grigorii Demidov's avatar
Grigorii Demidov committed
865
						session_del_tasks(session, t);
866 867 868 869
					}
					session_close(session);
					return status;
				}
870
			}
871
		}
Grigorii Demidov's avatar
Grigorii Demidov committed
872
		if (!session->closing) {
Daniel Kahn Gillmor's avatar
Daniel Kahn Gillmor committed
873
			io_start_read(handle); /* Start reading new query */
874 875
		}
	}
876
	return status;
877 878
}

879 880
static void on_send(uv_udp_send_t *req, int status)
{
881 882 883 884
	uv_handle_t *handle = (uv_handle_t *)(req->handle);
	uv_loop_t *loop = handle->loop;
	struct worker_ctx *worker = loop->data;
	assert(worker == get_worker());
885
	struct qr_task *task = req->data;
Grigorii Demidov's avatar
Grigorii Demidov committed
886
	qr_task_on_send(task, handle, status);
887
	qr_task_unref(task);
888
	iorequest_release(worker, req);
889 890
}

891
static void on_task_write(uv_write_t *req, int status)
892
{
893 894 895 896
	uv_handle_t *handle = (uv_handle_t *)(req->handle);
	uv_loop_t *loop = handle->loop;
	struct worker_ctx *worker = loop->data;
	assert(worker == get_worker());
897
	struct qr_task *task = req->data;
Grigorii Demidov's avatar
Grigorii Demidov committed
898
	qr_task_on_send(task, handle, status);
899
	qr_task_unref(task);
900
	iorequest_release(worker, req);
901 902
}

903 904 905 906 907 908 909 910 911
static void on_nontask_write(uv_write_t *req, int status)
{
	uv_handle_t *handle = (uv_handle_t *)(req->handle);
	uv_loop_t *loop = handle->loop;
	struct worker_ctx *worker = loop->data;
	assert(worker == get_worker());
	iorequest_release(worker, req);
}

912 913
ssize_t worker_gnutls_push(gnutls_transport_ptr_t h, const void *buf, size_t len)
{
914
	struct tls_common_ctx *t = (struct tls_common_ctx *)h;
915 916 917
	const uv_buf_t uv_buf[1] = {
		{ (char *)buf, len }
	};
918 919 920 921 922 923

	if (t == NULL) {
		errno = EFAULT;
		return -1;
	}

924 925 926
	assert(t->session && t->session->handle &&
	       t->session->handle->type == UV_TCP);

927
	VERBOSE_MSG(NULL,"[%s] push %zu <%p>\n",
928
		    t->client_side ? "tls_client" : "tls", len, h);
929 930

	struct worker_ctx *worker = t->worker;
931
	assert(worker);
932 933 934 935 936 937 938 939

	void *ioreq = worker_iohandle_borrow(worker);
	if (!ioreq) {
		errno = EFAULT;
		return -1;
	}

	uv_write_t *write_req = (uv_write_t *)ioreq;
940 941 942 943 944 945 946 947 948

	struct qr_task *task = t->task;
	uv_write_cb write_cb = on_task_write;
	if (t->handshake_state == TLS_HS_DONE) {
		assert(task);
	} else {
		task = NULL;
		write_cb = on_nontask_write;
	}
949 950 951 952

	write_req->data = task;

	ssize_t ret = -1;
953
	int res = uv_write(write_req, (uv_stream_t *)t->session->handle, uv_buf, 1, write_cb);
954
	if (res == 0) {
955 956
		if (task) {
			qr_task_ref(task); /* Pending ioreq on current task */
957 958 959 960 961 962 963 964 965 966
			struct request_ctx *ctx = task->ctx;
			if (ctx && ctx->source.session &&
			    t->session->handle != ctx->source.session->handle) {
				struct sockaddr *addr = &t->session->peer.ip;
				worker->stats.tls += 1;
				if (addr->sa_family == AF_INET6)
					worker->stats.ipv6 += 1;
				else if (addr->sa_family == AF_INET)
					worker->stats.ipv4 += 1;
			}
967
		}
968 969 970 971 972 973 974
		if (worker->too_many_open &&
		    worker->stats.rconcurrent <
			worker->rconcurrent_highwatermark - 10) {
			worker->too_many_open = false;
		}
		ret = len;
	} else {
975
		VERBOSE_MSG(NULL,"[%s] uv_write: %s\n",
976
			    t->client_side ? "tls_client" : "tls", uv_strerror(res));
977 978 979 980 981 982
		iorequest_release(worker, ioreq);
		errno = EIO;
	}
	return ret;
}

983 984
static int qr_task_send(struct qr_task *task, uv_handle_t *handle,
			struct sockaddr *addr, knot_pkt_t *pkt)
985
{
986
	if (!handle) {
987
		return qr_task_on_send(task, handle, kr_error(EIO));
988
	}
989 990 991

	/* Synchronous push to TLS context, bypassing event loop. */
	struct session *session = handle->data;
992
	assert(session->closing == false);
993
	if (session->has_tls) {
994
		struct kr_request *req = &task->ctx->req;
995 996 997
		if (session->outgoing) {
			int ret = kr_resolve_checkout(req, NULL, addr,
						      SOCK_STREAM, pkt);
998 999 1000 1001
			if (ret != kr_ok()) {
				return ret;
			}
		}
1002
		return tls_push(task, handle, pkt);
1003
	}
1004

1005
	int ret = 0;
1006 1007 1008
	struct request_ctx *ctx = task->ctx;
	struct worker_ctx *worker = ctx->worker;
	struct kr_request *req = &ctx->req;
1009 1010
	void *ioreq = iorequest_borrow(worker);
	if (!ioreq) {
1011 1012
		return qr_task_on_send(task, handle, kr_error(ENOMEM));
	}
1013
	if (knot_wire_get_qr(pkt->wire) == 0) {
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
		/*
		 * Query must be finalised using destination address before
		 * sending.
		 *
		 * Libuv does not offer a convenient way how to obtain a source
		 * IP address from a UDP handle that has been initialised using
		 * uv_udp_init(). The uv_udp_getsockname() fails because of the
		 * lazy socket initialisation.
		 *
		 * @note -- A solution might be opening a separate socket and
		 * trying to obtain the IP address from it.
		 */
1026
		ret = kr_resolve_checkout(req, NULL, addr,
1027 1028
		                          handle->type == UV_UDP ? SOCK_DGRAM : SOCK_STREAM,
		                          pkt);
1029
		if (ret != 0) {
1030
			iorequest_release(worker, ioreq);
1031
			return ret;
1032
		}
1033 1034 1035
	}
	/* Send using given protocol */
	if (handle->type == UV_UDP) {
1036
		uv_udp_send_t *send_req = (uv_udp_send_t *)ioreq;
1037
		uv_buf_t buf = { (char *)pkt->wire, pkt->size };
1038 1039 1040 1041
		send_req->data = task;
		ret = uv_udp_send(send_req, (uv_udp_t *)handle, &buf, 1, addr, &on_send);
	} else if (handle->type == UV_TCP) {
		uv_write_t *write_req = (uv_write_t *)ioreq;
1042 1043 1044 1045 1046
		uint16_t pkt_size = htons(pkt->size);
		uv_buf_t buf[2] = {
			{ (char *)&pkt_size, sizeof(pkt_size) },
			{ (char *)pkt->wire, pkt->size }
		};
1047
		write_req->data = task;
1048
		ret = uv_write(write_req, (uv_stream_t *)handle, buf, 2, &on_task_write);
1049 1050
	} else {
		assert(false);
1051
	}
1052

1053
	if (ret == 0) {
1054
		qr_task_ref(task); /* Pending ioreq on current task */
1055 1056
		if (worker->too_many_open &&
		    worker->stats.rconcurrent <
1057
			worker->rconcurrent_highwatermark - 10) {
1058 1059
			worker->too_many_open = false;
		}
1060
	} else {
1061
		iorequest_release(worker, ioreq);
1062 1063 1064 1065
		if (ret == UV_EMFILE) {
			worker->too_many_open = true;
			worker->rconcurrent_highwatermark = worker->stats.rconcurrent;
		}
1066
	}
1067

1068
	/* Update statistics */
1069 1070 1071
	if (ctx->source.session &&
	    handle != ctx->source.session->handle &&
	    addr) {
1072
		if (handle->type == UV_UDP)
1073
			worker->stats.udp += 1;
1074
		else
1075
			worker->stats.tcp += 1;
1076
		if (addr->sa_family == AF_INET6)
1077
			worker->stats.ipv6 += 1;
1078
		else if (addr->sa_family == AF_INET)
1079
			worker->stats.ipv4 += 1;
1080
	}
1081
	return ret;
1082 1083
}

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
static int session_next_waiting_send(struct session *session)
{
	union inaddr *peer = &session->peer;
	int ret = kr_ok();
	if (session->waiting.len > 0) {
		struct qr_task *task = session->waiting.at[0];
		ret = qr_task_send(task, session->handle, &peer->ip, task->pktbuf);
	}
	return ret;
}

static int session_tls_hs_cb(struct session *session, int status)
{
1097 1098 1099
	struct worker_ctx *worker = get_worker();
	union inaddr *peer = &session->peer;
	int deletion_res = worker_del_tcp_waiting(worker, &peer->ip);
1100
	int ret = kr_ok();
1101

1102
	if (status) {
1103
		kr_nsrep_update_rtt(NULL, &peer->ip, KR_NS_DEAD,
1104 1105
				    worker->engine->resolver.cache_rtt,
				    KR_NS_UPDATE_NORESET);
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
		return ret;
	}

	/* handshake was completed successfully */
	struct tls_client_ctx_t *tls_client_ctx = session->tls_client_ctx;
	struct tls_client_paramlist_entry *tls_params = tls_client_ctx->params;
	gnutls_session_t tls_session = tls_client_ctx->c.tls_session;
	if (gnutls_session_is_resumed(tls_session) != 0) {
		kr_log_verbose("[tls_client] TLS session has resumed\n");
	} else {
		kr_log_verbose("[tls_client] TLS session has not resumed\n");
		/* session wasn't resumed, delete old session data ... */
		if (tls_params->session_data.data != NULL) {
			gnutls_free(tls_params->session_data.data);
			tls_params->session_data.data = NULL;
			tls_params->session_data.size = 0;
		}
		/* ... and get the new session data */
		gnutls_datum_t tls_session_data = { NULL, 0 };
		ret = gnutls_session_get_data2(tls_session, &tls_session_data);
		if (ret == 0) {
			tls_params->session_data = tls_session_data;
		}
1129
	}
1130

1131
	ret = worker_add_tcp_connected(worker, &peer->ip, session);
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
	if (deletion_res == kr_ok() && ret == kr_ok()) {
		ret = session_next_waiting_send(session);
	} else {
		ret = kr_error(EINVAL);
	}

	if (ret != kr_ok()) {
		/* Something went wrong.
		 * Session isn't in the list of waiting sessions,
		 * or addition to the list of connected sessions failed,
		 * or write to upstream failed. */
		while (session->waiting.len > 0) {
			struct qr_task *task = session->waiting.at[0];
			session_del_tasks(session, task);
			array_del(session->waiting, 0);
			qr_task_finalize(task, KR_STATE_FAIL);
			qr_task_unref(task);
1149
		}
1150 1151 1152
		worker_del_tcp_connected(worker, &peer->ip);
		assert(session->tasks.len == 0);
		session_close(session);
1153 1154 1155 1156
	} else {
		uv_timer_stop(&session->timeout);
		session->timeout.data = session;
		timer_start(session, on_tcp_watchdog_timeout, MAX_TCP_INACTIVITY, 0);
1157 1158 1159 1160
	}
	return kr_ok();
}

1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
static struct kr_query *session_current_query(struct session *session)
{
	if (session->waiting.len == 0) {
		return NULL;
	}

	struct qr_task *task = session->waiting.at[0];
	if (task->ctx->req.rplan.pending.len == 0) {
		return NULL;
	}

	return array_tail(task->ctx->req.rplan.pending);
}

1175
static void on_connect(uv_connect_t *req, int status)
1176
{
1177
	struct worker_ctx *worker = get_worker();
1178
	uv_stream_t *handle = req->handle;
1179 1180 1181 1182 1183 1184
	struct session *session = handle->data;
	union inaddr *peer = &session->peer;

	if (status == UV_ECANCELED) {
		worker_del_tcp_waiting(worker, &peer->ip);
		assert(session->closing && session->waiting.len == 0 && session->tasks.len == 0);
1185
		iorequest_release(worker, req);
1186 1187 1188 1189 1190 1191
		return;
	}

	if (session->closing) {
		worker_del_tcp_waiting(worker, &peer->ip);
		assert(session->waiting.len == 0 && session->tasks.len == 0);
1192
		iorequest_release(worker, req);
1193 1194 1195
		return;
	}

1196 1197
	uv_timer_stop(&session->timeout);

1198
	if (status != 0) {
1199
		worker_del_tcp_waiting(worker, &peer->ip);
1200 1201 1202 1203
		while (session->waiting.len > 0) {
			struct qr_task *task = session->waiting.at[0];
			session_del_tasks(session, task);
			array_del(session->waiting, 0);
1204
			assert(task->refs > 1);
1205
			qr_task_unref(task);
1206
			qr_task_step(task, NULL, NULL);
1207 1208
		}
		assert(session->tasks.len == 0);
1209
		iorequest_release(worker, req);
1210 1211 1212 1213
		session_close(session);
		return;
	}

1214 1215 1216 1217 1218 1219 1220 1221 1222 1223
	if (!session->has_tls) {
		/* if there is a TLS, session still waiting for handshake,
		 * otherwise remove it from waiting list */
		if (worker_del_tcp_waiting(worker, &peer->ip) != 0) {
			/* session isn't in list of waiting queries, *
			 * something gone wrong */
			while (session->waiting.len > 0) {
				struct qr_task *task = session->waiting.at[0];
				session_del_tasks(session, task);
				array_del(session->waiting, 0);
1224 1225
				ioreq_kill_pending(task);
				assert(task->pending_count == 0);
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
				qr_task_finalize(task, KR_STATE_FAIL);
				qr_task_unref(task);
			}
			assert(session->tasks.len == 0);
			iorequest_release(worker, req);
			session_close(session);
			return;
		}
	}

1236 1237
	struct kr_query *qry = session_current_query(session);
	WITH_VERBOSE (qry) {
1238 1239 1240
		char addr_str[INET6_ADDRSTRLEN];
		inet_ntop(session->peer.ip.sa_family, kr_inaddr(&session->peer.ip),
			  addr_str, sizeof(addr_str));
1241
		VERBOSE_MSG(qry, "=> connected to '%s'\n", addr_str);
1242 1243
	}

1244 1245 1246 1247 1248 1249 1250 1251
	session->connected = true;
	session->handle = (uv_handle_t *)handle;

	int ret = kr_ok();
	if (session->has_tls) {
		ret = tls_client_connect_start(session->tls_client_ctx,
					       session, session_tls_hs_cb);
		if (ret == kr_error(EAGAIN)) {
1252
			iorequest_release(worker, req);
1253
			io_start_read(session->handle);
1254
			timer_start(session, on_tcp_watchdog_timeout, MAX_TCP_INACTIVITY, 0);
1255 1256 1257 1258 1259 1260 1261
			return;
		}
	}

	if (ret == kr_ok()) {
		ret = session_next_waiting_send(session);
		if (ret == kr_ok()) {
1262
			timer_start(session, on_tcp_watchdog_timeout, MAX_TCP_INACTIVITY, 0);
1263
			worker_add_tcp_connected(worker, &session->peer.ip, session);
1264
			iorequest_release(worker, req);
1265 1266
			return;
		}
1267
	}
1268

1269 1270 1271 1272
	while (session->waiting.len > 0) {
		struct qr_task *task = session->waiting.at[0];
		session_del_tasks(session, task);
		array_del(session->waiting, 0);
1273 1274
		ioreq_kill_pending(task);
		assert(task->pending_count == 0);
1275 1276
		qr_task_finalize(task, KR_STATE_FAIL);
		qr_task_unref(task);
1277
	}
1278 1279 1280

	assert(session->tasks.len == 0);

1281
	iorequest_release(worker, req);
1282
	session_close(session);
1283 1284
}

1285
static void on_tcp_connect_timeout(uv_timer_t *timer)
1286
{
1287 1288 1289
	struct session *session = timer->data;

	uv_timer_stop(timer);
1290
	struct worker_ctx *worker = get_worker();
1291 1292 1293

	assert (session->waiting.len == session->tasks.len);

1294 1295 1296
	union inaddr *peer = &session->peer;
	worker_del_tcp_waiting(worker, &peer->ip);

1297 1298
	struct kr_query *qry = session_current_query(session);
	WITH_VERBOSE (qry) {
1299 1300
		char addr_str[INET6_ADDRSTRLEN];
		inet_ntop(peer->ip.sa_family, kr_inaddr(&peer->ip), addr_str, sizeof(addr_str));
1301
		VERBOSE_MSG(qry, "=> connection to '%s' failed\n", addr_str);
1302
	}
1303

1304
	kr_nsrep_update_rtt(NULL, &peer->ip, KR_NS_DEAD,
1305 1306
			    worker->engine->resolver.cache_rtt,
			    KR_NS_UPDATE_NORESET);
1307

1308 1309 1310
	while (session->waiting.len > 0) {
		struct qr_task *task = session->waiting.at[0];
		struct request_ctx *ctx = task->ctx;
1311
		assert(ctx);
1312 1313 1314 1315
		task->timeouts += 1;
		worker->stats.timeout += 1;
		session_del_tasks(session, task);
		array_del(session->waiting, 0);