functions.c 13.2 KB
Newer Older
1
/*  Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2 3 4 5 6 7 8 9 10 11 12 13

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
14
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
15 16
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "knot/modules/rrl/functions.h"
21
#include "contrib/sockaddr.h"
22
#include "contrib/time.h"
23 24
#include "libdnssec/error.h"
#include "libdnssec/random.h"
25

26 27
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
28 29
/* Limits (class, ipv6 remote, dname) */
#define RRL_CLSBLK_MAXLEN (1 + 8 + 255)
30
/* CIDR block prefix lengths for v4/v6 */
31 32
#define RRL_V4_PREFIX_LEN 3 /* /24 */
#define RRL_V6_PREFIX_LEN 7 /* /56 */
33
/* Defaults */
34
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
35
#define RRL_PSIZE_LARGE 1024
36 37
#define RRL_CAPACITY 4 /* Window size in seconds */
#define RRL_LOCK_GRANULARITY 32 /* Last digit granularity */
38 39 40 41 42 43 44

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
45
	CLS_EMPTY    = 1 << 3, /* Empty response. */
46
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
47 48 49
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
50 51
};

52
/* Classification string. */
53 54 55 56 57 58
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
59 60 61 62 63 64 65 66 67 68
	{ CLS_NORMAL,   "POSITIVE" },
	{ CLS_ERROR,    "ERROR" },
	{ CLS_NXDOMAIN, "NXDOMAIN"},
	{ CLS_EMPTY,    "EMPTY"},
	{ CLS_LARGE,    "LARGE"},
	{ CLS_WILDCARD, "WILDCARD"},
	{ CLS_ANY,      "ANY"},
	{ CLS_DNSSEC,   "DNSSEC"},
	{ CLS_NULL,     "NULL"},
	{ CLS_NULL,     NULL}
69
};
70

71 72
static inline const char *rrl_clsstr(int code)
{
73 74 75 76 77 78 79
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
80 81
}

82 83 84 85 86 87 88 89 90
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
91 92
	/* Check error code */
	int ret = CLS_NULL;
93
	switch (knot_wire_get_rcode(p->wire)) {
94 95 96 97 98 99
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
Daniel Salzman's avatar
Daniel Salzman committed
100
	if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
101 102
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
103

104
	/* Check query type for spec. classes. */
105
	if (p->query) {
106
		switch(knot_pkt_qtype(p->query)) {
107 108 109 110 111 112 113 114 115 116 117 118
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
119

120
	/* Check packet size for threshold. */
121
	if (p->len >= RRL_PSIZE_LARGE) {
122 123 124 125
		return CLS_LARGE;
	}

	/* Check ancount */
126
	if (knot_wire_get_ancount(p->wire) == 0) {
127 128 129 130
		return CLS_EMPTY;
	}

	return ret;
131 132
}

133
static int rrl_clsname(uint8_t *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
134
                       const knot_dname_t *name)
135
{
136 137 138
	if (name == NULL) {
		/* Fallback for errors etc. */
		name = (const knot_dname_t *)"\x00";
139
	}
140

141 142 143 144 145 146
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
147
		/* Use QNAME */
148
		if (req->query) {
149
			name = knot_pkt_qname(req->query);
150
		}
151 152
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
153

154
	/* Write to wire */
155
	return knot_dname_to_wire(dst, name, maxlen);
156 157
}

158
static int rrl_classify(uint8_t *dst, size_t maxlen, const struct sockaddr_storage *remote,
159
                        rrl_req_t *req, const knot_dname_t *name)
160
{
161
	/* Class */
162
	uint8_t cls = rrl_clsid(req);
163
	*dst = cls;
164
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
165

166
	/* Address (in network byteorder, adjust masks). */
167 168 169
	uint64_t netblk = 0;
	if (remote->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)remote;
170
		memcpy(&netblk, &ipv6->sin6_addr, RRL_V6_PREFIX_LEN);
171
	} else {
172
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)remote;
173
		memcpy(&netblk, &ipv4->sin_addr, RRL_V4_PREFIX_LEN);
174
	}
175
	memcpy(dst + blklen, &netblk, sizeof(netblk));
176
	blklen += sizeof(netblk);
177

178
	/* Name */
179
	int ret = rrl_clsname(dst + blklen, maxlen - blklen, cls, req, name);
180 181
	if (ret < 0) {
		return ret;
182
	}
183
	uint8_t len = ret;
184
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
185

186 187 188
	return blklen;
}

189
static int bucket_free(rrl_item_t *bucket, uint32_t now)
190
{
191
	return bucket->cls == CLS_NULL || (bucket->time + 1 < now);
192 193
}

194
static int bucket_match(rrl_item_t *bucket, rrl_item_t *match)
195
{
196 197 198
	return bucket->cls    == match->cls &&
	       bucket->netblk == match->netblk &&
	       bucket->qname  == match->qname;
199
}
200

201
static int find_free(rrl_table_t *tbl, unsigned id, uint32_t now)
202
{
203 204 205
	for (int i = id; i < tbl->size; i++) {
		if (bucket_free(&tbl->arr[i], now)) {
			return i - id;
206
		}
207
	}
208 209 210
	for (int i = 0; i < id; i++) {
		if (bucket_free(&tbl->arr[i], now)) {
			return i + (tbl->size - id);
211
		}
212
	}
Jan Včelák's avatar
Jan Včelák committed
213

214
	/* this happens if table is full... force vacate current elm */
215
	return id;
216
}
217

218
static inline unsigned find_match(rrl_table_t *tbl, uint32_t id, rrl_item_t *m)
219
{
220 221 222 223 224 225 226 227
	unsigned new_id = 0;
	unsigned hop = 0;
	unsigned match_bitmap = tbl->arr[id].hop;
	while (match_bitmap != 0) {
		hop = __builtin_ctz(match_bitmap); /* offset of next potential match */
		new_id = (id + hop) % tbl->size;
		if (bucket_match(&tbl->arr[new_id], m)) {
			return hop;
228
		} else {
229
			match_bitmap &= ~(1 << hop); /* clear potential match */
230
		}
231
	}
232 233 234 235

	return HOP_LEN + 1;
}

236
static inline unsigned reduce_dist(rrl_table_t *tbl, unsigned id, unsigned dist, unsigned *free_id)
237 238 239
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
240 241 242 243 244 245 246 247 248 249 250 251 252
		unsigned vacate_id = (tbl->size + *free_id - rd) % tbl->size; /* bucket to be vacated */
		if (tbl->arr[vacate_id].hop != 0) {
			unsigned hop = __builtin_ctz(tbl->arr[vacate_id].hop);  /* offset of first valid bucket */
			if (hop < rd) { /* only offsets in <vacate_id, free_id> are interesting */
				unsigned new_id = (vacate_id + hop) % tbl->size; /* this item will be displaced to [free_id] */
				unsigned keep_hop = tbl->arr[*free_id].hop; /* unpredictable padding */
				memcpy(tbl->arr + *free_id, tbl->arr + new_id, sizeof(rrl_item_t));
				tbl->arr[*free_id].hop = keep_hop;
				tbl->arr[new_id].cls = CLS_NULL;
				tbl->arr[vacate_id].hop &= ~(1 << hop);
				tbl->arr[vacate_id].hop |= 1 << rd;
				*free_id = new_id;
				return dist - (rd - hop);
253
			}
254
		}
255
		--rd;
256
	}
Jan Včelák's avatar
Jan Včelák committed
257

258
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
259 260 261
	*free_id = id;
	dist = 0; /* force vacate initial element */
	return dist;
262 263
}

264 265
static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss,
                          uint16_t flags, uint8_t cls)
266
{
267 268 269 270
	if (mod == NULL) {
		return;
	}

271
	char addr_str[SOCKADDR_STRLEN] = {0};
272
	sockaddr_tostr(addr_str, sizeof(addr_str), (struct sockaddr *)ss);
273

274 275 276 277
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
278

279
	knotd_mod_log(mod, LOG_NOTICE, "address %s, class %s, %s limiting",
280
	              addr_str, rrl_clsstr(cls), what);
281 282
}

283
static void rrl_lock(rrl_table_t *tbl, int lk_id)
284
{
285
	assert(lk_id > -1);
286
	pthread_mutex_lock(tbl->lk + lk_id);
287 288
}

289
static void rrl_unlock(rrl_table_t *tbl, int lk_id)
290
{
291
	assert(lk_id > -1);
292
	pthread_mutex_unlock(tbl->lk + lk_id);
293 294
}

295
static int rrl_setlocks(rrl_table_t *tbl, uint32_t granularity)
296
{
297 298
	assert(!tbl->lk); /* Cannot change while locks are used. */
	assert(granularity <= tbl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
299

300
	if (pthread_mutex_init(&tbl->ll, NULL) < 0) {
301 302
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
303

304
	/* Alloc new locks. */
305 306
	tbl->lk = malloc(granularity * sizeof(pthread_mutex_t));
	if (!tbl->lk) {
307 308
		return KNOT_ENOMEM;
	}
309
	memset(tbl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
310

311 312
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
313
		if (pthread_mutex_init(tbl->lk + i, NULL) < 0) {
314 315
			break;
		}
316
		++tbl->lk_count;
317
	}
318

319
	/* Incomplete initialization */
320 321 322
	if (tbl->lk_count != granularity) {
		for (size_t i = 0; i < tbl->lk_count; ++i) {
			pthread_mutex_destroy(tbl->lk + i);
323
		}
324 325
		free(tbl->lk);
		tbl->lk_count = 0;
326 327
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
328

329 330 331
	return KNOT_EOK;
}

332 333 334 335 336 337 338
rrl_table_t *rrl_create(size_t size, uint32_t rate)
{
	if (size == 0) {
		return NULL;
	}

	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
339 340
	rrl_table_t *tbl = calloc(1, tbl_len);
	if (!tbl) {
341 342
		return NULL;
	}
343 344
	tbl->size = size;
	tbl->rate = rate;
345

346 347
	if (dnssec_random_buffer((uint8_t *)&tbl->key, sizeof(tbl->key)) != DNSSEC_EOK) {
		free(tbl);
348 349 350
		return NULL;
	}

351 352
	if (rrl_setlocks(tbl, RRL_LOCK_GRANULARITY) != KNOT_EOK) {
		free(tbl);
353 354 355
		return NULL;
	}

356
	return tbl;
357 358 359
}

/*! \brief Get bucket for current combination of parameters. */
360
static rrl_item_t *rrl_hash(rrl_table_t *tbl, const struct sockaddr_storage *remote,
361 362
                            rrl_req_t *req, const knot_dname_t *zone, uint32_t stamp,
                            int *lock)
363
{
364
	uint8_t buf[RRL_CLSBLK_MAXLEN];
365
	int len = rrl_classify(buf, sizeof(buf), remote, req, zone);
366 367 368
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
369

370
	uint32_t id = SipHash24(&tbl->key, buf, len) % tbl->size;
Jan Včelák's avatar
Jan Včelák committed
371

372
	/* Lock for lookup. */
373
	pthread_mutex_lock(&tbl->ll);
374 375

	/* Find an exact match in <id, id + HOP_LEN). */
376
	knot_dname_t *qname = buf + sizeof(uint8_t) + sizeof(uint64_t);
377 378
	uint64_t netblk;
	memcpy(&netblk, buf + sizeof(uint8_t), sizeof(netblk));
379
	rrl_item_t match = {
Daniel Salzman's avatar
Daniel Salzman committed
380
		.hop = 0,
381
		.netblk = netblk,
382
		.ntok = tbl->rate * RRL_CAPACITY,
Daniel Salzman's avatar
Daniel Salzman committed
383 384
		.cls = buf[0],
		.flags = RRL_BF_NULL,
385
		.qname = SipHash24(&tbl->key, qname, knot_dname_size(qname)),
Daniel Salzman's avatar
Daniel Salzman committed
386
		.time = stamp
387 388
	};

389 390 391
	unsigned dist = find_match(tbl, id, &match);
	if (dist > HOP_LEN) { /* not an exact match, find free element [f] */
		dist = find_free(tbl, id, stamp);
392
	}
Jan Včelák's avatar
Jan Včelák committed
393

394
	/* Reduce distance to fit <id, id + HOP_LEN) */
395 396 397
	unsigned free_id = (id + dist) % tbl->size;
	while (dist >= HOP_LEN) {
		dist = reduce_dist(tbl, id, dist, &free_id);
398
	}
Jan Včelák's avatar
Jan Včelák committed
399

400
	/* Assign granular lock and unlock lookup. */
401
	*lock = free_id % tbl->lk_count;
402 403
	rrl_lock(tbl, *lock);
	pthread_mutex_unlock(&tbl->ll);
404

405 406 407 408
	/* found free bucket which is in <id, id + HOP_LEN) */
	tbl->arr[id].hop |= (1 << dist);
	rrl_item_t *bucket = &tbl->arr[free_id];
	assert(free_id == (id + dist) % tbl->size);
409 410

	/* Inspect bucket state. */
411 412 413 414
	unsigned hop = bucket->hop;
	if (bucket->cls == CLS_NULL) {
		memcpy(bucket, &match, sizeof(rrl_item_t));
		bucket->hop = hop;
415 416
	}
	/* Check for collisions. */
417 418 419 420 421 422
	if (!bucket_match(bucket, &match)) {
		if (!(bucket->flags & RRL_BF_SSTART)) {
			memcpy(bucket, &match, sizeof(rrl_item_t));
			bucket->hop = hop;
			bucket->ntok = tbl->rate + tbl->rate / RRL_SSTART;
			bucket->flags |= RRL_BF_SSTART;
423 424
		}
	}
Jan Včelák's avatar
Jan Včelák committed
425

426
	return bucket;
427 428
}

429 430
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *remote,
              rrl_req_t *req, const knot_dname_t *zone, knotd_mod_t *mod)
431
{
432
	if (!rrl || !req || !remote) {
433 434
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
435

436 437
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
438
	int lock = -1;
439
	uint32_t now = time_now().tv_sec;
440 441
	rrl_item_t *bucket = rrl_hash(rrl, remote, req, zone, now, &lock);
	if (!bucket) {
442 443 444
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
445 446
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
447

448
	/* Calculate rate for dT */
449
	uint32_t dt = now - bucket->time;
450 451 452
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
453
	/* Visit bucket. */
454
	bucket->time = now;
455
	if (dt > 0) { /* Window moved. */
456 457

		/* Check state change. */
458 459 460
		if ((bucket->ntok > 0 || dt > 1) && (bucket->flags & RRL_BF_ELIMIT)) {
			bucket->flags &= ~RRL_BF_ELIMIT;
			rrl_log_state(mod, remote, bucket->flags, bucket->cls);
461
		}
Jan Včelák's avatar
Jan Včelák committed
462

463 464
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
465 466
		if (bucket->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			bucket->flags &= ~RRL_BF_SSTART;
467
		}
468 469 470
		bucket->ntok += dn;
		if (bucket->ntok > RRL_CAPACITY * rrl->rate) {
			bucket->ntok = RRL_CAPACITY * rrl->rate;
471
		}
472
	}
Jan Včelák's avatar
Jan Včelák committed
473

474
	/* Last item taken. */
475 476 477
	if (bucket->ntok == 1 && !(bucket->flags & RRL_BF_ELIMIT)) {
		bucket->flags |= RRL_BF_ELIMIT;
		rrl_log_state(mod, remote, bucket->flags, bucket->cls);
478
	}
479 480

	/* Decay current bucket. */
481 482 483
	if (bucket->ntok > 0) {
		--bucket->ntok;
	} else if (bucket->ntok == 0) {
484 485
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
486

487 488 489
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
490 491 492
	return ret;
}

493 494 495 496 497 498
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
499
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
500
	return (roll < threshold);
501 502
}

503
void rrl_destroy(rrl_table_t *rrl)
504
{
505
	if (rrl) {
506 507 508
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
509
		for (size_t i = 0; i < rrl->lk_count; ++i) {
510
			pthread_mutex_destroy(rrl->lk + i);
511 512 513
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
514

515
	free(rrl);
516
}