functions.c 13.3 KB
Newer Older
Daniel Salzman's avatar
Daniel Salzman committed
1
/*  Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "knot/modules/rrl/functions.h"
21
#include "contrib/murmurhash3/murmurhash3.h"
22
#include "contrib/sockaddr.h"
23
#include "dnssec/random.h"
24

25 26
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
27 28 29
/* Limits */
#define RRL_CLSBLK_MAXLEN (4 + 8 + 1 + 256)
/* CIDR block prefix lengths for v4/v6 */
30 31
#define RRL_V4_PREFIX ((uint32_t)0x00ffffff)         /* /24 */
#define RRL_V6_PREFIX ((uint64_t)0x00ffffffffffffff) /* /56 */
32
/* Defaults */
33
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
34
#define RRL_PSIZE_LARGE 1024
35 36 37 38 39 40 41

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
42
	CLS_EMPTY    = 1 << 3, /* Empty response. */
43
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
44 45 46
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
47 48
};

49
/* Classification string. */
50 51 52 53 54 55
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
56 57 58 59 60 61 62 63 64 65
	{ CLS_NORMAL,   "POSITIVE" },
	{ CLS_ERROR,    "ERROR" },
	{ CLS_NXDOMAIN, "NXDOMAIN"},
	{ CLS_EMPTY,    "EMPTY"},
	{ CLS_LARGE,    "LARGE"},
	{ CLS_WILDCARD, "WILDCARD"},
	{ CLS_ANY,      "ANY"},
	{ CLS_DNSSEC,   "DNSSEC"},
	{ CLS_NULL,     "NULL"},
	{ CLS_NULL,     NULL}
66
};
67

68 69
static inline const char *rrl_clsstr(int code)
{
70 71 72 73 74 75 76
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
77 78
}

79 80 81 82 83 84 85 86 87
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
88 89
	/* Check error code */
	int ret = CLS_NULL;
90
	switch (knot_wire_get_rcode(p->w)) {
91 92 93 94 95 96
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
Daniel Salzman's avatar
Daniel Salzman committed
97
	if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
98 99
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
100

101
	/* Check query type for spec. classes. */
102
	if (p->query) {
103
		switch(knot_pkt_qtype(p->query)) {
104 105 106 107 108 109 110 111 112 113 114 115
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
116

117
	/* Check packet size for threshold. */
118
	if (p->len >= RRL_PSIZE_LARGE) {
119 120 121 122
		return CLS_LARGE;
	}

	/* Check ancount */
123
	if (knot_wire_get_ancount(p->w) == 0) {
124 125 126 127
		return CLS_EMPTY;
	}

	return ret;
128 129
}

130 131
static int rrl_clsname(char *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
                       const knot_dname_t *name)
132
{
133 134 135
	if (name == NULL) {
		/* Fallback for errors etc. */
		name = (const knot_dname_t *)"\x00";
136
	}
137

138 139 140 141 142 143
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
144
		/* Use QNAME */
145
		if (req->query) {
146
			name = knot_pkt_qname(req->query);
147
		}
148 149
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
150

151
	/* Write to wire */
152
	return knot_dname_to_wire((uint8_t *)dst, name, maxlen);
153 154
}

155
static int rrl_classify(char *dst, size_t maxlen, const struct sockaddr_storage *a,
156
                        rrl_req_t *p, const knot_dname_t *z, uint32_t seed)
157
{
158
	if (!dst || !p || !a || maxlen == 0) {
159 160
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
161

162 163 164
	/* Class */
	uint8_t cls = rrl_clsid(p);
	*dst = cls;
165
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
166

167
	/* Address (in network byteorder, adjust masks). */
168
	uint64_t nb = 0;
169 170
	if (a->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)a;
Jan Včelák's avatar
Jan Včelák committed
171
		nb = *((uint64_t *)(&ipv6->sin6_addr)) & RRL_V6_PREFIX;
172 173 174
	} else {
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)a;
		nb = ((uint32_t)ipv4->sin_addr.s_addr) & RRL_V4_PREFIX;
175
	}
176 177 178
	if (blklen + sizeof(nb) > maxlen) {
		return KNOT_ESPACE;
	}
Jan Včelák's avatar
Jan Včelák committed
179
	memcpy(dst + blklen, (void *)&nb, sizeof(nb));
180 181
	blklen += sizeof(nb);

182
	/* Name */
Jan Včelák's avatar
Jan Včelák committed
183
	uint16_t *nlen = (uint16_t *)(dst + blklen);
184
	blklen += sizeof(uint16_t);
185
	int len = rrl_clsname(dst + blklen, maxlen - blklen, cls, p, z);
186
	if (len < 0) {
187
		return len;
188
	}
189
	*nlen = len;
190
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
191

192
	/* Seed. */
193 194 195
	if (blklen + sizeof(seed) > maxlen) {
		return KNOT_ESPACE;
	}
196 197
	memcpy(dst + blklen, (void *)&seed, sizeof(seed));
	blklen += sizeof(seed);
Jan Včelák's avatar
Jan Včelák committed
198

199 200 201
	return blklen;
}

202 203 204 205
static int bucket_free(rrl_item_t *b, uint32_t now) {
	return b->cls == CLS_NULL || (b->time + 1 < now);
}

206
static int bucket_match(rrl_item_t *b, rrl_item_t *m)
207
{
208 209 210
	return b->cls    == m->cls &&
	       b->netblk == m->netblk &&
	       b->qname  == m->qname;
211
}
212

213
static int find_free(rrl_table_t *t, unsigned i, uint32_t now)
214 215 216 217 218 219 220
{
	rrl_item_t *np = t->arr + t->size;
	rrl_item_t *b = NULL;
	for (b = t->arr + i; b != np; ++b) {
		if (bucket_free(b, now)) {
			return b - (t->arr + i);
		}
221
	}
222 223 224 225
	np = t->arr + i;
	for (b = t->arr; b != np; ++b) {
		if (bucket_free(b, now)) {
			return (b - t->arr) + (t->size - i);
226
		}
227
	}
Jan Včelák's avatar
Jan Včelák committed
228

229 230
	/* this happens if table is full... force vacate current elm */
	return i;
231
}
232

233 234 235 236 237 238 239 240 241 242 243 244 245
static inline unsigned find_match(rrl_table_t *t, uint32_t id, rrl_item_t *m)
{
	unsigned f = 0;
	unsigned d = 0;
	unsigned match = t->arr[id].hop;
	while (match != 0) {
		d = __builtin_ctz(match);
		f = (id + d) % t->size;
		if (bucket_match(t->arr + f, m)) {
			return d;
		} else {
			match &= ~(1<<d); /* clear potential match */
		}
246
	}
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

	return HOP_LEN + 1;
}

static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f)
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
		unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */
		unsigned o = __builtin_ctz(t->arr[s].hop); /* offset of first valid bucket */
		if (t->arr[s].hop != 0 && o < rd) {        /* only offsets in <s, f> are interesting */
			unsigned e = (s + o) % t->size;    /* this item will be displaced to [f] */
			unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */
			memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t));
			t->arr[*f].hop = keep_hop;
			t->arr[e].cls = CLS_NULL;
			t->arr[s].hop &= ~(1<<o);
			t->arr[s].hop |= 1<<rd;
			*f = e;
			return d - (rd - o);
267
		}
268
		--rd;
269
	}
Jan Včelák's avatar
Jan Včelák committed
270

271 272 273 274
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
	*f = id;
	d = 0; /* force vacate initial element */
	return d;
275 276
}

277 278
static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss,
                          uint16_t flags, uint8_t cls)
279
{
280 281 282 283
	if (mod == NULL) {
		return;
	}

284
	char addr_str[SOCKADDR_STRLEN] = {0};
285
	sockaddr_tostr(addr_str, sizeof(addr_str), (struct sockaddr *)ss);
286

287 288 289 290
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
291

292
	knotd_mod_log(mod, LOG_NOTICE, "address %s, class %s, %s limiting",
293
	              addr_str, rrl_clsstr(cls), what);
294 295
}

296 297
rrl_table_t *rrl_create(size_t size)
{
298 299 300
	if (size == 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
301

302 303
	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
	rrl_table_t *t = malloc(tbl_len);
304 305 306
	if (!t) {
		return NULL;
	}
307
	memset(t, 0, sizeof(rrl_table_t));
308
	t->size = size;
309
	rrl_reseed(t);
310

311 312 313 314 315
	return t;
}

uint32_t rrl_setrate(rrl_table_t *rrl, uint32_t rate)
{
316 317 318
	if (!rrl) {
		return 0;
	}
319 320 321 322 323 324 325
	uint32_t old = rrl->rate;
	rrl->rate = rate;
	return old;
}

uint32_t rrl_rate(rrl_table_t *rrl)
{
326
	return rrl ? rrl->rate : 0;
327 328
}

329
int rrl_setlocks(rrl_table_t *rrl, unsigned granularity)
330
{
331 332 333 334
	if (!rrl) {
		return KNOT_EINVAL;
	}

335
	assert(!rrl->lk); /* Cannot change while locks are used. */
336
	assert(granularity <= rrl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
337

338 339 340
	if (pthread_mutex_init(&rrl->ll, NULL) < 0) {
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
341

342
	/* Alloc new locks. */
343
	rrl->lk = malloc(granularity * sizeof(pthread_mutex_t));
344 345 346
	if (!rrl->lk) {
		return KNOT_ENOMEM;
	}
347
	memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
348

349 350
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
351 352 353
		if (pthread_mutex_init(rrl->lk + i, NULL) < 0) {
			break;
		}
354 355
		++rrl->lk_count;
	}
356

357 358 359
	/* Incomplete initialization */
	if (rrl->lk_count != granularity) {
		for (size_t i = 0; i < rrl->lk_count; ++i) {
360
			pthread_mutex_destroy(rrl->lk + i);
361 362 363 364 365
		}
		free(rrl->lk);
		rrl->lk_count = 0;
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
366

367 368 369
	return KNOT_EOK;
}

Jan Včelák's avatar
Jan Včelák committed
370
rrl_item_t *rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a, rrl_req_t *p,
371
                     const knot_dname_t *zone, uint32_t stamp, int *lock)
372 373 374 375 376 377
{
	char buf[RRL_CLSBLK_MAXLEN];
	int len = rrl_classify(buf, sizeof(buf), a, p, zone, t->seed);
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
378

379
	uint32_t id = hash(buf, len) % t->size;
Jan Včelák's avatar
Jan Včelák committed
380

381 382 383 384
	/* Lock for lookup. */
	pthread_mutex_lock(&t->ll);

	/* Find an exact match in <id, id + HOP_LEN). */
Jan Včelák's avatar
Jan Včelák committed
385
	uint16_t *qname = (uint16_t *)(buf + sizeof(uint8_t) + sizeof(uint64_t));
386
	rrl_item_t match = {
Daniel Salzman's avatar
Daniel Salzman committed
387 388 389 390 391 392 393
		.hop = 0,
		.netblk = *((uint64_t *)(buf + 1)),
		.ntok = t->rate * RRL_CAPACITY,
		.cls = buf[0],
		.flags = RRL_BF_NULL,
		.qname = hash((char *)(qname + 1), *qname),
		.time = stamp
394 395 396 397 398
	};

	unsigned d = find_match(t, id, &match);
	if (d > HOP_LEN) { /* not an exact match, find free element [f] */
		d = find_free(t, id, stamp);
399
	}
Jan Včelák's avatar
Jan Včelák committed
400

401 402
	/* Reduce distance to fit <id, id + HOP_LEN) */
	unsigned f = (id + d) % t->size;
403
	while (d >= HOP_LEN) {
404
		d = reduce_dist(t, id, d, &f);
405
	}
Jan Včelák's avatar
Jan Včelák committed
406

407 408 409 410 411
	/* Assign granular lock and unlock lookup. */
	*lock = f % t->lk_count;
	rrl_lock(t, *lock);
	pthread_mutex_unlock(&t->ll);

412 413
	/* found free elm 'k' which is in <id, id + HOP_LEN) */
	t->arr[id].hop |= (1 << d);
Jan Včelák's avatar
Jan Včelák committed
414
	rrl_item_t *b = t->arr + f;
415 416 417
	assert(f == (id+d) % t->size);

	/* Inspect bucket state. */
418
	unsigned hop = b->hop;
419
	if (b->cls == CLS_NULL) {
420 421
		memcpy(b, &match, sizeof(rrl_item_t));
		b->hop = hop;
422 423
	}
	/* Check for collisions. */
424
	if (!bucket_match(b, &match)) {
425
		if (!(b->flags & RRL_BF_SSTART)) {
426 427
			memcpy(b, &match, sizeof(rrl_item_t));
			b->hop = hop;
428 429 430 431
			b->ntok = t->rate + t->rate / RRL_SSTART;
			b->flags |= RRL_BF_SSTART;
		}
	}
Jan Včelák's avatar
Jan Včelák committed
432

433 434 435
	return b;
}

436
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req,
437
              const knot_dname_t *zone, knotd_mod_t *mod)
438
{
439 440 441
	if (!rrl || !req || !a) {
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
442

443 444
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
445
	int lock = -1;
446
	uint32_t now = time(NULL);
447
	rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock);
448
	if (!b) {
449 450 451
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
452 453
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
454

455 456 457 458 459
	/* Calculate rate for dT */
	uint32_t dt = now - b->time;
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
460 461
	/* Visit bucket. */
	b->time = now;
462
	if (dt > 0) { /* Window moved. */
463 464 465 466

		/* Check state change. */
		if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) {
			b->flags &= ~RRL_BF_ELIMIT;
467
			rrl_log_state(mod, a, b->flags, b->cls);
468
		}
Jan Včelák's avatar
Jan Včelák committed
469

470 471
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
472 473
		if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			b->flags &= ~RRL_BF_SSTART;
474
		}
475 476 477
		b->ntok += dn;
		if (b->ntok > RRL_CAPACITY * rrl->rate) {
			b->ntok = RRL_CAPACITY * rrl->rate;
478
		}
479
	}
Jan Včelák's avatar
Jan Včelák committed
480

481 482 483
	/* Last item taken. */
	if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) {
		b->flags |= RRL_BF_ELIMIT;
484
		rrl_log_state(mod, a, b->flags, b->cls);
485
	}
486 487 488 489 490 491 492

	/* Decay current bucket. */
	if (b->ntok > 0) {
		--b->ntok;
	} else if (b->ntok == 0) {
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
493

494 495 496
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
497 498 499
	return ret;
}

500 501 502 503 504 505
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
506
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
507
	return (roll < threshold);
508 509
}

510 511
int rrl_destroy(rrl_table_t *rrl)
{
512
	if (rrl) {
513 514 515
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
516
		for (size_t i = 0; i < rrl->lk_count; ++i) {
517
			pthread_mutex_destroy(rrl->lk + i);
518 519 520
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
521

522 523 524
	free(rrl);
	return KNOT_EOK;
}
525 526 527 528 529 530 531 532 533 534

int rrl_reseed(rrl_table_t *rrl)
{
	/* Lock entire table. */
	if (rrl->lk_count > 0) {
		pthread_mutex_lock(&rrl->ll);
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_lock(rrl, i);
		}
	}
Jan Včelák's avatar
Jan Včelák committed
535

536
	memset(rrl->arr, 0, rrl->size * sizeof(rrl_item_t));
537
	rrl->seed = dnssec_random_uint32_t();
Jan Včelák's avatar
Jan Včelák committed
538

539 540 541 542 543 544
	if (rrl->lk_count > 0) {
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_unlock(rrl, i);
		}
		pthread_mutex_unlock(&rrl->ll);
	}
Jan Včelák's avatar
Jan Včelák committed
545

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
	return KNOT_EOK;
}

int rrl_lock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_lock(t->lk + lk_id) != 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}

int rrl_unlock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_unlock(t->lk + lk_id)!= 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}