functions.c 13.3 KB
Newer Older
Daniel Salzman's avatar
Daniel Salzman committed
1
/*  Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "contrib/murmurhash3/murmurhash3.h"
21
#include "contrib/sockaddr.h"
22 23 24
#include "dnssec/random.h"
#include "knot/modules/rrl/functions.h"
#include "knot/common/log.h"
25

26 27
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
28 29 30
/* Limits */
#define RRL_CLSBLK_MAXLEN (4 + 8 + 1 + 256)
/* CIDR block prefix lengths for v4/v6 */
31 32
#define RRL_V4_PREFIX ((uint32_t)0x00ffffff)         /* /24 */
#define RRL_V6_PREFIX ((uint64_t)0x00ffffffffffffff) /* /56 */
33
/* Defaults */
34
#define RRL_CAPACITY 4 /* N seconds. */
35
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
36
#define RRL_PSIZE_LARGE 1024
37 38
/* Enable RRL logging. */
#define RRL_ENABLE_LOG
39 40 41 42 43 44 45

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
46
	CLS_EMPTY    = 1 << 3, /* Empty response. */
47
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
48 49 50
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
51 52
};

53
/* Classification string. */
54 55 56 57 58 59
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
60 61 62 63 64 65 66 67 68 69
	{ CLS_NORMAL,   "POSITIVE" },
	{ CLS_ERROR,    "ERROR" },
	{ CLS_NXDOMAIN, "NXDOMAIN"},
	{ CLS_EMPTY,    "EMPTY"},
	{ CLS_LARGE,    "LARGE"},
	{ CLS_WILDCARD, "WILDCARD"},
	{ CLS_ANY,      "ANY"},
	{ CLS_DNSSEC,   "DNSSEC"},
	{ CLS_NULL,     "NULL"},
	{ CLS_NULL,     NULL}
70
};
71

72 73
static inline const char *rrl_clsstr(int code)
{
74 75 76 77 78 79 80
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
81 82
}

83 84 85 86 87 88 89 90 91
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
92 93
	/* Check error code */
	int ret = CLS_NULL;
94
	switch (knot_wire_get_rcode(p->w)) {
95 96 97 98 99 100
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
Daniel Salzman's avatar
Daniel Salzman committed
101
	if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
102 103
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
104

105
	/* Check query type for spec. classes. */
106
	if (p->query) {
107
		switch(knot_pkt_qtype(p->query)) {
108 109 110 111 112 113 114 115 116 117 118 119
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
120

121
	/* Check packet size for threshold. */
122
	if (p->len >= RRL_PSIZE_LARGE) {
123 124 125 126
		return CLS_LARGE;
	}

	/* Check ancount */
127
	if (knot_wire_get_ancount(p->w) == 0) {
128 129 130 131
		return CLS_EMPTY;
	}

	return ret;
132 133
}

134 135
static int rrl_clsname(char *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
                       const knot_dname_t *name)
136
{
137 138 139
	if (name == NULL) {
		/* Fallback for errors etc. */
		name = (const knot_dname_t *)"\x00";
140
	}
141

142 143 144 145 146 147
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
148
		/* Use QNAME */
149
		if (req->query) {
150
			name = knot_pkt_qname(req->query);
151
		}
152 153
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
154

155
	/* Write to wire */
156
	return knot_dname_to_wire((uint8_t *)dst, name, maxlen);
157 158
}

159
static int rrl_classify(char *dst, size_t maxlen, const struct sockaddr_storage *a,
160
                        rrl_req_t *p, const knot_dname_t *z, uint32_t seed)
161
{
162
	if (!dst || !p || !a || maxlen == 0) {
163 164
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
165

166 167 168
	/* Class */
	uint8_t cls = rrl_clsid(p);
	*dst = cls;
169
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
170

171
	/* Address (in network byteorder, adjust masks). */
172
	uint64_t nb = 0;
173 174
	if (a->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)a;
Jan Včelák's avatar
Jan Včelák committed
175
		nb = *((uint64_t *)(&ipv6->sin6_addr)) & RRL_V6_PREFIX;
176 177 178
	} else {
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)a;
		nb = ((uint32_t)ipv4->sin_addr.s_addr) & RRL_V4_PREFIX;
179
	}
180 181 182
	if (blklen + sizeof(nb) > maxlen) {
		return KNOT_ESPACE;
	}
Jan Včelák's avatar
Jan Včelák committed
183
	memcpy(dst + blklen, (void *)&nb, sizeof(nb));
184 185
	blklen += sizeof(nb);

186
	/* Name */
Jan Včelák's avatar
Jan Včelák committed
187
	uint16_t *nlen = (uint16_t *)(dst + blklen);
188
	blklen += sizeof(uint16_t);
189
	int len = rrl_clsname(dst + blklen, maxlen - blklen, cls, p, z);
190
	if (len < 0) {
191
		return len;
192
	}
193
	*nlen = len;
194
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
195

196
	/* Seed. */
197 198 199
	if (blklen + sizeof(seed) > maxlen) {
		return KNOT_ESPACE;
	}
200 201
	memcpy(dst + blklen, (void *)&seed, sizeof(seed));
	blklen += sizeof(seed);
Jan Včelák's avatar
Jan Včelák committed
202

203 204 205
	return blklen;
}

206 207 208 209
static int bucket_free(rrl_item_t *b, uint32_t now) {
	return b->cls == CLS_NULL || (b->time + 1 < now);
}

210
static int bucket_match(rrl_item_t *b, rrl_item_t *m)
211
{
212 213 214
	return b->cls    == m->cls &&
	       b->netblk == m->netblk &&
	       b->qname  == m->qname;
215
}
216

217
static int find_free(rrl_table_t *t, unsigned i, uint32_t now)
218 219 220 221 222 223 224
{
	rrl_item_t *np = t->arr + t->size;
	rrl_item_t *b = NULL;
	for (b = t->arr + i; b != np; ++b) {
		if (bucket_free(b, now)) {
			return b - (t->arr + i);
		}
225
	}
226 227 228 229
	np = t->arr + i;
	for (b = t->arr; b != np; ++b) {
		if (bucket_free(b, now)) {
			return (b - t->arr) + (t->size - i);
230
		}
231
	}
Jan Včelák's avatar
Jan Včelák committed
232

233 234
	/* this happens if table is full... force vacate current elm */
	return i;
235
}
236

237 238 239 240 241 242 243 244 245 246 247 248 249
static inline unsigned find_match(rrl_table_t *t, uint32_t id, rrl_item_t *m)
{
	unsigned f = 0;
	unsigned d = 0;
	unsigned match = t->arr[id].hop;
	while (match != 0) {
		d = __builtin_ctz(match);
		f = (id + d) % t->size;
		if (bucket_match(t->arr + f, m)) {
			return d;
		} else {
			match &= ~(1<<d); /* clear potential match */
		}
250
	}
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270

	return HOP_LEN + 1;
}

static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f)
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
		unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */
		unsigned o = __builtin_ctz(t->arr[s].hop); /* offset of first valid bucket */
		if (t->arr[s].hop != 0 && o < rd) {        /* only offsets in <s, f> are interesting */
			unsigned e = (s + o) % t->size;    /* this item will be displaced to [f] */
			unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */
			memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t));
			t->arr[*f].hop = keep_hop;
			t->arr[e].cls = CLS_NULL;
			t->arr[s].hop &= ~(1<<o);
			t->arr[s].hop |= 1<<rd;
			*f = e;
			return d - (rd - o);
271
		}
272
		--rd;
273
	}
Jan Včelák's avatar
Jan Včelák committed
274

275 276 277 278
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
	*f = id;
	d = 0; /* force vacate initial element */
	return d;
279 280
}

281
static void rrl_log_state(const struct sockaddr_storage *ss, uint16_t flags, uint8_t cls)
282 283
{
#ifdef RRL_ENABLE_LOG
284
	char addr_str[SOCKADDR_STRLEN] = {0};
285
	sockaddr_tostr(addr_str, sizeof(addr_str), (struct sockaddr *)ss);
286

287 288 289 290
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
291

292
	log_notice("mod-rrl, address '%s' class '%s' %s limiting",
Jan Včelák's avatar
Jan Včelák committed
293
	           addr_str, rrl_clsstr(cls), what);
294 295 296
#endif
}

297 298
rrl_table_t *rrl_create(size_t size)
{
299 300 301
	if (size == 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
302

303 304
	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
	rrl_table_t *t = malloc(tbl_len);
305 306 307
	if (!t) {
		return NULL;
	}
308
	memset(t, 0, sizeof(rrl_table_t));
309
	t->size = size;
310
	rrl_reseed(t);
311

312 313 314 315 316
	return t;
}

uint32_t rrl_setrate(rrl_table_t *rrl, uint32_t rate)
{
317 318 319
	if (!rrl) {
		return 0;
	}
320 321 322 323 324 325 326
	uint32_t old = rrl->rate;
	rrl->rate = rate;
	return old;
}

uint32_t rrl_rate(rrl_table_t *rrl)
{
327
	return rrl ? rrl->rate : 0;
328 329
}

330
int rrl_setlocks(rrl_table_t *rrl, unsigned granularity)
331
{
332 333 334 335
	if (!rrl) {
		return KNOT_EINVAL;
	}

336
	assert(!rrl->lk); /* Cannot change while locks are used. */
337
	assert(granularity <= rrl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
338

339 340 341
	if (pthread_mutex_init(&rrl->ll, NULL) < 0) {
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
342

343
	/* Alloc new locks. */
344
	rrl->lk = malloc(granularity * sizeof(pthread_mutex_t));
345 346 347
	if (!rrl->lk) {
		return KNOT_ENOMEM;
	}
348
	memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
349

350 351
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
352 353 354
		if (pthread_mutex_init(rrl->lk + i, NULL) < 0) {
			break;
		}
355 356
		++rrl->lk_count;
	}
357

358 359 360
	/* Incomplete initialization */
	if (rrl->lk_count != granularity) {
		for (size_t i = 0; i < rrl->lk_count; ++i) {
361
			pthread_mutex_destroy(rrl->lk + i);
362 363 364 365 366
		}
		free(rrl->lk);
		rrl->lk_count = 0;
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
367

368 369 370
	return KNOT_EOK;
}

Jan Včelák's avatar
Jan Včelák committed
371
rrl_item_t *rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a, rrl_req_t *p,
372
                     const knot_dname_t *zone, uint32_t stamp, int *lock)
373 374 375 376 377 378
{
	char buf[RRL_CLSBLK_MAXLEN];
	int len = rrl_classify(buf, sizeof(buf), a, p, zone, t->seed);
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
379

380
	uint32_t id = hash(buf, len) % t->size;
Jan Včelák's avatar
Jan Včelák committed
381

382 383 384 385
	/* Lock for lookup. */
	pthread_mutex_lock(&t->ll);

	/* Find an exact match in <id, id + HOP_LEN). */
Jan Včelák's avatar
Jan Včelák committed
386
	uint16_t *qname = (uint16_t *)(buf + sizeof(uint8_t) + sizeof(uint64_t));
387
	rrl_item_t match = {
Daniel Salzman's avatar
Daniel Salzman committed
388 389 390 391 392 393 394
		.hop = 0,
		.netblk = *((uint64_t *)(buf + 1)),
		.ntok = t->rate * RRL_CAPACITY,
		.cls = buf[0],
		.flags = RRL_BF_NULL,
		.qname = hash((char *)(qname + 1), *qname),
		.time = stamp
395 396 397 398 399
	};

	unsigned d = find_match(t, id, &match);
	if (d > HOP_LEN) { /* not an exact match, find free element [f] */
		d = find_free(t, id, stamp);
400
	}
Jan Včelák's avatar
Jan Včelák committed
401

402 403
	/* Reduce distance to fit <id, id + HOP_LEN) */
	unsigned f = (id + d) % t->size;
404
	while (d >= HOP_LEN) {
405
		d = reduce_dist(t, id, d, &f);
406
	}
Jan Včelák's avatar
Jan Včelák committed
407

408 409 410 411 412
	/* Assign granular lock and unlock lookup. */
	*lock = f % t->lk_count;
	rrl_lock(t, *lock);
	pthread_mutex_unlock(&t->ll);

413 414
	/* found free elm 'k' which is in <id, id + HOP_LEN) */
	t->arr[id].hop |= (1 << d);
Jan Včelák's avatar
Jan Včelák committed
415
	rrl_item_t *b = t->arr + f;
416 417 418
	assert(f == (id+d) % t->size);

	/* Inspect bucket state. */
419
	unsigned hop = b->hop;
420
	if (b->cls == CLS_NULL) {
421 422
		memcpy(b, &match, sizeof(rrl_item_t));
		b->hop = hop;
423 424
	}
	/* Check for collisions. */
425
	if (!bucket_match(b, &match)) {
426
		if (!(b->flags & RRL_BF_SSTART)) {
427 428
			memcpy(b, &match, sizeof(rrl_item_t));
			b->hop = hop;
429 430 431 432
			b->ntok = t->rate + t->rate / RRL_SSTART;
			b->flags |= RRL_BF_SSTART;
		}
	}
Jan Včelák's avatar
Jan Včelák committed
433

434 435 436
	return b;
}

437
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req,
438
              const knot_dname_t *zone)
439
{
440 441 442
	if (!rrl || !req || !a) {
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
443

444 445
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
446
	int lock = -1;
447
	uint32_t now = time(NULL);
448
	rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock);
449
	if (!b) {
450 451 452
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
453 454
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
455

456 457 458 459 460
	/* Calculate rate for dT */
	uint32_t dt = now - b->time;
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
461 462
	/* Visit bucket. */
	b->time = now;
463
	if (dt > 0) { /* Window moved. */
464 465 466 467 468 469

		/* Check state change. */
		if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) {
			b->flags &= ~RRL_BF_ELIMIT;
			rrl_log_state(a, b->flags, b->cls);
		}
Jan Včelák's avatar
Jan Včelák committed
470

471 472
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
473 474
		if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			b->flags &= ~RRL_BF_SSTART;
475
		}
476 477 478
		b->ntok += dn;
		if (b->ntok > RRL_CAPACITY * rrl->rate) {
			b->ntok = RRL_CAPACITY * rrl->rate;
479
		}
480
	}
Jan Včelák's avatar
Jan Včelák committed
481

482 483 484 485
	/* Last item taken. */
	if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) {
		b->flags |= RRL_BF_ELIMIT;
		rrl_log_state(a, b->flags, b->cls);
486
	}
487 488 489 490 491 492 493

	/* Decay current bucket. */
	if (b->ntok > 0) {
		--b->ntok;
	} else if (b->ntok == 0) {
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
494

495 496 497
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
498 499 500
	return ret;
}

501 502 503 504 505 506
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
507
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
508
	return (roll < threshold);
509 510
}

511 512
int rrl_destroy(rrl_table_t *rrl)
{
513
	if (rrl) {
514 515 516
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
517
		for (size_t i = 0; i < rrl->lk_count; ++i) {
518
			pthread_mutex_destroy(rrl->lk + i);
519 520 521
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
522

523 524 525
	free(rrl);
	return KNOT_EOK;
}
526 527 528 529 530 531 532 533 534 535

int rrl_reseed(rrl_table_t *rrl)
{
	/* Lock entire table. */
	if (rrl->lk_count > 0) {
		pthread_mutex_lock(&rrl->ll);
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_lock(rrl, i);
		}
	}
Jan Včelák's avatar
Jan Včelák committed
536

537
	memset(rrl->arr, 0, rrl->size * sizeof(rrl_item_t));
538
	rrl->seed = dnssec_random_uint32_t();
Jan Včelák's avatar
Jan Včelák committed
539

540 541 542 543 544 545
	if (rrl->lk_count > 0) {
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_unlock(rrl, i);
		}
		pthread_mutex_unlock(&rrl->ll);
	}
Jan Včelák's avatar
Jan Včelák committed
546

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
	return KNOT_EOK;
}

int rrl_lock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_lock(t->lk + lk_id) != 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}

int rrl_unlock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_unlock(t->lk + lk_id)!= 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}