rrl.c 13.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "dnssec/random.h"
21
#include "knot/common/log.h"
22
#include "knot/server/rrl.h"
23
#include "knot/zone/zone.h"
Daniel Salzman's avatar
Daniel Salzman committed
24
#include "libknot/libknot.h"
25
#include "contrib/murmurhash3/murmurhash3.h"
26
#include "contrib/sockaddr.h"
27

28 29
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
30 31 32
/* Limits */
#define RRL_CLSBLK_MAXLEN (4 + 8 + 1 + 256)
/* CIDR block prefix lengths for v4/v6 */
33 34
#define RRL_V4_PREFIX ((uint32_t)0x00ffffff)         /* /24 */
#define RRL_V6_PREFIX ((uint64_t)0x00ffffffffffffff) /* /56 */
35 36
/* Defaults */
#define RRL_DEFAULT_RATE 100
37
#define RRL_CAPACITY 4 /* N seconds. */
38
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
39
#define RRL_PSIZE_LARGE 1024
40 41
/* Enable RRL logging. */
#define RRL_ENABLE_LOG
42 43 44 45 46 47 48

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
49
	CLS_EMPTY    = 1 << 3, /* Empty response. */
50
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
51 52 53
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
54 55
};

56
/* Classification string. */
57 58 59 60 61 62
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
63 64 65 66 67 68 69 70 71 72 73
        {CLS_NORMAL,  "POSITIVE" },
        {CLS_ERROR,   "ERROR" },
        {CLS_NXDOMAIN,"NXDOMAIN"},
        {CLS_EMPTY,   "EMPTY"},
        {CLS_LARGE,   "LARGE"},
        {CLS_WILDCARD,"WILDCARD"},
        {CLS_ANY,     "ANY"},
        {CLS_DNSSEC,  "DNSSEC"},
        {CLS_NULL,    "NULL"},
        {CLS_NULL,    NULL}
};
74

75 76
static inline const char *rrl_clsstr(int code)
{
77 78 79 80 81 82 83
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
84 85
}

86 87 88 89 90 91 92 93 94
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
95 96
	/* Check error code */
	int ret = CLS_NULL;
97
	switch (knot_wire_get_rcode(p->w)) {
98 99 100 101 102 103
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
104
	if (ret == CLS_NORMAL && p->flags & RRL_WILDCARD) {
105 106
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
107

108
	/* Check query type for spec. classes. */
109
	if (p->query) {
110
		switch(knot_pkt_qtype(p->query)) {
111 112 113 114 115 116 117 118 119 120 121 122
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
123

124
	/* Check packet size for threshold. */
125
	if (p->len >= RRL_PSIZE_LARGE) {
126 127 128 129
		return CLS_LARGE;
	}

	/* Check ancount */
130
	if (knot_wire_get_ancount(p->w) == 0) {
131 132 133 134
		return CLS_EMPTY;
	}

	return ret;
135 136
}

137
static int rrl_clsname(char *dst, size_t maxlen, uint8_t cls,
138
                       rrl_req_t *req, const zone_t *zone)
139
{
140 141 142 143
	/* Fallback zone (for errors etc.) */
	const knot_dname_t *dn = (const knot_dname_t*)"\x00";

	/* Found associated zone. */
144 145 146
	if (zone != NULL) {
		dn = zone->name;
	}
147

148 149 150 151 152 153
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
154
		/* Use QNAME */
155
		if (req->query) {
156
			dn = knot_pkt_qname(req->query);
157
		}
158 159
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
160

161
	/* Write to wire */
162
	return knot_dname_to_wire((uint8_t *)dst, dn, maxlen);
163 164
}

165
static int rrl_classify(char *dst, size_t maxlen, const struct sockaddr_storage *a,
166
                        rrl_req_t *p, const zone_t *z, uint32_t seed)
167
{
168
	if (!dst || !p || !a || maxlen == 0) {
169 170
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
171

172 173 174
	/* Class */
	uint8_t cls = rrl_clsid(p);
	*dst = cls;
175
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
176

177
	/* Address (in network byteorder, adjust masks). */
178
	uint64_t nb = 0;
179 180 181 182 183 184
	if (a->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)a;
		nb = *((uint64_t*)(&ipv6->sin6_addr)) & RRL_V6_PREFIX;
	} else {
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)a;
		nb = ((uint32_t)ipv4->sin_addr.s_addr) & RRL_V4_PREFIX;
185
	}
186 187 188
	if (blklen + sizeof(nb) > maxlen) {
		return KNOT_ESPACE;
	}
189 190 191
	memcpy(dst + blklen, (void*)&nb, sizeof(nb));
	blklen += sizeof(nb);

192
	/* Name */
193 194
	uint16_t *nlen = (uint16_t*)(dst + blklen);
	blklen += sizeof(uint16_t);
195
	int len = rrl_clsname(dst + blklen, maxlen - blklen, cls, p, z);
196
	if (len < 0) {
197
		return len;
198
	}
199
	*nlen = len;
200
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
201

202
	/* Seed. */
203 204 205
	if (blklen + sizeof(seed) > maxlen) {
		return KNOT_ESPACE;
	}
206 207
	memcpy(dst + blklen, (void *)&seed, sizeof(seed));
	blklen += sizeof(seed);
Jan Včelák's avatar
Jan Včelák committed
208

209 210 211
	return blklen;
}

212 213 214 215
static int bucket_free(rrl_item_t *b, uint32_t now) {
	return b->cls == CLS_NULL || (b->time + 1 < now);
}

216
static int bucket_match(rrl_item_t *b, rrl_item_t *m)
217
{
218 219 220
	return b->cls    == m->cls &&
	       b->netblk == m->netblk &&
	       b->qname  == m->qname;
221
}
222

223
static int find_free(rrl_table_t *t, unsigned i, uint32_t now)
224 225 226 227 228 229 230
{
	rrl_item_t *np = t->arr + t->size;
	rrl_item_t *b = NULL;
	for (b = t->arr + i; b != np; ++b) {
		if (bucket_free(b, now)) {
			return b - (t->arr + i);
		}
231
	}
232 233 234 235
	np = t->arr + i;
	for (b = t->arr; b != np; ++b) {
		if (bucket_free(b, now)) {
			return (b - t->arr) + (t->size - i);
236
		}
237
	}
Jan Včelák's avatar
Jan Včelák committed
238

239 240
	/* this happens if table is full... force vacate current elm */
	return i;
241
}
242

243 244 245 246 247 248 249 250 251 252 253 254 255
static inline unsigned find_match(rrl_table_t *t, uint32_t id, rrl_item_t *m)
{
	unsigned f = 0;
	unsigned d = 0;
	unsigned match = t->arr[id].hop;
	while (match != 0) {
		d = __builtin_ctz(match);
		f = (id + d) % t->size;
		if (bucket_match(t->arr + f, m)) {
			return d;
		} else {
			match &= ~(1<<d); /* clear potential match */
		}
256
	}
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276

	return HOP_LEN + 1;
}

static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f)
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
		unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */
		unsigned o = __builtin_ctz(t->arr[s].hop); /* offset of first valid bucket */
		if (t->arr[s].hop != 0 && o < rd) {        /* only offsets in <s, f> are interesting */
			unsigned e = (s + o) % t->size;    /* this item will be displaced to [f] */
			unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */
			memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t));
			t->arr[*f].hop = keep_hop;
			t->arr[e].cls = CLS_NULL;
			t->arr[s].hop &= ~(1<<o);
			t->arr[s].hop |= 1<<rd;
			*f = e;
			return d - (rd - o);
277
		}
278
		--rd;
279
	}
Jan Včelák's avatar
Jan Včelák committed
280

281 282 283 284
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
	*f = id;
	d = 0; /* force vacate initial element */
	return d;
285 286
}

287
static void rrl_log_state(const struct sockaddr_storage *ss, uint16_t flags, uint8_t cls)
288 289
{
#ifdef RRL_ENABLE_LOG
290
	char addr_str[SOCKADDR_STRLEN] = {0};
291
	sockaddr_tostr(addr_str, sizeof(addr_str), ss);
292

293 294 295 296
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
297

298
	log_notice("rate limiting, address '%s' class '%s' %s limiting",
Jan Včelák's avatar
Jan Včelák committed
299
	           addr_str, rrl_clsstr(cls), what);
300 301 302
#endif
}

303 304
rrl_table_t *rrl_create(size_t size)
{
305 306 307
	if (size == 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
308

309 310
	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
	rrl_table_t *t = malloc(tbl_len);
311 312 313
	if (!t) {
		return NULL;
	}
314
	memset(t, 0, sizeof(rrl_table_t));
315
	t->size = size;
316
	rrl_reseed(t);
317

318 319 320 321 322
	return t;
}

uint32_t rrl_setrate(rrl_table_t *rrl, uint32_t rate)
{
323 324 325
	if (!rrl) {
		return 0;
	}
326 327 328 329 330 331 332
	uint32_t old = rrl->rate;
	rrl->rate = rate;
	return old;
}

uint32_t rrl_rate(rrl_table_t *rrl)
{
333
	return rrl ? rrl->rate : 0;
334 335
}

336
int rrl_setlocks(rrl_table_t *rrl, unsigned granularity)
337
{
338 339 340 341
	if (!rrl) {
		return KNOT_EINVAL;
	}

342
	assert(!rrl->lk); /* Cannot change while locks are used. */
343
	assert(granularity <= rrl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
344

345 346 347
	if (pthread_mutex_init(&rrl->ll, NULL) < 0) {
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
348

349
	/* Alloc new locks. */
350
	rrl->lk = malloc(granularity * sizeof(pthread_mutex_t));
351 352 353
	if (!rrl->lk) {
		return KNOT_ENOMEM;
	}
354
	memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
355

356 357
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
358 359 360
		if (pthread_mutex_init(rrl->lk + i, NULL) < 0) {
			break;
		}
361 362
		++rrl->lk_count;
	}
363

364 365 366
	/* Incomplete initialization */
	if (rrl->lk_count != granularity) {
		for (size_t i = 0; i < rrl->lk_count; ++i) {
367
			pthread_mutex_destroy(rrl->lk + i);
368 369 370 371 372
		}
		free(rrl->lk);
		rrl->lk_count = 0;
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
373

374 375 376
	return KNOT_EOK;
}

377
rrl_item_t* rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a, rrl_req_t *p,
378
                     const zone_t *zone, uint32_t stamp, int *lock)
379 380 381 382 383 384
{
	char buf[RRL_CLSBLK_MAXLEN];
	int len = rrl_classify(buf, sizeof(buf), a, p, zone, t->seed);
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
385

386
	uint32_t id = hash(buf, len) % t->size;
Jan Včelák's avatar
Jan Včelák committed
387

388 389 390 391 392 393 394 395 396 397 398 399 400 401
	/* Lock for lookup. */
	pthread_mutex_lock(&t->ll);

	/* Find an exact match in <id, id + HOP_LEN). */
	uint16_t *qname = (uint16_t*)(buf + sizeof(uint8_t) + sizeof(uint64_t));
	rrl_item_t match = {
	        0, *((uint64_t*)(buf + 1)), t->rate,    /* hop, netblk, ntok */
	        buf[0], RRL_BF_NULL,                    /* cls, flags */
	        hash((char*)(qname + 1), *qname), stamp /* qname, time*/
	};

	unsigned d = find_match(t, id, &match);
	if (d > HOP_LEN) { /* not an exact match, find free element [f] */
		d = find_free(t, id, stamp);
402
	}
Jan Včelák's avatar
Jan Včelák committed
403

404 405
	/* Reduce distance to fit <id, id + HOP_LEN) */
	unsigned f = (id + d) % t->size;
406
	while (d >= HOP_LEN) {
407
		d = reduce_dist(t, id, d, &f);
408
	}
Jan Včelák's avatar
Jan Včelák committed
409

410 411 412 413 414
	/* Assign granular lock and unlock lookup. */
	*lock = f % t->lk_count;
	rrl_lock(t, *lock);
	pthread_mutex_unlock(&t->ll);

415 416 417 418 419 420
	/* found free elm 'k' which is in <id, id + HOP_LEN) */
	t->arr[id].hop |= (1 << d);
	rrl_item_t* b = t->arr + f;
	assert(f == (id+d) % t->size);

	/* Inspect bucket state. */
421
	unsigned hop = b->hop;
422
	if (b->cls == CLS_NULL) {
423 424
		memcpy(b, &match, sizeof(rrl_item_t));
		b->hop = hop;
425 426
	}
	/* Check for collisions. */
427
	if (!bucket_match(b, &match)) {
428
		if (!(b->flags & RRL_BF_SSTART)) {
429 430
			memcpy(b, &match, sizeof(rrl_item_t));
			b->hop = hop;
431 432 433 434
			b->ntok = t->rate + t->rate / RRL_SSTART;
			b->flags |= RRL_BF_SSTART;
		}
	}
Jan Včelák's avatar
Jan Včelák committed
435

436 437 438
	return b;
}

439
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req,
440
              const zone_t *zone)
441
{
442 443 444
	if (!rrl || !req || !a) {
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
445

446 447
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
448
	int lock = -1;
449
	uint32_t now = time(NULL);
450
	rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock);
451
	if (!b) {
452 453 454
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
455 456
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
457

458 459 460 461 462
	/* Calculate rate for dT */
	uint32_t dt = now - b->time;
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
463 464
	/* Visit bucket. */
	b->time = now;
465
	if (dt > 0) { /* Window moved. */
466 467 468 469 470 471

		/* Check state change. */
		if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) {
			b->flags &= ~RRL_BF_ELIMIT;
			rrl_log_state(a, b->flags, b->cls);
		}
Jan Včelák's avatar
Jan Včelák committed
472

473 474
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
475 476
		if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			b->flags &= ~RRL_BF_SSTART;
477
		}
478 479 480
		b->ntok += dn;
		if (b->ntok > RRL_CAPACITY * rrl->rate) {
			b->ntok = RRL_CAPACITY * rrl->rate;
481
		}
482
	}
Jan Včelák's avatar
Jan Včelák committed
483

484 485 486 487
	/* Last item taken. */
	if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) {
		b->flags |= RRL_BF_ELIMIT;
		rrl_log_state(a, b->flags, b->cls);
488
	}
489 490 491 492 493 494 495

	/* Decay current bucket. */
	if (b->ntok > 0) {
		--b->ntok;
	} else if (b->ntok == 0) {
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
496

497 498 499
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
500 501 502
	return ret;
}

503 504 505 506 507 508
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
509
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
510
	return (roll < threshold);
511 512
}

513 514
int rrl_destroy(rrl_table_t *rrl)
{
515
	if (rrl) {
516 517 518
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
519
		for (size_t i = 0; i < rrl->lk_count; ++i) {
520
			pthread_mutex_destroy(rrl->lk + i);
521 522 523
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
524

525 526 527
	free(rrl);
	return KNOT_EOK;
}
528 529 530 531 532 533 534 535 536 537

int rrl_reseed(rrl_table_t *rrl)
{
	/* Lock entire table. */
	if (rrl->lk_count > 0) {
		pthread_mutex_lock(&rrl->ll);
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_lock(rrl, i);
		}
	}
Jan Včelák's avatar
Jan Včelák committed
538

539
	memset(rrl->arr, 0, rrl->size * sizeof(rrl_item_t));
540
	rrl->seed = dnssec_random_uint32_t();
Jan Včelák's avatar
Jan Včelák committed
541

542 543 544 545 546 547
	if (rrl->lk_count > 0) {
		for (unsigned i = 0; i < rrl->lk_count; ++i) {
			rrl_unlock(rrl, i);
		}
		pthread_mutex_unlock(&rrl->ll);
	}
Jan Včelák's avatar
Jan Včelák committed
548

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
	return KNOT_EOK;
}

int rrl_lock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_lock(t->lk + lk_id) != 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}

int rrl_unlock(rrl_table_t *t, int lk_id)
{
	assert(lk_id > -1);
	if (pthread_mutex_unlock(t->lk + lk_id)!= 0) {
		return KNOT_ERROR;
	}
	return KNOT_EOK;
}