functions.c 13.1 KB
Newer Older
1
/*  Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2 3 4 5 6 7 8 9 10 11 12 13

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
14
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
15 16
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "knot/modules/rrl/functions.h"
21
#include "contrib/sockaddr.h"
22
#include "contrib/time.h"
23 24
#include "libdnssec/error.h"
#include "libdnssec/random.h"
25

26 27
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
28 29 30
/* Limits */
#define RRL_CLSBLK_MAXLEN (4 + 8 + 1 + 256)
/* CIDR block prefix lengths for v4/v6 */
31 32
#define RRL_V4_PREFIX ((uint32_t)0x00ffffff)         /* /24 */
#define RRL_V6_PREFIX ((uint64_t)0x00ffffffffffffff) /* /56 */
33
/* Defaults */
34
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
35
#define RRL_PSIZE_LARGE 1024
36 37
#define RRL_CAPACITY 4 /* Window size in seconds */
#define RRL_LOCK_GRANULARITY 32 /* Last digit granularity */
38 39 40 41 42 43 44

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
45
	CLS_EMPTY    = 1 << 3, /* Empty response. */
46
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
47 48 49
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
50 51
};

52
/* Classification string. */
53 54 55 56 57 58
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
59 60 61 62 63 64 65 66 67 68
	{ CLS_NORMAL,   "POSITIVE" },
	{ CLS_ERROR,    "ERROR" },
	{ CLS_NXDOMAIN, "NXDOMAIN"},
	{ CLS_EMPTY,    "EMPTY"},
	{ CLS_LARGE,    "LARGE"},
	{ CLS_WILDCARD, "WILDCARD"},
	{ CLS_ANY,      "ANY"},
	{ CLS_DNSSEC,   "DNSSEC"},
	{ CLS_NULL,     "NULL"},
	{ CLS_NULL,     NULL}
69
};
70

71 72
static inline const char *rrl_clsstr(int code)
{
73 74 75 76 77 78 79
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
80 81
}

82 83 84 85 86 87 88 89 90
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
91 92
	/* Check error code */
	int ret = CLS_NULL;
93
	switch (knot_wire_get_rcode(p->w)) {
94 95 96 97 98 99
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
Daniel Salzman's avatar
Daniel Salzman committed
100
	if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
101 102
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
103

104
	/* Check query type for spec. classes. */
105
	if (p->query) {
106
		switch(knot_pkt_qtype(p->query)) {
107 108 109 110 111 112 113 114 115 116 117 118
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
119

120
	/* Check packet size for threshold. */
121
	if (p->len >= RRL_PSIZE_LARGE) {
122 123 124 125
		return CLS_LARGE;
	}

	/* Check ancount */
126
	if (knot_wire_get_ancount(p->w) == 0) {
127 128 129 130
		return CLS_EMPTY;
	}

	return ret;
131 132
}

133
static int rrl_clsname(uint8_t *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
134
                       const knot_dname_t *name)
135
{
136 137 138
	if (name == NULL) {
		/* Fallback for errors etc. */
		name = (const knot_dname_t *)"\x00";
139
	}
140

141 142 143 144 145 146
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
147
		/* Use QNAME */
148
		if (req->query) {
149
			name = knot_pkt_qname(req->query);
150
		}
151 152
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
153

154
	/* Write to wire */
155
	return knot_dname_to_wire(dst, name, maxlen);
156 157
}

158
static int rrl_classify(uint8_t *dst, size_t maxlen, const struct sockaddr_storage *remote,
159
                        rrl_req_t *req, const knot_dname_t *name)
160
{
161
	/* Class */
162
	uint8_t cls = rrl_clsid(req);
163
	*dst = cls;
164
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
165

166
	/* Address (in network byteorder, adjust masks). */
167 168 169 170
	uint64_t netblk = 0;
	if (remote->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)remote;
		netblk = *((uint64_t *)(&ipv6->sin6_addr)) & RRL_V6_PREFIX;
171
	} else {
172 173
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)remote;
		netblk = ((uint32_t)ipv4->sin_addr.s_addr) & RRL_V4_PREFIX;
174
	}
175
	if (blklen + sizeof(netblk) > maxlen) {
176 177
		return KNOT_ESPACE;
	}
178 179
	memcpy(dst + blklen, (void *)&netblk, sizeof(netblk));
	blklen += sizeof(netblk);
180

181
	/* Name */
182
	uint16_t *len_pos = (uint16_t *)(dst + blklen);
183
	blklen += sizeof(uint16_t);
184
	int ret = rrl_clsname(dst + blklen, maxlen - blklen, cls, req, name);
185 186
	if (ret < 0) {
		return ret;
187
	}
188 189
	uint16_t len = ret;
	memcpy(len_pos, &len, sizeof(len));
190
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
191

192 193 194
	return blklen;
}

195 196
static int bucket_free(rrl_item_t *b, uint32_t now)
{
197 198 199
	return b->cls == CLS_NULL || (b->time + 1 < now);
}

200
static int bucket_match(rrl_item_t *b, rrl_item_t *m)
201
{
202 203 204
	return b->cls    == m->cls &&
	       b->netblk == m->netblk &&
	       b->qname  == m->qname;
205
}
206

207
static int find_free(rrl_table_t *tbl, unsigned i, uint32_t now)
208
{
209
	rrl_item_t *np = tbl->arr + tbl->size;
210
	rrl_item_t *b = NULL;
211
	for (b = tbl->arr + i; b != np; ++b) {
212
		if (bucket_free(b, now)) {
213
			return b - (tbl->arr + i);
214
		}
215
	}
216 217
	np = tbl->arr + i;
	for (b = tbl->arr; b != np; ++b) {
218
		if (bucket_free(b, now)) {
219
			return (b - tbl->arr) + (tbl->size - i);
220
		}
221
	}
Jan Včelák's avatar
Jan Včelák committed
222

223 224
	/* this happens if table is full... force vacate current elm */
	return i;
225
}
226

227
static inline unsigned find_match(rrl_table_t *tbl, uint32_t id, rrl_item_t *m)
228 229 230
{
	unsigned f = 0;
	unsigned d = 0;
231
	unsigned match = tbl->arr[id].hop;
232 233
	while (match != 0) {
		d = __builtin_ctz(match);
234 235
		f = (id + d) % tbl->size;
		if (bucket_match(tbl->arr + f, m)) {
236 237 238 239
			return d;
		} else {
			match &= ~(1<<d); /* clear potential match */
		}
240
	}
241 242 243 244

	return HOP_LEN + 1;
}

245
static inline unsigned reduce_dist(rrl_table_t *tbl, unsigned id, unsigned d, unsigned *f)
246 247 248
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
249 250 251
		unsigned s = (tbl->size + *f - rd) % tbl->size; /* bucket to be vacated */
		if (tbl->arr[s].hop != 0) {
			unsigned o = __builtin_ctz(tbl->arr[s].hop);  /* offset of first valid bucket */
252
			if (o < rd) {                               /* only offsets in <s, f> are interesting */
253 254 255 256 257 258 259
				unsigned e = (s + o) % tbl->size;     /* this item will be displaced to [f] */
				unsigned keep_hop = tbl->arr[*f].hop; /* unpredictable padding */
				memcpy(tbl->arr + *f, tbl->arr + e, sizeof(rrl_item_t));
				tbl->arr[*f].hop = keep_hop;
				tbl->arr[e].cls = CLS_NULL;
				tbl->arr[s].hop &= ~(1<<o);
				tbl->arr[s].hop |= 1<<rd;
260 261 262
				*f = e;
				return d - (rd - o);
			}
263
		}
264
		--rd;
265
	}
Jan Včelák's avatar
Jan Včelák committed
266

267 268 269 270
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
	*f = id;
	d = 0; /* force vacate initial element */
	return d;
271 272
}

273 274
static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss,
                          uint16_t flags, uint8_t cls)
275
{
276 277 278 279
	if (mod == NULL) {
		return;
	}

280
	char addr_str[SOCKADDR_STRLEN] = {0};
281
	sockaddr_tostr(addr_str, sizeof(addr_str), (struct sockaddr *)ss);
282

283 284 285 286
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
287

288
	knotd_mod_log(mod, LOG_NOTICE, "address %s, class %s, %s limiting",
289
	              addr_str, rrl_clsstr(cls), what);
290 291
}

292
static void rrl_lock(rrl_table_t *tbl, int lk_id)
293
{
294
	assert(lk_id > -1);
295
	pthread_mutex_lock(tbl->lk + lk_id);
296 297
}

298
static void rrl_unlock(rrl_table_t *tbl, int lk_id)
299
{
300
	assert(lk_id > -1);
301
	pthread_mutex_unlock(tbl->lk + lk_id);
302 303
}

304
static int rrl_setlocks(rrl_table_t *tbl, uint32_t granularity)
305
{
306 307
	assert(!tbl->lk); /* Cannot change while locks are used. */
	assert(granularity <= tbl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
308

309
	if (pthread_mutex_init(&tbl->ll, NULL) < 0) {
310 311
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
312

313
	/* Alloc new locks. */
314 315
	tbl->lk = malloc(granularity * sizeof(pthread_mutex_t));
	if (!tbl->lk) {
316 317
		return KNOT_ENOMEM;
	}
318
	memset(tbl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
319

320 321
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
322
		if (pthread_mutex_init(tbl->lk + i, NULL) < 0) {
323 324
			break;
		}
325
		++tbl->lk_count;
326
	}
327

328
	/* Incomplete initialization */
329 330 331
	if (tbl->lk_count != granularity) {
		for (size_t i = 0; i < tbl->lk_count; ++i) {
			pthread_mutex_destroy(tbl->lk + i);
332
		}
333 334
		free(tbl->lk);
		tbl->lk_count = 0;
335 336
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
337

338 339 340
	return KNOT_EOK;
}

341 342 343 344 345 346 347
rrl_table_t *rrl_create(size_t size, uint32_t rate)
{
	if (size == 0) {
		return NULL;
	}

	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
348 349
	rrl_table_t *tbl = calloc(1, tbl_len);
	if (!tbl) {
350 351
		return NULL;
	}
352 353
	tbl->size = size;
	tbl->rate = rate;
354

355 356
	if (dnssec_random_buffer((uint8_t *)&tbl->key, sizeof(tbl->key)) != DNSSEC_EOK) {
		free(tbl);
357 358 359
		return NULL;
	}

360 361
	if (rrl_setlocks(tbl, RRL_LOCK_GRANULARITY) != KNOT_EOK) {
		free(tbl);
362 363 364
		return NULL;
	}

365
	return tbl;
366 367 368
}

/*! \brief Get bucket for current combination of parameters. */
369
static rrl_item_t *rrl_hash(rrl_table_t *tbl, const struct sockaddr_storage *remote,
370 371
                            rrl_req_t *req, const knot_dname_t *zone, uint32_t stamp,
                            int *lock)
372
{
373
	uint8_t buf[RRL_CLSBLK_MAXLEN];
374
	int len = rrl_classify(buf, sizeof(buf), remote, req, zone);
375 376 377
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
378

379
	uint32_t id = SipHash24(&tbl->key, buf, len) % tbl->size;
Jan Včelák's avatar
Jan Včelák committed
380

381
	/* Lock for lookup. */
382
	pthread_mutex_lock(&tbl->ll);
383 384

	/* Find an exact match in <id, id + HOP_LEN). */
385
	uint8_t *qname = buf + sizeof(uint8_t) + sizeof(uint64_t);
386 387
	uint64_t netblk;
	memcpy(&netblk, buf + sizeof(uint8_t), sizeof(netblk));
388
	rrl_item_t match = {
Daniel Salzman's avatar
Daniel Salzman committed
389
		.hop = 0,
390
		.netblk = netblk,
391
		.ntok = tbl->rate * RRL_CAPACITY,
Daniel Salzman's avatar
Daniel Salzman committed
392 393
		.cls = buf[0],
		.flags = RRL_BF_NULL,
394
		.qname = SipHash24(&tbl->key, qname + 1, qname[0]),
Daniel Salzman's avatar
Daniel Salzman committed
395
		.time = stamp
396 397
	};

398
	unsigned d = find_match(tbl, id, &match);
399
	if (d > HOP_LEN) { /* not an exact match, find free element [f] */
400
		d = find_free(tbl, id, stamp);
401
	}
Jan Včelák's avatar
Jan Včelák committed
402

403
	/* Reduce distance to fit <id, id + HOP_LEN) */
404
	unsigned f = (id + d) % tbl->size;
405
	while (d >= HOP_LEN) {
406
		d = reduce_dist(tbl, id, d, &f);
407
	}
Jan Včelák's avatar
Jan Včelák committed
408

409
	/* Assign granular lock and unlock lookup. */
410 411 412
	*lock = f % tbl->lk_count;
	rrl_lock(tbl, *lock);
	pthread_mutex_unlock(&tbl->ll);
413

414
	/* found free elm 'k' which is in <id, id + HOP_LEN) */
415 416 417
	tbl->arr[id].hop |= (1 << d);
	rrl_item_t *b = tbl->arr + f;
	assert(f == (id+d) % tbl->size);
418 419

	/* Inspect bucket state. */
420
	unsigned hop = b->hop;
421
	if (b->cls == CLS_NULL) {
422 423
		memcpy(b, &match, sizeof(rrl_item_t));
		b->hop = hop;
424 425
	}
	/* Check for collisions. */
426
	if (!bucket_match(b, &match)) {
427
		if (!(b->flags & RRL_BF_SSTART)) {
428 429
			memcpy(b, &match, sizeof(rrl_item_t));
			b->hop = hop;
430
			b->ntok = tbl->rate + tbl->rate / RRL_SSTART;
431 432 433
			b->flags |= RRL_BF_SSTART;
		}
	}
Jan Včelák's avatar
Jan Včelák committed
434

435 436 437
	return b;
}

438 439
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *remote,
              rrl_req_t *req, const knot_dname_t *zone, knotd_mod_t *mod)
440
{
441
	if (!rrl || !req || !remote) {
442 443
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
444

445 446
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
447
	int lock = -1;
448
	uint32_t now = time_now().tv_sec;
449 450
	rrl_item_t *bucket = rrl_hash(rrl, remote, req, zone, now, &lock);
	if (!bucket) {
451 452 453
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
454 455
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
456

457
	/* Calculate rate for dT */
458
	uint32_t dt = now - bucket->time;
459 460 461
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
462
	/* Visit bucket. */
463
	bucket->time = now;
464
	if (dt > 0) { /* Window moved. */
465 466

		/* Check state change. */
467 468 469
		if ((bucket->ntok > 0 || dt > 1) && (bucket->flags & RRL_BF_ELIMIT)) {
			bucket->flags &= ~RRL_BF_ELIMIT;
			rrl_log_state(mod, remote, bucket->flags, bucket->cls);
470
		}
Jan Včelák's avatar
Jan Včelák committed
471

472 473
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
474 475
		if (bucket->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			bucket->flags &= ~RRL_BF_SSTART;
476
		}
477 478 479
		bucket->ntok += dn;
		if (bucket->ntok > RRL_CAPACITY * rrl->rate) {
			bucket->ntok = RRL_CAPACITY * rrl->rate;
480
		}
481
	}
Jan Včelák's avatar
Jan Včelák committed
482

483
	/* Last item taken. */
484 485 486
	if (bucket->ntok == 1 && !(bucket->flags & RRL_BF_ELIMIT)) {
		bucket->flags |= RRL_BF_ELIMIT;
		rrl_log_state(mod, remote, bucket->flags, bucket->cls);
487
	}
488 489

	/* Decay current bucket. */
490 491 492
	if (bucket->ntok > 0) {
		--bucket->ntok;
	} else if (bucket->ntok == 0) {
493 494
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
495

496 497 498
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
499 500 501
	return ret;
}

502 503 504 505 506 507
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
508
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
509
	return (roll < threshold);
510 511
}

512
void rrl_destroy(rrl_table_t *rrl)
513
{
514
	if (rrl) {
515 516 517
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
518
		for (size_t i = 0; i < rrl->lk_count; ++i) {
519
			pthread_mutex_destroy(rrl->lk + i);
520 521 522
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
523

524
	free(rrl);
525
}