functions.c 12.8 KB
Newer Older
Daniel Salzman's avatar
Daniel Salzman committed
1
/*  Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

17
#include <assert.h>
18
#include <time.h>
19

20
#include "knot/modules/rrl/functions.h"
21
#include "contrib/sockaddr.h"
22
#include "contrib/time.h"
23 24
#include "libdnssec/error.h"
#include "libdnssec/random.h"
25

26 27
/* Hopscotch defines. */
#define HOP_LEN (sizeof(unsigned)*8)
28 29 30
/* Limits */
#define RRL_CLSBLK_MAXLEN (4 + 8 + 1 + 256)
/* CIDR block prefix lengths for v4/v6 */
31 32
#define RRL_V4_PREFIX ((uint32_t)0x00ffffff)         /* /24 */
#define RRL_V6_PREFIX ((uint64_t)0x00ffffffffffffff) /* /56 */
33
/* Defaults */
34
#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
35
#define RRL_PSIZE_LARGE 1024
36 37
#define RRL_CAPACITY 4 /* Window size in seconds */
#define RRL_LOCK_GRANULARITY 32 /* Last digit granularity */
38 39 40 41 42 43 44

/* Classification */
enum {
	CLS_NULL     = 0 << 0, /* Empty bucket. */
	CLS_NORMAL   = 1 << 0, /* Normal response. */
	CLS_ERROR    = 1 << 1, /* Error response. */
	CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
45
	CLS_EMPTY    = 1 << 3, /* Empty response. */
46
	CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
47 48 49
	CLS_WILDCARD = 1 << 5, /* Wildcard query. */
	CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
	CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
50 51
};

52
/* Classification string. */
53 54 55 56 57 58
struct cls_name {
	int code;
	const char *name;
};

static const struct cls_name rrl_cls_names[] = {
59 60 61 62 63 64 65 66 67 68
	{ CLS_NORMAL,   "POSITIVE" },
	{ CLS_ERROR,    "ERROR" },
	{ CLS_NXDOMAIN, "NXDOMAIN"},
	{ CLS_EMPTY,    "EMPTY"},
	{ CLS_LARGE,    "LARGE"},
	{ CLS_WILDCARD, "WILDCARD"},
	{ CLS_ANY,      "ANY"},
	{ CLS_DNSSEC,   "DNSSEC"},
	{ CLS_NULL,     "NULL"},
	{ CLS_NULL,     NULL}
69
};
70

71 72
static inline const char *rrl_clsstr(int code)
{
73 74 75 76 77 78 79
	for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
		if (c->code == code) {
			return c->name;
		}
	}

	return "unknown class";
80 81
}

82 83 84 85 86 87 88 89 90
/* Bucket flags. */
enum {
	RRL_BF_NULL   = 0 << 0, /* No flags. */
	RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
	RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
};

static uint8_t rrl_clsid(rrl_req_t *p)
{
91 92
	/* Check error code */
	int ret = CLS_NULL;
93
	switch (knot_wire_get_rcode(p->w)) {
94 95 96 97 98 99
	case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
	case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
	default: return CLS_ERROR; break;
	}

	/* Check if answered from a qname */
Daniel Salzman's avatar
Daniel Salzman committed
100
	if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
101 102
		return CLS_WILDCARD;
	}
Jan Včelák's avatar
Jan Včelák committed
103

104
	/* Check query type for spec. classes. */
105
	if (p->query) {
106
		switch(knot_pkt_qtype(p->query)) {
107 108 109 110 111 112 113 114 115 116 117 118
		case KNOT_RRTYPE_ANY:      /* ANY spec. class */
			return CLS_ANY;
			break;
		case KNOT_RRTYPE_DNSKEY:
		case KNOT_RRTYPE_RRSIG:
		case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
			return CLS_DNSSEC;
			break;
		default:
			break;
		}
	}
119

120
	/* Check packet size for threshold. */
121
	if (p->len >= RRL_PSIZE_LARGE) {
122 123 124 125
		return CLS_LARGE;
	}

	/* Check ancount */
126
	if (knot_wire_get_ancount(p->w) == 0) {
127 128 129 130
		return CLS_EMPTY;
	}

	return ret;
131 132
}

133 134
static int rrl_clsname(char *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
                       const knot_dname_t *name)
135
{
136 137 138
	if (name == NULL) {
		/* Fallback for errors etc. */
		name = (const knot_dname_t *)"\x00";
139
	}
140

141 142 143 144 145 146
	switch (cls) {
	case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
	case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
	case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
		break;
	default:
147
		/* Use QNAME */
148
		if (req->query) {
149
			name = knot_pkt_qname(req->query);
150
		}
151 152
		break;
	}
Jan Včelák's avatar
Jan Včelák committed
153

154
	/* Write to wire */
155
	return knot_dname_to_wire((uint8_t *)dst, name, maxlen);
156 157
}

158
static int rrl_classify(char *dst, size_t maxlen, const struct sockaddr_storage *a,
159
                        rrl_req_t *req, const knot_dname_t *name)
160
{
161
	/* Class */
162
	uint8_t cls = rrl_clsid(req);
163
	*dst = cls;
164
	int blklen = sizeof(cls);
Jan Včelák's avatar
Jan Včelák committed
165

166
	/* Address (in network byteorder, adjust masks). */
167
	uint64_t nb = 0;
168 169
	if (a->ss_family == AF_INET6) {
		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)a;
Jan Včelák's avatar
Jan Včelák committed
170
		nb = *((uint64_t *)(&ipv6->sin6_addr)) & RRL_V6_PREFIX;
171 172 173
	} else {
		struct sockaddr_in *ipv4 = (struct sockaddr_in *)a;
		nb = ((uint32_t)ipv4->sin_addr.s_addr) & RRL_V4_PREFIX;
174
	}
175 176 177
	if (blklen + sizeof(nb) > maxlen) {
		return KNOT_ESPACE;
	}
Jan Včelák's avatar
Jan Včelák committed
178
	memcpy(dst + blklen, (void *)&nb, sizeof(nb));
179 180
	blklen += sizeof(nb);

181
	/* Name */
182
	uint16_t *len_pos = (uint16_t *)(dst + blklen);
183
	blklen += sizeof(uint16_t);
184
	int ret = rrl_clsname(dst + blklen, maxlen - blklen, cls, req, name);
185 186
	if (ret < 0) {
		return ret;
187
	}
188 189
	uint16_t len = ret;
	memcpy(len_pos, &len, sizeof(len));
190
	blklen += len;
Jan Včelák's avatar
Jan Včelák committed
191

192 193 194
	return blklen;
}

195 196
static int bucket_free(rrl_item_t *b, uint32_t now)
{
197 198 199
	return b->cls == CLS_NULL || (b->time + 1 < now);
}

200
static int bucket_match(rrl_item_t *b, rrl_item_t *m)
201
{
202 203 204
	return b->cls    == m->cls &&
	       b->netblk == m->netblk &&
	       b->qname  == m->qname;
205
}
206

207
static int find_free(rrl_table_t *t, unsigned i, uint32_t now)
208 209 210 211 212 213 214
{
	rrl_item_t *np = t->arr + t->size;
	rrl_item_t *b = NULL;
	for (b = t->arr + i; b != np; ++b) {
		if (bucket_free(b, now)) {
			return b - (t->arr + i);
		}
215
	}
216 217 218 219
	np = t->arr + i;
	for (b = t->arr; b != np; ++b) {
		if (bucket_free(b, now)) {
			return (b - t->arr) + (t->size - i);
220
		}
221
	}
Jan Včelák's avatar
Jan Včelák committed
222

223 224
	/* this happens if table is full... force vacate current elm */
	return i;
225
}
226

227 228 229 230 231 232 233 234 235 236 237 238 239
static inline unsigned find_match(rrl_table_t *t, uint32_t id, rrl_item_t *m)
{
	unsigned f = 0;
	unsigned d = 0;
	unsigned match = t->arr[id].hop;
	while (match != 0) {
		d = __builtin_ctz(match);
		f = (id + d) % t->size;
		if (bucket_match(t->arr + f, m)) {
			return d;
		} else {
			match &= ~(1<<d); /* clear potential match */
		}
240
	}
241 242 243 244 245 246 247 248 249

	return HOP_LEN + 1;
}

static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f)
{
	unsigned rd = HOP_LEN - 1;
	while (rd > 0) {
		unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */
250 251 252 253 254 255 256 257 258 259 260 261 262
		if (t->arr[s].hop != 0) {
			unsigned o = __builtin_ctz(t->arr[s].hop);  /* offset of first valid bucket */
			if (o < rd) {                               /* only offsets in <s, f> are interesting */
				unsigned e = (s + o) % t->size;     /* this item will be displaced to [f] */
				unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */
				memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t));
				t->arr[*f].hop = keep_hop;
				t->arr[e].cls = CLS_NULL;
				t->arr[s].hop &= ~(1<<o);
				t->arr[s].hop |= 1<<rd;
				*f = e;
				return d - (rd - o);
			}
263
		}
264
		--rd;
265
	}
Jan Včelák's avatar
Jan Včelák committed
266

267 268 269 270
	assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
	*f = id;
	d = 0; /* force vacate initial element */
	return d;
271 272
}

273 274
static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss,
                          uint16_t flags, uint8_t cls)
275
{
276 277 278 279
	if (mod == NULL) {
		return;
	}

280
	char addr_str[SOCKADDR_STRLEN] = {0};
281
	sockaddr_tostr(addr_str, sizeof(addr_str), (struct sockaddr *)ss);
282

283 284 285 286
	const char *what = "leaves";
	if (flags & RRL_BF_ELIMIT) {
		what = "enters";
	}
Jan Včelák's avatar
Jan Včelák committed
287

288
	knotd_mod_log(mod, LOG_NOTICE, "address %s, class %s, %s limiting",
289
	              addr_str, rrl_clsstr(cls), what);
290 291
}

292
static void rrl_lock(rrl_table_t *t, int lk_id)
293
{
294 295
	assert(lk_id > -1);
	pthread_mutex_lock(t->lk + lk_id);
296 297
}

298
static void rrl_unlock(rrl_table_t *t, int lk_id)
299
{
300 301
	assert(lk_id > -1);
	pthread_mutex_unlock(t->lk + lk_id);
302 303
}

304
static int rrl_setlocks(rrl_table_t *rrl, uint32_t granularity)
305 306
{
	assert(!rrl->lk); /* Cannot change while locks are used. */
307
	assert(granularity <= rrl->size / 10); /* Due to int. division err. */
Jan Včelák's avatar
Jan Včelák committed
308

309 310 311
	if (pthread_mutex_init(&rrl->ll, NULL) < 0) {
		return KNOT_ENOMEM;
	}
Jan Včelák's avatar
Jan Včelák committed
312

313
	/* Alloc new locks. */
314
	rrl->lk = malloc(granularity * sizeof(pthread_mutex_t));
315 316 317
	if (!rrl->lk) {
		return KNOT_ENOMEM;
	}
318
	memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t));
Jan Včelák's avatar
Jan Včelák committed
319

320 321
	/* Initialize. */
	for (size_t i = 0; i < granularity; ++i) {
322 323 324
		if (pthread_mutex_init(rrl->lk + i, NULL) < 0) {
			break;
		}
325 326
		++rrl->lk_count;
	}
327

328 329 330
	/* Incomplete initialization */
	if (rrl->lk_count != granularity) {
		for (size_t i = 0; i < rrl->lk_count; ++i) {
331
			pthread_mutex_destroy(rrl->lk + i);
332 333 334 335 336
		}
		free(rrl->lk);
		rrl->lk_count = 0;
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
337

338 339 340
	return KNOT_EOK;
}

341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
rrl_table_t *rrl_create(size_t size, uint32_t rate)
{
	if (size == 0) {
		return NULL;
	}

	const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
	rrl_table_t *t = calloc(1, tbl_len);
	if (!t) {
		return NULL;
	}
	t->size = size;
	t->rate = rate;

	if (dnssec_random_buffer((uint8_t *)&t->key, sizeof(t->key)) != DNSSEC_EOK) {
		free(t);
		return NULL;
	}

	if (rrl_setlocks(t, RRL_LOCK_GRANULARITY) != KNOT_EOK) {
		free(t);
		return NULL;
	}

	return t;
}

/*! \brief Get bucket for current combination of parameters. */
static rrl_item_t *rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a,
                            rrl_req_t *req, const knot_dname_t *zone, uint32_t stamp,
                            int *lock)
372 373
{
	char buf[RRL_CLSBLK_MAXLEN];
374
	int len = rrl_classify(buf, sizeof(buf), a, req, zone);
375 376 377
	if (len < 0) {
		return NULL;
	}
Jan Včelák's avatar
Jan Včelák committed
378

379
	uint32_t id = SipHash24(&t->key, buf, len) % t->size;
Jan Včelák's avatar
Jan Včelák committed
380

381 382 383 384
	/* Lock for lookup. */
	pthread_mutex_lock(&t->ll);

	/* Find an exact match in <id, id + HOP_LEN). */
385 386 387
	char *qname = buf + sizeof(uint8_t) + sizeof(uint64_t);
	uint64_t netblk;
	memcpy(&netblk, buf + sizeof(uint8_t), sizeof(netblk));
388
	rrl_item_t match = {
Daniel Salzman's avatar
Daniel Salzman committed
389
		.hop = 0,
390
		.netblk = netblk,
Daniel Salzman's avatar
Daniel Salzman committed
391 392 393
		.ntok = t->rate * RRL_CAPACITY,
		.cls = buf[0],
		.flags = RRL_BF_NULL,
394
		.qname = SipHash24(&t->key, qname + 1, *qname),
Daniel Salzman's avatar
Daniel Salzman committed
395
		.time = stamp
396 397 398 399 400
	};

	unsigned d = find_match(t, id, &match);
	if (d > HOP_LEN) { /* not an exact match, find free element [f] */
		d = find_free(t, id, stamp);
401
	}
Jan Včelák's avatar
Jan Včelák committed
402

403 404
	/* Reduce distance to fit <id, id + HOP_LEN) */
	unsigned f = (id + d) % t->size;
405
	while (d >= HOP_LEN) {
406
		d = reduce_dist(t, id, d, &f);
407
	}
Jan Včelák's avatar
Jan Včelák committed
408

409 410 411 412 413
	/* Assign granular lock and unlock lookup. */
	*lock = f % t->lk_count;
	rrl_lock(t, *lock);
	pthread_mutex_unlock(&t->ll);

414 415
	/* found free elm 'k' which is in <id, id + HOP_LEN) */
	t->arr[id].hop |= (1 << d);
Jan Včelák's avatar
Jan Včelák committed
416
	rrl_item_t *b = t->arr + f;
417 418 419
	assert(f == (id+d) % t->size);

	/* Inspect bucket state. */
420
	unsigned hop = b->hop;
421
	if (b->cls == CLS_NULL) {
422 423
		memcpy(b, &match, sizeof(rrl_item_t));
		b->hop = hop;
424 425
	}
	/* Check for collisions. */
426
	if (!bucket_match(b, &match)) {
427
		if (!(b->flags & RRL_BF_SSTART)) {
428 429
			memcpy(b, &match, sizeof(rrl_item_t));
			b->hop = hop;
430 431 432 433
			b->ntok = t->rate + t->rate / RRL_SSTART;
			b->flags |= RRL_BF_SSTART;
		}
	}
Jan Včelák's avatar
Jan Včelák committed
434

435 436 437
	return b;
}

438
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req,
439
              const knot_dname_t *zone, knotd_mod_t *mod)
440
{
441 442 443
	if (!rrl || !req || !a) {
		return KNOT_EINVAL;
	}
Jan Včelák's avatar
Jan Včelák committed
444

445 446
	/* Calculate hash and fetch */
	int ret = KNOT_EOK;
447
	int lock = -1;
448
	uint32_t now = time_now().tv_sec;
449
	rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock);
450
	if (!b) {
451 452 453
		if (lock > -1) {
			rrl_unlock(rrl, lock);
		}
454 455
		return KNOT_ERROR;
	}
Jan Včelák's avatar
Jan Včelák committed
456

457 458 459 460 461
	/* Calculate rate for dT */
	uint32_t dt = now - b->time;
	if (dt > RRL_CAPACITY) {
		dt = RRL_CAPACITY;
	}
462 463
	/* Visit bucket. */
	b->time = now;
464
	if (dt > 0) { /* Window moved. */
465 466 467 468

		/* Check state change. */
		if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) {
			b->flags &= ~RRL_BF_ELIMIT;
469
			rrl_log_state(mod, a, b->flags, b->cls);
470
		}
Jan Včelák's avatar
Jan Včelák committed
471

472 473
		/* Add new tokens. */
		uint32_t dn = rrl->rate * dt;
474 475
		if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
			b->flags &= ~RRL_BF_SSTART;
476
		}
477 478 479
		b->ntok += dn;
		if (b->ntok > RRL_CAPACITY * rrl->rate) {
			b->ntok = RRL_CAPACITY * rrl->rate;
480
		}
481
	}
Jan Včelák's avatar
Jan Včelák committed
482

483 484 485
	/* Last item taken. */
	if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) {
		b->flags |= RRL_BF_ELIMIT;
486
		rrl_log_state(mod, a, b->flags, b->cls);
487
	}
488 489 490 491 492 493 494

	/* Decay current bucket. */
	if (b->ntok > 0) {
		--b->ntok;
	} else if (b->ntok == 0) {
		ret = KNOT_ELIMIT;
	}
Jan Včelák's avatar
Jan Včelák committed
495

496 497 498
	if (lock > -1) {
		rrl_unlock(rrl, lock);
	}
499 500 501
	return ret;
}

502 503 504 505 506 507
bool rrl_slip_roll(int n_slip)
{
	/* Now n_slip means every Nth answer slips.
	 * That represents a chance of 1/N that answer slips.
	 * Therefore, on average, from 100 answers 100/N will slip. */
	int threshold = RRL_SLIP_MAX / n_slip;
508
	int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
509
	return (roll < threshold);
510 511
}

512
void rrl_destroy(rrl_table_t *rrl)
513
{
514
	if (rrl) {
515 516 517
		if (rrl->lk_count > 0) {
			pthread_mutex_destroy(&rrl->ll);
		}
518
		for (size_t i = 0; i < rrl->lk_count; ++i) {
519
			pthread_mutex_destroy(rrl->lk + i);
520 521 522
		}
		free(rrl->lk);
	}
Jan Včelák's avatar
Jan Včelák committed
523

524
	free(rrl);
525
}