nsrep.c 8.37 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*  Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
14
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
15 16
 */

Marek Vavruša's avatar
Marek Vavruša committed
17
#include <assert.h>
18 19 20
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
Marek Vavruša's avatar
Marek Vavruša committed
21

22
#include "lib/nsrep.h"
23
#include "lib/rplan.h"
24
#include "lib/resolve.h"
Marek Vavruša's avatar
Marek Vavruša committed
25
#include "lib/defines.h"
26
#include "lib/generic/pack.h"
27
#include "contrib/ucw/lib.h"
28

29 30 31
/** Some built-in unfairness ... */
#define FAVOUR_IPV6 20 /* 20ms bonus for v6 */

32 33 34 35 36 37 38 39
/** @internal Macro to set address structure. */
#define ADDR_SET(sa, family, addr, len) do {\
    	memcpy(&sa ## _addr, (addr), (len)); \
    	sa ## _family = (family); \
	sa ## _port = htons(KR_DNS_PORT); \
} while (0)

/** Update nameserver representation with current name/address pair. */
40
static void update_nsrep(struct kr_nsrep *ns, size_t pos, uint8_t *addr, size_t addr_len)
41 42
{
	if (addr == NULL) {
43
		ns->addr[pos].ip.sa_family = AF_UNSPEC;
44 45 46
		return;
	}

47 48 49
	/* Rotate previous addresses to the right. */
	memmove(ns->addr + pos + 1, ns->addr + pos, (KR_NSREP_MAXADDR - pos - 1) * sizeof(ns->addr[0]));

50
	switch(addr_len) {
51
	case sizeof(struct in_addr):
52
		ADDR_SET(ns->addr[pos].ip4.sin, AF_INET, addr, addr_len); break;
53
	case sizeof(struct in6_addr):
54
		ADDR_SET(ns->addr[pos].ip6.sin6, AF_INET6, addr, addr_len); break;
55 56 57 58
	default: assert(0); break;
	}
}

59 60
static void update_nsrep_set(struct kr_nsrep *ns, const knot_dname_t *name, uint8_t *addr[], unsigned score)
{
61 62 63 64 65
	/* NSLIST is not empty, empty NS cannot be a leader. */
	if (!addr[0] && ns->addr[0].ip.sa_family != AF_UNSPEC) {
		return;
	}
	/* Set new NS leader */
66 67 68
	ns->name = name;
	ns->score = score;
	for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
69 70 71 72 73
		if (addr[i]) {
			void *addr_val = pack_obj_val(addr[i]);
			size_t len = pack_obj_len(addr[i]);
			update_nsrep(ns, i, addr_val, len);
		} else {
74
			break;
75
		}
76 77 78
	}
}

79 80
#undef ADDR_SET

81
static unsigned eval_addr_set(pack_t *addr_set, kr_nsrep_lru_t *rttcache, unsigned score, uint8_t *addr[], uint32_t opts)
82 83
{
	/* Name server is better candidate if it has address record. */
84 85 86 87
	uint8_t *it = pack_head(*addr_set);
	while (it != pack_tail(*addr_set)) {
		void *val = pack_obj_val(it);
		size_t len = pack_obj_len(it);
88 89 90 91 92 93 94 95 96
		unsigned favour = 0;
		bool is_valid = false;
		/* Check if the address isn't disabled. */
		if (len == sizeof(struct in6_addr)) {
			is_valid = !(opts & QUERY_NO_IPV6);
			favour = FAVOUR_IPV6;
		} else {
			is_valid = !(opts & QUERY_NO_IPV4);
		}
97
		/* Get RTT for this address (if known) */
98 99 100 101
		if (is_valid) {
			unsigned *cached = rttcache ? lru_get(rttcache, val, len) : NULL;
			unsigned addr_score = (cached) ? *cached : KR_NS_GLUED;
			if (addr_score < score + favour) {
102 103
				/* Shake down previous contenders */
				for (size_t i = KR_NSREP_MAXADDR - 1; i > 0; --i)
104 105 106 107
					addr[i] = addr[i - 1];
				addr[0] = it;
				score = addr_score;
			}
108 109 110
		}
		it = pack_obj_next(it);
	}
111 112 113 114 115
	return score;
}

static int eval_nsrep(const char *k, void *v, void *baton)
{
116 117 118
	struct kr_query *qry = baton;
	struct kr_nsrep *ns = &qry->ns;
	struct kr_context *ctx = ns->ctx;
119
	unsigned score = KR_NS_MAX_SCORE;
120
	unsigned reputation = 0;
121
	uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
122

123 124 125 126 127 128 129 130
	/* Fetch NS reputation */
	if (ctx->cache_rep) {
		unsigned *cached = lru_get(ctx->cache_rep, k, knot_dname_size((const uint8_t *)k));
		if (cached) {
			reputation = *cached;
		}
	}

131 132 133 134
	/* Favour nameservers with unknown addresses to probe them,
	 * otherwise discover the current best address for the NS. */
	pack_t *addr_set = (pack_t *)v;
	if (addr_set->len == 0) {
135
		score = KR_NS_UNKNOWN;
136 137 138 139 140
		/* If the server doesn't have IPv6, give it disadvantage. */
		if (reputation & KR_NS_NOIP6) {
			score += FAVOUR_IPV6;
			/* If the server is unknown but has rep record, treat it as timeouted */
			if (reputation & KR_NS_NOIP4) {
141
				score = KR_NS_UNKNOWN;
142 143
				reputation = 0; /* Start with clean slate */
			}
144
		}
145
	} else {
146
		score = eval_addr_set(addr_set, ctx->cache_rtt, score, addr_choice, ctx->options);
147 148
	}

149 150 151 152
	/* Probabilistic bee foraging strategy (naive).
	 * The fastest NS is preferred by workers until it is depleted (timeouts or degrades),
	 * at the same time long distance scouts probe other sources (low probability).
	 * Servers on TIMEOUT (depleted) can be probed by the dice roll only */
153
	if (score <= ns->score && (qry->flags & QUERY_NO_THROTTLE || score < KR_NS_TIMEOUT)) {
154
		update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score);
155
		ns->reputation = reputation;
156
	} else {
157 158
		/* With 10% chance, probe server with a probability given by its RTT / MAX_RTT */
		if ((kr_rand_uint(100) < 10) && (kr_rand_uint(KR_NS_MAX_SCORE) >= score)) {
159 160 161 162
			/* If this is a low-reliability probe, go with TCP to get ICMP reachability check. */
			if (score >= KR_NS_LONG) {
				qry->flags |= QUERY_TCP;
			}
163
			update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score);
164
			ns->reputation = reputation;
165 166
			return 1; /* Stop evaluation */
		}
167 168 169 170 171
	}

	return kr_ok();
}

172 173 174 175 176 177 178 179 180 181 182 183 184
int kr_nsrep_set(struct kr_query *qry, uint8_t *addr, size_t addr_len)
{
	if (!qry || !addr) {
		return kr_error(EINVAL);
	}
	qry->ns.name = (const uint8_t *)"";
	qry->ns.score = KR_NS_UNKNOWN;
	qry->ns.reputation = 0;
	update_nsrep(&qry->ns, 0, addr, addr_len);
	update_nsrep(&qry->ns, 1, NULL, 0);
	return kr_ok();
}

185 186
#define ELECT_INIT(ns, ctx_) do { \
	(ns)->ctx = (ctx_); \
187
	(ns)->addr[0].ip.sa_family = AF_UNSPEC; \
188 189 190 191
	(ns)->reputation = 0; \
	(ns)->score = KR_NS_MAX_SCORE + 1; \
} while (0)

192
int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx)
193
{
194 195 196 197 198
	if (!qry || !ctx) {
		return kr_error(EINVAL);
	}

	struct kr_nsrep *ns = &qry->ns;
199
	ELECT_INIT(ns, ctx);
200
	return map_walk(&qry->zone_cut.nsset, eval_nsrep, qry);
201
}
202

203 204 205 206 207 208 209 210 211 212 213 214 215 216
int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx)
{
	if (!qry || !ctx) {
		return kr_error(EINVAL);
	}

	/* Get address list for this NS */
	struct kr_nsrep *ns = &qry->ns;
	ELECT_INIT(ns, ctx);
	pack_t *addr_set = map_get(&qry->zone_cut.nsset, (const char *)ns->name);
	if (!addr_set) {
		return kr_error(ENOENT);
	}
	/* Evaluate addr list */
217
	uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
218
	unsigned score = eval_addr_set(addr_set, ctx->cache_rtt, ns->score, addr_choice, ctx->options);
219
	update_nsrep_set(ns, ns->name, addr_choice, score);
220 221 222 223 224
	return kr_ok();
}

#undef ELECT_INIT

225 226
int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
			unsigned score, kr_nsrep_lru_t *cache, int umode)
227
{
228
	if (!ns || !cache || ns->addr[0].ip.sa_family == AF_UNSPEC) {
229 230 231
		return kr_error(EINVAL);
	}

232 233 234 235 236 237 238 239 240 241 242 243
	const char *addr_in = kr_nsrep_inaddr(ns->addr[0]);
	size_t addr_len = kr_nsrep_inaddr_len(ns->addr[0]);
	if (addr) { /* Caller provided specific address */
		if (addr->sa_family == AF_INET) {
			addr_in = (const char *)&((struct sockaddr_in *)addr)->sin_addr;
			addr_len = sizeof(struct in_addr);
		} else if (addr->sa_family == AF_INET6) {
			addr_in = (const char *)&((struct sockaddr_in6 *)addr)->sin6_addr;
			addr_len = sizeof(struct in6_addr);
		}
	}
	unsigned *cur = lru_set(cache, addr_in, addr_len);
244 245 246 247 248 249 250
	if (!cur) {
		return kr_error(ENOMEM);
	}
	/* Score limits */
	if (score > KR_NS_MAX_SCORE) {
		score = KR_NS_MAX_SCORE;
	}
251 252
	if (score <= KR_NS_GLUED) {
		score = KR_NS_GLUED + 1;
253
	}
254 255 256 257 258 259 260 261 262 263
	/* First update is always set. */
	if (*cur == 0) {
		umode = KR_NS_RESET;
	}
	/* Update score, by default smooth over last two measurements. */
	switch (umode) {
	case KR_NS_UPDATE: *cur = (*cur + score) / 2; break;
	case KR_NS_RESET:  *cur = score; break;
	case KR_NS_ADD:    *cur = MIN(KR_NS_MAX_SCORE - 1, *cur + score); break;
	default: break;
264 265 266
	}
	return kr_ok();
}
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282

int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache)
{
	if (!ns || !cache ) {
		return kr_error(EINVAL);
	}

	/* Store in the struct */
	ns->reputation = reputation;
	/* Store reputation in the LRU cache */
	unsigned *cur = lru_set(cache, (const char *)ns->name, knot_dname_size(ns->name));
	if (!cur) {
		return kr_error(ENOMEM);
	}
	*cur = reputation;
	return kr_ok();
283
}