Commit 63c8df64 authored by Grigorii Demidov's avatar Grigorii Demidov

daemon, resolve, nsrep: improve transport failures handling when forwarding

parent d51e9287
......@@ -688,7 +688,7 @@ static int session_tls_hs_cb(struct session *session, int status)
if (status) {
kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
worker->engine->resolver.cache_rtt,
KR_NS_UPDATE_NORESET);
KR_NS_RESET);
return ret;
}
......@@ -812,10 +812,13 @@ static void on_connect(uv_connect_t *req, int status)
if (status != 0) {
if (kr_verbose_status) {
const char *peer_str = kr_straddr(peer);
kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s)\n",
kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
peer_str ? peer_str : "", uv_strerror(status));
}
worker_del_tcp_waiting(worker, peer);
kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
worker->engine->resolver.cache_rtt,
KR_NS_RESET);
assert(session_tasklist_is_empty(session));
session_waitinglist_retry(session, false);
session_close(session);
......@@ -890,7 +893,7 @@ static void on_tcp_connect_timeout(uv_timer_t *timer)
kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
worker->engine->resolver.cache_rtt,
KR_NS_UPDATE_NORESET);
KR_NS_RESET);
worker->stats.timeout += session_waitinglist_get_len(session);
session_waitinglist_retry(session, true);
......@@ -917,13 +920,12 @@ static void on_udp_timeout(uv_timer_t *timer)
for (uint16_t i = 0; i < MIN(task->pending_count, task->addrlist_count); ++i) {
struct sockaddr *choice = (struct sockaddr *)(&addrlist[i]);
WITH_VERBOSE(qry) {
char addr_str[INET6_ADDRSTRLEN];
inet_ntop(choice->sa_family, kr_inaddr(choice), addr_str, sizeof(addr_str));
VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str);
char *addr_str = kr_straddr(choice);
VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : "");
}
kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_DEAD,
worker->engine->resolver.cache_rtt,
KR_NS_UPDATE_NORESET);
KR_NS_RESET);
}
}
task->timeouts += 1;
......@@ -1278,11 +1280,20 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr
}
/* Start connection process to upstream. */
if (uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect) != 0) {
ret = uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect);
if (ret != 0) {
session_timer_stop(session);
worker_del_tcp_waiting(ctx->worker, addr);
free(conn);
session_close(session);
kr_nsrep_update_rtt(NULL, addr, KR_NS_DEAD,
worker->engine->resolver.cache_rtt,
KR_NS_RESET);
WITH_VERBOSE (qry) {
const char *peer_str = kr_straddr(peer);
kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
peer_str ? peer_str : "", uv_strerror(ret));
}
return kr_error(EAGAIN);
}
......
......@@ -508,7 +508,7 @@ int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx)
if (sa->sa_family == AF_INET) {
scores[i] += FAVOUR_IPV6;
}
} else if (rtt_cache_entry->score >= KR_NS_TIMEOUT) {
} else if (rtt_cache_entry->score >= KR_NS_FWD_TIMEOUT) {
uint64_t now = kr_now();
uint64_t elapsed = now - rtt_cache_entry->tout_timestamp;
scores[i] = KR_NS_MAX_SCORE + 1;
......
......@@ -31,19 +31,18 @@ struct kr_query;
* @note RTT is measured in milliseconds.
*/
enum kr_ns_score {
KR_NS_MAX_SCORE = KR_CONN_RTT_MAX,
KR_NS_TIMEOUT = (95 * KR_NS_MAX_SCORE) / 100,
KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4,
KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2,
KR_NS_PENALTY = 100,
KR_NS_GLUED = 10,
KR_NS_MAX_SCORE = 20 * KR_CONN_RTT_MAX, /* rtt "invalid value" */
KR_NS_DEAD = KR_NS_MAX_SCORE - 1, /* NS didn't answer via UDP transport,
* TCP connection failed or
* TLS handshake failed */
KR_NS_FWD_TIMEOUT = 10000, /* timeout for upstream recursor */
KR_NS_TIMEOUT = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4,
KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2,
KR_NS_PENALTY = 100,
KR_NS_GLUED = 10
};
/**
* See kr_nsrep_update_rtt()
*/
#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
/** If once NS was marked as "timeouted", it won't participate in NS elections
* at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one minute). */
#define KR_NS_TIMEOUT_RETRY_INTERVAL 60000
......@@ -144,8 +143,6 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx);
* @param ns updated NS representation
* @param addr chosen address (NULL for first)
* @param score new score (i.e. RTT), see enum kr_ns_score
* after two calls with score = KR_NS_DEAD and umode = KR_NS_UPDATE
* server will be guaranteed to have score >= KR_NS_TIMEOUT
* @param cache RTT LRU cache
* @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD)
* @return 0 on success, error code on failure
......
......@@ -878,12 +878,6 @@ static void update_nslist_score(struct kr_request *request, struct kr_query *qry
} else { /* Penalize SERVFAILs. */
kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
}
/* Penalise resolution failures except validation failures. */
} else if (!(qry->flags.DNSSEC_BOGUS)) {
kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_UPDATE);
WITH_VERBOSE(qry) {
VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", kr_straddr(src));
}
}
}
......@@ -1428,6 +1422,8 @@ int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *t
} else if (qflg.FORWARD || qflg.STUB) {
kr_nsrep_sort(&qry->ns, request->ctx);
if (qry->ns.score > KR_NS_MAX_SCORE) {
/* At the moment all NS have bad reputation.
* But there can be existing connections*/
VERBOSE_MSG(qry, "=> no valid NS left\n");
return KR_STATE_FAIL;
}
......@@ -1468,14 +1464,14 @@ int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *t
return KR_STATE_PRODUCE;
}
/* Randomize query case (if not in safemode or turned off) */
/* Randomize query case (if not in safe mode or turned off) */
qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20)
? 0 : kr_rand_uint(0);
knot_dname_t *qname_raw = knot_pkt_qname(packet);
randomized_qname_case(qname_raw, qry->secret);
/*
* Additional query is going to be finalised when calling
* Additional query is going to be finalized when calling
* kr_resolve_checkout().
*/
qry->timestamp_mono = kr_now();
......@@ -1581,8 +1577,6 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
WITH_VERBOSE(qry) {
char ns_str[INET6_ADDRSTRLEN];
KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet));
KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet));
......@@ -1595,12 +1589,13 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
if (!kr_inaddr_equal(dst, addr)) {
continue;
}
inet_ntop(addr->sa_family, kr_inaddr(&qry->ns.addr[i].ip), ns_str, sizeof(ns_str));
const char *ns_str = kr_straddr(addr);
VERBOSE_MSG(qry,
"=> id: '%05u' querying: '%s' score: %u zone cut: '%s' "
"qname: '%s' qtype: '%s' proto: '%s'\n",
qry->id, ns_str, qry->ns.score, zonecut_str,
qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str,
qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp");
break;
}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment