Commit b5bfac19 authored by Daniel Salzman's avatar Daniel Salzman

knotd: introduce multimaster with failover

parent 6a2c8ff0
......@@ -394,8 +394,8 @@ Definition of remote servers for zone transfers or notifications.
.ft C
remote:
\- id: STR
address: ADDR[@INT]
via: ADDR[@INT]
address: ADDR[@INT] ...
via: ADDR[@INT] ...
key: key_id
.ft P
.fi
......@@ -406,16 +406,17 @@ remote:
A remote identifier.
.SS address
.sp
A destination IP address of the remote server. Optional destination port
specification (default is 53) can be appended to the address using \fB@\fP
separator.
An ordered list of destination IP addresses which are used for communication
with the remote server. The addresses are tried in sequence unless the
operation is successful. Optional destination port (default is 53)
can be appended to the address using \fB@\fP separator.
.sp
Default: empty
.SS via
.sp
A source IP address which is used to communicate with the remote server.
Optional source port specification can be appended to the address using
\fB@\fP separator.
An ordered list of source IP addresses. The first address with the same family
as the destination address is used. Optional source port (default is random)
can be appended to the address using \fB@\fP separator.
.sp
Default: empty
.SS key
......@@ -459,6 +460,7 @@ zone:
file: STR
storage: STR
master: remote_id ...
ddns\-master: remote_id
notify: remote_id ...
acl: acl_id ...
semantic\-checks: BOOL
......@@ -507,6 +509,12 @@ Default: \fB${localstatedir}/lib/knot\fP (configured with \fB\-\-with\-storage=p
An ordered list of \fI\%references\fP to zone master servers.
.sp
Default: empty
.SS ddns\-master
.sp
A \fI\%references\fP to zone primary master server.
If not specified, the first \fI\%master\fP server is used.
.sp
Default: empty
.SS notify
.sp
An ordered list of \fI\%references\fP to remotes to which notify
......
......@@ -466,8 +466,8 @@ Definition of remote servers for zone transfers or notifications.
remote:
- id: STR
address: ADDR[@INT]
via: ADDR[@INT]
address: ADDR[@INT] ...
via: ADDR[@INT] ...
key: key_id
.. _remote_id:
......@@ -482,9 +482,10 @@ A remote identifier.
address
-------
A destination IP address of the remote server. Optional destination port
specification (default is 53) can be appended to the address using ``@``
separator.
An ordered list of destination IP addresses which are used for communication
with the remote server. The addresses are tried in sequence unless the
operation is successful. Optional destination port (default is 53)
can be appended to the address using ``@`` separator.
Default: empty
......@@ -493,9 +494,9 @@ Default: empty
via
---
A source IP address which is used to communicate with the remote server.
Optional source port specification can be appended to the address using
``@`` separator.
An ordered list of source IP addresses. The first address with the same family
as the destination address is used. Optional source port (default is random)
can be appended to the address using ``@`` separator.
Default: empty
......@@ -547,6 +548,7 @@ Definition of zones served by the server.
file: STR
storage: STR
master: remote_id ...
ddns-master: remote_id
notify: remote_id ...
acl: acl_id ...
semantic-checks: BOOL
......@@ -609,6 +611,16 @@ An ordered list of :ref:`references<remote_id>` to zone master servers.
Default: empty
.. _zone_ddns-master:
ddns-master
-----------
A :ref:`references<remote_id>` to zone primary master server.
If not specified, the first :ref:`master<zone_master>` server is used.
Default: empty
.. _zone_notify:
notify
......
......@@ -136,6 +136,7 @@ static const yp_item_t desc_remote[] = {
{ C_FILE, YP_TSTR, YP_VNONE }, \
{ C_STORAGE, YP_TSTR, YP_VSTR = { STORAGE_DIR } }, \
{ C_MASTER, YP_TREF, YP_VREF = { C_RMT }, YP_FMULTI, { check_ref } }, \
{ C_DDNS_MASTER, YP_TREF, YP_VREF = { C_RMT }, YP_FNONE, { check_ref } }, \
{ C_NOTIFY, YP_TREF, YP_VREF = { C_RMT }, YP_FMULTI, { check_ref } }, \
{ C_ACL, YP_TREF, YP_VREF = { C_ACL }, YP_FMULTI, { check_ref } }, \
{ C_SEM_CHECKS, YP_TBOOL, YP_VNONE }, \
......
......@@ -36,6 +36,7 @@
#define C_BG_WORKERS "\x12""background-workers"
#define C_COMMENT "\x07""comment"
#define C_CTL "\x07""control"
#define C_DDNS_MASTER "\x0B""ddns-master"
#define C_DENY "\x04""deny"
#define C_DISABLE_ANY "\x0B""disable-any"
#define C_DNSSEC_SIGNING "\x0E""dnssec-signing"
......
......@@ -385,7 +385,7 @@ int axfr_answer_process(knot_pkt_t *pkt, struct answer_data *adata)
if (rcode != KNOT_RCODE_NOERROR) {
lookup_table_t *lut = lookup_by_id(knot_rcode_names, rcode);
if (lut != NULL) {
AXFRIN_LOG(LOG_ERR, "server responded with %s", lut->name);
AXFRIN_LOG(LOG_WARNING, "server responded with %s", lut->name);
}
return KNOT_STATE_FAIL;
}
......@@ -397,7 +397,7 @@ int axfr_answer_process(knot_pkt_t *pkt, struct answer_data *adata)
int ret = axfr_answer_init(adata);
if (ret != KNOT_EOK) {
AXFRIN_LOG(LOG_ERR, "failed (%s)", knot_strerror(ret));
AXFRIN_LOG(LOG_WARNING, "failed (%s)", knot_strerror(ret));
return KNOT_STATE_FAIL;
}
} else {
......
......@@ -888,12 +888,27 @@ static int process_soa_answer(knot_pkt_t *pkt, struct answer_data *data)
if (serial_compare(our_serial, their_serial) >= 0) {
ANSWER_LOG(LOG_INFO, data, "refresh, outgoing", "zone is up-to-date");
zone_events_cancel(zone, ZONE_EVENT_EXPIRE);
/* Clear preferred master. */
pthread_mutex_lock(&zone->preferred_lock);
free(zone->preferred_master);
zone->preferred_master = NULL;
pthread_mutex_unlock(&zone->preferred_lock);
return KNOT_STATE_DONE; /* Our zone is up to date. */
}
/* Our zone is outdated, schedule zone transfer. */
ANSWER_LOG(LOG_INFO, data, "refresh, outgoing", "master has newer serial %u -> %u",
our_serial, their_serial);
/* Update preferred master. */
pthread_mutex_lock(&zone->preferred_lock);
free(zone->preferred_master);
zone->preferred_master = malloc(sizeof(struct sockaddr_storage));
*zone->preferred_master = *data->param->remote;
pthread_mutex_unlock(&zone->preferred_lock);
zone_events_schedule(zone, ZONE_EVENT_XFER, ZONE_EVENT_NOW);
return KNOT_STATE_DONE;
}
......
......@@ -98,6 +98,14 @@ int notify_process_query(knot_pkt_t *pkt, struct query_data *qdata)
/* Incoming NOTIFY expires REFRESH timer and renews EXPIRE timer. */
zone_t *zone = (zone_t *)qdata->zone;
/* Update preferred master. */
pthread_mutex_lock(&zone->preferred_lock);
free(zone->preferred_master);
zone->preferred_master = malloc(sizeof(struct sockaddr_storage));
*zone->preferred_master = *qdata->param->remote;
pthread_mutex_unlock(&zone->preferred_lock);
zone_events_schedule(zone, ZONE_EVENT_REFRESH, ZONE_EVENT_NOW);
int ret = zone_events_write_persistent(zone);
if (ret != KNOT_EOK) {
......
......@@ -341,13 +341,6 @@ static int process_requests(zone_t *zone, list_t *requests)
static int forward_request(zone_t *zone, struct knot_request *request)
{
/* Create requestor instance. */
struct knot_requestor re;
knot_requestor_init(&re, NULL);
/* Fetch primary master. */
const conf_remote_t master = zone_master(zone);
/* Copy request and assign new ID. */
knot_pkt_t *query = knot_pkt_new(NULL, request->query->max_size, NULL);
int ret = knot_pkt_copy(query, request->query);
......@@ -359,6 +352,29 @@ static int forward_request(zone_t *zone, struct knot_request *request)
knot_wire_set_id(query->wire, dnssec_random_uint16_t());
knot_tsig_append(query->wire, &query->size, query->max_size, query->tsig_rr);
/* Read the ddns master or the first master. */
conf_val_t remote = conf_zone_get(conf(), C_DDNS_MASTER, zone->name);
if (remote.code != KNOT_EOK) {
remote = conf_zone_get(conf(), C_MASTER, zone->name);
}
/* Get the number of remote addresses. */
conf_val_t addr = conf_id_get(conf(), C_RMT, C_ADDR, &remote);
size_t addr_count = conf_val_count(&addr);
/* Try all remote addresses to forward the request to. */
for (size_t i = 0; i < addr_count; i++) {
conf_remote_t master = conf_remote(conf(), &remote, i);
/* Create requestor instance. */
struct knot_requestor re;
knot_requestor_init(&re, NULL);
/* Prepare packet capture layer. */
struct capture_param param;
param.sink = request->resp;
knot_requestor_overlay(&re, LAYER_CAPTURE, &param);
/* Create a request. */
const struct sockaddr *dst = (const struct sockaddr *)&master.addr;
const struct sockaddr *src = (const struct sockaddr *)&master.via;
......@@ -368,20 +384,24 @@ static int forward_request(zone_t *zone, struct knot_request *request)
return KNOT_ENOMEM;
}
/* Prepare packet capture layer. */
struct capture_param param;
param.sink = request->resp;
knot_requestor_overlay(&re, LAYER_CAPTURE, &param);
/* Enqueue and execute request. */
/* Enqueue the request. */
ret = knot_requestor_enqueue(&re, req);
if (ret == KNOT_EOK) {
if (ret != KNOT_EOK) {
knot_requestor_clear(&re);
continue;
}
/* Execute the request. */
conf_val_t val = conf_get(conf(), C_SRV, C_TCP_REPLY_TIMEOUT);
struct timeval tv = { conf_int(&val), 0 };
ret = knot_requestor_exec(&re, &tv);
if (ret == KNOT_EOK) {
knot_requestor_clear(&re);
break;
}
knot_requestor_clear(&re);
}
/* Restore message ID and TSIG. */
knot_wire_set_id(request->resp->wire, knot_wire_get_id(request->query->wire));
......
......@@ -174,7 +174,7 @@ static int zone_query_transfer(zone_t *zone, const conf_remote_t *master, uint16
}
/* Log connection errors. */
ZONE_XFER_LOG(LOG_ERR, pkt_type, "failed (%s)", knot_strerror(ret));
ZONE_XFER_LOG(LOG_WARNING, pkt_type, "failed (%s)", knot_strerror(ret));
}
return ret;
......@@ -315,6 +315,20 @@ fail:
return result;
}
static int try_refresh(zone_t *zone, const conf_remote_t *master, void *ctx)
{
assert(zone);
assert(master);
int ret = zone_query_execute(zone, KNOT_QUERY_NORMAL, master);
if (ret != KNOT_EOK) {
ZONE_QUERY_LOG(LOG_WARNING, zone, master, "refresh, outgoing",
"failed (%s)", knot_strerror(ret));
}
return ret;
}
int event_refresh(zone_t *zone)
{
assert(zone);
......@@ -330,15 +344,11 @@ int event_refresh(zone_t *zone)
return KNOT_EOK;
}
const conf_remote_t master = zone_master(zone);
int ret = zone_query_execute(zone, KNOT_QUERY_NORMAL, &master);
int ret = zone_master_try(zone, try_refresh, NULL);
const knot_rdataset_t *soa = zone_soa(zone);
if (ret != KNOT_EOK) {
/* Log connection errors. */
ZONE_QUERY_LOG(LOG_WARNING, zone, &master, "SOA query, outgoing",
"failed (%s)", knot_strerror(ret));
/* Rotate masters if current failed. */
zone_master_rotate(zone);
log_zone_error(zone->name, "refresh, failed (%s)",
knot_strerror(ret));
/* Schedule next retry. */
zone_events_schedule(zone, ZONE_EVENT_REFRESH, knot_soa_retry(soa));
start_expire_timer(zone, soa);
......@@ -350,6 +360,21 @@ int event_refresh(zone_t *zone)
return zone_events_write_persistent(zone);
}
struct transfer_data {
uint16_t pkt_type;
};
static int try_xfer(zone_t *zone, const conf_remote_t *master, void *_data)
{
assert(zone);
assert(master);
assert(_data);
struct transfer_data *data = _data;
return zone_query_transfer(zone, master, data->pkt_type);
}
int event_xfer(zone_t *zone)
{
assert(zone);
......@@ -359,20 +384,29 @@ int event_xfer(zone_t *zone)
return KNOT_EOK;
}
struct transfer_data data = { 0 };
/* Determine transfer type. */
bool is_boostrap = zone_contents_is_empty(zone->contents);
uint16_t pkt_type = KNOT_QUERY_IXFR;
if (is_boostrap || zone->flags & ZONE_FORCE_AXFR) {
pkt_type = KNOT_QUERY_AXFR;
bool is_bootstrap = zone_contents_is_empty(zone->contents);
if (is_bootstrap || zone->flags & ZONE_FORCE_AXFR) {
data.pkt_type = KNOT_QUERY_AXFR;
} else {
data.pkt_type = KNOT_QUERY_IXFR;
}
/* Execute zone transfer and reschedule timers. */
const conf_remote_t master = zone_master(zone);
int ret = zone_query_transfer(zone, &master, pkt_type);
/* Execute zone transfer. */
int ret = zone_master_try(zone, try_xfer, &data);
/* Clear preferred master. */
pthread_mutex_lock(&zone->preferred_lock);
free(zone->preferred_master);
zone->preferred_master = NULL;
pthread_mutex_unlock(&zone->preferred_lock);
/* Handle failure during transfer. */
if (ret != KNOT_EOK) {
if (is_boostrap) {
log_zone_error(zone->name, "transfer, failed (%s)",
knot_strerror(ret));
if (is_bootstrap) {
zone->bootstrap_retry = bootstrap_next(zone->bootstrap_retry);
zone_events_schedule(zone, ZONE_EVENT_XFER, zone->bootstrap_retry);
} else {
......@@ -404,7 +438,7 @@ int event_xfer(zone_t *zone)
zone->flags &= ~ZONE_FORCE_AXFR;
/* Trim extra heap. */
if (!is_boostrap) {
if (!is_bootstrap) {
mem_trim();
}
......@@ -485,22 +519,27 @@ int event_notify(zone_t *zone)
}
/* Walk through configured remotes and send messages. */
conf_val_t val = conf_zone_get(conf(), C_NOTIFY, zone->name);
while (val.code == KNOT_EOK) {
conf_remote_t remote = conf_remote(conf(), &val);
int ret = zone_query_execute(zone, KNOT_QUERY_NOTIFY, &remote);
conf_val_t notify = conf_zone_get(conf(), C_NOTIFY, zone->name);
while (notify.code == KNOT_EOK) {
conf_val_t addr = conf_id_get(conf(), C_RMT, C_ADDR, &notify);
size_t addr_count = conf_val_count(&addr);
for (int i = 0; i < addr_count; i++) {
conf_remote_t slave = conf_remote(conf(), &notify, i);
int ret = zone_query_execute(zone, KNOT_QUERY_NOTIFY, &slave);
if (ret == KNOT_EOK) {
ZONE_QUERY_LOG(LOG_INFO, zone, &remote,
ZONE_QUERY_LOG(LOG_INFO, zone, &slave,
"NOTIFY, outgoing", "serial %u",
zone_contents_serial(zone->contents));
break;
} else {
ZONE_QUERY_LOG(LOG_WARNING, zone, &remote,
ZONE_QUERY_LOG(LOG_WARNING, zone, &slave,
"NOTIFY, outgoing", "failed (%s)",
knot_strerror(ret));
}
}
conf_val_next(&val);
conf_val_next(&notify);
}
return KNOT_EOK;
......
......@@ -31,6 +31,7 @@
#include "knot/zone/zone.h"
#include "knot/zone/zonefile.h"
#include "knot/zone/contents.h"
#include "knot/updates/acl.h"
#include "knot/updates/apply.h"
#include "libknot/processing/requestor.h"
#include "knot/nameserver/process_query.h"
......@@ -76,6 +77,9 @@ zone_t* zone_new(const knot_dname_t *name)
// Journal lock
pthread_mutex_init(&zone->journal_lock, NULL);
// Preferred master lock
pthread_mutex_init(&zone->preferred_lock, NULL);
// Initialize events
zone_events_init(zone);
......@@ -98,6 +102,10 @@ void zone_free(zone_t **zone_ptr)
pthread_mutex_destroy(&zone->ddns_lock);
pthread_mutex_destroy(&zone->journal_lock);
/* Free preferred master. */
pthread_mutex_destroy(&zone->preferred_lock);
free(zone->preferred_master);
/* Free zone contents. */
zone_contents_deep_free(&zone->contents);
......@@ -188,29 +196,88 @@ bool zone_is_slave(const zone_t *zone)
return conf_val_count(&val) > 0 ? true : false;
}
conf_remote_t zone_master(const zone_t *zone)
/*!
* \brief Get preferred zone master while checking its existence.
*/
int static preferred_master(zone_t *zone, conf_remote_t *master)
{
conf_val_t val = conf_zone_get(conf(), C_MASTER, zone->name);
pthread_mutex_lock(&zone->preferred_lock);
/* Seek the current master if possible. */
if (zone->master_index < conf_val_count(&val)) {
for (size_t index = 0; index < zone->master_index; index++) {
conf_val_next(&val);
if (zone->preferred_master == NULL) {
pthread_mutex_unlock(&zone->preferred_lock);
return KNOT_EINVAL;
}
conf_val_t masters = conf_zone_get(conf(), C_MASTER, zone->name);
while (masters.code == KNOT_EOK) {
conf_val_t addr = conf_id_get(conf(), C_RMT, C_ADDR, &masters);
size_t addr_count = conf_val_count(&addr);
for (size_t i = 0; i < addr_count; i++) {
conf_remote_t remote = conf_remote(conf(), &masters, i);
if (netblock_match(&remote.addr, zone->preferred_master, -1)) {
*master = remote;
pthread_mutex_unlock(&zone->preferred_lock);
return KNOT_EOK;
}
}
return conf_remote(conf(), &val);
conf_val_next(&masters);
}
pthread_mutex_unlock(&zone->preferred_lock);
return KNOT_ENOENT;
}
void zone_master_rotate(zone_t *zone)
int zone_master_try(zone_t *zone, zone_master_cb callback, void *callback_data)
{
conf_val_t val = conf_zone_get(conf(), C_MASTER, zone->name);
if (zone == NULL) {
return KNOT_EINVAL;
}
if (zone->master_index + 2 <= conf_val_count(&val)) {
zone->master_index += 1;
} else {
zone->master_index = 0;
/* Try the preferred server. */
conf_remote_t preferred = { { AF_UNSPEC } };
if (preferred_master(zone, &preferred) == KNOT_EOK) {
int ret = callback(zone, &preferred, callback_data);
if (ret == KNOT_EOK) {
return ret;
}
}
/* Try all the other servers. */
bool success = false;
conf_val_t masters = conf_zone_get(conf(), C_MASTER, zone->name);
while (masters.code == KNOT_EOK) {
conf_val_t addr = conf_id_get(conf(), C_RMT, C_ADDR, &masters);
size_t addr_count = conf_val_count(&addr);
for (size_t i = 0; i < addr_count; i++) {
conf_remote_t master = conf_remote(conf(), &masters, i);
if (preferred.addr.ss_family != AF_UNSPEC &&
netblock_match(&master.addr, &preferred.addr, -1)) {
preferred.addr.ss_family = AF_UNSPEC;
continue;
}
int ret = callback(zone, &master, callback_data);
if (ret == KNOT_EOK) {
success = true;
break;
}
}
if (!success) {
log_zone_warning(zone->name, "refresh, remote '%s' "
"not available", conf_str(&masters));
}
conf_val_next(&masters);
}
return success ? KNOT_EOK : KNOT_ENOMASTER;
}
int zone_flush_journal(zone_t *zone)
......
......@@ -67,12 +67,14 @@ typedef struct zone
/*! \brief Zone events. */
zone_events_t events; /*!< Zone events timers. */
uint32_t bootstrap_retry; /*!< AXFR/IN bootstrap retry. */
time_t zonefile_mtime;
uint32_t bootstrap_retry; /*!< AXFR/IN bootstrap retry. */
uint32_t zonefile_serial;
/*! \brief Config master list index of the current master server. */
size_t master_index;
/*! \brief Preferred master lock. */
pthread_mutex_t preferred_lock;
/*! \brief Preferred master for remote operation. */
struct sockaddr_storage *preferred_master;
/*! \brief Query modules. */
list_t query_modules;
......@@ -109,17 +111,23 @@ int zone_change_store(zone_t *zone, changeset_t *change);
/*!
* \brief Atomically switch the content of the zone.
*/
zone_contents_t *zone_switch_contents(zone_t *zone,
zone_contents_t *new_contents);
zone_contents_t *zone_switch_contents(zone_t *zone, zone_contents_t *new_contents);
/*! \brief Checks if the zone is slave. */
bool zone_is_slave(const zone_t *zone);
/*! \brief Return the current zone master. */
conf_remote_t zone_master(const zone_t *zone);
typedef int (*zone_master_cb)(zone_t *zone, const conf_remote_t *remote, void *data);
/*! \brief Set the next zone master as a current. */
void zone_master_rotate(zone_t *zone);
/*!
* \brief Perform an action with a first working master server.
*
* The function iterates over available masters. For each master, the callback
* function is called. If the callback function succeeds (\ref KNOT_EOK is
* returned), the iteration is terminated.
*
* \return Error code from the last callback.
*/
int zone_master_try(zone_t *zone, zone_master_cb callback, void *callback_data);
/*! \brief Synchronize zone file with journal. */
int zone_flush_journal(zone_t *zone);
......
......@@ -65,6 +65,7 @@ static const struct error errors[] = {
{ KNOT_EZONENOENT, "zone file not found" },
{ KNOT_ENOZONE, "no such zone found" },
{ KNOT_ENONODE, "no such node in zone found" },
{ KNOT_ENOMASTER, "no active master" },
{ KNOT_EDNAMEPTR, "domain name pointer larger than allowed" },
{ KNOT_EPAYLOAD, "invalid EDNS payload size" },
{ KNOT_EPREREQ, "UPDATE prerequisity not met" },
......
......@@ -69,6 +69,7 @@ enum knot_error {
KNOT_EZONENOENT,
KNOT_ENOZONE,
KNOT_ENONODE,
KNOT_ENOMASTER,
KNOT_EDNAMEPTR,
KNOT_EPAYLOAD,
KNOT_EPREREQ,
......
#!/usr/bin/env python3
"""
Multi-master failover tests.
"""
from dnstest.test import Test
t = Test()
# testing zone
zone = t.zone_rnd(1, dnssec=False, records=1)[0]
zone.update_soa(serial=1, refresh=600, retry=600, expire=3600)
# +---------+       +---------+
# | master1 <-------+ master2 |
# +----^----+       +----^----+
#      |                 |    
#      |   +---------+   |    
#      +---+  slave  +---+    
#          +---------+   
master1 = t.server("knot")
master2 = t.server("bind")
slave = t.server("knot")
# flush zones immediatelly
for server in [master1, master2, slave]:
slave.zonefile_sync = "0"
t.link([zone], master1, master2)
t.link([zone], master1, slave)
t.link([zone], master2, slave)
t.start()
# zone boostrap
for server in [master1, master2, slave]:
server.zone_wait(zone)
# transfer with fully working topology
master1.zones[zone.name].zfile.update_soa(serial=10)
master1.reload()
for server in [master1, master2, slave]:
server.zone_wait(zone, serial=9)
# stop slave, update masters
slave.stop()
master1.zones[zone.name].zfile.update_soa(serial=20)
master1.reload()
for server in [master1, master2]:
server.zone_wait(zone, serial=19)
# failover to second master
master1.stop()
slave.start()
slave.zone_wait(zone, serial=19)
master1.start()
# stop slave, update masters
slave.stop()
master1.zones[zone.name].zfile.update_soa(serial=30)
master1.reload()
for server in [master1, master2]:
server.zone_wait(zone, serial=29)
# failover after notify
master1.stop()
master2.stop()
slave.start()
slave.zone_wait(zone, serial=19)
master2.start()
slave.zone_wait(zone, serial=29)
t.end()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment