Commit 7d924855 authored by Daniel Salzman's avatar Daniel Salzman

Merge branch 'journal_fast' into 'master'

journal: added journal-fast option by LMDB flags for many-zones slaves to speed up axfr

See merge request !688
parents 3ddf426c 0e1f388e
......@@ -652,6 +652,7 @@ template:
\- id: STR
timer\-db: STR
journal\-db: STR
journal\-db\-mode: robust | asynchronous
max\-journal\-db\-size: SIZE
global\-module: STR/STR ...
# All zone options (excluding \(aqtemplate\(aq item)
......@@ -688,6 +689,30 @@ This option is only available in the \fIdefault\fP template.
.UNINDENT
.sp
\fIDefault:\fP \fI\%storage\fP/journal
.SS journal\-db\-mode
.sp
Specifies journal LMDB backend configuration, which influences performance
and durability.
.sp
Possible values:
.INDENT 0.0
.IP \(bu 2
\fBrobust\fP – The journal DB disk sychronization ensures DB durability but is
generally slower
.IP \(bu 2
\fBasynchronous\fP – The journal DB disk synchronization is optimized for
better perfomance at the expense of lower DB durability; this mode is
recommended only on slave nodes with many zones
.UNINDENT
.sp
\fBNOTE:\fP
.INDENT 0.0
.INDENT 3.5
This option is only available in the \fIdefault\fP template.
.UNINDENT
.UNINDENT
.sp
\fIDefault:\fP robust
.SS max\-journal\-db\-size
.sp
Hard limit for the common journal DB. There is no cleanup logic in journal
......
......@@ -774,6 +774,7 @@ if a zone doesn't have another template specified.
- id: STR
timer-db: STR
journal-db: STR
journal-db-mode: robust | asynchronous
max-journal-db-size: SIZE
global-module: STR/STR ...
# All zone options (excluding 'template' item)
......@@ -811,6 +812,27 @@ as a relative path to the *default* template :ref:`storage<zone_storage>`.
*Default:* :ref:`storage<zone_storage>`/journal
.. _template_journal-db-mode:
journal-db-mode
---------------
Specifies journal LMDB backend configuration, which influences performance
and durability.
Possible values:
- ``robust`` – The journal DB disk sychronization ensures DB durability but is
generally slower
- ``asynchronous`` – The journal DB disk synchronization is optimized for
better perfomance at the expense of lower DB durability; this mode is
recommended only on slave nodes with many zones
.. NOTE::
This option is only available in the *default* template.
*Default:* robust
.. _template_max-journal-db-size:
max-journal-db-size
......
......@@ -107,6 +107,12 @@ static const knot_lookup_t log_severities[] = {
{ 0, NULL }
};
static const knot_lookup_t journal_modes[] = {
{ JOURNAL_MODE_ROBUST, "robust" },
{ JOURNAL_MODE_ASYNC, "asynchronous" },
{ 0, NULL }
};
static const yp_item_t desc_server[] = {
{ C_IDENT, YP_TSTR, YP_VNONE },
{ C_VERSION, YP_TSTR, YP_VNONE },
......@@ -268,6 +274,8 @@ static const yp_item_t desc_template[] = {
{ C_GLOBAL_MODULE, YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt },
YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } },
{ C_JOURNAL_DB, YP_TSTR, YP_VSTR = { "journal" }, CONF_IO_FRLD_SRV },
{ C_JOURNAL_DB_MODE, YP_TOPT, YP_VOPT = { journal_modes, JOURNAL_MODE_ROBUST },
CONF_IO_FRLD_SRV },
{ C_MAX_JOURNAL_DB_SIZE, YP_TINT, YP_VINT = { JOURNAL_MIN_FSLIMIT, VIRT_MEM_LIMIT(TERA(100)),
VIRT_MEM_LIMIT(GIGA(20)), YP_SSIZE },
CONF_IO_FRLD_SRV },
......
......@@ -55,6 +55,7 @@
#define C_IXFR_DIFF "\x15""ixfr-from-differences"
#define C_JOURNAL "\x07""journal" /* obsolete, old journal compat */
#define C_JOURNAL_DB "\x0A""journal-db"
#define C_JOURNAL_DB_MODE "\x0F""journal-db-mode"
#define C_KASP_DB "\x07""kasp-db"
#define C_KEY "\x03""key"
#define C_KEYSTORE "\x08""keystore"
......
......@@ -507,6 +507,7 @@ int check_template(
CHECK_DFLT(C_TIMER_DB, "timer database");
CHECK_DFLT(C_GLOBAL_MODULE, "global module");
CHECK_DFLT(C_JOURNAL_DB, "journal database path");
CHECK_DFLT(C_JOURNAL_DB_MODE, "journal database mode");
CHECK_DFLT(C_MAX_JOURNAL_DB_SIZE, "journal database maximum size");
return KNOT_EOK;
......
......@@ -1462,6 +1462,8 @@ static int open_journal_db_unsafe(journal_db_t **db)
opts.mapsize = (*db)->fslimit;
opts.maxdbs = 1;
opts.maxreaders = JOURNAL_MAX_READERS;
opts.flags.env = ((*db)->mode == JOURNAL_MODE_ASYNC ?
KNOT_DB_LMDB_WRITEMAP | KNOT_DB_LMDB_MAPASYNC : 0);
int ret = (*db)->db_api->init(&(*db)->db, NULL, &opts);
if (ret != KNOT_EOK) {
......@@ -1525,7 +1527,8 @@ void journal_close(journal_t *j)
j->zone = NULL;
}
int journal_db_init(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit)
int journal_db_init(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit,
journal_mode_t mode)
{
if (*db != NULL) {
return KNOT_EOK;
......@@ -1538,7 +1541,8 @@ int journal_db_init(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fs
.db = NULL,
.db_api = knot_db_lmdb_api(),
.path = strdup(lmdb_dir_path),
.fslimit = ((lmdb_fslimit < JOURNAL_MIN_FSLIMIT) ? JOURNAL_MIN_FSLIMIT : lmdb_fslimit)
.fslimit = ((lmdb_fslimit < JOURNAL_MIN_FSLIMIT) ? JOURNAL_MIN_FSLIMIT : lmdb_fslimit),
.mode = mode,
};
memcpy(*db, &dbinit, sizeof(journal_db_t));
pthread_mutex_init(&(*db)->db_mutex, NULL);
......@@ -1789,7 +1793,7 @@ static void _jch_print(const knot_dname_t *zname, int warn_level, const char *fo
jch_warn("failed transaction: %s (%s)", (comment), knot_strerror(txn->ret)); \
if (fatal) return txn->ret; } } while (0)
int journal_check(journal_t *j, journal_check_level warn_level)
int journal_check(journal_t *j, journal_check_level_t warn_level)
{
int ret, allok = 1;
changeset_t *ch = NULL;
......
......@@ -26,11 +26,17 @@
/*! \brief Minimum journal size. */
#define JOURNAL_MIN_FSLIMIT (1 * 1024 * 1024)
typedef enum {
JOURNAL_MODE_ROBUST = 0, // Robust journal DB disk synchronization.
JOURNAL_MODE_ASYNC = 1, // Asynchronous journal DB disk synchronization.
} journal_mode_t;
typedef struct {
knot_db_t *db;
const knot_db_api_t *db_api;
char *path;
size_t fslimit;
journal_mode_t mode;
pthread_mutex_t db_mutex; // please delete this once you move DB opening from journal_open to db_init
} journal_db_t;
......@@ -44,7 +50,7 @@ typedef enum {
JOURNAL_CHECK_WARN = 1, // Log journal inconsistencies.
JOURNAL_CHECK_INFO = 2, // Log journal state.
JOURNAL_CHECK_STDERR = 3, // Log everything and redirect to stderr.
} journal_check_level;
} journal_check_level_t;
/*!
* \brief Initialize shared journal DB file. The DB will be open on first use.
......@@ -52,10 +58,12 @@ typedef enum {
* \param db Database to be initialized. Must be (*db == NULL) before!
* \param lmdb_dir_path Path to the directory with DB
* \param lmdb_fslimit Maximum size of DB data file
* \param mode Journal DB synchronization mode.
*
* \return KNOT_E*
*/
int journal_db_init(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit);
int journal_db_init(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit,
journal_mode_t mode);
/*!
* \brief Close shared journal DB file.
......@@ -204,6 +212,6 @@ void journal_metadata_info(journal_t *j, bool *is_empty, uint32_t *serial_from,
*
* \return KNOT_E*
*/
int journal_check(journal_t *j, journal_check_level warn_level);
int journal_check(journal_t *j, journal_check_level_t warn_level);
/*! @} */
......@@ -381,8 +381,9 @@ int server_init(server_t *server, int bg_workers)
char *journal_dir = conf_journalfile(conf());
conf_val_t journal_size = conf_default_get(conf(), C_MAX_JOURNAL_DB_SIZE);
conf_val_t journal_mode = conf_default_get(conf(), C_JOURNAL_DB_MODE);
int ret = journal_db_init(&server->journal_db, journal_dir,
conf_int(&journal_size));
conf_int(&journal_size), conf_opt(&journal_mode));
free(journal_dir);
if (ret != KNOT_EOK) {
worker_pool_destroy(server->workers);
......@@ -678,8 +679,10 @@ static int reconfigure_journal_db(conf_t *conf, server_t *server)
{
char *journal_dir = conf_journalfile(conf);
conf_val_t journal_size = conf_default_get(conf, C_MAX_JOURNAL_DB_SIZE);
conf_val_t journal_mode = conf_default_get(conf, C_JOURNAL_DB_MODE);
bool changed_path = (strcmp(journal_dir, server->journal_db->path) != 0);
bool changed_size = (conf_int(&journal_size) != server->journal_db->fslimit);
bool changed_mode = (conf_opt(&journal_mode) != server->journal_db->mode);
int ret = KNOT_EOK;
if (server->journal_db->db != NULL) {
......@@ -689,9 +692,13 @@ static int reconfigure_journal_db(conf_t *conf, server_t *server)
if (changed_size) {
log_warning("journal, ignored reconfiguration of journal DB max size (already open)");
}
} else if (changed_path || changed_size) {
if (changed_mode) {
log_warning("journal, ignored reconfiguration of journal DB mode (already open)");
}
} else if (changed_path || changed_size || changed_mode) {
journal_db_t *newjdb = NULL;
ret = journal_db_init(&newjdb, journal_dir, conf_int(&journal_size));
ret = journal_db_init(&newjdb, journal_dir, conf_int(&journal_size),
conf_opt(&journal_mode));
if (ret == KNOT_EOK) {
journal_db_close(&server->journal_db);
server->journal_db = newjdb;
......
......@@ -36,6 +36,8 @@ _public_ const unsigned KNOT_DB_LMDB_NOTLS = MDB_NOTLS;
_public_ const unsigned KNOT_DB_LMDB_RDONLY = MDB_RDONLY;
_public_ const unsigned KNOT_DB_LMDB_INTEGERKEY = MDB_INTEGERKEY;
_public_ const unsigned KNOT_DB_LMDB_NOSYNC = MDB_NOSYNC;
_public_ const unsigned KNOT_DB_LMDB_WRITEMAP = MDB_WRITEMAP;
_public_ const unsigned KNOT_DB_LMDB_MAPASYNC = MDB_MAPASYNC;
struct lmdb_env
{
......
......@@ -26,6 +26,8 @@ extern const unsigned KNOT_DB_LMDB_NOTLS;
extern const unsigned KNOT_DB_LMDB_RDONLY;
extern const unsigned KNOT_DB_LMDB_INTEGERKEY;
extern const unsigned KNOT_DB_LMDB_NOSYNC;
extern const unsigned KNOT_DB_LMDB_WRITEMAP;
extern const unsigned KNOT_DB_LMDB_MAPASYNC;
/* Native options. */
struct knot_db_lmdb_opts {
......
......@@ -63,7 +63,7 @@ int print_journal(char *path, knot_dname_t *name, uint32_t limit, bool color)
journal_t *j = journal_new();
int ret;
ret = journal_db_init(&jdb, path, 1);
ret = journal_db_init(&jdb, path, 1, JOURNAL_MODE_ROBUST);
if (ret != KNOT_EOK) {
journal_free(&j);
free(buff);
......@@ -151,7 +151,7 @@ pj_finally:
int list_zones(char *path)
{
journal_db_t *jdb = NULL;
int ret = journal_db_init(&jdb, path, 1);
int ret = journal_db_init(&jdb, path, 1, JOURNAL_MODE_ROBUST);
if (ret != KNOT_EOK) {
return ret;
}
......
......@@ -246,7 +246,7 @@ static void test_journal_db(void)
{
int ret, ret2 = KNOT_EOK;
ret = journal_db_init(&db, test_dir_name, 2 * 1024 * 1024);
ret = journal_db_init(&db, test_dir_name, 2 * 1024 * 1024, JOURNAL_MODE_ASYNC);
is_int(KNOT_EOK, ret, "journal: init db (%d)", ret);
ret = open_journal_db(&db);
......@@ -255,12 +255,12 @@ static void test_journal_db(void)
journal_db_close(&db);
ok(db == NULL, "journal: close and destroy db");
ret = journal_db_init(&db, test_dir_name, 4 * 1024 * 1024);
ret = journal_db_init(&db, test_dir_name, 4 * 1024 * 1024, JOURNAL_MODE_ASYNC);
if (ret == KNOT_EOK) ret2 = open_journal_db(&db);
ok(ret == KNOT_EOK && ret2 == KNOT_EOK, "journal: open with bigger mapsize (%d, %d)", ret, ret2);
journal_db_close(&db);
ret = journal_db_init(&db, test_dir_name, 1024 * 1024);
ret = journal_db_init(&db, test_dir_name, 1024 * 1024, JOURNAL_MODE_ASYNC);
if (ret == KNOT_EOK) ret2 = open_journal_db(&db);
ok(ret == KNOT_EOK && ret2 == KNOT_EOK, "journal: open with smaller mapsize (%d, %d)", ret, ret2);
journal_db_close(&db);
......@@ -276,7 +276,7 @@ static void test_store_load(void)
j = journal_new();
ok(j != NULL, "journal: new");
ret = journal_db_init(&db, test_dir_name, 1024 * 1024);
ret = journal_db_init(&db, test_dir_name, 1024 * 1024, JOURNAL_MODE_ASYNC);
if (ret == KNOT_EOK) ret2 = journal_open(j, &db, apex);
is_int(KNOT_EOK, ret, "journal: open (%d, %d)", ret, ret2);
......@@ -658,7 +658,7 @@ static void test_stress_base(journal_t *j, size_t update_size, size_t file_size)
journal_close(j);
journal_db_close(&db);
db = NULL;
ret = journal_db_init(&db, test_dir_name, file_size);
ret = journal_db_init(&db, test_dir_name, file_size, JOURNAL_MODE_ASYNC);
assert(ret == KNOT_EOK);
ret = open_journal_db(&db);
assert(ret == KNOT_EOK);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment