Commit 1c4c5908 authored by Daniel Salzman's avatar Daniel Salzman

yparser: add support for backslashed characters in item value

Backslash character before quotation character is removed during parsing.

This feature is useful for the GeoIP module.
parent 6e5d220f
......@@ -27,24 +27,25 @@
static const char _yparser_actions[] = {
0, 1, 0, 1, 1, 1, 2, 1,
3, 1, 4, 1, 5, 1, 7, 1,
8, 1, 9, 1, 10, 1, 11, 1,
14, 2, 1, 0, 2, 1, 2, 2,
2, 0, 2, 3, 4, 2, 5, 0,
2, 6, 7, 2, 11, 12, 2, 13,
11, 3, 1, 2, 0, 3, 1, 6,
7, 3, 1, 11, 12, 3, 1, 13,
11, 3, 2, 6, 7, 3, 2, 11,
12, 3, 2, 13, 11, 4, 1, 2,
6, 7, 4, 1, 2, 11, 12, 4,
1, 2, 13, 11
3, 1, 4, 1, 6, 1, 8, 1,
9, 1, 10, 1, 11, 1, 12, 1,
15, 2, 1, 0, 2, 1, 2, 2,
2, 0, 2, 3, 4, 2, 5, 4,
2, 6, 0, 2, 7, 8, 2, 12,
13, 2, 14, 12, 3, 1, 2, 0,
3, 1, 7, 8, 3, 1, 12, 13,
3, 1, 14, 12, 3, 2, 7, 8,
3, 2, 12, 13, 3, 2, 14, 12,
4, 1, 2, 7, 8, 4, 1, 2,
12, 13, 4, 1, 2, 14, 12
};
static const unsigned char _yparser_key_offsets[] = {
0, 0, 13, 15, 16, 25, 36, 38,
39, 49, 60, 71, 73, 76, 88, 91,
94, 98, 100, 110, 118, 121, 124, 127,
130, 141, 154, 167, 180
39, 49, 60, 71, 73, 76, 89, 93,
96, 100, 102, 105, 115, 124, 127, 130,
133, 137, 140, 143, 146, 157, 170, 183,
196
};
static const char _yparser_trans_keys[] = {
......@@ -58,13 +59,15 @@ static const char _yparser_trans_keys[] = {
45, 90, 94, 126, 32, 58, 92, 45,
46, 48, 57, 65, 90, 97, 122, 32,
58, 10, 13, 32, 10, 13, 32, 34,
35, 91, 33, 43, 45, 92, 94, 126,
34, 32, 126, 10, 13, 32, 10, 13,
32, 35, 10, 13, 32, 33, 34, 92,
36, 43, 45, 90, 94, 126, 32, 33,
44, 93, 36, 90, 92, 126, 32, 44,
93, 10, 13, 32, 34, 32, 126, 32,
44, 93, 32, 58, 92, 45, 46, 48,
35, 91, 92, 33, 43, 45, 90, 94,
126, 34, 92, 32, 126, 10, 13, 32,
10, 13, 32, 35, 10, 13, 34, 32,
126, 32, 33, 34, 92, 36, 43, 45,
90, 94, 126, 32, 33, 44, 92, 93,
36, 90, 94, 126, 32, 44, 93, 10,
13, 32, 34, 32, 126, 34, 92, 32,
126, 32, 44, 93, 34, 32, 126, 34,
32, 126, 32, 58, 92, 45, 46, 48,
57, 65, 90, 97, 122, 10, 13, 32,
35, 45, 46, 92, 48, 57, 65, 90,
97, 122, 10, 13, 32, 35, 45, 46,
......@@ -77,23 +80,26 @@ static const char _yparser_trans_keys[] = {
static const char _yparser_single_lengths[] = {
0, 7, 2, 1, 3, 3, 2, 1,
4, 5, 3, 2, 3, 6, 1, 3,
4, 2, 4, 4, 3, 3, 1, 3,
3, 7, 7, 7, 7
4, 5, 3, 2, 3, 7, 2, 3,
4, 2, 1, 4, 5, 3, 3, 1,
2, 3, 1, 1, 3, 7, 7, 7,
7
};
static const char _yparser_range_lengths[] = {
0, 3, 0, 0, 3, 4, 0, 0,
3, 3, 4, 0, 0, 3, 1, 0,
0, 0, 3, 2, 0, 0, 1, 0,
4, 3, 3, 3, 3
0, 0, 1, 3, 2, 0, 0, 1,
1, 0, 1, 1, 4, 3, 3, 3,
3
};
static const unsigned char _yparser_index_offsets[] = {
0, 0, 11, 14, 16, 23, 31, 34,
36, 44, 53, 61, 64, 68, 78, 81,
85, 90, 93, 101, 108, 112, 116, 119,
123, 131, 142, 153, 164
36, 44, 53, 61, 64, 68, 79, 83,
87, 92, 95, 98, 106, 114, 118, 122,
125, 129, 133, 136, 139, 147, 158, 169,
180
};
static const char _yparser_indicies[] = {
......@@ -101,62 +107,67 @@ static const char _yparser_indicies[] = {
6, 6, 0, 1, 2, 4, 7, 0,
7, 8, 8, 8, 8, 8, 0, 9,
11, 10, 10, 10, 10, 10, 0, 12,
13, 0, 14, 0, 14, 15, 16, 15,
15, 15, 15, 0, 17, 18, 19, 20,
20, 20, 20, 20, 0, 21, 23, 22,
22, 22, 22, 22, 0, 24, 25, 0,
26, 27, 28, 0, 26, 27, 28, 16,
29, 30, 15, 15, 15, 0, 32, 31,
0, 17, 18, 19, 0, 26, 27, 33,
29, 0, 26, 27, 29, 30, 34, 35,
34, 34, 34, 34, 0, 36, 37, 38,
39, 37, 37, 0, 40, 30, 41, 0,
26, 27, 33, 0, 43, 42, 0, 36,
38, 39, 0, 44, 46, 45, 45, 45,
45, 45, 0, 1, 2, 3, 4, 5,
47, 47, 47, 47, 47, 0, 48, 49,
50, 51, 52, 53, 53, 53, 53, 53,
0, 54, 55, 56, 57, 58, 59, 59,
59, 59, 59, 0, 60, 61, 62, 63,
64, 65, 65, 65, 65, 65, 0, 0
13, 0, 14, 0, 14, 15, 16, 17,
15, 15, 15, 0, 18, 19, 20, 21,
22, 21, 21, 21, 0, 23, 25, 24,
24, 24, 24, 24, 0, 26, 27, 0,
28, 29, 30, 0, 28, 29, 30, 16,
31, 32, 17, 15, 15, 15, 0, 34,
35, 33, 0, 18, 19, 20, 0, 28,
29, 36, 31, 0, 28, 29, 31, 37,
33, 0, 32, 38, 39, 40, 38, 38,
38, 0, 41, 42, 43, 44, 45, 42,
42, 0, 46, 32, 47, 0, 28, 29,
36, 0, 48, 42, 0, 50, 51, 49,
0, 41, 43, 45, 0, 52, 49, 0,
53, 21, 0, 54, 56, 55, 55, 55,
55, 55, 0, 1, 2, 3, 4, 5,
57, 57, 57, 57, 57, 0, 58, 59,
60, 61, 62, 63, 63, 63, 63, 63,
0, 64, 65, 66, 67, 68, 69, 69,
69, 69, 69, 0, 70, 71, 72, 73,
74, 75, 75, 75, 75, 75, 0, 0
};
static const char _yparser_trans_targs[] = {
0, 26, 27, 1, 2, 3, 24, 4,
0, 30, 31, 1, 2, 3, 28, 4,
5, 6, 5, 7, 6, 7, 8, 9,
14, 28, 25, 16, 9, 11, 10, 12,
11, 12, 28, 25, 13, 17, 18, 14,
15, 16, 19, 22, 20, 19, 18, 21,
20, 21, 22, 23, 11, 24, 12, 10,
26, 27, 1, 2, 3, 10, 26, 27,
1, 2, 3, 10, 26, 27, 1, 2,
3, 10
14, 27, 32, 29, 16, 9, 27, 11,
10, 12, 11, 12, 32, 29, 13, 17,
19, 14, 15, 18, 16, 14, 20, 24,
23, 21, 20, 19, 23, 22, 21, 22,
20, 24, 25, 26, 24, 9, 11, 28,
12, 10, 30, 31, 1, 2, 3, 10,
30, 31, 1, 2, 3, 10, 30, 31,
1, 2, 3, 10
};
static const char _yparser_trans_actions[] = {
23, 1, 0, 43, 0, 46, 40, 21,
40, 19, 13, 19, 0, 0, 0, 34,
7, 37, 11, 11, 9, 15, 13, 15,
0, 0, 1, 0, 0, 0, 0, 9,
0, 0, 34, 7, 11, 9, 11, 11,
0, 0, 9, 0, 17, 13, 17, 40,
49, 28, 82, 28, 87, 77, 31, 5,
69, 5, 73, 65, 25, 3, 57, 3,
61, 53
23, 1, 0, 46, 0, 49, 43, 21,
43, 19, 13, 19, 0, 0, 0, 34,
7, 34, 40, 11, 11, 9, 9, 15,
13, 15, 0, 0, 1, 0, 0, 0,
0, 9, 0, 9, 0, 37, 34, 7,
34, 11, 9, 11, 9, 11, 0, 0,
37, 9, 0, 9, 37, 37, 17, 13,
17, 43, 52, 28, 85, 28, 90, 80,
31, 5, 72, 5, 76, 68, 25, 3,
60, 3, 64, 56
};
static const char _yparser_eof_actions[] = {
0, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23,
23, 0, 28, 5, 3
23, 23, 23, 23, 23, 0, 28, 5,
3
};
int _yp_start_state = 25;
int _yp_start_state = 29;
int _yp_parse(
yp_parser_t *parser)
......@@ -293,6 +304,11 @@ _match:
}
break;
case 5:
{
parser->data_len--;
}
break;
case 6:
{
// Return if a value parsed.
parser->data[parser->data_len] = '\0';
......@@ -301,7 +317,7 @@ _match:
{p++; goto _out; }
}
break;
case 6:
case 7:
{
if (indent > 0 && parser->indent > 0 &&
indent != parser->indent) {
......@@ -313,7 +329,7 @@ _match:
parser->event = YP_ENULL;
}
break;
case 7:
case 8:
{
if (parser->key_len >= sizeof(parser->key) - 1) {
return KNOT_ESPACE;
......@@ -321,7 +337,7 @@ _match:
parser->key[parser->key_len++] = (*p);
}
break;
case 8:
case 9:
{
parser->key[parser->key_len] = '\0';
parser->indent = 0;
......@@ -329,14 +345,14 @@ _match:
parser->event = YP_EKEY0;
}
break;
case 9:
case 10:
{
parser->key[parser->key_len] = '\0';
parser->indent = indent;
parser->event = YP_EKEY1;
}
break;
case 10:
case 11:
{
parser->key[parser->key_len] = '\0';
parser->indent = indent;
......@@ -344,17 +360,17 @@ _match:
parser->event = YP_EID;
}
break;
case 11:
case 12:
{
indent++;
}
break;
case 12:
case 13:
{
id_pos++;
}
break;
case 13:
case 14:
{
if (id_pos > 0 && parser->id_pos > 0 &&
id_pos != parser->id_pos) {
......@@ -363,7 +379,7 @@ _match:
parser->indent = 0;
}
break;
case 14:
case 15:
{
switch ((*p)) {
case '\t':
......@@ -401,7 +417,7 @@ _again:
id_pos = 0;
}
break;
case 14:
case 15:
{
switch ((*p)) {
case '\t':
......
......@@ -66,6 +66,9 @@
}
parser->data[parser->data_len++] = fc;
}
action _item_unbackslash {
parser->data_len--;
}
action _item_data_exit {
// Return if a value parsed.
parser->data[parser->data_len] = '\0';
......@@ -73,16 +76,20 @@
found = true;
fbreak;
}
backslash_char = '\\';
quote_char = '\"';
list_char = [\[,\]];
data_char =
(ascii - space - cntrl - quote_char - sep_char -
comment_char - list_char
( (ascii - space - cntrl - quote_char - sep_char -
comment_char - list_char - backslash_char)
| (backslash_char . (quote_char) >_item_unbackslash)
| (backslash_char . (32..126 - quote_char))
) $_item_data;
data_str_char =
(data_char | sep_char | comment_char | list_char
) $_item_data;
data_str = (quote_char . data_str_char* . quote_char);
( (data_char)
| (sep_char | comment_char | list_char) $_item_data
);
data_str = (quote_char . data_str_char* <: quote_char);
item_data = (data_char+ | data_str) >_item_data_init %_item_data_exit;
item_data_plus = item_data . ((sep? . ',' . sep?) . item_data)*;
item_data_list = '\[' . sep? . item_data_plus . sep? . '\]';
......
......@@ -42,12 +42,14 @@ const char *syntax_ok =
" f: [ f] # comment\n"
" f: [f ]\n"
" f: [ f ]\n"
" f: [ \"f\" ]\n"
"\n"
"c: [a,b]\n"
"c: [a, b]\n"
"c: [a ,b]\n"
"c: [a , b]\n"
"c: [ a , b ]\n"
"c: [ \"a\" , \"b\" ]\n"
"\n"
"- d: d\n"
"- d : d # comment\n"
......@@ -84,22 +86,21 @@ const char *dname_ok =
"dom-ain:\n"
"\\070-\\071.\\072.:";
int main(int argc, char *argv[])
{
plan_lazy();
int ret;
size_t line;
yp_parser_t yparser;
yp_parser_t *yp = &yparser;
yp_init(yp);
const char *quotes_ok =
"g: \"\"\n"
"g: a\\ b\n"
"g: \"\\# 1 00\"\n"
"g: \"\\\"\\\"\"\n"
"g: \" a \\\" b \\\" \\\"c\\\" \"\n"
"g: \"\\@ \\[ \\# \\, \\]\"\n";
static void test_syntax_ok(yp_parser_t *yp)
{
// OK input.
ret = yp_set_input_string(yp, syntax_ok, strlen(syntax_ok));
int ret = yp_set_input_string(yp, syntax_ok, strlen(syntax_ok));
is_int(KNOT_EOK, ret, "set input string");
line = 3;
size_t line = 3;
for (int i = 0; i < 3; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. key0", i);
......@@ -108,7 +109,7 @@ int main(int argc, char *argv[])
yp->line_count == line + i, "compare %i. key0", i);
}
line = 7;
line += 4;
for (int i = 0; i < 6; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. key0 with value", i);
......@@ -118,8 +119,8 @@ int main(int argc, char *argv[])
"compare %i. key0 with value", i);
}
line = 14;
for (int i = 0; i < 6; i++) {
line += 7;
for (int i = 0; i < 7; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. key1 with value", i);
ok(yp->key_len == 1 && yp->key[0] == 'f' &&
......@@ -128,8 +129,8 @@ int main(int argc, char *argv[])
"compare %i. key1 with value", i);
}
line = 21;
for (int i = 0; i < 5; i++) {
line += 8;
for (int i = 0; i < 6; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. key0 with first value", i);
ok(yp->key_len == 1 && yp->key[0] == 'c' &&
......@@ -145,7 +146,7 @@ int main(int argc, char *argv[])
"compare %i. key0 with second value", i);
}
line = 27;
line += 7;
for (int i = 0; i < 2; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. id", i);
......@@ -155,7 +156,7 @@ int main(int argc, char *argv[])
"compare %i. id", i);
}
line = 30;
line += 3;
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0 with quoted value");
ok(yp->key_len == 1 && yp->key[0] == 'e' && yp->data_len == 10 &&
......@@ -163,7 +164,7 @@ int main(int argc, char *argv[])
yp->event == YP_EKEY0 && yp->line_count == line,
"compare key0 with quoted value");
line = 32;
line += 2;
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0");
ok(yp->key_len == 4 && strcmp(yp->key, "zone") == 0 &&
......@@ -171,7 +172,7 @@ int main(int argc, char *argv[])
yp->event == YP_EKEY0 && yp->line_count == line,
"compare key0 value");
line = 35;
line += 3;
for (int i = 0; i < 2; i++) {
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse %i. id", i);
......@@ -187,7 +188,7 @@ int main(int argc, char *argv[])
"compare key1");
}
line = 39;
line += 4;
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0");
ok(yp->key_len == 5 && strcmp(yp->key, "zone2") == 0 &&
......@@ -203,37 +204,24 @@ int main(int argc, char *argv[])
ret = yp_parse(yp);
is_int(KNOT_EOF, ret, "parse EOF");
}
// Error input 1.
ret = yp_set_input_string(yp, syntax_error1, strlen(syntax_error1));
is_int(KNOT_EOK, ret, "set error input string 1");
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0");
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key1");
ret = yp_parse(yp);
is_int(KNOT_YP_EINVAL_INDENT, ret, "parse key1 - invalid indentation");
// Error input 2.
ret = yp_set_input_string(yp, syntax_error2, strlen(syntax_error2));
is_int(KNOT_EOK, ret, "set error input string 2");
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0");
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key1");
ret = yp_parse(yp);
is_int(KNOT_YP_EINVAL_INDENT, ret, "parse key1 - invalid indentation");
static void test_syntax_error(yp_parser_t *yp, const char *input)
{
static int count = 1;
// Error input 3.
ret = yp_set_input_string(yp, syntax_error3, strlen(syntax_error3));
is_int(KNOT_EOK, ret, "set error input string 3");
int ret = yp_set_input_string(yp, input, strlen(input));
is_int(KNOT_EOK, ret, "set error input string %i", count++);
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key0");
ret = yp_parse(yp);
is_int(KNOT_EOK, ret, "parse key1");
ret = yp_parse(yp);
is_int(KNOT_YP_EINVAL_INDENT, ret, "parse key1 - invalid indentation");
}
static void test_dname(yp_parser_t *yp)
{
#define CHECK_DNAME(str) \
ret = yp_parse(yp); \
is_int(KNOT_EOK, ret, "parse dname " str); \
......@@ -241,14 +229,52 @@ int main(int argc, char *argv[])
yp->event == YP_EKEY0 && yp->line_count == line++, "compare " str);
// Dname key value.
ret = yp_set_input_string(yp, dname_ok, strlen(dname_ok));
int ret = yp_set_input_string(yp, dname_ok, strlen(dname_ok));
is_int(KNOT_EOK, ret, "set input string");
line = 1;
size_t line = 1;
CHECK_DNAME(".");
CHECK_DNAME("dom-ain");
CHECK_DNAME("\\070-\\071.\\072.");
yp_deinit(yp);
}
static void test_quotes(yp_parser_t *yp)
{
#define CHECK_QUOTE(str) \
ret = yp_parse(yp); \
is_int(KNOT_EOK, ret, "parse quoted " str); \
ok(yp->key_len == 1 && yp->key[0] == 'g' && \
yp->data_len == strlen(str) && strcmp(yp->data, str) == 0 && \
yp->event == YP_EKEY0 && yp->line_count == line++, "compare " str);
int ret = yp_set_input_string(yp, quotes_ok, strlen(quotes_ok));
is_int(KNOT_EOK, ret, "set input string");
size_t line = 1;
CHECK_QUOTE("");
CHECK_QUOTE("a\\ b");
CHECK_QUOTE("\\# 1 00");
CHECK_QUOTE("\"\"");
CHECK_QUOTE(" a \" b \" \"c\" ");
CHECK_QUOTE("\\@ \\[ \\# \\, \\]");
}
int main(int argc, char *argv[])
{
plan_lazy();
yp_parser_t yp;
yp_init(&yp);
test_syntax_ok(&yp);
test_syntax_error(&yp, syntax_error1);
test_syntax_error(&yp, syntax_error2);
test_syntax_error(&yp, syntax_error3);
test_dname(&yp);
test_quotes(&yp);
yp_deinit(&yp);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment