Commit ef4a50be authored by Ondřej Zajíček's avatar Ondřej Zajíček

Better packet priority and traffic class handling.

Implements support for IPv6 traffic class, sets higher priority for OSPF
and RIP outgoing packets by default and allows to configure ToS/DS/TClass
IP header field and the local priority of outgoing packets.
parent fad04c75
......@@ -567,6 +567,22 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
<cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all
ethernet interfaces that have address from 192.168.1.0/24.
<tag><label id="dsc-prio">tx class|dscp <m/num/</tag>
This option specifies the value of ToS/DS/Class field in IP
headers of the outgoing protocol packets. This may affect how the
protocol packets are processed by the network relative to the
other network traffic. With <cf/class/ keyword, the value
(0-255) is used for the whole ToS/Class octet (but two bits
reserved for ECN are ignored). With <cf/dscp/ keyword, the
value (0-63) is used just for the DS field in the
octet. Default value is 0xc0 (DSCP 0x30 - CS6).
<tag>tx priority <m/num/</tag>
This option specifies the local packet priority. This may
affect how the protocol packets are processed in the local TX
queues. This option is Linux specific. Default value is 7
(highest priority, privileged traffic).
<tag><label id="dsc-pass">password "<m/password/" [ { id <m/num/; generate from <m/time/; generate to <m/time/; accept from <m/time/; accept to <m/time/; } ]</tag>
Specifies a password that can be used by the protocol. Password option can
be used more times to specify more passwords. If more passwords are
......@@ -2220,6 +2236,11 @@ protocol ospf &lt;name&gt; {
prefix) is propagated. It is possible that some hardware
drivers or platforms do not implement this feature. Default value is no.
<tag>tx class|dscp|priority <m/num/</tag>
These options specify the ToS/DiffServ/Traffic class/Priority
of the outgoing OSPF packets. See <ref id="dsc-prio" name="tx
class"> common option for detailed description.
<tag>ecmp weight <M>num</M></tag>
When ECMP (multipath) routes are allowed, this value specifies
a relative weight used for nexthops going through the iface.
......@@ -2748,13 +2769,26 @@ makes it pretty much obsolete. (It is still usable on very small networks.)
neighbors, that is not configurable. Default: never.
</descrip>
<p>There are two options that can be specified per-interface. First is <cf>metric</cf>, with
default one. Second is <cf>mode multicast|broadcast|quiet|nolisten|version1</cf>, it selects mode for
rip to work in. If nothing is specified, rip runs in multicast mode. <cf>version1</cf> is
currently equivalent to <cf>broadcast</cf>, and it makes RIP talk to a broadcast address even
through multicast mode is possible. <cf>quiet</cf> option means that RIP will not transmit
any periodic messages to this interface and <cf>nolisten</cf> means that RIP will send to this
interface but not listen to it.
<p>There are some options that can be specified per-interface:
<descrip>
<tag>metric <m/num/</tag>
This option specifies the metric of the interface. Valid
<tag>mode multicast|broadcast|quiet|nolisten|version1</tag>
This option selects the mode for RIP to work in. If nothing is
specified, RIP runs in multicast mode. <cf/version1/ is
currently equivalent to <cf/broadcast/, and it makes RIP talk
to a broadcast address even through multicast mode is
possible. <cf/quiet/ option means that RIP will not transmit
any periodic messages to this interface and <cf/nolisten/
means that RIP will send to this interface butnot listen to it.
<tag>tx class|dscp|priority <m/num/</tag>
These options specify the ToS/DiffServ/Traffic class/Priority
of the outgoing RIP packets. See <ref id="dsc-prio" name="tx
class"> common option for detailed description.
</descrip>
<p>The following options generally override behavior specified in RFC. If you use any of these
options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything
......
......@@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a)
return buf+16;
}
/*
* RFC 1883 defines packet precendece, but RFC 2460 replaces it
* by generic Traffic Class ID with no defined semantics. Better
* not use it yet.
*/
#define IP_PREC_INTERNET_CONTROL -1
#define IP_PREC_INTERNET_CONTROL 0xc0
#endif
......@@ -20,7 +20,8 @@ typedef struct birdsock {
void *data; /* User data */
ip_addr saddr, daddr; /* IPA_NONE = unspecified */
unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */
int tos; /* TOS and priority, -1 = default */
int tos; /* TOS / traffic class, -1 = default */
int priority; /* Local socket priority, -1 = default */
int ttl; /* Time To Live, -1 = default */
u32 flags;
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
......@@ -81,6 +82,7 @@ sk_send_buffer_empty(sock *sk)
return sk->tbuf == sk->tpos;
}
extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */
/* Socket flags */
......
......@@ -48,7 +48,7 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE
CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES)
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
......@@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID)
%type <ro> roa_args
%type <rot> roa_table_arg
%type <sd> sym_args
%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted
%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos
%type <ps> proto_patt proto_patt2
%type <g> limit_spec
......@@ -277,6 +277,10 @@ iface_patt:
iface_patt_init iface_patt_list
;
tos:
CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); }
| DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); }
;
/* Direct device route protocol */
......
......@@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC)
CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY)
%type <t> opttext
%type <ld> lsadb_args
......@@ -305,6 +305,8 @@ ospf_iface_item:
| RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; }
| RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; }
| RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); }
| TX tos { OSPF_PATT->tx_tos = $2; }
| TX PRIORITY expr { OSPF_PATT->tx_priority = $3; }
| password_list
;
......@@ -367,6 +369,8 @@ ospf_iface_start:
init_list(&OSPF_PATT->nbma_list);
OSPF_PATT->autype = OSPF_AUTH_NONE;
OSPF_PATT->ptp_netmask = 2; /* not specified */
OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
OSPF_PATT->tx_priority = sk_priority_control;
reset_passwords();
}
;
......
......@@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->dport = OSPF_PROTO;
sk->saddr = IPA_NONE;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->tos = ifa->cf->tx_tos;
sk->priority = ifa->cf->tx_priority;
sk->rx_hook = ospf_rx_hook;
sk->tx_hook = ospf_tx_hook;
sk->err_hook = ospf_err_hook;
......@@ -659,7 +660,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
if (ifa->stub != new_stub)
return 0;
if (new->real_bcast != ifa->cf->real_bcast)
/* Change of these options would require to reset the iface socket */
if ((new->real_bcast != ifa->cf->real_bcast) ||
(new->tx_tos != ifa->cf->tx_tos) ||
(new->tx_priority != ifa->cf->tx_priority))
return 0;
ifa->cf = new;
......
......@@ -800,6 +800,8 @@ struct ospf_iface_patt
u32 priority;
u32 voa;
u32 vid;
int tx_tos;
int tx_priority;
u16 rxbuf;
#define OSPF_RXBUF_NORMAL 0
#define OSPF_RXBUF_LARGE 1
......
......@@ -27,7 +27,7 @@ CF_DECLS
CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT,
MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1,
AUTHENTICATION, NONE, PLAINTEXT, MD5,
HONOR, NEVER, NEIGHBOR, ALWAYS,
HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY,
RIP_METRIC, RIP_TAG)
%type <i> rip_mode rip_auth
......@@ -76,6 +76,8 @@ rip_mode:
rip_iface_item:
| METRIC expr { RIP_IPATT->metric = $2; }
| MODE rip_mode { RIP_IPATT->mode |= $2; }
| TX tos { RIP_IPATT->tx_tos = $2; }
| TX PRIORITY expr { RIP_IPATT->tx_priority = $3; }
;
rip_iface_opts:
......@@ -94,6 +96,8 @@ rip_iface_init:
add_tail(&RIP_CFG->iface_list, NODE this_ipatt);
init_list(&this_ipatt->ipn_list);
RIP_IPATT->metric = 1;
RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL;
RIP_IPATT->tx_priority = sk_priority_control;
}
;
......
......@@ -707,7 +707,8 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_
if (new)
{
rif->sock->ttl = 1;
rif->sock->tos = IP_PREC_INTERNET_CONTROL;
rif->sock->tos = PATT->tx_tos;
rif->sock->priority = PATT->tx_priority;
rif->sock->flags = SKF_LADDR_RX;
}
......@@ -1007,7 +1008,9 @@ static int
rip_pat_compare(struct rip_patt *a, struct rip_patt *b)
{
return ((a->metric == b->metric) &&
(a->mode == b->mode));
(a->mode == b->mode) &&
(a->tx_tos == b->tx_tos) &&
(a->tx_priority == b->tx_priority));
}
static int
......
......@@ -128,6 +128,8 @@ struct rip_patt {
#define IM_QUIET 4
#define IM_NOLISTEN 8
#define IM_VERSION1 16
int tx_tos;
int tx_priority;
};
struct rip_proto_config {
......
......@@ -284,3 +284,12 @@ sk_set_min_ttl6(sock *s, int ttl)
#endif
int sk_priority_control = -1;
static int
sk_set_priority(sock *s, int prio UNUSED)
{
log(L_WARN "Socket priority not supported");
return -1;
}
......@@ -310,3 +310,22 @@ sk_set_min_ttl6(sock *s, int ttl)
}
#endif
#ifndef IPV6_TCLASS
#define IPV6_TCLASS 67
#endif
int sk_priority_control = 7;
static int
sk_set_priority(sock *s, int prio)
{
if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0)
{
log(L_WARN "sk_set_priority: setsockopt: %m");
return -1;
}
return 0;
}
......@@ -598,7 +598,7 @@ sock_new(pool *p)
sock *s = ralloc(p, &sk_class);
s->pool = p;
// s->saddr = s->daddr = IPA_NONE;
s->tos = s->ttl = -1;
s->tos = s->priority = s->ttl = -1;
s->fd = -1;
return s;
}
......@@ -783,11 +783,18 @@ sk_setup(sock *s)
ERR("fcntl(O_NONBLOCK)");
if (s->type == SK_UNIX)
return NULL;
#ifndef IPV6
#ifdef IPV6
if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
WARN("IPV6_TCLASS");
#else
if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
WARN("IP_TOS");
#endif
if (s->priority >= 0)
sk_set_priority(s, s->priority);
#ifdef IPV6
int v = 1;
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment