Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Mon, 1 Jul 2013 00:35:13 +0000 (17:35 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 1 Jul 2013 00:35:13 +0000 (17:35 -0700)
Pablo Neira Ayuso says:

====================
The following batch contains Netfilter/IPVS updates for net-next,
they are:

* Enforce policy to several nfnetlink subsystem, from Daniel
  Borkmann.

* Use xt_socket to match the third packet (to perform simplistic
  socket-based stateful filtering), from Eric Dumazet.

* Avoid large timeout for picked up from the middle TCP flows,
  from Florian Westphal.

* Exclude IPVS from struct net if IPVS is disabled and removal
  of unnecessary included header file, from JunweiZhang.

* Release SCTP connection immediately under load, to mimic current
  TCP behaviour, from Julian Anastasov.

* Replace and enhance SCTP state machine, from Julian Anastasov.

* Add tweak to reduce sync traffic in the presence of persistence,
  also from Julian Anastasov.

* Add tweak for the IPVS SH scheduler not to reject connections
  directed to a server, choose a new one instead, from Alexander
  Frolkin.

* Add support for sloppy TCP and SCTP modes, that creates state
  information on any packet, not only initial handshake packets,
  from Alexander Frolkin.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
29 files changed:
Documentation/networking/ipvs-sysctl.txt
include/net/ip_vs.h
include/net/net_namespace.h
include/uapi/linux/ip_vs.h
include/uapi/linux/netfilter/nfnetlink_queue.h
include/uapi/linux/netfilter/xt_socket.h
kernel/sysctl_binary.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_dh.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_lc.c
net/netfilter/ipvs/ip_vs_nq.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_rr.c
net/netfilter/ipvs/ip_vs_sed.c
net/netfilter/ipvs/ip_vs_sh.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_wlc.c
net/netfilter/ipvs/ip_vs_wrr.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nfnetlink_queue_core.c
net/netfilter/xt_socket.c

index 9573d0c..7a3c047 100644 (file)
@@ -181,6 +181,19 @@ snat_reroute - BOOLEAN
        always be the same as the original route so it is an optimisation
        to disable snat_reroute and avoid the recalculation.
 
+sync_persist_mode - INTEGER
+       default 0
+
+       Controls the synchronisation of connections when using persistence
+
+       0: All types of connections are synchronised
+       1: Attempt to reduce the synchronisation traffic depending on
+       the connection type. For persistent services avoid synchronisation
+       for normal connections, do it only for persistence templates.
+       In such case, for TCP and SCTP it may need enabling sloppy_tcp and
+       sloppy_sctp flags on backup servers. For non-persistent services
+       such optimization is not applied, mode 0 is assumed.
+
 sync_version - INTEGER
        default 1
 
index 4405886..f0d70f0 100644 (file)
@@ -197,31 +197,6 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
        }
 }
 
-/* This function is a faster version of ip_vs_fill_iph_skb().
- * Where we only populate {s,d}addr (and avoid calling ipv6_find_hdr()).
- * This is used by the some of the ip_vs_*_schedule() functions.
- * (Mostly done to avoid ABI breakage of external schedulers)
- */
-static inline void
-ip_vs_fill_iph_addr_only(int af, const struct sk_buff *skb,
-                        struct ip_vs_iphdr *iphdr)
-{
-#ifdef CONFIG_IP_VS_IPV6
-       if (af == AF_INET6) {
-               const struct ipv6hdr *iph =
-                       (struct ipv6hdr *)skb_network_header(skb);
-               iphdr->saddr.in6 = iph->saddr;
-               iphdr->daddr.in6 = iph->daddr;
-       } else
-#endif
-       {
-               const struct iphdr *iph =
-                       (struct iphdr *)skb_network_header(skb);
-               iphdr->saddr.ip = iph->saddr;
-               iphdr->daddr.ip = iph->daddr;
-       }
-}
-
 static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
                                   const union nf_inet_addr *src)
 {
@@ -405,17 +380,18 @@ enum {
  */
 enum ip_vs_sctp_states {
        IP_VS_SCTP_S_NONE,
-       IP_VS_SCTP_S_INIT_CLI,
-       IP_VS_SCTP_S_INIT_SER,
-       IP_VS_SCTP_S_INIT_ACK_CLI,
-       IP_VS_SCTP_S_INIT_ACK_SER,
-       IP_VS_SCTP_S_ECHO_CLI,
-       IP_VS_SCTP_S_ECHO_SER,
+       IP_VS_SCTP_S_INIT1,
+       IP_VS_SCTP_S_INIT,
+       IP_VS_SCTP_S_COOKIE_SENT,
+       IP_VS_SCTP_S_COOKIE_REPLIED,
+       IP_VS_SCTP_S_COOKIE_WAIT,
+       IP_VS_SCTP_S_COOKIE,
+       IP_VS_SCTP_S_COOKIE_ECHOED,
        IP_VS_SCTP_S_ESTABLISHED,
-       IP_VS_SCTP_S_SHUT_CLI,
-       IP_VS_SCTP_S_SHUT_SER,
-       IP_VS_SCTP_S_SHUT_ACK_CLI,
-       IP_VS_SCTP_S_SHUT_ACK_SER,
+       IP_VS_SCTP_S_SHUTDOWN_SENT,
+       IP_VS_SCTP_S_SHUTDOWN_RECEIVED,
+       IP_VS_SCTP_S_SHUTDOWN_ACK_SENT,
+       IP_VS_SCTP_S_REJECTED,
        IP_VS_SCTP_S_CLOSED,
        IP_VS_SCTP_S_LAST
 };
@@ -814,7 +790,8 @@ struct ip_vs_scheduler {
 
        /* selecting a server from the given service */
        struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
-                                      const struct sk_buff *skb);
+                                      const struct sk_buff *skb,
+                                      struct ip_vs_iphdr *iph);
 };
 
 /* The persistence engine object */
@@ -998,10 +975,13 @@ struct netns_ipvs {
        int                     sysctl_snat_reroute;
        int                     sysctl_sync_ver;
        int                     sysctl_sync_ports;
+       int                     sysctl_sync_persist_mode;
        unsigned long           sysctl_sync_qlen_max;
        int                     sysctl_sync_sock_size;
        int                     sysctl_cache_bypass;
        int                     sysctl_expire_nodest_conn;
+       int                     sysctl_sloppy_tcp;
+       int                     sysctl_sloppy_sctp;
        int                     sysctl_expire_quiescent_template;
        int                     sysctl_sync_threshold[2];
        unsigned int            sysctl_sync_refresh_period;
@@ -1044,6 +1024,8 @@ struct netns_ipvs {
 #define DEFAULT_SYNC_THRESHOLD 3
 #define DEFAULT_SYNC_PERIOD    50
 #define DEFAULT_SYNC_VER       1
+#define DEFAULT_SLOPPY_TCP     0
+#define DEFAULT_SLOPPY_SCTP    0
 #define DEFAULT_SYNC_REFRESH_PERIOD    (0U * HZ)
 #define DEFAULT_SYNC_RETRIES           0
 #define IPVS_SYNC_WAKEUP_RATE  8
@@ -1080,11 +1062,26 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
        return ipvs->sysctl_sync_ver;
 }
 
+static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_sloppy_tcp;
+}
+
+static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_sloppy_sctp;
+}
+
 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
 {
        return ACCESS_ONCE(ipvs->sysctl_sync_ports);
 }
 
+static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_sync_persist_mode;
+}
+
 static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
 {
        return ipvs->sysctl_sync_qlen_max;
@@ -1133,11 +1130,26 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
        return DEFAULT_SYNC_VER;
 }
 
+static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
+{
+       return DEFAULT_SLOPPY_TCP;
+}
+
+static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
+{
+       return DEFAULT_SLOPPY_SCTP;
+}
+
 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
 {
        return 1;
 }
 
+static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
+{
+       return 0;
+}
+
 static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
 {
        return IPVS_SYNC_QLEN_MAX;
index 495bc57..84e37b1 100644 (file)
@@ -115,7 +115,9 @@ struct net {
 #ifdef CONFIG_XFRM
        struct netns_xfrm       xfrm;
 #endif
+#if IS_ENABLED(CONFIG_IP_VS)
        struct netns_ipvs       *ipvs;
+#endif
        struct sock             *diag_nlsk;
        atomic_t                rt_genid;
        atomic_t                fnhe_genid;
index a245377..2945822 100644 (file)
 #define IP_VS_SVC_F_PERSISTENT 0x0001          /* persistent port */
 #define IP_VS_SVC_F_HASHED     0x0002          /* hashed entry */
 #define IP_VS_SVC_F_ONEPACKET  0x0004          /* one-packet scheduling */
+#define IP_VS_SVC_F_SCHED1     0x0008          /* scheduler flag 1 */
+#define IP_VS_SVC_F_SCHED2     0x0010          /* scheduler flag 2 */
+#define IP_VS_SVC_F_SCHED3     0x0020          /* scheduler flag 3 */
+
+#define IP_VS_SVC_F_SCHED_SH_FALLBACK  IP_VS_SVC_F_SCHED1 /* SH fallback */
+#define IP_VS_SVC_F_SCHED_SH_PORT      IP_VS_SVC_F_SCHED2 /* SH use port */
 
 /*
  *      Destination Server Flags
index a2308ae..3a9b921 100644 (file)
@@ -105,5 +105,7 @@ enum nfqnl_attr_config {
 #define NFQA_SKB_CSUMNOTREADY (1 << 0)
 /* packet is GSO (i.e., exceeds device mtu) */
 #define NFQA_SKB_GSO (1 << 1)
+/* csum not validated (incoming device doesn't support hw checksum, etc.) */
+#define NFQA_SKB_CSUM_NOTVERIFIED (1 << 2)
 
 #endif /* _NFNETLINK_QUEUE_H */
index 26d7217..6315e2a 100644 (file)
@@ -5,10 +5,17 @@
 
 enum {
        XT_SOCKET_TRANSPARENT = 1 << 0,
+       XT_SOCKET_NOWILDCARD = 1 << 1,
 };
 
 struct xt_socket_mtinfo1 {
        __u8 flags;
 };
+#define XT_SOCKET_FLAGS_V1 XT_SOCKET_TRANSPARENT
+
+struct xt_socket_mtinfo2 {
+       __u8 flags;
+};
+#define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD)
 
 #endif /* _XT_SOCKET_H */
index aea4a9e..b609213 100644 (file)
@@ -3,7 +3,6 @@
 #include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
-#include <net/ip_vs.h>
 #include <linux/syscalls.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
index c8c52a9..4c8e5c0 100644 (file)
@@ -1231,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net)
                                default:
                                        continue;
                                }
+                       } else if (cp->protocol == IPPROTO_SCTP) {
+                               switch (cp->state) {
+                               case IP_VS_SCTP_S_INIT1:
+                               case IP_VS_SCTP_S_INIT:
+                                       break;
+                               case IP_VS_SCTP_S_ESTABLISHED:
+                                       if (todrop_entry(cp))
+                                               break;
+                                       continue;
+                               default:
+                                       continue;
+                               }
                        } else {
                                if (!todrop_entry(cp))
                                        continue;
index 05565d2..e9b0330 100644 (file)
@@ -305,7 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * return *ignored=0 i.e. ICMP and NF_DROP
                 */
                sched = rcu_dereference(svc->scheduler);
-               dest = sched->schedule(svc, skb);
+               dest = sched->schedule(svc, skb, iph);
                if (!dest) {
                        IP_VS_DBG(1, "p-schedule: no dest found.\n");
                        kfree(param.pe_data);
@@ -452,7 +452,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        }
 
        sched = rcu_dereference(svc->scheduler);
-       dest = sched->schedule(svc, skb);
+       dest = sched->schedule(svc, skb, iph);
        if (dest == NULL) {
                IP_VS_DBG(1, "Schedule: no dest found.\n");
                return NULL;
index 47e5108..c8148e4 100644 (file)
@@ -1714,6 +1714,12 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = &proc_do_sync_ports,
        },
+       {
+               .procname       = "sync_persist_mode",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "sync_qlen_max",
                .maxlen         = sizeof(unsigned long),
@@ -1738,6 +1744,18 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "sloppy_tcp",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sloppy_sctp",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "expire_quiescent_template",
                .maxlen         = sizeof(int),
@@ -3717,12 +3735,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
        tbl[idx++].data = &ipvs->sysctl_sync_ver;
        ipvs->sysctl_sync_ports = 1;
        tbl[idx++].data = &ipvs->sysctl_sync_ports;
+       tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
        ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
        tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
        ipvs->sysctl_sync_sock_size = 0;
        tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+       tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
+       tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
index ccab120..c3b8454 100644 (file)
@@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
  *      Destination hashing scheduling
  */
 static struct ip_vs_dest *
-ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_dh_state *s;
-       struct ip_vs_iphdr iph;
-
-       ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
 
        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
        s = (struct ip_vs_dh_state *) svc->sched_data;
-       dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
+       dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
        if (!dest
            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
            || atomic_read(&dest->weight) <= 0
@@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        }
 
        IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+                     IP_VS_DBG_ADDR(svc->af, &iph->daddr),
                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
                      ntohs(dest->port));
 
index 44595b8..1383b0e 100644 (file)
@@ -487,19 +487,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
  *    Locality-Based (weighted) Least-Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                   struct ip_vs_iphdr *iph)
 {
        struct ip_vs_lblc_table *tbl = svc->sched_data;
-       struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest = NULL;
        struct ip_vs_lblc_entry *en;
 
-       ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
-
        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
        /* First look in our cache */
-       en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
+       en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr);
        if (en) {
                /* We only hold a read lock, but this is atomic */
                en->lastuse = jiffies;
@@ -529,12 +527,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        /* If we fail to create a cache entry, we'll just use the valid dest */
        spin_lock_bh(&svc->sched_lock);
        if (!tbl->dead)
-               ip_vs_lblc_new(tbl, &iph.daddr, dest);
+               ip_vs_lblc_new(tbl, &iph->daddr, dest);
        spin_unlock_bh(&svc->sched_lock);
 
 out:
        IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+                     IP_VS_DBG_ADDR(svc->af, &iph->daddr),
                      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
 
        return dest;
index 876937d..3cd85b2 100644 (file)
@@ -655,19 +655,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
  *    Locality-Based (weighted) Least-Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                    struct ip_vs_iphdr *iph)
 {
        struct ip_vs_lblcr_table *tbl = svc->sched_data;
-       struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest;
        struct ip_vs_lblcr_entry *en;
 
-       ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
-
        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
        /* First look in our cache */
-       en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
+       en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr);
        if (en) {
                en->lastuse = jiffies;
 
@@ -718,12 +716,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        /* If we fail to create a cache entry, we'll just use the valid dest */
        spin_lock_bh(&svc->sched_lock);
        if (!tbl->dead)
-               ip_vs_lblcr_new(tbl, &iph.daddr, dest);
+               ip_vs_lblcr_new(tbl, &iph->daddr, dest);
        spin_unlock_bh(&svc->sched_lock);
 
 out:
        IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+                     IP_VS_DBG_ADDR(svc->af, &iph->daddr),
                      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
 
        return dest;
index 5128e33..2bdcb1c 100644 (file)
@@ -26,7 +26,8 @@
  *     Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least = NULL;
        unsigned int loh = 0, doh;
index 646cfd4..d8d9860 100644 (file)
@@ -55,7 +55,8 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
  *     Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least = NULL;
        unsigned int loh = 0, doh;
index 8646488..3c0da87 100644 (file)
@@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 {
        struct net *net;
        struct ip_vs_service *svc;
+       struct netns_ipvs *ipvs;
        sctp_chunkhdr_t _schunkh, *sch;
        sctp_sctphdr_t *sh, _sctph;
 
@@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
        if (sch == NULL)
                return 0;
        net = skb_net(skb);
+       ipvs = net_ipvs(net);
        rcu_read_lock();
-       if ((sch->type == SCTP_CID_INIT) &&
+       if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
            (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
                                      &iph->daddr, sh->dest))) {
                int ignored;
 
-               if (ip_vs_todrop(net_ipvs(net))) {
+               if (ip_vs_todrop(ipvs)) {
                        /*
                         * It seems that we are very loaded.
                         * We have to drop this packet :(
@@ -183,710 +185,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
        return 1;
 }
 
-struct ipvs_sctp_nextstate {
-       int next_state;
-};
 enum ipvs_sctp_event_t {
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_DATA_SER,
-       IP_VS_SCTP_EVE_INIT_CLI,
-       IP_VS_SCTP_EVE_INIT_SER,
-       IP_VS_SCTP_EVE_INIT_ACK_CLI,
-       IP_VS_SCTP_EVE_INIT_ACK_SER,
-       IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
-       IP_VS_SCTP_EVE_COOKIE_ECHO_SER,
-       IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
-       IP_VS_SCTP_EVE_COOKIE_ACK_SER,
-       IP_VS_SCTP_EVE_ABORT_CLI,
-       IP_VS_SCTP_EVE__ABORT_SER,
-       IP_VS_SCTP_EVE_SHUT_CLI,
-       IP_VS_SCTP_EVE_SHUT_SER,
-       IP_VS_SCTP_EVE_SHUT_ACK_CLI,
-       IP_VS_SCTP_EVE_SHUT_ACK_SER,
-       IP_VS_SCTP_EVE_SHUT_COM_CLI,
-       IP_VS_SCTP_EVE_SHUT_COM_SER,
-       IP_VS_SCTP_EVE_LAST
+       IP_VS_SCTP_DATA = 0,            /* DATA, SACK, HEARTBEATs */
+       IP_VS_SCTP_INIT,
+       IP_VS_SCTP_INIT_ACK,
+       IP_VS_SCTP_COOKIE_ECHO,
+       IP_VS_SCTP_COOKIE_ACK,
+       IP_VS_SCTP_SHUTDOWN,
+       IP_VS_SCTP_SHUTDOWN_ACK,
+       IP_VS_SCTP_SHUTDOWN_COMPLETE,
+       IP_VS_SCTP_ERROR,
+       IP_VS_SCTP_ABORT,
+       IP_VS_SCTP_EVENT_LAST
 };
 
-static enum ipvs_sctp_event_t sctp_events[256] = {
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_INIT_CLI,
-       IP_VS_SCTP_EVE_INIT_ACK_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_ABORT_CLI,
-       IP_VS_SCTP_EVE_SHUT_CLI,
-       IP_VS_SCTP_EVE_SHUT_ACK_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
-       IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_DATA_CLI,
-       IP_VS_SCTP_EVE_SHUT_COM_CLI,
+/* RFC 2960, 3.2 Chunk Field Descriptions */
+static __u8 sctp_events[] = {
+       [SCTP_CID_DATA]                 = IP_VS_SCTP_DATA,
+       [SCTP_CID_INIT]                 = IP_VS_SCTP_INIT,
+       [SCTP_CID_INIT_ACK]             = IP_VS_SCTP_INIT_ACK,
+       [SCTP_CID_SACK]                 = IP_VS_SCTP_DATA,
+       [SCTP_CID_HEARTBEAT]            = IP_VS_SCTP_DATA,
+       [SCTP_CID_HEARTBEAT_ACK]        = IP_VS_SCTP_DATA,
+       [SCTP_CID_ABORT]                = IP_VS_SCTP_ABORT,
+       [SCTP_CID_SHUTDOWN]             = IP_VS_SCTP_SHUTDOWN,
+       [SCTP_CID_SHUTDOWN_ACK]         = IP_VS_SCTP_SHUTDOWN_ACK,
+       [SCTP_CID_ERROR]                = IP_VS_SCTP_ERROR,
+       [SCTP_CID_COOKIE_ECHO]          = IP_VS_SCTP_COOKIE_ECHO,
+       [SCTP_CID_COOKIE_ACK]           = IP_VS_SCTP_COOKIE_ACK,
+       [SCTP_CID_ECN_ECNE]             = IP_VS_SCTP_DATA,
+       [SCTP_CID_ECN_CWR]              = IP_VS_SCTP_DATA,
+       [SCTP_CID_SHUTDOWN_COMPLETE]    = IP_VS_SCTP_SHUTDOWN_COMPLETE,
 };
 
-static struct ipvs_sctp_nextstate
- sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
-       /*
-        * STATE : IP_VS_SCTP_S_NONE
-        */
-       /*next state *//*event */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
-        },
-       /*
-        * STATE : IP_VS_SCTP_S_INIT_CLI
-        * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_INIT_SER
-        * Server sent INIT and waiting for INIT ACK from the client
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_INIT_ACK_CLI
-        * Client sent INIT ACK and waiting for ECHO from the server
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK has been resent by the client, let us stay is in
-         * the same state
-         */
-        {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        /*
-         * INIT_ACK sent by the server, close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * ECHO by client, it should not happen, close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        /*
-         * ECHO by server, this is what we are expecting, move to ECHO_SER
-         */
-        {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, it should not happen, close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        /*
-         * Unexpected COOKIE ACK from server, staty in the same state
-         */
-        {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_INIT_ACK_SER
-        * Server sent INIT ACK and waiting for ECHO from the client
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * Unexpected INIT_ACK by the client, let us close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        /*
-         * INIT_ACK resent by the server, let us move to same state
-         */
-        {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client send the ECHO, this is what we are expecting,
-         * move to ECHO_CLI
-         */
-        {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        /*
-         * ECHO received from the server, Not sure what to do,
-         * let us close it
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, let us stay in the same state
-         */
-        {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        /*
-         * COOKIE ACK from server, hmm... this should not happen, lets close
-         * the connection.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_ECHO_CLI
-        * Cient  sent ECHO and waiting COOKEI ACK from the Server
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK has been by the client, let us close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client resent the ECHO, let us stay in the same state
-         */
-        {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        /*
-         * ECHO received from the server, Not sure what to do,
-         * let us close it
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, this shoud not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        /*
-         * COOKIE ACK from server, this is what we are awaiting,lets move to
-         * ESTABLISHED.
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_ECHO_SER
-        * Server sent ECHO and waiting COOKEI ACK from the client
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        /*
-         * INIT_ACK has been by the server, let us close the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent the ECHO, not sure what to do, let's close the
-         * connection.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        /*
-         * ECHO resent by the server, stay in the same state
-         */
-        {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, this is what we are expecting, let's move
-         * to ESTABLISHED.
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        /*
-         * COOKIE ACK from server, this should not happen, lets close the
-         * connection.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_ESTABLISHED
-        * Association established
-        */
-       {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-         * peer and peer shall move to the ESTABISHED. if it doesn't handle
-         * it will send ERROR chunk. So, stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, not sure what to do stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        /*
-         * SHUTDOWN from the client, move to SHUDDOWN_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        /*
-         * SHUTDOWN from the server, move to SHUTDOWN_SER
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        /*
-         * client sent SHUDTDOWN_ACK, this should not happen, let's close
-         * the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_SHUT_CLI
-        * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
-        */
-       /*
-        * We received the data chuck, keep the state unchanged. I assume
-        * that still data chuncks  can be received by both the peers in
-        * SHUDOWN state
-        */
-
-       {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-         * peer and peer shall move to the ESTABISHED. if it doesn't handle
-         * it will send ERROR chunk. So, stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, not sure what to do stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        /*
-         * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        /*
-         * SHUTDOWN from the server, move to SHUTDOWN_SER
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        /*
-         * client sent SHUDTDOWN_ACK, this should not happen, let's close
-         * the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        /*
-         * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
-         * to SHUDOWN_ACK_SER
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        /*
-         * SHUTDOWN COM from client, this should not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_SHUT_SER
-        * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
-        */
-       /*
-        * We received the data chuck, keep the state unchanged. I assume
-        * that still data chuncks  can be received by both the peers in
-        * SHUDOWN state
-        */
-
-       {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-         * peer and peer shall move to the ESTABISHED. if it doesn't handle
-         * it will send ERROR chunk. So, stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, not sure what to do stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        /*
-         * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        /*
-         * SHUTDOWN resent from the server, move to SHUTDOWN_SER
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        /*
-         * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
-         * move to SHUT_ACK_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        /*
-         * Server sent SHUTDOWN ACK, this should not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        /*
-         * SHUTDOWN COM from client, this should not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-
-       /*
-        * State : IP_VS_SCTP_S_SHUT_ACK_CLI
-        * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
-        */
-       /*
-        * We received the data chuck, keep the state unchanged. I assume
-        * that still data chuncks  can be received by both the peers in
-        * SHUDOWN state
-        */
-
-       {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-         * peer and peer shall move to the ESTABISHED. if it doesn't handle
-         * it will send ERROR chunk. So, stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, not sure what to do stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        /*
-         * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        /*
-         * SHUTDOWN sent from the server, move to SHUTDOWN_SER
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        /*
-         * client resent SHUDTDOWN_ACK, let's stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        /*
-         * Server sent SHUTDOWN ACK, this should not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        /*
-         * SHUTDOWN COM from client, this should not happen, let's close the
-         * connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        /*
-         * SHUTDOWN COMPLETE from server this is what we are expecting.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-
-       /*
-        * State : IP_VS_SCTP_S_SHUT_ACK_SER
-        * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
-        */
-       /*
-        * We received the data chuck, keep the state unchanged. I assume
-        * that still data chuncks  can be received by both the peers in
-        * SHUDOWN state
-        */
+/* SCTP States:
+ * See RFC 2960, 4. SCTP Association State Diagram
+ *
+ * New states (not in diagram):
+ * - INIT1 state: use shorter timeout for dropped INIT packets
+ * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
+ * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
+ *
+ * The states are as seen in real server. In the diagram, INIT1, INIT,
+ * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
+ *
+ * States as per packets from client (C) and server (S):
+ *
+ * Setup of client connection:
+ * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
+ *
+ * Setup of server connection:
+ * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
+ */
 
-       {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
-        /*
-         * We have got an INIT from client. From the spec.“Upon receipt of
-         * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-         * an INIT ACK using the same parameters it sent in its  original
-         * INIT chunk (including its Initiate Tag, unchanged”).
-         */
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        /*
-         * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-         * “If an INIT ACK is received by an endpoint in any state other
-         * than the COOKIE-WAIT state, the endpoint should discard the
-         * INIT ACK chunk”. Stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        /*
-         * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-         * peer and peer shall move to the ESTABISHED. if it doesn't handle
-         * it will send ERROR chunk. So, stay in the same state
-         */
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        /*
-         * COOKIE ACK from client, not sure what to do stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        /*
-         * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
-         */
-        {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        /*
-         * SHUTDOWN sent from the server, move to SHUTDOWN_SER
-         */
-        {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        /*
-         * client sent SHUDTDOWN_ACK, this should not happen let's close
-         * the connection.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        /*
-         * Server resent SHUTDOWN ACK, stay in the same state
-         */
-        {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        /*
-         * SHUTDOWN COM from client, this what we are expecting, let's close
-         * the connection
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        /*
-         * SHUTDOWN COMPLETE from server this should not happen.
-         */
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        },
-       /*
-        * State : IP_VS_SCTP_S_CLOSED
-        */
-       {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-        {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-        {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-        {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-        }
+#define sNO IP_VS_SCTP_S_NONE
+#define sI1 IP_VS_SCTP_S_INIT1
+#define sIN IP_VS_SCTP_S_INIT
+#define sCS IP_VS_SCTP_S_COOKIE_SENT
+#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
+#define sCW IP_VS_SCTP_S_COOKIE_WAIT
+#define sCO IP_VS_SCTP_S_COOKIE
+#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
+#define sES IP_VS_SCTP_S_ESTABLISHED
+#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
+#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
+#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
+#define sRJ IP_VS_SCTP_S_REJECTED
+#define sCL IP_VS_SCTP_S_CLOSED
+
+static const __u8 sctp_states
+       [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
+       { /* INPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+       },
+       { /* OUTPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
+/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
+/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+       },
+       { /* INPUT-ONLY */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+       },
 };
 
-/*
- *      Timeout table[state]
- */
+#define IP_VS_SCTP_MAX_RTO     ((60 + 1) * HZ)
+
+/* Timeout table[state] */
 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
-       [IP_VS_SCTP_S_NONE]         =     2 * HZ,
-       [IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_INIT_ACK_CLI] =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_INIT_ACK_SER] =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_ECHO_CLI]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_ECHO_SER]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_ESTABLISHED]  =    15 * 60 * HZ,
-       [IP_VS_SCTP_S_SHUT_CLI]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_SHUT_SER]     =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_SHUT_ACK_CLI] =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_SHUT_ACK_SER] =     1 * 60 * HZ,
-       [IP_VS_SCTP_S_CLOSED]       =    10 * HZ,
-       [IP_VS_SCTP_S_LAST]         =     2 * HZ,
+       [IP_VS_SCTP_S_NONE]                     = 2 * HZ,
+       [IP_VS_SCTP_S_INIT1]                    = (0 + 3 + 1) * HZ,
+       [IP_VS_SCTP_S_INIT]                     = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_COOKIE_SENT]              = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_COOKIE_REPLIED]           = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_COOKIE_WAIT]              = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_COOKIE]                   = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_COOKIE_ECHOED]            = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_ESTABLISHED]              = 15 * 60 * HZ,
+       [IP_VS_SCTP_S_SHUTDOWN_SENT]            = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_REJECTED]                 = (0 + 3 + 1) * HZ,
+       [IP_VS_SCTP_S_CLOSED]                   = IP_VS_SCTP_MAX_RTO,
+       [IP_VS_SCTP_S_LAST]                     = 2 * HZ,
 };
 
 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
-       [IP_VS_SCTP_S_NONE]         =    "NONE",
-       [IP_VS_SCTP_S_INIT_CLI]     =    "INIT_CLI",
-       [IP_VS_SCTP_S_INIT_SER]     =    "INIT_SER",
-       [IP_VS_SCTP_S_INIT_ACK_CLI] =    "INIT_ACK_CLI",
-       [IP_VS_SCTP_S_INIT_ACK_SER] =    "INIT_ACK_SER",
-       [IP_VS_SCTP_S_ECHO_CLI]     =    "COOKIE_ECHO_CLI",
-       [IP_VS_SCTP_S_ECHO_SER]     =    "COOKIE_ECHO_SER",
-       [IP_VS_SCTP_S_ESTABLISHED]  =    "ESTABISHED",
-       [IP_VS_SCTP_S_SHUT_CLI]     =    "SHUTDOWN_CLI",
-       [IP_VS_SCTP_S_SHUT_SER]     =    "SHUTDOWN_SER",
-       [IP_VS_SCTP_S_SHUT_ACK_CLI] =    "SHUTDOWN_ACK_CLI",
-       [IP_VS_SCTP_S_SHUT_ACK_SER] =    "SHUTDOWN_ACK_SER",
-       [IP_VS_SCTP_S_CLOSED]       =    "CLOSED",
-       [IP_VS_SCTP_S_LAST]         =    "BUG!"
+       [IP_VS_SCTP_S_NONE]                     = "NONE",
+       [IP_VS_SCTP_S_INIT1]                    = "INIT1",
+       [IP_VS_SCTP_S_INIT]                     = "INIT",
+       [IP_VS_SCTP_S_COOKIE_SENT]              = "C-SENT",
+       [IP_VS_SCTP_S_COOKIE_REPLIED]           = "C-REPLIED",
+       [IP_VS_SCTP_S_COOKIE_WAIT]              = "C-WAIT",
+       [IP_VS_SCTP_S_COOKIE]                   = "COOKIE",
+       [IP_VS_SCTP_S_COOKIE_ECHOED]            = "C-ECHOED",
+       [IP_VS_SCTP_S_ESTABLISHED]              = "ESTABLISHED",
+       [IP_VS_SCTP_S_SHUTDOWN_SENT]            = "S-SENT",
+       [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = "S-RECEIVED",
+       [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = "S-ACK-SENT",
+       [IP_VS_SCTP_S_REJECTED]                 = "REJECTED",
+       [IP_VS_SCTP_S_CLOSED]                   = "CLOSED",
+       [IP_VS_SCTP_S_LAST]                     = "BUG!",
 };
 
 
@@ -943,17 +394,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                }
        }
 
-       event = sctp_events[chunk_type];
+       event = (chunk_type < sizeof(sctp_events)) ?
+               sctp_events[chunk_type] : IP_VS_SCTP_DATA;
 
-       /*
-        *  If the direction is IP_VS_DIR_OUTPUT, this event is from server
-        */
-       if (direction == IP_VS_DIR_OUTPUT)
-               event++;
-       /*
-        * get next state
+       /* Update direction to INPUT_ONLY if necessary
+        * or delete NO_OUTPUT flag if output packet detected
         */
-       next_state = sctp_states_table[cp->state][event].next_state;
+       if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+               if (direction == IP_VS_DIR_OUTPUT)
+                       cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+               else
+                       direction = IP_VS_DIR_INPUT_ONLY;
+       }
+
+       next_state = sctp_states[direction][event][cp->state];
 
        if (next_state != cp->state) {
                struct ip_vs_dest *dest = cp->dest;
index 50a1594..e3a6972 100644 (file)
@@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
        struct net *net;
        struct ip_vs_service *svc;
        struct tcphdr _tcph, *th;
+       struct netns_ipvs *ipvs;
 
        th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
        if (th == NULL) {
@@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                return 0;
        }
        net = skb_net(skb);
+       ipvs = net_ipvs(net);
        /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
        rcu_read_lock();
-       if (th->syn &&
+       if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
            (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
                                      &iph->daddr, th->dest))) {
                int ignored;
 
-               if (ip_vs_todrop(net_ipvs(net))) {
+               if (ip_vs_todrop(ipvs)) {
                        /*
                         * It seems that we are very loaded.
                         * We have to drop this packet :(
@@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
 
 /*     OUTPUT */
@@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
@@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
-/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 
 /*     OUTPUT */
@@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
index c35986c..176b87c 100644 (file)
@@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
  * Round-Robin Scheduling
  */
 static struct ip_vs_dest *
-ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
 {
        struct list_head *p;
        struct ip_vs_dest *dest, *last;
index f320592..a5284cc 100644 (file)
@@ -59,7 +59,8 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
  *     Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                  struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least;
        unsigned int loh, doh;
index a65edfe..f16c027 100644 (file)
 
 #include <net/ip_vs.h>
 
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
 
 /*
  *      IPVS SH bucket
@@ -71,10 +75,19 @@ struct ip_vs_sh_state {
        struct ip_vs_sh_bucket          buckets[IP_VS_SH_TAB_SIZE];
 };
 
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+       return atomic_read(&dest->weight) <= 0 ||
+              dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
 /*
  *     Returns hash value for IPVS SH entry
  */
-static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int
+ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
+                __be16 port, unsigned int offset)
 {
        __be32 addr_fold = addr->ip;
 
@@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
-       return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
+       return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+               IP_VS_SH_TAB_MASK;
 }
 
 
@@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
+ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+            const union nf_inet_addr *addr, __be16 port)
 {
-       return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
+       unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+       struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
+
+       return (!dest || is_unavailable(dest)) ? NULL : dest;
 }
 
 
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
+static inline struct ip_vs_dest *
+ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+                     const union nf_inet_addr *addr, __be16 port)
+{
+       unsigned int offset;
+       unsigned int hash;
+       struct ip_vs_dest *dest;
+
+       for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+               hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
+               dest = rcu_dereference(s->buckets[hash].dest);
+               if (!dest)
+                       break;
+               if (is_unavailable(dest))
+                       IP_VS_DBG_BUF(6, "SH: selected unavailable server "
+                                     "%s:%d (offset %d)",
+                                     IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                                     ntohs(dest->port), offset);
+               else
+                       return dest;
+       }
+
+       return NULL;
+}
+
 /*
  *      Assign all the hash buckets of the specified table with the service.
  */
@@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
 }
 
 
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
+/* Helper function to get port number */
+static inline __be16
+ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
 {
-       return dest->flags & IP_VS_DEST_F_OVERLOAD;
+       __be16 port;
+       struct tcphdr _tcph, *th;
+       struct udphdr _udph, *uh;
+       sctp_sctphdr_t _sctph, *sh;
+
+       switch (iph->protocol) {
+       case IPPROTO_TCP:
+               th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+               port = th->source;
+               break;
+       case IPPROTO_UDP:
+               uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+               port = uh->source;
+               break;
+       case IPPROTO_SCTP:
+               sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+               port = sh->source;
+               break;
+       default:
+               port = 0;
+       }
+
+       return port;
 }
 
 
@@ -227,28 +291,32 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
  *      Source Hashing scheduling
  */
 static struct ip_vs_dest *
-ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_sh_state *s;
-       struct ip_vs_iphdr iph;
-
-       ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
+       __be16 port = 0;
 
        IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
+       if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
+               port = ip_vs_sh_get_port(skb, iph);
+
        s = (struct ip_vs_sh_state *) svc->sched_data;
-       dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
-       if (!dest
-           || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-           || atomic_read(&dest->weight) <= 0
-           || is_overloaded(dest)) {
+
+       if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
+               dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+       else
+               dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+
+       if (!dest) {
                ip_vs_scheduler_err(svc, "no destination available");
                return NULL;
        }
 
        IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.saddr),
+                     IP_VS_DBG_ADDR(svc->af, &iph->saddr),
                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
                      ntohs(dest->port));
 
index f6046d9..f448471 100644 (file)
@@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
        return sb;
 }
 
+/* Check if connection is controlled by persistence */
+static inline bool in_persistence(struct ip_vs_conn *cp)
+{
+       for (cp = cp->control; cp; cp = cp->control) {
+               if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+                       return true;
+       }
+       return false;
+}
+
 /* Check if conn should be synced.
  * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
  * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
@@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
        /* Check if we sync in current state */
        if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
                force = 0;
+       else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
+               return 0;
        else if (likely(cp->protocol == IPPROTO_TCP)) {
                if (!((1 << cp->state) &
                      ((1 << IP_VS_TCP_S_ESTABLISHED) |
@@ -461,9 +473,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
        } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
                if (!((1 << cp->state) &
                      ((1 << IP_VS_SCTP_S_ESTABLISHED) |
-                      (1 << IP_VS_SCTP_S_CLOSED) |
-                      (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
-                      (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
+                      (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
+                      (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
+                      (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
+                      (1 << IP_VS_SCTP_S_CLOSED))))
                        return 0;
                force = cp->state != cp->old_state;
                if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
index c60a81c..6dc1fa1 100644 (file)
@@ -31,7 +31,8 @@
  *     Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                  struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least;
        unsigned int loh, doh;
index 0e68555..0546cd5 100644 (file)
@@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
  *    Weighted Round-Robin Scheduling
  */
 static struct ip_vs_dest *
-ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                  struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *last, *stop = NULL;
        struct ip_vs_wrr_mark *mark = svc->sched_data;
index 6d0f8a1..f83a522 100644 (file)
@@ -828,7 +828,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
        struct nf_conntrack_l3proto *l3proto;
        int ret = 0;
 
-       nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+       ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+       if (ret < 0)
+               return ret;
 
        rcu_read_lock();
        l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -895,7 +897,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 
        memset(tuple, 0, sizeof(*tuple));
 
-       nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+       err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+       if (err < 0)
+               return err;
 
        if (!tb[CTA_TUPLE_IP])
                return -EINVAL;
@@ -946,9 +950,12 @@ static inline int
 ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
                     struct nlattr **helpinfo)
 {
+       int err;
        struct nlattr *tb[CTA_HELP_MAX+1];
 
-       nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+       err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+       if (err < 0)
+               return err;
 
        if (!tb[CTA_HELP_NAME])
                return -EINVAL;
@@ -1431,7 +1438,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
        struct nf_conntrack_l4proto *l4proto;
        int err = 0;
 
-       nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+       err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+       if (err < 0)
+               return err;
 
        rcu_read_lock();
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1452,9 +1461,12 @@ static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
 static inline int
 change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
 {
+       int err;
        struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
 
-       nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
+       err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
+       if (err < 0)
+               return err;
 
        if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
                return -EINVAL;
@@ -2115,7 +2127,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
        struct nlattr *cda[CTA_MAX+1];
        int ret;
 
-       nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+       ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+       if (ret < 0)
+               return ret;
 
        spin_lock_bh(&nf_conntrack_lock);
        ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
@@ -2710,7 +2724,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
        struct nf_conntrack_tuple nat_tuple = {};
        int err;
 
-       nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+       err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+       if (err < 0)
+               return err;
 
        if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE])
                return -EINVAL;
index 4d4d8f1..7dcc376 100644 (file)
@@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct,
                        nf_ct_kill_acct(ct, ctinfo, skb);
                        return NF_ACCEPT;
                }
+               /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
+                * pickup with loose=1. Avoid large ESTABLISHED timeout.
+                */
+               if (new_state == TCP_CONNTRACK_ESTABLISHED &&
+                   timeout > timeouts[TCP_CONNTRACK_UNACK])
+                       timeout = timeouts[TCP_CONNTRACK_UNACK];
        } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
                   && (old_state == TCP_CONNTRACK_SYN_RECV
                       || old_state == TCP_CONNTRACK_ESTABLISHED)
index a191b6d..9e287cb 100644 (file)
@@ -67,9 +67,12 @@ static int
 nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
                          const struct nlattr *attr)
 {
+       int err;
        struct nlattr *tb[NFCTH_TUPLE_MAX+1];
 
-       nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+       err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+       if (err < 0)
+               return err;
 
        if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
                return -EINVAL;
@@ -121,9 +124,12 @@ static int
 nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
                            const struct nlattr *attr)
 {
+       int err;
        struct nlattr *tb[NFCTH_POLICY_MAX+1];
 
-       nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+       err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+       if (err < 0)
+               return err;
 
        if (!tb[NFCTH_POLICY_NAME] ||
            !tb[NFCTH_POLICY_EXPECT_MAX] ||
@@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
        struct nf_conntrack_expect_policy *expect_policy;
        struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
 
-       nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
-                                       nfnl_cthelper_expect_policy_set);
+       ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+                              nfnl_cthelper_expect_policy_set);
+       if (ret < 0)
+               return ret;
 
        if (!tb[NFCTH_POLICY_SET_NUM])
                return -EINVAL;
index 65074df..5058049 100644 (file)
@@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
        if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
                struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
 
-               nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
-                                attr, l4proto->ctnl_timeout.nla_policy);
+               ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
+                                      attr, l4proto->ctnl_timeout.nla_policy);
+               if (ret < 0)
+                       return ret;
 
                ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net,
                                                          &timeout->data);
index 299a48a..971ea14 100644 (file)
@@ -280,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
        skb_shinfo(to)->nr_frags = j;
 }
 
-static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
+static int
+nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
+                     bool csum_verify)
 {
        __u32 flags = 0;
 
        if (packet->ip_summed == CHECKSUM_PARTIAL)
                flags = NFQA_SKB_CSUMNOTREADY;
+       else if (csum_verify)
+               flags = NFQA_SKB_CSUM_NOTVERIFIED;
+
        if (skb_is_gso(packet))
                flags |= NFQA_SKB_GSO;
 
@@ -310,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
        struct net_device *outdev;
        struct nf_conn *ct = NULL;
        enum ip_conntrack_info uninitialized_var(ctinfo);
+       bool csum_verify;
 
        size =    nlmsg_total_size(sizeof(struct nfgenmsg))
                + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -327,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
        if (entskb->tstamp.tv64)
                size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
+       if (entry->hook <= NF_INET_FORWARD ||
+          (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+               csum_verify = !skb_csum_unnecessary(entskb);
+       else
+               csum_verify = false;
+
        outdev = entry->outdev;
 
        switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
@@ -476,7 +488,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
            nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
                goto nla_put_failure;
 
-       if (nfqnl_put_packet_info(skb, entskb))
+       if (nfqnl_put_packet_info(skb, entskb, csum_verify))
                goto nla_put_failure;
 
        if (data_len) {
index 0270424..f8b7191 100644 (file)
@@ -163,8 +163,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
                bool wildcard;
                bool transparent = true;
 
-               /* Ignore sockets listening on INADDR_ANY */
-               wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+               /* Ignore sockets listening on INADDR_ANY,
+                * unless XT_SOCKET_NOWILDCARD is set
+                */
+               wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+                           sk->sk_state != TCP_TIME_WAIT &&
                            inet_sk(sk)->inet_rcv_saddr == 0);
 
                /* Ignore non-transparent sockets,
@@ -197,7 +200,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static bool
-socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
        return socket_match(skb, par, par->matchinfo);
 }
@@ -259,7 +262,7 @@ extract_icmp6_fields(const struct sk_buff *skb,
 }
 
 static bool
-socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
        struct ipv6hdr *iph = ipv6_hdr(skb);
        struct udphdr _hdr, *hp = NULL;
@@ -302,8 +305,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
                bool wildcard;
                bool transparent = true;
 
-               /* Ignore sockets listening on INADDR_ANY */
-               wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+               /* Ignore sockets listening on INADDR_ANY
+                * unless XT_SOCKET_NOWILDCARD is set
+                */
+               wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+                           sk->sk_state != TCP_TIME_WAIT &&
                            ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
 
                /* Ignore non-transparent sockets,
@@ -331,6 +337,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 }
 #endif
 
+static int socket_mt_v1_check(const struct xt_mtchk_param *par)
+{
+       const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+
+       if (info->flags & ~XT_SOCKET_FLAGS_V1) {
+               pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int socket_mt_v2_check(const struct xt_mtchk_param *par)
+{
+       const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
+
+       if (info->flags & ~XT_SOCKET_FLAGS_V2) {
+               pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
+               return -EINVAL;
+       }
+       return 0;
+}
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
        {
                .name           = "socket",
@@ -345,7 +373,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                .name           = "socket",
                .revision       = 1,
                .family         = NFPROTO_IPV4,
-               .match          = socket_mt4_v1,
+               .match          = socket_mt4_v1_v2,
+               .checkentry     = socket_mt_v1_check,
                .matchsize      = sizeof(struct xt_socket_mtinfo1),
                .hooks          = (1 << NF_INET_PRE_ROUTING) |
                                  (1 << NF_INET_LOCAL_IN),
@@ -356,7 +385,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                .name           = "socket",
                .revision       = 1,
                .family         = NFPROTO_IPV6,
-               .match          = socket_mt6_v1,
+               .match          = socket_mt6_v1_v2,
+               .checkentry     = socket_mt_v1_check,
+               .matchsize      = sizeof(struct xt_socket_mtinfo1),
+               .hooks          = (1 << NF_INET_PRE_ROUTING) |
+                                 (1 << NF_INET_LOCAL_IN),
+               .me             = THIS_MODULE,
+       },
+#endif
+       {
+               .name           = "socket",
+               .revision       = 2,
+               .family         = NFPROTO_IPV4,
+               .match          = socket_mt4_v1_v2,
+               .checkentry     = socket_mt_v2_check,
+               .matchsize      = sizeof(struct xt_socket_mtinfo1),
+               .hooks          = (1 << NF_INET_PRE_ROUTING) |
+                                 (1 << NF_INET_LOCAL_IN),
+               .me             = THIS_MODULE,
+       },
+#ifdef XT_SOCKET_HAVE_IPV6
+       {
+               .name           = "socket",
+               .revision       = 2,
+               .family         = NFPROTO_IPV6,
+               .match          = socket_mt6_v1_v2,
+               .checkentry     = socket_mt_v2_check,
                .matchsize      = sizeof(struct xt_socket_mtinfo1),
                .hooks          = (1 << NF_INET_PRE_ROUTING) |
                                  (1 << NF_INET_LOCAL_IN),