Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Thu, 15 Jan 2015 06:50:25 +0000 (01:50 -0500)
committerDavid S. Miller <davem@davemloft.net>
Thu, 15 Jan 2015 06:50:25 +0000 (01:50 -0500)
Pablo Neira Ayuso says:

====================
netfilter updates for net-next

The following patchset contains netfilter updates for net-next, just a
bunch of cleanups and small enhancement to selectively flush conntracks
in ctnetlink, more specifically the patches are:

1) Rise default number of buckets in conntrack from 16384 to 65536 in
   systems with >= 4GBytes, patch from Marcelo Leitner.

2) Small refactor to save one level on indentation in xt_osf, from
   Joe Perches.

3) Remove unnecessary sizeof(char) in nf_log, from Fabian Frederick.

4) Another small cleanup to remove redundant variable in nfnetlink,
   from Duan Jiong.

5) Fix compilation warning in nfnetlink_cthelper on parisc, from
   Chen Gang.

6) Fix wrong format in debugging for ctseqadj, from Gao feng.

7) Selective conntrack flushing through the mark for ctnetlink, patch
   from Kristian Evensen.

8) Remove nf_ct_conntrack_flush_report() exported symbol now that is
   not required anymore after the selective flushing patch, again from
   Kristian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/nf_conntrack-sysctl.txt
include/net/netfilter/nf_conntrack.h
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_seqadj.c
net/netfilter/nf_log.c
net/netfilter/nfnetlink.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/xt_osf.c

index 70da508..f55599c 100644 (file)
@@ -11,7 +11,8 @@ nf_conntrack_buckets - INTEGER (read-only)
        Size of hash table. If not specified as parameter during module
        loading, the default size is calculated by dividing total memory
        by 16384 to determine the number of buckets but the hash table will
-       never have fewer than 32 or more than 16384 buckets.
+       never have fewer than 32 and limited to 16384 buckets. For systems
+       with more than 4GB of memory it will be 65536 buckets.
 
 nf_conntrack_checksum - BOOLEAN
        0 - disabled
index f0daed2..74f271a 100644 (file)
@@ -191,8 +191,6 @@ __nf_conntrack_find(struct net *net, u16 zone,
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
-
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
                       u_int16_t l3num, struct nf_conntrack_tuple *tuple);
 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
index 46d1b26..13fad86 100644 (file)
@@ -1424,12 +1424,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
-{
-       nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
-
 static int untrack_refs(void)
 {
        int cnt = 0, cpu;
@@ -1622,13 +1616,18 @@ int nf_conntrack_init_start(void)
        for (i = 0; i < CONNTRACK_LOCKS; i++)
                spin_lock_init(&nf_conntrack_locks[i]);
 
-       /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
-        * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
        if (!nf_conntrack_htable_size) {
+               /* Idea from tcp.c: use 1/16384 of memory.
+                * On i386: 32MB machine has 512 buckets.
+                * >= 1GB machines have 16384 buckets.
+                * >= 4GB machines have 65536 buckets.
+                */
                nf_conntrack_htable_size
                        = (((totalram_pages << PAGE_SHIFT) / 16384)
                           / sizeof(struct hlist_head));
-               if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
+               if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+                       nf_conntrack_htable_size = 65536;
+               else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
                        nf_conntrack_htable_size = 16384;
                if (nf_conntrack_htable_size < 32)
                        nf_conntrack_htable_size = 32;
index 1bd9ed9..d1c2394 100644 (file)
@@ -749,13 +749,47 @@ static int ctnetlink_done(struct netlink_callback *cb)
        return 0;
 }
 
-struct ctnetlink_dump_filter {
+struct ctnetlink_filter {
        struct {
                u_int32_t val;
                u_int32_t mask;
        } mark;
 };
 
+static struct ctnetlink_filter *
+ctnetlink_alloc_filter(const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       struct ctnetlink_filter *filter;
+
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (filter == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
+       filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+       return filter;
+#else
+       return ERR_PTR(-EOPNOTSUPP);
+#endif
+}
+
+static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
+{
+       struct ctnetlink_filter *filter = data;
+
+       if (filter == NULL)
+               return 1;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       if ((ct->mark & filter->mark.mask) == filter->mark.val)
+               return 1;
+#endif
+
+       return 0;
+}
+
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -768,10 +802,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        int res;
        spinlock_t *lockp;
 
-#ifdef CONFIG_NF_CONNTRACK_MARK
-       const struct ctnetlink_dump_filter *filter = cb->data;
-#endif
-
        last = (struct nf_conn *)cb->args[1];
 
        local_bh_disable();
@@ -798,12 +828,9 @@ restart:
                                        continue;
                                cb->args[1] = 0;
                        }
-#ifdef CONFIG_NF_CONNTRACK_MARK
-                       if (filter && !((ct->mark & filter->mark.mask) ==
-                                       filter->mark.val)) {
+                       if (!ctnetlink_filter_match(ct, cb->data))
                                continue;
-                       }
-#endif
+
                        rcu_read_lock();
                        res =
                        ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
@@ -1001,6 +1028,25 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
                                    .len = NF_CT_LABELS_MAX_SIZE },
 };
 
+static int ctnetlink_flush_conntrack(struct net *net,
+                                    const struct nlattr * const cda[],
+                                    u32 portid, int report)
+{
+       struct ctnetlink_filter *filter = NULL;
+
+       if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+               filter = ctnetlink_alloc_filter(cda);
+               if (IS_ERR(filter))
+                       return PTR_ERR(filter);
+       }
+
+       nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter,
+                             portid, report);
+       kfree(filter);
+
+       return 0;
+}
+
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
@@ -1024,11 +1070,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
        else if (cda[CTA_TUPLE_REPLY])
                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
        else {
-               /* Flush the whole table */
-               nf_conntrack_flush_report(net,
-                                        NETLINK_CB(skb).portid,
-                                        nlmsg_report(nlh));
-               return 0;
+               return ctnetlink_flush_conntrack(net, cda,
+                                                NETLINK_CB(skb).portid,
+                                                nlmsg_report(nlh));
        }
 
        if (err < 0)
@@ -1076,21 +1120,16 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
                        .dump = ctnetlink_dump_table,
                        .done = ctnetlink_done,
                };
-#ifdef CONFIG_NF_CONNTRACK_MARK
+
                if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-                       struct ctnetlink_dump_filter *filter;
+                       struct ctnetlink_filter *filter;
 
-                       filter = kzalloc(sizeof(struct ctnetlink_dump_filter),
-                                        GFP_ATOMIC);
-                       if (filter == NULL)
-                               return -ENOMEM;
+                       filter = ctnetlink_alloc_filter(cda);
+                       if (IS_ERR(filter))
+                               return PTR_ERR(filter);
 
-                       filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-                       filter->mark.mask =
-                               ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
                        c.data = filter;
                }
-#endif
                return netlink_dump_start(ctnl, skb, nlh, &c);
        }
 
index f6e2ae9..ce3e840 100644 (file)
@@ -98,9 +98,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
                        new_end_seq = htonl(ntohl(sack->end_seq) -
                                      seq->offset_before);
 
-               pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-                        ntohl(sack->start_seq), new_start_seq,
-                        ntohl(sack->end_seq), new_end_seq);
+               pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n",
+                        ntohl(sack->start_seq), ntohl(new_start_seq),
+                        ntohl(sack->end_seq), ntohl(new_end_seq));
 
                inet_proto_csum_replace4(&tcph->check, skb,
                                         sack->start_seq, new_start_seq, 0);
index 43c926c..0d8448f 100644 (file)
@@ -425,8 +425,7 @@ static int netfilter_log_sysctl_init(struct net *net)
                        nf_log_sysctl_table[i].procname =
                                nf_log_sysctl_fnames[i];
                        nf_log_sysctl_table[i].data = NULL;
-                       nf_log_sysctl_table[i].maxlen =
-                               NFLOGGER_NAME_LEN * sizeof(char);
+                       nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
                        nf_log_sysctl_table[i].mode = 0644;
                        nf_log_sysctl_table[i].proc_handler =
                                nf_log_proc_dostring;
index c421d94..8b117c9 100644 (file)
@@ -272,7 +272,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
                                u_int16_t subsys_id)
 {
-       struct sk_buff *nskb, *oskb = skb;
+       struct sk_buff *oskb = skb;
        struct net *net = sock_net(skb->sk);
        const struct nfnetlink_subsystem *ss;
        const struct nfnl_callback *nc;
@@ -283,12 +283,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (subsys_id >= NFNL_SUBSYS_COUNT)
                return netlink_ack(skb, nlh, -EINVAL);
 replay:
-       nskb = netlink_skb_clone(oskb, GFP_KERNEL);
-       if (!nskb)
+       skb = netlink_skb_clone(oskb, GFP_KERNEL);
+       if (!skb)
                return netlink_ack(oskb, nlh, -ENOMEM);
 
-       nskb->sk = oskb->sk;
-       skb = nskb;
+       skb->sk = oskb->sk;
 
        nfnl_lock(subsys_id);
        ss = rcu_dereference_protected(table[subsys_id].subsys,
@@ -305,7 +304,7 @@ replay:
                {
                        nfnl_unlock(subsys_id);
                        netlink_ack(skb, nlh, -EOPNOTSUPP);
-                       return kfree_skb(nskb);
+                       return kfree_skb(skb);
                }
        }
 
@@ -386,7 +385,7 @@ replay:
                                nfnl_err_reset(&err_list);
                                ss->abort(oskb);
                                nfnl_unlock(subsys_id);
-                               kfree_skb(nskb);
+                               kfree_skb(skb);
                                goto replay;
                        }
                }
@@ -427,7 +426,7 @@ done:
 
        nfnl_err_deliver(&err_list, oskb);
        nfnl_unlock(subsys_id);
-       kfree_skb(nskb);
+       kfree_skb(skb);
 }
 
 static void nfnetlink_rcv(struct sk_buff *skb)
index 9e287cb..a5599fc 100644 (file)
@@ -86,7 +86,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
 static int
 nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 {
-       const struct nf_conn_help *help = nfct_help(ct);
+       struct nf_conn_help *help = nfct_help(ct);
 
        if (attr == NULL)
                return -EINVAL;
@@ -94,7 +94,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
        if (help->helper->data_len == 0)
                return -EINVAL;
 
-       memcpy(&help->data, nla_data(attr), help->helper->data_len);
+       memcpy(help->data, nla_data(attr), help->helper->data_len);
        return 0;
 }
 
index c529161..0778855 100644 (file)
@@ -225,6 +225,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 
        rcu_read_lock();
        list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+               int foptsize, optnum;
+
                f = &kf->finger;
 
                if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
@@ -233,110 +235,109 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
                optp = _optp;
                fmatch = FMATCH_WRONG;
 
-               if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
-                       int foptsize, optnum;
+               if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl))
+                       continue;
 
-                       /*
-                        * Should not happen if userspace parser was written correctly.
-                        */
-                       if (f->wss.wc >= OSF_WSS_MAX)
-                               continue;
+               /*
+                * Should not happen if userspace parser was written correctly.
+                */
+               if (f->wss.wc >= OSF_WSS_MAX)
+                       continue;
 
-                       /* Check options */
+               /* Check options */
 
-                       foptsize = 0;
-                       for (optnum = 0; optnum < f->opt_num; ++optnum)
-                               foptsize += f->opt[optnum].length;
+               foptsize = 0;
+               for (optnum = 0; optnum < f->opt_num; ++optnum)
+                       foptsize += f->opt[optnum].length;
 
-                       if (foptsize > MAX_IPOPTLEN ||
-                               optsize > MAX_IPOPTLEN ||
-                               optsize != foptsize)
-                               continue;
+               if (foptsize > MAX_IPOPTLEN ||
+                   optsize > MAX_IPOPTLEN ||
+                   optsize != foptsize)
+                       continue;
 
-                       check_WSS = f->wss.wc;
+               check_WSS = f->wss.wc;
 
-                       for (optnum = 0; optnum < f->opt_num; ++optnum) {
-                               if (f->opt[optnum].kind == (*optp)) {
-                                       __u32 len = f->opt[optnum].length;
-                                       const __u8 *optend = optp + len;
-                                       int loop_cont = 0;
+               for (optnum = 0; optnum < f->opt_num; ++optnum) {
+                       if (f->opt[optnum].kind == (*optp)) {
+                               __u32 len = f->opt[optnum].length;
+                               const __u8 *optend = optp + len;
+                               int loop_cont = 0;
 
-                                       fmatch = FMATCH_OK;
+                               fmatch = FMATCH_OK;
 
-                                       switch (*optp) {
-                                       case OSFOPT_MSS:
-                                               mss = optp[3];
-                                               mss <<= 8;
-                                               mss |= optp[2];
+                               switch (*optp) {
+                               case OSFOPT_MSS:
+                                       mss = optp[3];
+                                       mss <<= 8;
+                                       mss |= optp[2];
 
-                                               mss = ntohs((__force __be16)mss);
-                                               break;
-                                       case OSFOPT_TS:
-                                               loop_cont = 1;
-                                               break;
-                                       }
+                                       mss = ntohs((__force __be16)mss);
+                                       break;
+                               case OSFOPT_TS:
+                                       loop_cont = 1;
+                                       break;
+                               }
 
-                                       optp = optend;
-                               } else
-                                       fmatch = FMATCH_OPT_WRONG;
+                               optp = optend;
+                       } else
+                               fmatch = FMATCH_OPT_WRONG;
 
-                               if (fmatch != FMATCH_OK)
-                                       break;
-                       }
+                       if (fmatch != FMATCH_OK)
+                               break;
+               }
 
-                       if (fmatch != FMATCH_OPT_WRONG) {
-                               fmatch = FMATCH_WRONG;
+               if (fmatch != FMATCH_OPT_WRONG) {
+                       fmatch = FMATCH_WRONG;
 
-                               switch (check_WSS) {
-                               case OSF_WSS_PLAIN:
-                                       if (f->wss.val == 0 || window == f->wss.val)
-                                               fmatch = FMATCH_OK;
-                                       break;
-                               case OSF_WSS_MSS:
-                                       /*
-                                        * Some smart modems decrease mangle MSS to 
-                                        * SMART_MSS_2, so we check standard, decreased
-                                        * and the one provided in the fingerprint MSS
-                                        * values.
-                                        */
+                       switch (check_WSS) {
+                       case OSF_WSS_PLAIN:
+                               if (f->wss.val == 0 || window == f->wss.val)
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MSS:
+                               /*
+                                * Some smart modems decrease mangle MSS to
+                                * SMART_MSS_2, so we check standard, decreased
+                                * and the one provided in the fingerprint MSS
+                                * values.
+                                */
 #define SMART_MSS_1    1460
 #define SMART_MSS_2    1448
-                                       if (window == f->wss.val * mss ||
-                                           window == f->wss.val * SMART_MSS_1 ||
-                                           window == f->wss.val * SMART_MSS_2)
-                                               fmatch = FMATCH_OK;
-                                       break;
-                               case OSF_WSS_MTU:
-                                       if (window == f->wss.val * (mss + 40) ||
-                                           window == f->wss.val * (SMART_MSS_1 + 40) ||
-                                           window == f->wss.val * (SMART_MSS_2 + 40))
-                                               fmatch = FMATCH_OK;
-                                       break;
-                               case OSF_WSS_MODULO:
-                                       if ((window % f->wss.val) == 0)
-                                               fmatch = FMATCH_OK;
-                                       break;
-                               }
+                               if (window == f->wss.val * mss ||
+                                   window == f->wss.val * SMART_MSS_1 ||
+                                   window == f->wss.val * SMART_MSS_2)
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MTU:
+                               if (window == f->wss.val * (mss + 40) ||
+                                   window == f->wss.val * (SMART_MSS_1 + 40) ||
+                                   window == f->wss.val * (SMART_MSS_2 + 40))
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MODULO:
+                               if ((window % f->wss.val) == 0)
+                                       fmatch = FMATCH_OK;
+                               break;
                        }
+               }
 
-                       if (fmatch != FMATCH_OK)
-                               continue;
+               if (fmatch != FMATCH_OK)
+                       continue;
 
-                       fcount++;
+               fcount++;
 
-                       if (info->flags & XT_OSF_LOG)
-                               nf_log_packet(net, p->family, p->hooknum, skb,
-                                       p->in, p->out, NULL,
-                                       "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
-                                       f->genre, f->version, f->subtype,
-                                       &ip->saddr, ntohs(tcp->source),
-                                       &ip->daddr, ntohs(tcp->dest),
-                                       f->ttl - ip->ttl);
+               if (info->flags & XT_OSF_LOG)
+                       nf_log_packet(net, p->family, p->hooknum, skb,
+                                     p->in, p->out, NULL,
+                                     "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
+                                     f->genre, f->version, f->subtype,
+                                     &ip->saddr, ntohs(tcp->source),
+                                     &ip->daddr, ntohs(tcp->dest),
+                                     f->ttl - ip->ttl);
 
-                       if ((info->flags & XT_OSF_LOG) &&
-                           info->loglevel == XT_OSF_LOGLEVEL_FIRST)
-                               break;
-               }
+               if ((info->flags & XT_OSF_LOG) &&
+                   info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+                       break;
        }
        rcu_read_unlock();