Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
authorDavid S. Miller <davem@davemloft.net>
Mon, 6 Jan 2014 18:29:30 +0000 (13:29 -0500)
committerDavid S. Miller <davem@davemloft.net>
Mon, 6 Jan 2014 18:29:30 +0000 (13:29 -0500)
Pablo Neira Ayuso says: <pablo@netfilter.org>

====================
nftables updates for net-next

The following patchset contains nftables updates for your net-next tree,
they are:

* Add set operation to the meta expression by means of the select_ops()
  infrastructure, this allows us to set the packet mark among other things.
  From Arturo Borrero Gonzalez.

* Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel
  Borkmann.

* Add new queue expression to nf_tables. These comes with two previous patches
  to prepare this new feature, one to add mask in nf_tables_core to
  evaluate the queue verdict appropriately and another to refactor common
  code with xt_NFQUEUE, from Eric Leblond.

* Do not hide nftables from Kconfig if nfnetlink is not enabled, also from
  Eric Leblond.

* Add the reject expression to nf_tables, this adds the missing TCP RST
  support. It comes with an initial patch to refactor common code with
  xt_NFQUEUE, again from Eric Leblond.

* Remove an unused variable assignment in nf_tables_dump_set(), from Michal
  Nazarewicz.

* Remove the nft_meta_target code, now that Arturo added the set operation
  to the meta expression, from me.

* Add help information for nf_tables to Kconfig, also from me.

* Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is
  available to other nf_tables objects, requested by Arturo, from me.

* Expose the table usage counter, so we can know how many chains are using
  this table without dumping the list of chains, from Tomasz Bursztyka.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
19 files changed:
include/net/netfilter/ipv4/nf_reject.h [new file with mode: 0644]
include/net/netfilter/ipv6/nf_reject.h [new file with mode: 0644]
include/net/netfilter/nf_queue.h
include/uapi/linux/netfilter/nf_tables.h
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/nft_reject_ipv4.c [deleted file]
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/ip6t_REJECT.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nft_meta.c
net/netfilter/nft_meta_target.c [deleted file]
net/netfilter/nft_queue.c [new file with mode: 0644]
net/netfilter/nft_reject.c [new file with mode: 0644]
net/netfilter/xt_NFQUEUE.c

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
new file mode 100644 (file)
index 0000000..931fbf8
--- /dev/null
@@ -0,0 +1,128 @@
+#ifndef _IPV4_NF_REJECT_H
+#define _IPV4_NF_REJECT_H
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+
+static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
+{
+       icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+
+/* Send RST reply */
+static void nf_send_reset(struct sk_buff *oldskb, int hook)
+{
+       struct sk_buff *nskb;
+       const struct iphdr *oiph;
+       struct iphdr *niph;
+       const struct tcphdr *oth;
+       struct tcphdr _otcph, *tcph;
+
+       /* IP header checks: fragment. */
+       if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
+               return;
+
+       oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
+                                sizeof(_otcph), &_otcph);
+       if (oth == NULL)
+               return;
+
+       /* No RST for RST. */
+       if (oth->rst)
+               return;
+
+       if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+               return;
+
+       /* Check checksum */
+       if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
+               return;
+       oiph = ip_hdr(oldskb);
+
+       nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+                        LL_MAX_HEADER, GFP_ATOMIC);
+       if (!nskb)
+               return;
+
+       skb_reserve(nskb, LL_MAX_HEADER);
+
+       skb_reset_network_header(nskb);
+       niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+       niph->version   = 4;
+       niph->ihl       = sizeof(struct iphdr) / 4;
+       niph->tos       = 0;
+       niph->id        = 0;
+       niph->frag_off  = htons(IP_DF);
+       niph->protocol  = IPPROTO_TCP;
+       niph->check     = 0;
+       niph->saddr     = oiph->daddr;
+       niph->daddr     = oiph->saddr;
+
+       skb_reset_transport_header(nskb);
+       tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+       memset(tcph, 0, sizeof(*tcph));
+       tcph->source    = oth->dest;
+       tcph->dest      = oth->source;
+       tcph->doff      = sizeof(struct tcphdr) / 4;
+
+       if (oth->ack)
+               tcph->seq = oth->ack_seq;
+       else {
+               tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+                                     oldskb->len - ip_hdrlen(oldskb) -
+                                     (oth->doff << 2));
+               tcph->ack = 1;
+       }
+
+       tcph->rst       = 1;
+       tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
+                                   niph->daddr, 0);
+       nskb->ip_summed = CHECKSUM_PARTIAL;
+       nskb->csum_start = (unsigned char *)tcph - nskb->head;
+       nskb->csum_offset = offsetof(struct tcphdr, check);
+
+       /* ip_route_me_harder expects skb->dst to be set */
+       skb_dst_set_noref(nskb, skb_dst(oldskb));
+
+       nskb->protocol = htons(ETH_P_IP);
+       if (ip_route_me_harder(nskb, RTN_UNSPEC))
+               goto free_nskb;
+
+       niph->ttl       = ip4_dst_hoplimit(skb_dst(nskb));
+
+       /* "Never happens" */
+       if (nskb->len > dst_mtu(skb_dst(nskb)))
+               goto free_nskb;
+
+       nf_ct_attach(nskb, oldskb);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+       /* If we use ip_local_out for bridged traffic, the MAC source on
+        * the RST will be ours, instead of the destination's.  This confuses
+        * some routers/firewalls, and they drop the packet.  So we need to
+        * build the eth header using the original destination's MAC as the
+        * source, and send the RST packet directly.
+        */
+       if (oldskb->nf_bridge) {
+               struct ethhdr *oeth = eth_hdr(oldskb);
+               nskb->dev = oldskb->nf_bridge->physindev;
+               niph->tot_len = htons(nskb->len);
+               ip_send_check(niph);
+               if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+                                   oeth->h_source, oeth->h_dest, nskb->len) < 0)
+                       goto free_nskb;
+               dev_queue_xmit(nskb);
+       } else
+#endif
+               ip_local_out(nskb);
+
+       return;
+
+ free_nskb:
+       kfree_skb(nskb);
+}
+
+
+#endif /* _IPV4_NF_REJECT_H */
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
new file mode 100644 (file)
index 0000000..710d17e
--- /dev/null
@@ -0,0 +1,171 @@
+#ifndef _IPV6_NF_REJECT_H
+#define _IPV6_NF_REJECT_H
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter_ipv6.h>
+
+static inline void
+nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
+            unsigned int hooknum)
+{
+       if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+               skb_in->dev = net->loopback_dev;
+
+       icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+}
+
+/* Send RST reply */
+static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
+{
+       struct sk_buff *nskb;
+       struct tcphdr otcph, *tcph;
+       unsigned int otcplen, hh_len;
+       int tcphoff, needs_ack;
+       const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
+       struct ipv6hdr *ip6h;
+#define DEFAULT_TOS_VALUE      0x0U
+       const __u8 tclass = DEFAULT_TOS_VALUE;
+       struct dst_entry *dst = NULL;
+       u8 proto;
+       __be16 frag_off;
+       struct flowi6 fl6;
+
+       if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
+           (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
+               pr_debug("addr is not unicast.\n");
+               return;
+       }
+
+       proto = oip6h->nexthdr;
+       tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
+
+       if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
+               pr_debug("Cannot get TCP header.\n");
+               return;
+       }
+
+       otcplen = oldskb->len - tcphoff;
+
+       /* IP header checks: fragment, too short. */
+       if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
+               pr_debug("proto(%d) != IPPROTO_TCP, "
+                        "or too short. otcplen = %d\n",
+                        proto, otcplen);
+               return;
+       }
+
+       if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
+               BUG();
+
+       /* No RST for RST. */
+       if (otcph.rst) {
+               pr_debug("RST is set\n");
+               return;
+       }
+
+       /* Check checksum. */
+       if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
+               pr_debug("TCP checksum is invalid\n");
+               return;
+       }
+
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_proto = IPPROTO_TCP;
+       fl6.saddr = oip6h->daddr;
+       fl6.daddr = oip6h->saddr;
+       fl6.fl6_sport = otcph.dest;
+       fl6.fl6_dport = otcph.source;
+       security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (dst == NULL || dst->error) {
+               dst_release(dst);
+               return;
+       }
+       dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+       if (IS_ERR(dst))
+               return;
+
+       hh_len = (dst->dev->hard_header_len + 15)&~15;
+       nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+                        + sizeof(struct tcphdr) + dst->trailer_len,
+                        GFP_ATOMIC);
+
+       if (!nskb) {
+               net_dbg_ratelimited("cannot alloc skb\n");
+               dst_release(dst);
+               return;
+       }
+
+       skb_dst_set(nskb, dst);
+
+       skb_reserve(nskb, hh_len + dst->header_len);
+
+       skb_put(nskb, sizeof(struct ipv6hdr));
+       skb_reset_network_header(nskb);
+       ip6h = ipv6_hdr(nskb);
+       ip6_flow_hdr(ip6h, tclass, 0);
+       ip6h->hop_limit = ip6_dst_hoplimit(dst);
+       ip6h->nexthdr = IPPROTO_TCP;
+       ip6h->saddr = oip6h->daddr;
+       ip6h->daddr = oip6h->saddr;
+
+       skb_reset_transport_header(nskb);
+       tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+       /* Truncate to length (no data) */
+       tcph->doff = sizeof(struct tcphdr)/4;
+       tcph->source = otcph.dest;
+       tcph->dest = otcph.source;
+
+       if (otcph.ack) {
+               needs_ack = 0;
+               tcph->seq = otcph.ack_seq;
+               tcph->ack_seq = 0;
+       } else {
+               needs_ack = 1;
+               tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+                                     + otcplen - (otcph.doff<<2));
+               tcph->seq = 0;
+       }
+
+       /* Reset flags */
+       ((u_int8_t *)tcph)[13] = 0;
+       tcph->rst = 1;
+       tcph->ack = needs_ack;
+       tcph->window = 0;
+       tcph->urg_ptr = 0;
+       tcph->check = 0;
+
+       /* Adjust TCP checksum */
+       tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+                                     &ipv6_hdr(nskb)->daddr,
+                                     sizeof(struct tcphdr), IPPROTO_TCP,
+                                     csum_partial(tcph,
+                                                  sizeof(struct tcphdr), 0));
+
+       nf_ct_attach(nskb, oldskb);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+       /* If we use ip6_local_out for bridged traffic, the MAC source on
+        * the RST will be ours, instead of the destination's.  This confuses
+        * some routers/firewalls, and they drop the packet.  So we need to
+        * build the eth header using the original destination's MAC as the
+        * source, and send the RST packet directly.
+        */
+       if (oldskb->nf_bridge) {
+               struct ethhdr *oeth = eth_hdr(oldskb);
+               nskb->dev = oldskb->nf_bridge->physindev;
+               nskb->protocol = htons(ETH_P_IPV6);
+               ip6h->payload_len = htons(sizeof(struct tcphdr));
+               if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+                                   oeth->h_source, oeth->h_dest, nskb->len) < 0)
+                       return;
+               dev_queue_xmit(nskb);
+       } else
+#endif
+               ip6_local_out(nskb);
+}
+
+#endif /* _IPV6_NF_REJECT_H */
index c1d5b3e..84a53d7 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef _NF_QUEUE_H
 #define _NF_QUEUE_H
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
 /* Each queued (to userspace) skbuff has one of these. */
 struct nf_queue_entry {
        struct list_head        list;
@@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
 bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
 void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
 
+static inline void init_hashrandom(u32 *jhash_initval)
+{
+       while (*jhash_initval == 0)
+               *jhash_initval = prandom_u32();
+}
+
+static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+
+       /* packets in either direction go into same queue */
+       if ((__force u32)iph->saddr < (__force u32)iph->daddr)
+               return jhash_3words((__force u32)iph->saddr,
+                       (__force u32)iph->daddr, iph->protocol, jhash_initval);
+
+       return jhash_3words((__force u32)iph->daddr,
+                       (__force u32)iph->saddr, iph->protocol, jhash_initval);
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       u32 a, b, c;
+
+       if ((__force u32)ip6h->saddr.s6_addr32[3] <
+           (__force u32)ip6h->daddr.s6_addr32[3]) {
+               a = (__force u32) ip6h->saddr.s6_addr32[3];
+               b = (__force u32) ip6h->daddr.s6_addr32[3];
+       } else {
+               b = (__force u32) ip6h->saddr.s6_addr32[3];
+               a = (__force u32) ip6h->daddr.s6_addr32[3];
+       }
+
+       if ((__force u32)ip6h->saddr.s6_addr32[1] <
+           (__force u32)ip6h->daddr.s6_addr32[1])
+               c = (__force u32) ip6h->saddr.s6_addr32[1];
+       else
+               c = (__force u32) ip6h->daddr.s6_addr32[1];
+
+       return jhash_3words(a, b, c, jhash_initval);
+}
+#endif
+
+static inline u32
+nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
+            u32 jhash_initval)
+{
+       if (family == NFPROTO_IPV4)
+               queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+       else if (family == NFPROTO_IPV6)
+               queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
+#endif
+
+       return queue;
+}
+
 #endif /* _NF_QUEUE_H */
index fbfd229..aa86a15 100644 (file)
@@ -110,11 +110,13 @@ enum nft_table_flags {
  *
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
+ * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
  */
 enum nft_table_attributes {
        NFTA_TABLE_UNSPEC,
        NFTA_TABLE_NAME,
        NFTA_TABLE_FLAGS,
+       NFTA_TABLE_USE,
        __NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX         (__NFTA_TABLE_MAX - 1)
@@ -553,11 +555,13 @@ enum nft_meta_keys {
  *
  * @NFTA_META_DREG: destination register (NLA_U32)
  * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
+ * @NFTA_META_SREG: source register (NLA_U32)
  */
 enum nft_meta_attributes {
        NFTA_META_UNSPEC,
        NFTA_META_DREG,
        NFTA_META_KEY,
+       NFTA_META_SREG,
        __NFTA_META_MAX
 };
 #define NFTA_META_MAX          (__NFTA_META_MAX - 1)
@@ -657,6 +661,26 @@ enum nft_log_attributes {
 };
 #define NFTA_LOG_MAX           (__NFTA_LOG_MAX - 1)
 
+/**
+ * enum nft_queue_attributes - nf_tables queue expression netlink attributes
+ *
+ * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
+ * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
+ * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ */
+enum nft_queue_attributes {
+       NFTA_QUEUE_UNSPEC,
+       NFTA_QUEUE_NUM,
+       NFTA_QUEUE_TOTAL,
+       NFTA_QUEUE_FLAGS,
+       __NFTA_QUEUE_MAX
+};
+#define NFTA_QUEUE_MAX         (__NFTA_QUEUE_MAX - 1)
+
+#define NFT_QUEUE_FLAG_BYPASS          0x01 /* for compatibility with v2 */
+#define NFT_QUEUE_FLAG_CPU_FANOUT      0x02 /* use current CPU (no hashing) */
+#define NFT_QUEUE_FLAG_MASK            0x03
+
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
  *
index 40d5607..81c6910 100644 (file)
@@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT
 config NF_TABLES_IPV4
        depends on NF_TABLES
        tristate "IPv4 nf_tables support"
-
-config NFT_REJECT_IPV4
-       depends on NF_TABLES_IPV4
-       tristate "nf_tables IPv4 reject support"
+       help
+         This option enables the IPv4 support for nf_tables.
 
 config NFT_CHAIN_ROUTE_IPV4
        depends on NF_TABLES_IPV4
        tristate "IPv4 nf_tables route chain support"
+       help
+         This option enables the "route" chain for IPv4 in nf_tables. This
+         chain type is used to force packet re-routing after mangling header
+         fields such as the source, destination, type of service and
+         the packet mark.
 
 config NFT_CHAIN_NAT_IPV4
        depends on NF_TABLES_IPV4
        depends on NF_NAT_IPV4 && NFT_NAT
        tristate "IPv4 nf_tables nat chain support"
+       help
+         This option enables the "nat" chain for IPv4 in nf_tables. This
+         chain type is used to perform Network Address Translation (NAT)
+         packet transformations such as the source, destination address and
+         source and destination ports.
 
 config NF_TABLES_ARP
        depends on NF_TABLES
        tristate "ARP nf_tables support"
+       help
+         This option enables the ARP support for nf_tables.
 
 config IP_NF_IPTABLES
        tristate "IP tables support (required for filtering/masq/NAT)"
index 19df72b..c16be9d 100644 (file)
@@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
 obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
-obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
index b969131..5b6e0df 100644 (file)
 #include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
 #include <linux/netfilter_bridge.h>
 #endif
 
+#include <net/netfilter/ipv4/nf_reject.h>
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
 
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb, int hook)
-{
-       struct sk_buff *nskb;
-       const struct iphdr *oiph;
-       struct iphdr *niph;
-       const struct tcphdr *oth;
-       struct tcphdr _otcph, *tcph;
-
-       /* IP header checks: fragment. */
-       if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
-               return;
-
-       oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
-                                sizeof(_otcph), &_otcph);
-       if (oth == NULL)
-               return;
-
-       /* No RST for RST. */
-       if (oth->rst)
-               return;
-
-       if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
-               return;
-
-       /* Check checksum */
-       if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
-               return;
-       oiph = ip_hdr(oldskb);
-
-       nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
-                        LL_MAX_HEADER, GFP_ATOMIC);
-       if (!nskb)
-               return;
-
-       skb_reserve(nskb, LL_MAX_HEADER);
-
-       skb_reset_network_header(nskb);
-       niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
-       niph->version   = 4;
-       niph->ihl       = sizeof(struct iphdr) / 4;
-       niph->tos       = 0;
-       niph->id        = 0;
-       niph->frag_off  = htons(IP_DF);
-       niph->protocol  = IPPROTO_TCP;
-       niph->check     = 0;
-       niph->saddr     = oiph->daddr;
-       niph->daddr     = oiph->saddr;
-
-       skb_reset_transport_header(nskb);
-       tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-       memset(tcph, 0, sizeof(*tcph));
-       tcph->source    = oth->dest;
-       tcph->dest      = oth->source;
-       tcph->doff      = sizeof(struct tcphdr) / 4;
-
-       if (oth->ack)
-               tcph->seq = oth->ack_seq;
-       else {
-               tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
-                                     oldskb->len - ip_hdrlen(oldskb) -
-                                     (oth->doff << 2));
-               tcph->ack = 1;
-       }
-
-       tcph->rst       = 1;
-       tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
-                                   niph->daddr, 0);
-       nskb->ip_summed = CHECKSUM_PARTIAL;
-       nskb->csum_start = (unsigned char *)tcph - nskb->head;
-       nskb->csum_offset = offsetof(struct tcphdr, check);
-
-       /* ip_route_me_harder expects skb->dst to be set */
-       skb_dst_set_noref(nskb, skb_dst(oldskb));
-
-       nskb->protocol = htons(ETH_P_IP);
-       if (ip_route_me_harder(nskb, RTN_UNSPEC))
-               goto free_nskb;
-
-       niph->ttl       = ip4_dst_hoplimit(skb_dst(nskb));
-
-       /* "Never happens" */
-       if (nskb->len > dst_mtu(skb_dst(nskb)))
-               goto free_nskb;
-
-       nf_ct_attach(nskb, oldskb);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-       /* If we use ip_local_out for bridged traffic, the MAC source on
-        * the RST will be ours, instead of the destination's.  This confuses
-        * some routers/firewalls, and they drop the packet.  So we need to
-        * build the eth header using the original destination's MAC as the
-        * source, and send the RST packet directly.
-        */
-       if (oldskb->nf_bridge) {
-               struct ethhdr *oeth = eth_hdr(oldskb);
-               nskb->dev = oldskb->nf_bridge->physindev;
-               niph->tot_len = htons(nskb->len);
-               ip_send_check(niph);
-               if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-                                   oeth->h_source, oeth->h_dest, nskb->len) < 0)
-                       goto free_nskb;
-               dev_queue_xmit(nskb);
-       } else
-#endif
-               ip_local_out(nskb);
-
-       return;
-
- free_nskb:
-       kfree_skb(nskb);
-}
-
-static inline void send_unreach(struct sk_buff *skb_in, int code)
-{
-       icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
 static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
        switch (reject->with) {
        case IPT_ICMP_NET_UNREACHABLE:
-               send_unreach(skb, ICMP_NET_UNREACH);
+               nf_send_unreach(skb, ICMP_NET_UNREACH);
                break;
        case IPT_ICMP_HOST_UNREACHABLE:
-               send_unreach(skb, ICMP_HOST_UNREACH);
+               nf_send_unreach(skb, ICMP_HOST_UNREACH);
                break;
        case IPT_ICMP_PROT_UNREACHABLE:
-               send_unreach(skb, ICMP_PROT_UNREACH);
+               nf_send_unreach(skb, ICMP_PROT_UNREACH);
                break;
        case IPT_ICMP_PORT_UNREACHABLE:
-               send_unreach(skb, ICMP_PORT_UNREACH);
+               nf_send_unreach(skb, ICMP_PORT_UNREACH);
                break;
        case IPT_ICMP_NET_PROHIBITED:
-               send_unreach(skb, ICMP_NET_ANO);
+               nf_send_unreach(skb, ICMP_NET_ANO);
                break;
        case IPT_ICMP_HOST_PROHIBITED:
-               send_unreach(skb, ICMP_HOST_ANO);
+               nf_send_unreach(skb, ICMP_HOST_ANO);
                break;
        case IPT_ICMP_ADMIN_PROHIBITED:
-               send_unreach(skb, ICMP_PKT_FILTERED);
+               nf_send_unreach(skb, ICMP_PKT_FILTERED);
                break;
        case IPT_TCP_RESET:
-               send_reset(skb, par->hooknum);
+               nf_send_reset(skb, par->hooknum);
        case IPT_ICMP_ECHOREPLY:
                /* Doesn't happen. */
                break;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
deleted file mode 100644 (file)
index 4a5e94a..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
-
-struct nft_reject {
-       enum nft_reject_types   type:8;
-       u8                      icmp_code;
-};
-
-static void nft_reject_eval(const struct nft_expr *expr,
-                             struct nft_data data[NFT_REG_MAX + 1],
-                             const struct nft_pktinfo *pkt)
-{
-       struct nft_reject *priv = nft_expr_priv(expr);
-
-       switch (priv->type) {
-       case NFT_REJECT_ICMP_UNREACH:
-               icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
-               break;
-       case NFT_REJECT_TCP_RST:
-               break;
-       }
-
-       data[NFT_REG_VERDICT].verdict = NF_DROP;
-}
-
-static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
-       [NFTA_REJECT_TYPE]              = { .type = NLA_U32 },
-       [NFTA_REJECT_ICMP_CODE]         = { .type = NLA_U8 },
-};
-
-static int nft_reject_init(const struct nft_ctx *ctx,
-                          const struct nft_expr *expr,
-                          const struct nlattr * const tb[])
-{
-       struct nft_reject *priv = nft_expr_priv(expr);
-
-       if (tb[NFTA_REJECT_TYPE] == NULL)
-               return -EINVAL;
-
-       priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
-       switch (priv->type) {
-       case NFT_REJECT_ICMP_UNREACH:
-               if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
-                       return -EINVAL;
-               priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
-       case NFT_REJECT_TCP_RST:
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
-{
-       const struct nft_reject *priv = nft_expr_priv(expr);
-
-       if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
-               goto nla_put_failure;
-
-       switch (priv->type) {
-       case NFT_REJECT_ICMP_UNREACH:
-               if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
-                       goto nla_put_failure;
-               break;
-       }
-
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static struct nft_expr_type nft_reject_type;
-static const struct nft_expr_ops nft_reject_ops = {
-       .type           = &nft_reject_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
-       .eval           = nft_reject_eval,
-       .init           = nft_reject_init,
-       .dump           = nft_reject_dump,
-};
-
-static struct nft_expr_type nft_reject_type __read_mostly = {
-       .name           = "reject",
-       .ops            = &nft_reject_ops,
-       .policy         = nft_reject_policy,
-       .maxattr        = NFTA_REJECT_MAX,
-       .owner          = THIS_MODULE,
-};
-
-static int __init nft_reject_module_init(void)
-{
-       return nft_register_expr(&nft_reject_type);
-}
-
-static void __exit nft_reject_module_exit(void)
-{
-       nft_unregister_expr(&nft_reject_type);
-}
-
-module_init(nft_reject_module_init);
-module_exit(nft_reject_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("reject");
index 7702f9e..35750df 100644 (file)
@@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6
 config NF_TABLES_IPV6
        depends on NF_TABLES
        tristate "IPv6 nf_tables support"
+       help
+         This option enables the IPv6 support for nf_tables.
 
 config NFT_CHAIN_ROUTE_IPV6
        depends on NF_TABLES_IPV6
        tristate "IPv6 nf_tables route chain support"
+       help
+         This option enables the "route" chain for IPv6 in nf_tables. This
+         chain type is used to force packet re-routing after mangling header
+         fields such as the source, destination, flowlabel, hop-limit and
+         the packet mark.
 
 config NFT_CHAIN_NAT_IPV6
        depends on NF_TABLES_IPV6
        depends on NF_NAT_IPV6 && NFT_NAT
        tristate "IPv6 nf_tables nat chain support"
+       help
+         This option enables the "nat" chain for IPv6 in nf_tables. This
+         chain type is used to perform Network Address Translation (NAT)
+         packet transformations such as the source, destination address and
+         source and destination ports.
 
 config IP6_NF_IPTABLES
        tristate "IP6 tables support (required for filtering)"
index da00a2e..544b0a9 100644 (file)
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
 #include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/tcp.h>
 #include <net/icmp.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
 #include <net/flow.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
 
+#include <net/netfilter/ipv6/nf_reject.h>
+
 MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
 MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
-/* Send RST reply */
-static void send_reset(struct net *net, struct sk_buff *oldskb, int hook)
-{
-       struct sk_buff *nskb;
-       struct tcphdr otcph, *tcph;
-       unsigned int otcplen, hh_len;
-       int tcphoff, needs_ack;
-       const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
-       struct ipv6hdr *ip6h;
-#define DEFAULT_TOS_VALUE      0x0U
-       const __u8 tclass = DEFAULT_TOS_VALUE;
-       struct dst_entry *dst = NULL;
-       u8 proto;
-       __be16 frag_off;
-       struct flowi6 fl6;
-
-       if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
-           (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-               pr_debug("addr is not unicast.\n");
-               return;
-       }
-
-       proto = oip6h->nexthdr;
-       tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
-
-       if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-               pr_debug("Cannot get TCP header.\n");
-               return;
-       }
-
-       otcplen = oldskb->len - tcphoff;
-
-       /* IP header checks: fragment, too short. */
-       if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
-               pr_debug("proto(%d) != IPPROTO_TCP, "
-                        "or too short. otcplen = %d\n",
-                        proto, otcplen);
-               return;
-       }
-
-       if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
-               BUG();
-
-       /* No RST for RST. */
-       if (otcph.rst) {
-               pr_debug("RST is set\n");
-               return;
-       }
-
-       /* Check checksum. */
-       if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
-               pr_debug("TCP checksum is invalid\n");
-               return;
-       }
-
-       memset(&fl6, 0, sizeof(fl6));
-       fl6.flowi6_proto = IPPROTO_TCP;
-       fl6.saddr = oip6h->daddr;
-       fl6.daddr = oip6h->saddr;
-       fl6.fl6_sport = otcph.dest;
-       fl6.fl6_dport = otcph.source;
-       security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
-       dst = ip6_route_output(net, NULL, &fl6);
-       if (dst == NULL || dst->error) {
-               dst_release(dst);
-               return;
-       }
-       dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
-       if (IS_ERR(dst))
-               return;
-
-       hh_len = (dst->dev->hard_header_len + 15)&~15;
-       nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
-                        + sizeof(struct tcphdr) + dst->trailer_len,
-                        GFP_ATOMIC);
-
-       if (!nskb) {
-               net_dbg_ratelimited("cannot alloc skb\n");
-               dst_release(dst);
-               return;
-       }
-
-       skb_dst_set(nskb, dst);
-
-       skb_reserve(nskb, hh_len + dst->header_len);
-
-       skb_put(nskb, sizeof(struct ipv6hdr));
-       skb_reset_network_header(nskb);
-       ip6h = ipv6_hdr(nskb);
-       ip6_flow_hdr(ip6h, tclass, 0);
-       ip6h->hop_limit = ip6_dst_hoplimit(dst);
-       ip6h->nexthdr = IPPROTO_TCP;
-       ip6h->saddr = oip6h->daddr;
-       ip6h->daddr = oip6h->saddr;
-
-       skb_reset_transport_header(nskb);
-       tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-       /* Truncate to length (no data) */
-       tcph->doff = sizeof(struct tcphdr)/4;
-       tcph->source = otcph.dest;
-       tcph->dest = otcph.source;
-
-       if (otcph.ack) {
-               needs_ack = 0;
-               tcph->seq = otcph.ack_seq;
-               tcph->ack_seq = 0;
-       } else {
-               needs_ack = 1;
-               tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
-                                     + otcplen - (otcph.doff<<2));
-               tcph->seq = 0;
-       }
-
-       /* Reset flags */
-       ((u_int8_t *)tcph)[13] = 0;
-       tcph->rst = 1;
-       tcph->ack = needs_ack;
-       tcph->window = 0;
-       tcph->urg_ptr = 0;
-       tcph->check = 0;
-
-       /* Adjust TCP checksum */
-       tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
-                                     &ipv6_hdr(nskb)->daddr,
-                                     sizeof(struct tcphdr), IPPROTO_TCP,
-                                     csum_partial(tcph,
-                                                  sizeof(struct tcphdr), 0));
-
-       nf_ct_attach(nskb, oldskb);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-       /* If we use ip6_local_out for bridged traffic, the MAC source on
-        * the RST will be ours, instead of the destination's.  This confuses
-        * some routers/firewalls, and they drop the packet.  So we need to
-        * build the eth header using the original destination's MAC as the
-        * source, and send the RST packet directly.
-        */
-       if (oldskb->nf_bridge) {
-               struct ethhdr *oeth = eth_hdr(oldskb);
-               nskb->dev = oldskb->nf_bridge->physindev;
-               nskb->protocol = htons(ETH_P_IPV6);
-               ip6h->payload_len = htons(sizeof(struct tcphdr));
-               if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-                                   oeth->h_source, oeth->h_dest, nskb->len) < 0)
-                       return;
-               dev_queue_xmit(nskb);
-       } else
-#endif
-               ip6_local_out(nskb);
-}
-
-static inline void
-send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
-            unsigned int hooknum)
-{
-       if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
-               skb_in->dev = net->loopback_dev;
-
-       icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
 
 static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
        pr_debug("%s: medium point\n", __func__);
        switch (reject->with) {
        case IP6T_ICMP6_NO_ROUTE:
-               send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
                break;
        case IP6T_ICMP6_ADM_PROHIBITED:
-               send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
                break;
        case IP6T_ICMP6_NOT_NEIGHBOUR:
-               send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
                break;
        case IP6T_ICMP6_ADDR_UNREACH:
-               send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
                break;
        case IP6T_ICMP6_PORT_UNREACH:
-               send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
                break;
        case IP6T_ICMP6_ECHOREPLY:
                /* Do nothing */
                break;
        case IP6T_TCP_RESET:
-               send_reset(net, skb, par->hooknum);
+               nf_send_reset6(net, skb, par->hooknum);
                break;
        default:
                net_info_ratelimited("case %u not handled yet\n", reject->with);
index c17902c..c3b3b26 100644 (file)
@@ -414,47 +414,104 @@ config NETFILTER_SYNPROXY
 endif # NF_CONNTRACK
 
 config NF_TABLES
-       depends on NETFILTER_NETLINK
+       select NETFILTER_NETLINK
        tristate "Netfilter nf_tables support"
+       help
+         nftables is the new packet classification framework that intends to
+         replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
+         provides a pseudo-state machine with an extensible instruction-set
+         (also known as expressions) that the userspace 'nft' utility
+         (http://www.netfilter.org/projects/nftables) uses to build the
+         rule-set. It also comes with the generic set infrastructure that
+         allows you to construct mappings between matchings and actions
+         for performance lookups.
+
+         To compile it as a module, choose M here.
 
 config NFT_EXTHDR
        depends on NF_TABLES
        tristate "Netfilter nf_tables IPv6 exthdr module"
+       help
+         This option adds the "exthdr" expression that you can use to match
+         IPv6 extension headers.
 
 config NFT_META
        depends on NF_TABLES
        tristate "Netfilter nf_tables meta module"
+       help
+         This option adds the "meta" expression that you can use to match and
+         to set packet metainformation such as the packet mark.
 
 config NFT_CT
        depends on NF_TABLES
        depends on NF_CONNTRACK
        tristate "Netfilter nf_tables conntrack module"
+       help
+         This option adds the "meta" expression that you can use to match
+         connection tracking information such as the flow state.
 
 config NFT_RBTREE
        depends on NF_TABLES
        tristate "Netfilter nf_tables rbtree set module"
+       help
+         This option adds the "rbtree" set type (Red Black tree) that is used
+         to build interval-based sets.
 
 config NFT_HASH
        depends on NF_TABLES
        tristate "Netfilter nf_tables hash set module"
+       help
+         This option adds the "hash" set type that is used to build one-way
+         mappings between matchings and actions.
 
 config NFT_COUNTER
        depends on NF_TABLES
        tristate "Netfilter nf_tables counter module"
+       help
+         This option adds the "counter" expression that you can use to
+         include packet and byte counters in a rule.
 
 config NFT_LOG
        depends on NF_TABLES
        tristate "Netfilter nf_tables log module"
+       help
+         This option adds the "log" expression that you can use to log
+         packets matching some criteria.
 
 config NFT_LIMIT
        depends on NF_TABLES
        tristate "Netfilter nf_tables limit module"
+       help
+         This option adds the "limit" expression that you can use to
+         ratelimit rule matchings.
 
 config NFT_NAT
        depends on NF_TABLES
        depends on NF_CONNTRACK
        depends on NF_NAT
        tristate "Netfilter nf_tables nat module"
+       help
+         This option adds the "nat" expression that you can use to perform
+         typical Network Address Translation (NAT) packet transformations.
+
+config NFT_QUEUE
+       depends on NF_TABLES
+       depends on NETFILTER_XTABLES
+       depends on NETFILTER_NETLINK_QUEUE
+       tristate "Netfilter nf_tables queue module"
+       help
+         This is required if you intend to use the userspace queueing
+         infrastructure (also known as NFQUEUE) from nftables.
+
+config NFT_REJECT
+       depends on NF_TABLES
+       depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
+       default m if NETFILTER_ADVANCED=n
+       tristate "Netfilter nf_tables reject support"
+       help
+         This option adds the "reject" expression that you can use to
+         explicitly deny and notify via TCP reset/ICMP informational errors
+         unallowed traffic.
 
 config NFT_COMPAT
        depends on NF_TABLES
index 407fc23..78b4e1c 100644 (file)
@@ -76,7 +76,8 @@ obj-$(CONFIG_NFT_META)                += nft_meta.o
 obj-$(CONFIG_NFT_CT)           += nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)                += nft_limit.o
 obj-$(CONFIG_NFT_NAT)          += nft_nat.o
-#nf_tables-objs                        += nft_meta_target.o
+obj-$(CONFIG_NFT_QUEUE)                += nft_queue.o
+obj-$(CONFIG_NFT_REJECT)       += nft_reject.o
 obj-$(CONFIG_NFT_RBTREE)       += nft_rbtree.o
 obj-$(CONFIG_NFT_HASH)         += nft_hash.o
 obj-$(CONFIG_NFT_COUNTER)      += nft_counter.o
index f93b7d0..629b6da 100644 (file)
@@ -180,7 +180,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
        nfmsg->res_id           = 0;
 
        if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
-           nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
+           nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+           nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
                goto nla_put_failure;
 
        return nlmsg_end(skb, nlh);
@@ -1923,12 +1924,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
 {
        struct net *net = sock_net(skb->sk);
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-       const struct nft_af_info *afi;
+       const struct nft_af_info *afi = NULL;
        const struct nft_table *table = NULL;
 
-       afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
-       if (IS_ERR(afi))
-               return PTR_ERR(afi);
+       if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
+               afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+               if (IS_ERR(afi))
+                       return PTR_ERR(afi);
+       }
 
        if (nla[NFTA_SET_TABLE] != NULL) {
                table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
@@ -1973,11 +1976,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
                        return -ENOMEM;
 
                list_for_each_entry(i, &ctx->table->sets, list) {
-                       if (!sscanf(i->name, name, &n))
+                       int tmp;
+
+                       if (!sscanf(i->name, name, &tmp))
                                continue;
-                       if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
+                       if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
                                continue;
-                       set_bit(n, inuse);
+
+                       set_bit(tmp, inuse);
                }
 
                n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
@@ -2094,8 +2100,8 @@ done:
        return skb->len;
 }
 
-static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
-                                  struct netlink_callback *cb)
+static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
+                                     struct netlink_callback *cb)
 {
        const struct nft_set *set;
        unsigned int idx = 0, s_idx = cb->args[0];
@@ -2127,6 +2133,61 @@ done:
        return skb->len;
 }
 
+static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       const struct nft_set *set;
+       unsigned int idx, s_idx = cb->args[0];
+       const struct nft_af_info *afi;
+       struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+       struct net *net = sock_net(skb->sk);
+       int cur_family = cb->args[3];
+
+       if (cb->args[1])
+               return skb->len;
+
+       list_for_each_entry(afi, &net->nft.af_info, list) {
+               if (cur_family) {
+                       if (afi->family != cur_family)
+                               continue;
+
+                       cur_family = 0;
+               }
+
+               list_for_each_entry(table, &afi->tables, list) {
+                       if (cur_table) {
+                               if (cur_table != table)
+                                       continue;
+
+                               cur_table = NULL;
+                       }
+
+                       ctx->table = table;
+                       ctx->afi = afi;
+                       idx = 0;
+                       list_for_each_entry(set, &ctx->table->sets, list) {
+                               if (idx < s_idx)
+                                       goto cont;
+                               if (nf_tables_fill_set(skb, ctx, set,
+                                                      NFT_MSG_NEWSET,
+                                                      NLM_F_MULTI) < 0) {
+                                       cb->args[0] = idx;
+                                       cb->args[2] = (unsigned long) table;
+                                       cb->args[3] = afi->family;
+                                       goto done;
+                               }
+cont:
+                               idx++;
+                       }
+                       if (s_idx)
+                               s_idx = 0;
+               }
+       }
+       cb->args[1] = 1;
+done:
+       return skb->len;
+}
+
 static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
@@ -2143,9 +2204,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
        if (err < 0)
                return err;
 
-       if (ctx.table == NULL)
-               ret = nf_tables_dump_sets_all(&ctx, skb, cb);
-       else
+       if (ctx.table == NULL) {
+               if (ctx.afi == NULL)
+                       ret = nf_tables_dump_sets_all(&ctx, skb, cb);
+               else
+                       ret = nf_tables_dump_sets_family(&ctx, skb, cb);
+       } else
                ret = nf_tables_dump_sets_table(&ctx, skb, cb);
 
        return ret;
@@ -2158,6 +2222,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
        const struct nft_set *set;
        struct nft_ctx ctx;
        struct sk_buff *skb2;
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        int err;
 
        /* Verify existance before starting dump */
@@ -2172,6 +2237,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
                return netlink_dump_start(nlsk, skb, nlh, &c);
        }
 
+       /* Only accept unspec with dump */
+       if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+               return -EAFNOSUPPORT;
+
        set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
        if (IS_ERR(set))
                return PTR_ERR(set);
@@ -2341,6 +2410,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
                            const struct nlmsghdr *nlh,
                            const struct nlattr * const nla[])
 {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        struct nft_set *set;
        struct nft_ctx ctx;
        int err;
@@ -2352,6 +2422,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
        if (err < 0)
                return err;
 
+       if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+               return -EAFNOSUPPORT;
+
        set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
        if (IS_ERR(set))
                return PTR_ERR(set);
@@ -2521,9 +2594,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
        u32 portid, seq;
        int event, err;
 
-       nfmsg = nlmsg_data(cb->nlh);
-       err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
-                         nft_set_elem_list_policy);
+       err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
+                         NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
        if (err < 0)
                return err;
 
index cb9e685..e8fcc34 100644 (file)
@@ -164,7 +164,7 @@ next_rule:
                break;
        }
 
-       switch (data[NFT_REG_VERDICT].verdict) {
+       switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
        case NF_ACCEPT:
        case NF_DROP:
        case NF_QUEUE:
@@ -172,6 +172,9 @@ next_rule:
                        nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 
                return data[NFT_REG_VERDICT].verdict;
+       }
+
+       switch (data[NFT_REG_VERDICT].verdict) {
        case NFT_JUMP:
                if (unlikely(pkt->skb->nf_trace))
                        nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
index 8c28220..1ceaaa6 100644 (file)
 
 struct nft_meta {
        enum nft_meta_keys      key:8;
-       enum nft_registers      dreg:8;
+       union {
+               enum nft_registers      dreg:8;
+               enum nft_registers      sreg:8;
+       };
 };
 
-static void nft_meta_eval(const struct nft_expr *expr,
-                         struct nft_data data[NFT_REG_MAX + 1],
-                         const struct nft_pktinfo *pkt)
+static void nft_meta_get_eval(const struct nft_expr *expr,
+                             struct nft_data data[NFT_REG_MAX + 1],
+                             const struct nft_pktinfo *pkt)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
        const struct sk_buff *skb = pkt->skb;
@@ -132,23 +135,50 @@ err:
        data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
 
+static void nft_meta_set_eval(const struct nft_expr *expr,
+                             struct nft_data data[NFT_REG_MAX + 1],
+                             const struct nft_pktinfo *pkt)
+{
+       const struct nft_meta *meta = nft_expr_priv(expr);
+       struct sk_buff *skb = pkt->skb;
+       u32 value = data[meta->sreg].data[0];
+
+       switch (meta->key) {
+       case NFT_META_MARK:
+               skb->mark = value;
+               break;
+       case NFT_META_PRIORITY:
+               skb->priority = value;
+               break;
+       case NFT_META_NFTRACE:
+               skb->nf_trace = 1;
+               break;
+       default:
+               WARN_ON(1);
+       }
+}
+
 static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
        [NFTA_META_DREG]        = { .type = NLA_U32 },
        [NFTA_META_KEY]         = { .type = NLA_U32 },
+       [NFTA_META_SREG]        = { .type = NLA_U32 },
 };
 
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-                        const struct nlattr * const tb[])
+static int nft_meta_init_validate_set(uint32_t key)
 {
-       struct nft_meta *priv = nft_expr_priv(expr);
-       int err;
-
-       if (tb[NFTA_META_DREG] == NULL ||
-           tb[NFTA_META_KEY] == NULL)
-               return -EINVAL;
+       switch (key) {
+       case NFT_META_MARK:
+       case NFT_META_PRIORITY:
+       case NFT_META_NFTRACE:
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
 
-       priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-       switch (priv->key) {
+static int nft_meta_init_validate_get(uint32_t key)
+{
+       switch (key) {
        case NFT_META_LEN:
        case NFT_META_PROTOCOL:
        case NFT_META_PRIORITY:
@@ -167,26 +197,69 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 #ifdef CONFIG_NETWORK_SECMARK
        case NFT_META_SECMARK:
 #endif
-               break;
+               return 0;
        default:
                return -EOPNOTSUPP;
        }
 
-       priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
-       err = nft_validate_output_register(priv->dreg);
+}
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                        const struct nlattr * const tb[])
+{
+       struct nft_meta *priv = nft_expr_priv(expr);
+       int err;
+
+       priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+
+       if (tb[NFTA_META_DREG]) {
+               err = nft_meta_init_validate_get(priv->key);
+               if (err < 0)
+                       return err;
+
+               priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+               err = nft_validate_output_register(priv->dreg);
+               if (err < 0)
+                       return err;
+
+               return nft_validate_data_load(ctx, priv->dreg, NULL,
+                                             NFT_DATA_VALUE);
+       }
+
+       err = nft_meta_init_validate_set(priv->key);
        if (err < 0)
                return err;
-       return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
+
+       return 0;
 }
 
-static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_meta_get_dump(struct sk_buff *skb,
+                            const struct nft_expr *expr)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
 
+       if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+               goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
                goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static int nft_meta_set_dump(struct sk_buff *skb,
+                            const struct nft_expr *expr)
+{
+       const struct nft_meta *priv = nft_expr_priv(expr);
+
        if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
                goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+
        return 0;
 
 nla_put_failure:
@@ -194,17 +267,44 @@ nla_put_failure:
 }
 
 static struct nft_expr_type nft_meta_type;
-static const struct nft_expr_ops nft_meta_ops = {
+static const struct nft_expr_ops nft_meta_get_ops = {
        .type           = &nft_meta_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
-       .eval           = nft_meta_eval,
+       .eval           = nft_meta_get_eval,
        .init           = nft_meta_init,
-       .dump           = nft_meta_dump,
+       .dump           = nft_meta_get_dump,
 };
 
+static const struct nft_expr_ops nft_meta_set_ops = {
+       .type           = &nft_meta_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+       .eval           = nft_meta_set_eval,
+       .init           = nft_meta_init,
+       .dump           = nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_select_ops(const struct nft_ctx *ctx,
+                   const struct nlattr * const tb[])
+{
+       if (tb[NFTA_META_KEY] == NULL)
+               return ERR_PTR(-EINVAL);
+
+       if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+               return ERR_PTR(-EINVAL);
+
+       if (tb[NFTA_META_DREG])
+               return &nft_meta_get_ops;
+
+       if (tb[NFTA_META_SREG])
+               return &nft_meta_set_ops;
+
+       return ERR_PTR(-EINVAL);
+}
+
 static struct nft_expr_type nft_meta_type __read_mostly = {
        .name           = "meta",
-       .ops            = &nft_meta_ops,
+       .select_ops     = &nft_meta_select_ops,
        .policy         = nft_meta_policy,
        .maxattr        = NFTA_META_MAX,
        .owner          = THIS_MODULE,
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
deleted file mode 100644 (file)
index 71177df..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-
-struct nft_meta {
-       enum nft_meta_keys      key;
-};
-
-static void nft_meta_eval(const struct nft_expr *expr,
-                         struct nft_data *nfres,
-                         struct nft_data *data,
-                         const struct nft_pktinfo *pkt)
-{
-       const struct nft_meta *meta = nft_expr_priv(expr);
-       struct sk_buff *skb = pkt->skb;
-       u32 val = data->data[0];
-
-       switch (meta->key) {
-       case NFT_META_MARK:
-               skb->mark = val;
-               break;
-       case NFT_META_PRIORITY:
-               skb->priority = val;
-               break;
-       case NFT_META_NFTRACE:
-               skb->nf_trace = val;
-               break;
-#ifdef CONFIG_NETWORK_SECMARK
-       case NFT_META_SECMARK:
-               skb->secmark = val;
-               break;
-#endif
-       default:
-               WARN_ON(1);
-       }
-}
-
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
-       [NFTA_META_KEY]         = { .type = NLA_U32 },
-};
-
-static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
-{
-       struct nft_meta *meta = nft_expr_priv(expr);
-
-       if (tb[NFTA_META_KEY] == NULL)
-               return -EINVAL;
-
-       meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-       switch (meta->key) {
-       case NFT_META_MARK:
-       case NFT_META_PRIORITY:
-       case NFT_META_NFTRACE:
-#ifdef CONFIG_NETWORK_SECMARK
-       case NFT_META_SECMARK:
-#endif
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
-{
-       struct nft_meta *meta = nft_expr_priv(expr);
-
-       NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static struct nft_expr_ops meta_target __read_mostly = {
-       .name           = "meta",
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
-       .owner          = THIS_MODULE,
-       .eval           = nft_meta_eval,
-       .init           = nft_meta_init,
-       .dump           = nft_meta_dump,
-       .policy         = nft_meta_policy,
-       .maxattr        = NFTA_META_MAX,
-};
-
-static int __init nft_meta_target_init(void)
-{
-       return nft_register_expr(&meta_target);
-}
-
-static void __exit nft_meta_target_exit(void)
-{
-       nft_unregister_expr(&meta_target);
-}
-
-module_init(nft_meta_target_init);
-module_exit(nft_meta_target_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
new file mode 100644 (file)
index 0000000..cbea473
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code partly funded by OISF
+ * (http://www.openinfosecfoundation.org/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_queue.h>
+
+static u32 jhash_initval __read_mostly;
+
+struct nft_queue {
+       u16     queuenum;
+       u16     queues_total;
+       u16     flags;
+       u8      family;
+};
+
+static void nft_queue_eval(const struct nft_expr *expr,
+                          struct nft_data data[NFT_REG_MAX + 1],
+                          const struct nft_pktinfo *pkt)
+{
+       struct nft_queue *priv = nft_expr_priv(expr);
+       u32 queue = priv->queuenum;
+       u32 ret;
+
+       if (priv->queues_total > 1) {
+               if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
+                       int cpu = smp_processor_id();
+
+                       queue = priv->queuenum + cpu % priv->queues_total;
+               } else {
+                       queue = nfqueue_hash(pkt->skb, queue,
+                                            priv->queues_total, priv->family,
+                                            jhash_initval);
+               }
+       }
+
+       ret = NF_QUEUE_NR(queue);
+       if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+               ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+       data[NFT_REG_VERDICT].verdict = ret;
+}
+
+static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
+       [NFTA_QUEUE_NUM]        = { .type = NLA_U16 },
+       [NFTA_QUEUE_TOTAL]      = { .type = NLA_U16 },
+       [NFTA_QUEUE_FLAGS]      = { .type = NLA_U16 },
+};
+
+static int nft_queue_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_queue *priv = nft_expr_priv(expr);
+
+       if (tb[NFTA_QUEUE_NUM] == NULL)
+               return -EINVAL;
+
+       init_hashrandom(&jhash_initval);
+       priv->family = ctx->afi->family;
+       priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
+
+       if (tb[NFTA_QUEUE_TOTAL] != NULL)
+               priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
+       if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+               priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+               if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_queue *priv = nft_expr_priv(expr);
+
+       if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) ||
+           nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) ||
+           nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_queue_type;
+static const struct nft_expr_ops nft_queue_ops = {
+       .type           = &nft_queue_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+       .eval           = nft_queue_eval,
+       .init           = nft_queue_init,
+       .dump           = nft_queue_dump,
+};
+
+static struct nft_expr_type nft_queue_type __read_mostly = {
+       .name           = "queue",
+       .ops            = &nft_queue_ops,
+       .policy         = nft_queue_policy,
+       .maxattr        = NFTA_QUEUE_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_queue_module_init(void)
+{
+       return nft_register_expr(&nft_queue_type);
+}
+
+static void __exit nft_queue_module_exit(void)
+{
+       nft_unregister_expr(&nft_queue_type);
+}
+
+module_init(nft_queue_module_init);
+module_exit(nft_queue_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Leblond <eric@regit.org>");
+MODULE_ALIAS_NFT_EXPR("queue");
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
new file mode 100644 (file)
index 0000000..0d690d4
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+#include <net/netfilter/ipv6/nf_reject.h>
+#endif
+
+struct nft_reject {
+       enum nft_reject_types   type:8;
+       u8                      icmp_code;
+       u8                      family;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+                             struct nft_data data[NFT_REG_MAX + 1],
+                             const struct nft_pktinfo *pkt)
+{
+       struct nft_reject *priv = nft_expr_priv(expr);
+       struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               if (priv->family == NFPROTO_IPV4)
+                       nf_send_unreach(pkt->skb, priv->icmp_code);
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+               else if (priv->family == NFPROTO_IPV6)
+                       nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+                                     pkt->hooknum);
+#endif
+               break;
+       case NFT_REJECT_TCP_RST:
+               if (priv->family == NFPROTO_IPV4)
+                       nf_send_reset(pkt->skb, pkt->hooknum);
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+               else if (priv->family == NFPROTO_IPV6)
+                       nf_send_reset6(net, pkt->skb, pkt->hooknum);
+#endif
+               break;
+       }
+
+       data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+       [NFTA_REJECT_TYPE]              = { .type = NLA_U32 },
+       [NFTA_REJECT_ICMP_CODE]         = { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_reject *priv = nft_expr_priv(expr);
+
+       if (tb[NFTA_REJECT_TYPE] == NULL)
+               return -EINVAL;
+
+       priv->family = ctx->afi->family;
+       priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+                       return -EINVAL;
+               priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+       case NFT_REJECT_TCP_RST:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_reject *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+               goto nla_put_failure;
+
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+                       goto nla_put_failure;
+               break;
+       }
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_reject_type;
+static const struct nft_expr_ops nft_reject_ops = {
+       .type           = &nft_reject_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+       .eval           = nft_reject_eval,
+       .init           = nft_reject_init,
+       .dump           = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_type __read_mostly = {
+       .name           = "reject",
+       .ops            = &nft_reject_ops,
+       .policy         = nft_reject_policy,
+       .maxattr        = NFTA_REJECT_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_reject_module_init(void)
+{
+       return nft_register_expr(&nft_reject_type);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+       nft_unregister_expr(&nft_reject_type);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
index ed00fef..8f1779f 100644 (file)
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-
 #include <linux/netfilter.h>
 #include <linux/netfilter_arp.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_NFQUEUE.h>
 
+#include <net/netfilter/nf_queue.h>
+
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
 MODULE_LICENSE("GPL");
@@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static u32 jhash_initval __read_mostly;
-static bool rnd_inited __read_mostly;
 
 static unsigned int
 nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
        return NF_QUEUE_NR(tinfo->queuenum);
 }
 
-static u32 hash_v4(const struct sk_buff *skb)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-
-       /* packets in either direction go into same queue */
-       if ((__force u32)iph->saddr < (__force u32)iph->daddr)
-               return jhash_3words((__force u32)iph->saddr,
-                       (__force u32)iph->daddr, iph->protocol, jhash_initval);
-
-       return jhash_3words((__force u32)iph->daddr,
-                       (__force u32)iph->saddr, iph->protocol, jhash_initval);
-}
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static u32 hash_v6(const struct sk_buff *skb)
-{
-       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       u32 a, b, c;
-
-       if ((__force u32)ip6h->saddr.s6_addr32[3] <
-           (__force u32)ip6h->daddr.s6_addr32[3]) {
-               a = (__force u32) ip6h->saddr.s6_addr32[3];
-               b = (__force u32) ip6h->daddr.s6_addr32[3];
-       } else {
-               b = (__force u32) ip6h->saddr.s6_addr32[3];
-               a = (__force u32) ip6h->daddr.s6_addr32[3];
-       }
-
-       if ((__force u32)ip6h->saddr.s6_addr32[1] <
-           (__force u32)ip6h->daddr.s6_addr32[1])
-               c = (__force u32) ip6h->saddr.s6_addr32[1];
-       else
-               c = (__force u32) ip6h->daddr.s6_addr32[1];
-
-       return jhash_3words(a, b, c, jhash_initval);
-}
-#endif
-
-static u32
-nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
-{
-       const struct xt_NFQ_info_v1 *info = par->targinfo;
-       u32 queue = info->queuenum;
-
-       if (par->family == NFPROTO_IPV4)
-               queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-       else if (par->family == NFPROTO_IPV6)
-               queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
-#endif
-
-       return queue;
-}
-
 static unsigned int
 nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_NFQ_info_v1 *info = par->targinfo;
        u32 queue = info->queuenum;
 
-       if (info->queues_total > 1)
-               queue = nfqueue_hash(skb, par);
-
+       if (info->queues_total > 1) {
+               queue = nfqueue_hash(skb, queue, info->queues_total,
+                                    par->family, jhash_initval);
+       }
        return NF_QUEUE_NR(queue);
 }
 
@@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
        const struct xt_NFQ_info_v3 *info = par->targinfo;
        u32 maxid;
 
-       if (unlikely(!rnd_inited)) {
-               get_random_bytes(&jhash_initval, sizeof(jhash_initval));
-               rnd_inited = true;
-       }
+       init_hashrandom(&jhash_initval);
+
        if (info->queues_total == 0) {
                pr_err("NFQUEUE: number of total queues is 0\n");
                return -EINVAL;
@@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
                        int cpu = smp_processor_id();
 
                        queue = info->queuenum + cpu % info->queues_total;
-               } else
-                       queue = nfqueue_hash(skb, par);
+               } else {
+                       queue = nfqueue_hash(skb, queue, info->queues_total,
+                                            par->family, jhash_initval);
+               }
        }
 
        ret = NF_QUEUE_NR(queue);