datapath: Revert "datapath: Constify netlink structs."
[cascardo/ovs.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
25 #include <linux/in.h>
26 #include <linux/ip.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/version.h>
40 #include <linux/ethtool.h>
41 #include <linux/wait.h>
42 #include <asm/div64.h>
43 #include <linux/highmem.h>
44 #include <linux/netfilter_bridge.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <linux/inetdevice.h>
47 #include <linux/list.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <net/genetlink.h>
52 #include <net/net_namespace.h>
53 #include <net/netns/generic.h>
54
55 #include "datapath.h"
56 #include "flow.h"
57 #include "flow_table.h"
58 #include "flow_netlink.h"
59 #include "vlan.h"
60 #include "vport-internal_dev.h"
61 #include "vport-netdev.h"
62
63 int ovs_net_id __read_mostly;
64 EXPORT_SYMBOL_GPL(ovs_net_id);
65
66 static struct genl_family dp_packet_genl_family;
67 static struct genl_family dp_flow_genl_family;
68 static struct genl_family dp_datapath_genl_family;
69
70 static const struct nla_policy flow_policy[];
71
72 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
73         .name = OVS_FLOW_MCGROUP
74 };
75
76 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
77         .name = OVS_DATAPATH_MCGROUP
78 };
79
80 struct genl_multicast_group ovs_dp_vport_multicast_group = {
81         .name = OVS_VPORT_MCGROUP
82 };
83
84 /* Check if need to build a reply message.
85  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
86  */
87 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
88                             unsigned int group)
89 {
90         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
91                genl_has_listeners(family, genl_info_net(info), group);
92 }
93
94 static void ovs_notify(struct genl_family *family, struct genl_multicast_group *grp,
95                        struct sk_buff *skb, struct genl_info *info)
96 {
97         genl_notify(family, skb, genl_info_net(info),
98                     info->snd_portid, GROUP_ID(grp), info->nlhdr, GFP_KERNEL);
99 }
100
101 /**
102  * DOC: Locking:
103  *
104  * All writes e.g. Writes to device state (add/remove datapath, port, set
105  * operations on vports, etc.), Writes to other state (flow table
106  * modifications, set miscellaneous datapath parameters, etc.) are protected
107  * by ovs_lock.
108  *
109  * Reads are protected by RCU.
110  *
111  * There are a few special cases (mostly stats) that have their own
112  * synchronization but they nest under all of above and don't interact with
113  * each other.
114  *
115  * The RTNL lock nests inside ovs_mutex.
116  */
117
118 static DEFINE_MUTEX(ovs_mutex);
119
120 void ovs_lock(void)
121 {
122         mutex_lock(&ovs_mutex);
123 }
124
125 void ovs_unlock(void)
126 {
127         mutex_unlock(&ovs_mutex);
128 }
129
130 #ifdef CONFIG_LOCKDEP
131 int lockdep_ovsl_is_held(void)
132 {
133         if (debug_locks)
134                 return lockdep_is_held(&ovs_mutex);
135         else
136                 return 1;
137 }
138 EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
139 #endif
140
141 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
142                              const struct sw_flow_key *,
143                              const struct dp_upcall_info *);
144 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
145                                   const struct sw_flow_key *,
146                                   const struct dp_upcall_info *);
147
148 /* Must be called with rcu_read_lock. */
149 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
150 {
151         struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
152
153         if (dev) {
154                 struct vport *vport = ovs_internal_dev_get_vport(dev);
155                 if (vport)
156                         return vport->dp;
157         }
158
159         return NULL;
160 }
161
162 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
163  * returned dp pointer valid.
164  */
165 static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
166 {
167         struct datapath *dp;
168
169         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
170         rcu_read_lock();
171         dp = get_dp_rcu(net, dp_ifindex);
172         rcu_read_unlock();
173
174         return dp;
175 }
176
177 /* Must be called with rcu_read_lock or ovs_mutex. */
178 const char *ovs_dp_name(const struct datapath *dp)
179 {
180         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
181         return vport->ops->get_name(vport);
182 }
183
184 static int get_dpifindex(const struct datapath *dp)
185 {
186         struct vport *local;
187         int ifindex;
188
189         rcu_read_lock();
190
191         local = ovs_vport_rcu(dp, OVSP_LOCAL);
192         if (local)
193                 ifindex = netdev_vport_priv(local)->dev->ifindex;
194         else
195                 ifindex = 0;
196
197         rcu_read_unlock();
198
199         return ifindex;
200 }
201
202 static void destroy_dp_rcu(struct rcu_head *rcu)
203 {
204         struct datapath *dp = container_of(rcu, struct datapath, rcu);
205
206         ovs_flow_tbl_destroy(&dp->table);
207         free_percpu(dp->stats_percpu);
208         release_net(ovs_dp_get_net(dp));
209         kfree(dp->ports);
210         kfree(dp);
211 }
212
213 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
214                                             u16 port_no)
215 {
216         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
217 }
218
219 /* Called with ovs_mutex or RCU read lock. */
220 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
221 {
222         struct vport *vport;
223         struct hlist_head *head;
224
225         head = vport_hash_bucket(dp, port_no);
226         hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
227                 if (vport->port_no == port_no)
228                         return vport;
229         }
230         return NULL;
231 }
232
233 /* Called with ovs_mutex. */
234 static struct vport *new_vport(const struct vport_parms *parms)
235 {
236         struct vport *vport;
237
238         vport = ovs_vport_add(parms);
239         if (!IS_ERR(vport)) {
240                 struct datapath *dp = parms->dp;
241                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
242
243                 hlist_add_head_rcu(&vport->dp_hash_node, head);
244         }
245         return vport;
246 }
247
248 void ovs_dp_detach_port(struct vport *p)
249 {
250         ASSERT_OVSL();
251
252         /* First drop references to device. */
253         hlist_del_rcu(&p->dp_hash_node);
254
255         /* Then destroy it. */
256         ovs_vport_del(p);
257 }
258
259 /* Must be called with rcu_read_lock. */
260 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
261 {
262         const struct vport *p = OVS_CB(skb)->input_vport;
263         struct datapath *dp = p->dp;
264         struct sw_flow *flow;
265         struct sw_flow_actions *sf_acts;
266         struct dp_stats_percpu *stats;
267         u64 *stats_counter;
268         u32 n_mask_hit;
269
270         stats = this_cpu_ptr(dp->stats_percpu);
271
272         /* Look up flow. */
273         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
274                                          &n_mask_hit);
275         if (unlikely(!flow)) {
276                 struct dp_upcall_info upcall;
277                 int error;
278
279                 memset(&upcall, 0, sizeof(upcall));
280                 upcall.cmd = OVS_PACKET_CMD_MISS;
281                 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
282                 error = ovs_dp_upcall(dp, skb, key, &upcall);
283                 if (unlikely(error))
284                         kfree_skb(skb);
285                 else
286                         consume_skb(skb);
287                 stats_counter = &stats->n_missed;
288                 goto out;
289         }
290
291         ovs_flow_stats_update(flow, key->tp.flags, skb);
292         sf_acts = rcu_dereference(flow->sf_acts);
293         ovs_execute_actions(dp, skb, sf_acts, key);
294
295         stats_counter = &stats->n_hit;
296
297 out:
298         /* Update datapath statistics. */
299         u64_stats_update_begin(&stats->syncp);
300         (*stats_counter)++;
301         stats->n_mask_hit += n_mask_hit;
302         u64_stats_update_end(&stats->syncp);
303 }
304
305 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
306                   const struct sw_flow_key *key,
307                   const struct dp_upcall_info *upcall_info)
308 {
309         struct dp_stats_percpu *stats;
310         int err;
311
312         if (upcall_info->portid == 0) {
313                 err = -ENOTCONN;
314                 goto err;
315         }
316
317         if (!skb_is_gso(skb))
318                 err = queue_userspace_packet(dp, skb, key, upcall_info);
319         else
320                 err = queue_gso_packets(dp, skb, key, upcall_info);
321         if (err)
322                 goto err;
323
324         return 0;
325
326 err:
327         stats = this_cpu_ptr(dp->stats_percpu);
328
329         u64_stats_update_begin(&stats->syncp);
330         stats->n_lost++;
331         u64_stats_update_end(&stats->syncp);
332
333         return err;
334 }
335
336 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
337                              const struct sw_flow_key *key,
338                              const struct dp_upcall_info *upcall_info)
339 {
340         unsigned short gso_type = skb_shinfo(skb)->gso_type;
341         struct sw_flow_key later_key;
342         struct sk_buff *segs, *nskb;
343         struct ovs_skb_cb ovs_cb;
344         int err;
345
346         ovs_cb = *OVS_CB(skb);
347         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
348         *OVS_CB(skb) = ovs_cb;
349         if (IS_ERR(segs))
350                 return PTR_ERR(segs);
351         if (segs == NULL)
352                 return -EINVAL;
353
354         if (gso_type & SKB_GSO_UDP) {
355                 /* The initial flow key extracted by ovs_flow_key_extract()
356                  * in this case is for a first fragment, so we need to
357                  * properly mark later fragments.
358                  */
359                 later_key = *key;
360                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
361         }
362
363         /* Queue all of the segments. */
364         skb = segs;
365         do {
366                 *OVS_CB(skb) = ovs_cb;
367                 if (gso_type & SKB_GSO_UDP && skb != segs)
368                         key = &later_key;
369
370                 err = queue_userspace_packet(dp, skb, key, upcall_info);
371                 if (err)
372                         break;
373
374         } while ((skb = skb->next));
375
376         /* Free all of the segments. */
377         skb = segs;
378         do {
379                 nskb = skb->next;
380                 if (err)
381                         kfree_skb(skb);
382                 else
383                         consume_skb(skb);
384         } while ((skb = nskb));
385         return err;
386 }
387
388 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
389                               unsigned int hdrlen)
390 {
391         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
392                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
393                 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
394
395         /* OVS_PACKET_ATTR_USERDATA */
396         if (upcall_info->userdata)
397                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
398
399         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
400         if (upcall_info->egress_tun_info)
401                 size += nla_total_size(ovs_tun_key_attr_size());
402
403         /* OVS_PACKET_ATTR_ACTIONS */
404         if (upcall_info->actions_len)
405                 size += nla_total_size(upcall_info->actions_len);
406
407         return size;
408 }
409
410 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
411                                   const struct sw_flow_key *key,
412                                   const struct dp_upcall_info *upcall_info)
413 {
414         struct ovs_header *upcall;
415         struct sk_buff *nskb = NULL;
416         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
417         struct nlattr *nla;
418         struct genl_info info = {
419 #ifdef HAVE_GENLMSG_NEW_UNICAST
420                 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
421 #endif
422                 .snd_portid = upcall_info->portid,
423         };
424         size_t len;
425         unsigned int hlen;
426         int err, dp_ifindex;
427
428         dp_ifindex = get_dpifindex(dp);
429         if (!dp_ifindex)
430                 return -ENODEV;
431
432         if (skb_vlan_tag_present(skb)) {
433                 nskb = skb_clone(skb, GFP_ATOMIC);
434                 if (!nskb)
435                         return -ENOMEM;
436
437                 nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));
438                 if (!nskb)
439                         return -ENOMEM;
440
441                 vlan_set_tci(nskb, 0);
442
443                 skb = nskb;
444         }
445
446         if (nla_attr_size(skb->len) > USHRT_MAX) {
447                 err = -EFBIG;
448                 goto out;
449         }
450
451         /* Complete checksum if needed */
452         if (skb->ip_summed == CHECKSUM_PARTIAL &&
453             (err = skb_checksum_help(skb)))
454                 goto out;
455
456         /* Older versions of OVS user space enforce alignment of the last
457          * Netlink attribute to NLA_ALIGNTO which would require extensive
458          * padding logic. Only perform zerocopy if padding is not required.
459          */
460         if (dp->user_features & OVS_DP_F_UNALIGNED)
461                 hlen = skb_zerocopy_headlen(skb);
462         else
463                 hlen = skb->len;
464
465         len = upcall_msg_size(upcall_info, hlen);
466         user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
467         if (!user_skb) {
468                 err = -ENOMEM;
469                 goto out;
470         }
471
472         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
473                              0, upcall_info->cmd);
474         upcall->dp_ifindex = dp_ifindex;
475
476         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
477         BUG_ON(err);
478
479         if (upcall_info->userdata)
480                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
481                           nla_len(upcall_info->userdata),
482                           nla_data(upcall_info->userdata));
483
484         if (upcall_info->egress_tun_info) {
485                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
486                 err = ovs_nla_put_egress_tunnel_key(user_skb,
487                                                     upcall_info->egress_tun_info);
488                 BUG_ON(err);
489                 nla_nest_end(user_skb, nla);
490         }
491
492         if (upcall_info->actions_len) {
493                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
494                 err = ovs_nla_put_actions(upcall_info->actions,
495                                           upcall_info->actions_len,
496                                           user_skb);
497                 if (!err)
498                         nla_nest_end(user_skb, nla);
499                 else
500                         nla_nest_cancel(user_skb, nla);
501         }
502
503         /* Only reserve room for attribute header, packet data is added
504          * in skb_zerocopy()
505          */
506         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
507                 err = -ENOBUFS;
508                 goto out;
509         }
510         nla->nla_len = nla_attr_size(skb->len);
511
512         err = skb_zerocopy(user_skb, skb, skb->len, hlen);
513         if (err)
514                 goto out;
515
516         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
517         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
518                 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
519
520                 if (plen > 0)
521                         memset(skb_put(user_skb, plen), 0, plen);
522         }
523
524         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
525
526         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
527         user_skb = NULL;
528 out:
529         if (err)
530                 skb_tx_error(skb);
531         kfree_skb(user_skb);
532         kfree_skb(nskb);
533         return err;
534 }
535
536 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
537 {
538         struct ovs_header *ovs_header = info->userhdr;
539         struct nlattr **a = info->attrs;
540         struct sw_flow_actions *acts;
541         struct sk_buff *packet;
542         struct sw_flow *flow;
543         struct sw_flow_actions *sf_acts;
544         struct datapath *dp;
545         struct ethhdr *eth;
546         struct vport *input_vport;
547         int len;
548         int err;
549         bool log = !a[OVS_PACKET_ATTR_PROBE];
550
551         err = -EINVAL;
552         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
553             !a[OVS_PACKET_ATTR_ACTIONS])
554                 goto err;
555
556         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
557         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
558         err = -ENOMEM;
559         if (!packet)
560                 goto err;
561         skb_reserve(packet, NET_IP_ALIGN);
562
563         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
564
565         skb_reset_mac_header(packet);
566         eth = eth_hdr(packet);
567
568         /* Normally, setting the skb 'protocol' field would be handled by a
569          * call to eth_type_trans(), but it assumes there's a sending
570          * device, which we may not have.
571          */
572         if (eth_proto_is_802_3(eth->h_proto))
573                 packet->protocol = eth->h_proto;
574         else
575                 packet->protocol = htons(ETH_P_802_2);
576
577         /* Build an sw_flow for sending this packet. */
578         flow = ovs_flow_alloc();
579         err = PTR_ERR(flow);
580         if (IS_ERR(flow))
581                 goto err_kfree_skb;
582
583         err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
584                                              &flow->key, log);
585         if (err)
586                 goto err_flow_free;
587
588         err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
589                                    &flow->key, &acts, log);
590         if (err)
591                 goto err_flow_free;
592
593         rcu_assign_pointer(flow->sf_acts, acts);
594         OVS_CB(packet)->egress_tun_info = NULL;
595         packet->priority = flow->key.phy.priority;
596         packet->mark = flow->key.phy.skb_mark;
597
598         rcu_read_lock();
599         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
600         err = -ENODEV;
601         if (!dp)
602                 goto err_unlock;
603
604         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
605         if (!input_vport)
606                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
607
608         if (!input_vport)
609                 goto err_unlock;
610
611         OVS_CB(packet)->input_vport = input_vport;
612         sf_acts = rcu_dereference(flow->sf_acts);
613
614         local_bh_disable();
615         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
616         local_bh_enable();
617         rcu_read_unlock();
618
619         ovs_flow_free(flow, false);
620         return err;
621
622 err_unlock:
623         rcu_read_unlock();
624 err_flow_free:
625         ovs_flow_free(flow, false);
626 err_kfree_skb:
627         kfree_skb(packet);
628 err:
629         return err;
630 }
631
632 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
633         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
634         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
635         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
636         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
637 };
638
639 static struct genl_ops dp_packet_genl_ops[] = {
640         { .cmd = OVS_PACKET_CMD_EXECUTE,
641           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
642           .policy = packet_policy,
643           .doit = ovs_packet_cmd_execute
644         }
645 };
646
647 static struct genl_family dp_packet_genl_family = {
648         .id = GENL_ID_GENERATE,
649         .hdrsize = sizeof(struct ovs_header),
650         .name = OVS_PACKET_FAMILY,
651         .version = OVS_PACKET_VERSION,
652         .maxattr = OVS_PACKET_ATTR_MAX,
653         .netnsok = true,
654         .parallel_ops = true,
655         .ops = dp_packet_genl_ops,
656         .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
657 };
658
659 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
660                          struct ovs_dp_megaflow_stats *mega_stats)
661 {
662         int i;
663
664         memset(mega_stats, 0, sizeof(*mega_stats));
665
666         stats->n_flows = ovs_flow_tbl_count(&dp->table);
667         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
668
669         stats->n_hit = stats->n_missed = stats->n_lost = 0;
670
671         for_each_possible_cpu(i) {
672                 const struct dp_stats_percpu *percpu_stats;
673                 struct dp_stats_percpu local_stats;
674                 unsigned int start;
675
676                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
677
678                 do {
679                         start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
680                         local_stats = *percpu_stats;
681                 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
682
683                 stats->n_hit += local_stats.n_hit;
684                 stats->n_missed += local_stats.n_missed;
685                 stats->n_lost += local_stats.n_lost;
686                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
687         }
688 }
689
690 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
691 {
692         return ovs_identifier_is_ufid(sfid) &&
693                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
694 }
695
696 static bool should_fill_mask(uint32_t ufid_flags)
697 {
698         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
699 }
700
701 static bool should_fill_actions(uint32_t ufid_flags)
702 {
703         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
704 }
705
706 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
707                                     const struct sw_flow_id *sfid,
708                                     uint32_t ufid_flags)
709 {
710         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
711
712         /* OVS_FLOW_ATTR_UFID */
713         if (sfid && ovs_identifier_is_ufid(sfid))
714                 len += nla_total_size(sfid->ufid_len);
715
716         /* OVS_FLOW_ATTR_KEY */
717         if (!sfid || should_fill_key(sfid, ufid_flags))
718                 len += nla_total_size(ovs_key_attr_size());
719
720         /* OVS_FLOW_ATTR_MASK */
721         if (should_fill_mask(ufid_flags))
722                 len += nla_total_size(ovs_key_attr_size());
723
724         /* OVS_FLOW_ATTR_ACTIONS */
725         if (should_fill_actions(ufid_flags))
726                 len += nla_total_size(acts->actions_len);
727
728         return len
729                 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
730                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
731                 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
732 }
733
734 /* Called with ovs_mutex or RCU read lock. */
735 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
736                                    struct sk_buff *skb)
737 {
738         struct ovs_flow_stats stats;
739         __be16 tcp_flags;
740         unsigned long used;
741
742         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
743
744         if (used &&
745             nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
746                 return -EMSGSIZE;
747
748         if (stats.n_packets &&
749             nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
750                 return -EMSGSIZE;
751
752         if ((u8)ntohs(tcp_flags) &&
753              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
754                 return -EMSGSIZE;
755
756         return 0;
757 }
758
759 /* Called with ovs_mutex or RCU read lock. */
760 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
761                                      struct sk_buff *skb, int skb_orig_len)
762 {
763         struct nlattr *start;
764         int err;
765
766         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
767          * this is the first flow to be dumped into 'skb'.  This is unusual for
768          * Netlink but individual action lists can be longer than
769          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
770          * The userspace caller can always fetch the actions separately if it
771          * really wants them.  (Most userspace callers in fact don't care.)
772          *
773          * This can only fail for dump operations because the skb is always
774          * properly sized for single flows.
775          */
776         start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
777         if (start) {
778                 const struct sw_flow_actions *sf_acts;
779
780                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
781                 err = ovs_nla_put_actions(sf_acts->actions,
782                                           sf_acts->actions_len, skb);
783
784                 if (!err)
785                         nla_nest_end(skb, start);
786                 else {
787                         if (skb_orig_len)
788                                 return err;
789
790                         nla_nest_cancel(skb, start);
791                 }
792         } else if (skb_orig_len) {
793                 return -EMSGSIZE;
794         }
795
796         return 0;
797 }
798
799 /* Called with ovs_mutex or RCU read lock. */
800 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
801                                   struct sk_buff *skb, u32 portid,
802                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
803 {
804         const int skb_orig_len = skb->len;
805         struct ovs_header *ovs_header;
806         int err;
807
808         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
809                                  flags, cmd);
810         if (!ovs_header)
811                 return -EMSGSIZE;
812
813         ovs_header->dp_ifindex = dp_ifindex;
814
815         err = ovs_nla_put_identifier(flow, skb);
816         if (err)
817                 goto error;
818
819         if (should_fill_key(&flow->id, ufid_flags)) {
820                 err = ovs_nla_put_masked_key(flow, skb);
821                 if (err)
822                         goto error;
823         }
824
825         if (should_fill_mask(ufid_flags)) {
826                 err = ovs_nla_put_mask(flow, skb);
827                 if (err)
828                         goto error;
829         }
830
831         err = ovs_flow_cmd_fill_stats(flow, skb);
832         if (err)
833                 goto error;
834
835         if (should_fill_actions(ufid_flags)) {
836                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
837                 if (err)
838                         goto error;
839         }
840
841         genlmsg_end(skb, ovs_header);
842         return 0;
843
844 error:
845         genlmsg_cancel(skb, ovs_header);
846         return err;
847 }
848
849 /* May not be called with RCU read lock. */
850 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
851                                                const struct sw_flow_id *sfid,
852                                                struct genl_info *info,
853                                                bool always,
854                                                uint32_t ufid_flags)
855 {
856         struct sk_buff *skb;
857         size_t len;
858
859         if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
860                                         GROUP_ID(&ovs_dp_flow_multicast_group)))
861                 return NULL;
862
863         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
864         skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
865         if (!skb)
866                 return ERR_PTR(-ENOMEM);
867
868         return skb;
869 }
870
871 /* Called with ovs_mutex. */
872 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
873                                                int dp_ifindex,
874                                                struct genl_info *info, u8 cmd,
875                                                bool always, u32 ufid_flags)
876 {
877         struct sk_buff *skb;
878         int retval;
879
880         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
881                                       &flow->id, info, always, ufid_flags);
882         if (IS_ERR_OR_NULL(skb))
883                 return skb;
884
885         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
886                                         info->snd_portid, info->snd_seq, 0,
887                                         cmd, ufid_flags);
888         BUG_ON(retval < 0);
889         return skb;
890 }
891
892 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
893 {
894         struct nlattr **a = info->attrs;
895         struct ovs_header *ovs_header = info->userhdr;
896         struct sw_flow *flow = NULL, *new_flow;
897         struct sw_flow_mask mask;
898         struct sk_buff *reply;
899         struct datapath *dp;
900         struct sw_flow_key key;
901         struct sw_flow_actions *acts;
902         struct sw_flow_match match;
903         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
904         int error;
905         bool log = !a[OVS_FLOW_ATTR_PROBE];
906
907         /* Must have key and actions. */
908         error = -EINVAL;
909         if (!a[OVS_FLOW_ATTR_KEY]) {
910                 OVS_NLERR(log, "Flow key attr not present in new flow.");
911                 goto error;
912         }
913         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
914                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
915                 goto error;
916         }
917
918         /* Most of the time we need to allocate a new flow, do it before
919          * locking.
920          */
921         new_flow = ovs_flow_alloc();
922         if (IS_ERR(new_flow)) {
923                 error = PTR_ERR(new_flow);
924                 goto error;
925         }
926
927         /* Extract key. */
928         ovs_match_init(&match, &key, &mask);
929         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
930                                   a[OVS_FLOW_ATTR_MASK], log);
931         if (error)
932                 goto err_kfree_flow;
933
934         ovs_flow_mask_key(&new_flow->key, &key, &mask);
935
936         /* Extract flow identifier. */
937         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
938                                        &key, log);
939         if (error)
940                 goto err_kfree_flow;
941
942         /* Validate actions. */
943         error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
944                                      &acts, log);
945         if (error) {
946                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
947                 goto err_kfree_flow;
948         }
949
950         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
951                                         ufid_flags);
952         if (IS_ERR(reply)) {
953                 error = PTR_ERR(reply);
954                 goto err_kfree_acts;
955         }
956
957         ovs_lock();
958         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
959         if (unlikely(!dp)) {
960                 error = -ENODEV;
961                 goto err_unlock_ovs;
962         }
963
964         /* Check if this is a duplicate flow */
965         if (ovs_identifier_is_ufid(&new_flow->id))
966                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
967         if (!flow)
968                 flow = ovs_flow_tbl_lookup(&dp->table, &key);
969         if (likely(!flow)) {
970                 rcu_assign_pointer(new_flow->sf_acts, acts);
971
972                 /* Put flow in bucket. */
973                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
974                 if (unlikely(error)) {
975                         acts = NULL;
976                         goto err_unlock_ovs;
977                 }
978
979                 if (unlikely(reply)) {
980                         error = ovs_flow_cmd_fill_info(new_flow,
981                                                        ovs_header->dp_ifindex,
982                                                        reply, info->snd_portid,
983                                                        info->snd_seq, 0,
984                                                        OVS_FLOW_CMD_NEW,
985                                                        ufid_flags);
986                         BUG_ON(error < 0);
987                 }
988                 ovs_unlock();
989         } else {
990                 struct sw_flow_actions *old_acts;
991
992                 /* Bail out if we're not allowed to modify an existing flow.
993                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
994                  * because Generic Netlink treats the latter as a dump
995                  * request.  We also accept NLM_F_EXCL in case that bug ever
996                  * gets fixed.
997                  */
998                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
999                                                          | NLM_F_EXCL))) {
1000                         error = -EEXIST;
1001                         goto err_unlock_ovs;
1002                 }
1003                 /* The flow identifier has to be the same for flow updates.
1004                  * Look for any overlapping flow.
1005                  */
1006                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1007                         if (ovs_identifier_is_key(&flow->id))
1008                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1009                                                                  &match);
1010                         else /* UFID matches but key is different */
1011                                 flow = NULL;
1012                         if (!flow) {
1013                                 error = -ENOENT;
1014                                 goto err_unlock_ovs;
1015                         }
1016                 }
1017                 /* Update actions. */
1018                 old_acts = ovsl_dereference(flow->sf_acts);
1019                 rcu_assign_pointer(flow->sf_acts, acts);
1020
1021                 if (unlikely(reply)) {
1022                         error = ovs_flow_cmd_fill_info(flow,
1023                                                        ovs_header->dp_ifindex,
1024                                                        reply, info->snd_portid,
1025                                                        info->snd_seq, 0,
1026                                                        OVS_FLOW_CMD_NEW,
1027                                                        ufid_flags);
1028                         BUG_ON(error < 0);
1029                 }
1030                 ovs_unlock();
1031
1032                 ovs_nla_free_flow_actions(old_acts);
1033                 ovs_flow_free(new_flow, false);
1034         }
1035
1036         if (reply)
1037                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1038         return 0;
1039
1040 err_unlock_ovs:
1041         ovs_unlock();
1042         kfree_skb(reply);
1043 err_kfree_acts:
1044         kfree(acts);
1045 err_kfree_flow:
1046         ovs_flow_free(new_flow, false);
1047 error:
1048         return error;
1049 }
1050
1051 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1052 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1053                                                 const struct sw_flow_key *key,
1054                                                 const struct sw_flow_mask *mask,
1055                                                 bool log)
1056 {
1057         struct sw_flow_actions *acts;
1058         struct sw_flow_key masked_key;
1059         int error;
1060
1061         ovs_flow_mask_key(&masked_key, key, mask);
1062         error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
1063         if (error) {
1064                 OVS_NLERR(log,
1065                           "Actions may not be safe on all matching packets");
1066                 return ERR_PTR(error);
1067         }
1068
1069         return acts;
1070 }
1071
1072 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1073 {
1074         struct nlattr **a = info->attrs;
1075         struct ovs_header *ovs_header = info->userhdr;
1076         struct sw_flow_key key;
1077         struct sw_flow *flow;
1078         struct sw_flow_mask mask;
1079         struct sk_buff *reply = NULL;
1080         struct datapath *dp;
1081         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1082         struct sw_flow_match match;
1083         struct sw_flow_id sfid;
1084         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1085         int error;
1086         bool log = !a[OVS_FLOW_ATTR_PROBE];
1087         bool ufid_present;
1088
1089         /* Extract key. */
1090         error = -EINVAL;
1091         if (!a[OVS_FLOW_ATTR_KEY]) {
1092                 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1093                 goto error;
1094         }
1095
1096         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1097         ovs_match_init(&match, &key, &mask);
1098         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
1099                                   a[OVS_FLOW_ATTR_MASK], log);
1100         if (error)
1101                 goto error;
1102
1103         /* Validate actions. */
1104         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1105                 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1106                                         log);
1107                 if (IS_ERR(acts)) {
1108                         error = PTR_ERR(acts);
1109                         goto error;
1110                 }
1111
1112                 /* Can allocate before locking if have acts. */
1113                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1114                                                 ufid_flags);
1115                 if (IS_ERR(reply)) {
1116                         error = PTR_ERR(reply);
1117                         goto err_kfree_acts;
1118                 }
1119         }
1120
1121         ovs_lock();
1122         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1123         if (unlikely(!dp)) {
1124                 error = -ENODEV;
1125                 goto err_unlock_ovs;
1126         }
1127         /* Check that the flow exists. */
1128         if (ufid_present)
1129                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1130         else
1131                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1132         if (unlikely(!flow)) {
1133                 error = -ENOENT;
1134                 goto err_unlock_ovs;
1135         }
1136
1137         /* Update actions, if present. */
1138         if (likely(acts)) {
1139                 old_acts = ovsl_dereference(flow->sf_acts);
1140                 rcu_assign_pointer(flow->sf_acts, acts);
1141
1142                 if (unlikely(reply)) {
1143                         error = ovs_flow_cmd_fill_info(flow,
1144                                                        ovs_header->dp_ifindex,
1145                                                        reply, info->snd_portid,
1146                                                        info->snd_seq, 0,
1147                                                        OVS_FLOW_CMD_NEW,
1148                                                        ufid_flags);
1149                         BUG_ON(error < 0);
1150                 }
1151         } else {
1152                 /* Could not alloc without acts before locking. */
1153                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1154                                                 info, OVS_FLOW_CMD_NEW, false,
1155                                                 ufid_flags);
1156
1157                 if (unlikely(IS_ERR(reply))) {
1158                         error = PTR_ERR(reply);
1159                         goto err_unlock_ovs;
1160                 }
1161         }
1162
1163         /* Clear stats. */
1164         if (a[OVS_FLOW_ATTR_CLEAR])
1165                 ovs_flow_stats_clear(flow);
1166         ovs_unlock();
1167
1168         if (reply)
1169                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1170         if (old_acts)
1171                 ovs_nla_free_flow_actions(old_acts);
1172
1173         return 0;
1174
1175 err_unlock_ovs:
1176         ovs_unlock();
1177         kfree_skb(reply);
1178 err_kfree_acts:
1179         kfree(acts);
1180 error:
1181         return error;
1182 }
1183
1184 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1185 {
1186         struct nlattr **a = info->attrs;
1187         struct ovs_header *ovs_header = info->userhdr;
1188         struct sw_flow_key key;
1189         struct sk_buff *reply;
1190         struct sw_flow *flow;
1191         struct datapath *dp;
1192         struct sw_flow_match match;
1193         struct sw_flow_id ufid;
1194         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1195         int err = 0;
1196         bool log = !a[OVS_FLOW_ATTR_PROBE];
1197         bool ufid_present;
1198
1199         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1200         if (a[OVS_FLOW_ATTR_KEY]) {
1201                 ovs_match_init(&match, &key, NULL);
1202                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1203                                         log);
1204         } else if (!ufid_present) {
1205                 OVS_NLERR(log,
1206                           "Flow get message rejected, Key attribute missing.");
1207                 err = -EINVAL;
1208         }
1209         if (err)
1210                 return err;
1211
1212         ovs_lock();
1213         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1214         if (!dp) {
1215                 err = -ENODEV;
1216                 goto unlock;
1217         }
1218
1219         if (ufid_present)
1220                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1221         else
1222                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1223         if (!flow) {
1224                 err = -ENOENT;
1225                 goto unlock;
1226         }
1227
1228         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1229                                         OVS_FLOW_CMD_NEW, true, ufid_flags);
1230         if (IS_ERR(reply)) {
1231                 err = PTR_ERR(reply);
1232                 goto unlock;
1233         }
1234
1235         ovs_unlock();
1236         return genlmsg_reply(reply, info);
1237 unlock:
1238         ovs_unlock();
1239         return err;
1240 }
1241
1242 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1243 {
1244         struct nlattr **a = info->attrs;
1245         struct ovs_header *ovs_header = info->userhdr;
1246         struct sw_flow_key key;
1247         struct sk_buff *reply;
1248         struct sw_flow *flow = NULL;
1249         struct datapath *dp;
1250         struct sw_flow_match match;
1251         struct sw_flow_id ufid;
1252         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1253         int err;
1254         bool log = !a[OVS_FLOW_ATTR_PROBE];
1255         bool ufid_present;
1256
1257         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1258         if (a[OVS_FLOW_ATTR_KEY]) {
1259                 ovs_match_init(&match, &key, NULL);
1260                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1261                                         log);
1262                 if (unlikely(err))
1263                         return err;
1264         }
1265
1266         ovs_lock();
1267         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1268         if (unlikely(!dp)) {
1269                 err = -ENODEV;
1270                 goto unlock;
1271         }
1272
1273         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1274                 err = ovs_flow_tbl_flush(&dp->table);
1275                 goto unlock;
1276         }
1277
1278         if (ufid_present)
1279                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1280         else
1281                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1282         if (unlikely(!flow)) {
1283                 err = -ENOENT;
1284                 goto unlock;
1285         }
1286
1287         ovs_flow_tbl_remove(&dp->table, flow);
1288         ovs_unlock();
1289
1290         reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
1291                                         &flow->id, info, false, ufid_flags);
1292
1293         if (likely(reply)) {
1294                 if (likely(!IS_ERR(reply))) {
1295                         rcu_read_lock();        /*To keep RCU checker happy. */
1296                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1297                                                      reply, info->snd_portid,
1298                                                      info->snd_seq, 0,
1299                                                      OVS_FLOW_CMD_DEL,
1300                                                      ufid_flags);
1301                         rcu_read_unlock();
1302                         BUG_ON(err < 0);
1303                         ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1304                 } else {
1305                         genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
1306                                      GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
1307
1308                 }
1309         }
1310
1311         ovs_flow_free(flow, true);
1312         return 0;
1313 unlock:
1314         ovs_unlock();
1315         return err;
1316 }
1317
1318 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1319 {
1320         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1321         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1322         struct table_instance *ti;
1323         struct datapath *dp;
1324         u32 ufid_flags;
1325         int err;
1326
1327         err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1328                             OVS_FLOW_ATTR_MAX, flow_policy);
1329         if (err)
1330                 return err;
1331         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1332
1333         rcu_read_lock();
1334         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1335         if (!dp) {
1336                 rcu_read_unlock();
1337                 return -ENODEV;
1338         }
1339
1340         ti = rcu_dereference(dp->table.ti);
1341         for (;;) {
1342                 struct sw_flow *flow;
1343                 u32 bucket, obj;
1344
1345                 bucket = cb->args[0];
1346                 obj = cb->args[1];
1347                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1348                 if (!flow)
1349                         break;
1350
1351                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1352                                            NETLINK_CB(cb->skb).portid,
1353                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1354                                            OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1355                         break;
1356
1357                 cb->args[0] = bucket;
1358                 cb->args[1] = obj;
1359         }
1360         rcu_read_unlock();
1361         return skb->len;
1362 }
1363
1364 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1365         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1366         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1367         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1368         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1369         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1370         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1371         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1372 };
1373
1374 static struct genl_ops dp_flow_genl_ops[] = {
1375         { .cmd = OVS_FLOW_CMD_NEW,
1376           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1377           .policy = flow_policy,
1378           .doit = ovs_flow_cmd_new
1379         },
1380         { .cmd = OVS_FLOW_CMD_DEL,
1381           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1382           .policy = flow_policy,
1383           .doit = ovs_flow_cmd_del
1384         },
1385         { .cmd = OVS_FLOW_CMD_GET,
1386           .flags = 0,               /* OK for unprivileged users. */
1387           .policy = flow_policy,
1388           .doit = ovs_flow_cmd_get,
1389           .dumpit = ovs_flow_cmd_dump
1390         },
1391         { .cmd = OVS_FLOW_CMD_SET,
1392           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1393           .policy = flow_policy,
1394           .doit = ovs_flow_cmd_set,
1395         },
1396 };
1397
1398 static struct genl_family dp_flow_genl_family = {
1399         .id = GENL_ID_GENERATE,
1400         .hdrsize = sizeof(struct ovs_header),
1401         .name = OVS_FLOW_FAMILY,
1402         .version = OVS_FLOW_VERSION,
1403         .maxattr = OVS_FLOW_ATTR_MAX,
1404         .netnsok = true,
1405         .parallel_ops = true,
1406         .ops = dp_flow_genl_ops,
1407         .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1408         .mcgrps = &ovs_dp_flow_multicast_group,
1409         .n_mcgrps = 1,
1410 };
1411
1412 static size_t ovs_dp_cmd_msg_size(void)
1413 {
1414         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1415
1416         msgsize += nla_total_size(IFNAMSIZ);
1417         msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1418         msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1419         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1420
1421         return msgsize;
1422 }
1423
1424 /* Called with ovs_mutex. */
1425 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1426                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1427 {
1428         struct ovs_header *ovs_header;
1429         struct ovs_dp_stats dp_stats;
1430         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1431         int err;
1432
1433         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1434                                    flags, cmd);
1435         if (!ovs_header)
1436                 goto error;
1437
1438         ovs_header->dp_ifindex = get_dpifindex(dp);
1439
1440         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1441         if (err)
1442                 goto nla_put_failure;
1443
1444         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1445         if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1446                         &dp_stats))
1447                 goto nla_put_failure;
1448
1449         if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1450                         sizeof(struct ovs_dp_megaflow_stats),
1451                         &dp_megaflow_stats))
1452                 goto nla_put_failure;
1453
1454         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1455                 goto nla_put_failure;
1456
1457         genlmsg_end(skb, ovs_header);
1458         return 0;
1459
1460 nla_put_failure:
1461         genlmsg_cancel(skb, ovs_header);
1462 error:
1463         return -EMSGSIZE;
1464 }
1465
1466 static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1467 {
1468         return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1469 }
1470
1471 /* Called with rcu_read_lock or ovs_mutex. */
1472 static struct datapath *lookup_datapath(struct net *net,
1473                                         const struct ovs_header *ovs_header,
1474                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1475 {
1476         struct datapath *dp;
1477
1478         if (!a[OVS_DP_ATTR_NAME])
1479                 dp = get_dp(net, ovs_header->dp_ifindex);
1480         else {
1481                 struct vport *vport;
1482
1483                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1484                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1485         }
1486         return dp ? dp : ERR_PTR(-ENODEV);
1487 }
1488
1489 static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1490 {
1491         struct datapath *dp;
1492
1493         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1494         if (IS_ERR(dp))
1495                 return;
1496
1497         WARN(dp->user_features, "Dropping previously announced user features\n");
1498         dp->user_features = 0;
1499 }
1500
1501 static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1502 {
1503         if (a[OVS_DP_ATTR_USER_FEATURES])
1504                 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1505 }
1506
1507 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1508 {
1509         struct nlattr **a = info->attrs;
1510         struct vport_parms parms;
1511         struct sk_buff *reply;
1512         struct datapath *dp;
1513         struct vport *vport;
1514         struct ovs_net *ovs_net;
1515         int err, i;
1516
1517         err = -EINVAL;
1518         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1519                 goto err;
1520
1521         reply = ovs_dp_cmd_alloc_info(info);
1522         if (!reply)
1523                 return -ENOMEM;
1524
1525         err = -ENOMEM;
1526         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1527         if (dp == NULL)
1528                 goto err_free_reply;
1529
1530         ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1531
1532         /* Allocate table. */
1533         err = ovs_flow_tbl_init(&dp->table);
1534         if (err)
1535                 goto err_free_dp;
1536
1537         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1538         if (!dp->stats_percpu) {
1539                 err = -ENOMEM;
1540                 goto err_destroy_table;
1541         }
1542
1543         dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1544                             GFP_KERNEL);
1545         if (!dp->ports) {
1546                 err = -ENOMEM;
1547                 goto err_destroy_percpu;
1548         }
1549
1550         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1551                 INIT_HLIST_HEAD(&dp->ports[i]);
1552
1553         /* Set up our datapath device. */
1554         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1555         parms.type = OVS_VPORT_TYPE_INTERNAL;
1556         parms.options = NULL;
1557         parms.dp = dp;
1558         parms.port_no = OVSP_LOCAL;
1559         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1560
1561         ovs_dp_change(dp, a);
1562
1563         /* So far only local changes have been made, now need the lock. */
1564         ovs_lock();
1565
1566         vport = new_vport(&parms);
1567         if (IS_ERR(vport)) {
1568                 err = PTR_ERR(vport);
1569                 if (err == -EBUSY)
1570                         err = -EEXIST;
1571
1572                 if (err == -EEXIST) {
1573                         /* An outdated user space instance that does not understand
1574                          * the concept of user_features has attempted to create a new
1575                          * datapath and is likely to reuse it. Drop all user features.
1576                          */
1577                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1578                                 ovs_dp_reset_user_features(skb, info);
1579                 }
1580
1581                 goto err_destroy_ports_array;
1582         }
1583
1584         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1585                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1586         BUG_ON(err < 0);
1587
1588         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1589         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1590
1591         ovs_unlock();
1592
1593         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1594         return 0;
1595
1596 err_destroy_ports_array:
1597         ovs_unlock();
1598         kfree(dp->ports);
1599 err_destroy_percpu:
1600         free_percpu(dp->stats_percpu);
1601 err_destroy_table:
1602         ovs_flow_tbl_destroy(&dp->table);
1603 err_free_dp:
1604         release_net(ovs_dp_get_net(dp));
1605         kfree(dp);
1606 err_free_reply:
1607         kfree_skb(reply);
1608 err:
1609         return err;
1610 }
1611
1612 /* Called with ovs_mutex. */
1613 static void __dp_destroy(struct datapath *dp)
1614 {
1615         int i;
1616
1617         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1618                 struct vport *vport;
1619                 struct hlist_node *n;
1620
1621                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1622                         if (vport->port_no != OVSP_LOCAL)
1623                                 ovs_dp_detach_port(vport);
1624         }
1625
1626         list_del_rcu(&dp->list_node);
1627
1628         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1629          * all ports in datapath are destroyed first before freeing datapath.
1630          */
1631         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1632
1633         /* RCU destroy the flow table */
1634         call_rcu(&dp->rcu, destroy_dp_rcu);
1635 }
1636
1637 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1638 {
1639         struct sk_buff *reply;
1640         struct datapath *dp;
1641         int err;
1642
1643         reply = ovs_dp_cmd_alloc_info(info);
1644         if (!reply)
1645                 return -ENOMEM;
1646
1647         ovs_lock();
1648         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1649         err = PTR_ERR(dp);
1650         if (IS_ERR(dp))
1651                 goto err_unlock_free;
1652
1653         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1654                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1655         BUG_ON(err < 0);
1656
1657         __dp_destroy(dp);
1658         ovs_unlock();
1659
1660         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1661         return 0;
1662
1663 err_unlock_free:
1664         ovs_unlock();
1665         kfree_skb(reply);
1666         return err;
1667 }
1668
1669 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1670 {
1671         struct sk_buff *reply;
1672         struct datapath *dp;
1673         int err;
1674
1675         reply = ovs_dp_cmd_alloc_info(info);
1676         if (!reply)
1677                 return -ENOMEM;
1678
1679         ovs_lock();
1680         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1681         err = PTR_ERR(dp);
1682         if (IS_ERR(dp))
1683                 goto err_unlock_free;
1684
1685         ovs_dp_change(dp, info->attrs);
1686
1687         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1688                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1689         BUG_ON(err < 0);
1690
1691         ovs_unlock();
1692
1693         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1694         return 0;
1695
1696 err_unlock_free:
1697         ovs_unlock();
1698         kfree_skb(reply);
1699         return err;
1700 }
1701
1702 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1703 {
1704         struct sk_buff *reply;
1705         struct datapath *dp;
1706         int err;
1707
1708         reply = ovs_dp_cmd_alloc_info(info);
1709         if (!reply)
1710                 return -ENOMEM;
1711
1712         ovs_lock();
1713         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1714         if (IS_ERR(dp)) {
1715                 err = PTR_ERR(dp);
1716                 goto err_unlock_free;
1717         }
1718         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1719                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1720         BUG_ON(err < 0);
1721         ovs_unlock();
1722
1723         return genlmsg_reply(reply, info);
1724
1725 err_unlock_free:
1726         ovs_unlock();
1727         kfree_skb(reply);
1728         return err;
1729 }
1730
1731 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1732 {
1733         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1734         struct datapath *dp;
1735         int skip = cb->args[0];
1736         int i = 0;
1737
1738         ovs_lock();
1739         list_for_each_entry(dp, &ovs_net->dps, list_node) {
1740                 if (i >= skip &&
1741                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1742                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1743                                          OVS_DP_CMD_NEW) < 0)
1744                         break;
1745                 i++;
1746         }
1747         ovs_unlock();
1748
1749         cb->args[0] = i;
1750
1751         return skb->len;
1752 }
1753
1754 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1755         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1756         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1757         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1758 };
1759
1760 static struct genl_ops dp_datapath_genl_ops[] = {
1761         { .cmd = OVS_DP_CMD_NEW,
1762           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1763           .policy = datapath_policy,
1764           .doit = ovs_dp_cmd_new
1765         },
1766         { .cmd = OVS_DP_CMD_DEL,
1767           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1768           .policy = datapath_policy,
1769           .doit = ovs_dp_cmd_del
1770         },
1771         { .cmd = OVS_DP_CMD_GET,
1772           .flags = 0,               /* OK for unprivileged users. */
1773           .policy = datapath_policy,
1774           .doit = ovs_dp_cmd_get,
1775           .dumpit = ovs_dp_cmd_dump
1776         },
1777         { .cmd = OVS_DP_CMD_SET,
1778           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1779           .policy = datapath_policy,
1780           .doit = ovs_dp_cmd_set,
1781         },
1782 };
1783
1784 static struct genl_family dp_datapath_genl_family = {
1785         .id = GENL_ID_GENERATE,
1786         .hdrsize = sizeof(struct ovs_header),
1787         .name = OVS_DATAPATH_FAMILY,
1788         .version = OVS_DATAPATH_VERSION,
1789         .maxattr = OVS_DP_ATTR_MAX,
1790         .netnsok = true,
1791         .parallel_ops = true,
1792         .ops = dp_datapath_genl_ops,
1793         .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1794         .mcgrps = &ovs_dp_datapath_multicast_group,
1795         .n_mcgrps = 1,
1796 };
1797
1798 /* Called with ovs_mutex or RCU read lock. */
1799 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1800                                    u32 portid, u32 seq, u32 flags, u8 cmd)
1801 {
1802         struct ovs_header *ovs_header;
1803         struct ovs_vport_stats vport_stats;
1804         int err;
1805
1806         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1807                                  flags, cmd);
1808         if (!ovs_header)
1809                 return -EMSGSIZE;
1810
1811         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1812
1813         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1814             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1815             nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)))
1816                 goto nla_put_failure;
1817
1818         ovs_vport_get_stats(vport, &vport_stats);
1819         if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1820                     &vport_stats))
1821                 goto nla_put_failure;
1822
1823         if (ovs_vport_get_upcall_portids(vport, skb))
1824                 goto nla_put_failure;
1825
1826         err = ovs_vport_get_options(vport, skb);
1827         if (err == -EMSGSIZE)
1828                 goto error;
1829
1830         genlmsg_end(skb, ovs_header);
1831         return 0;
1832
1833 nla_put_failure:
1834         err = -EMSGSIZE;
1835 error:
1836         genlmsg_cancel(skb, ovs_header);
1837         return err;
1838 }
1839
1840 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1841 {
1842         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1843 }
1844
1845 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1846 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1847                                          u32 seq, u8 cmd)
1848 {
1849         struct sk_buff *skb;
1850         int retval;
1851
1852         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1853         if (!skb)
1854                 return ERR_PTR(-ENOMEM);
1855
1856         retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1857         BUG_ON(retval < 0);
1858
1859         return skb;
1860 }
1861
1862 /* Called with ovs_mutex or RCU read lock. */
1863 static struct vport *lookup_vport(struct net *net,
1864                                   const struct ovs_header *ovs_header,
1865                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1866 {
1867         struct datapath *dp;
1868         struct vport *vport;
1869
1870         if (a[OVS_VPORT_ATTR_NAME]) {
1871                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1872                 if (!vport)
1873                         return ERR_PTR(-ENODEV);
1874                 if (ovs_header->dp_ifindex &&
1875                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1876                         return ERR_PTR(-ENODEV);
1877                 return vport;
1878         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1879                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1880
1881                 if (port_no >= DP_MAX_PORTS)
1882                         return ERR_PTR(-EFBIG);
1883
1884                 dp = get_dp(net, ovs_header->dp_ifindex);
1885                 if (!dp)
1886                         return ERR_PTR(-ENODEV);
1887
1888                 vport = ovs_vport_ovsl_rcu(dp, port_no);
1889                 if (!vport)
1890                         return ERR_PTR(-ENODEV);
1891                 return vport;
1892         } else
1893                 return ERR_PTR(-EINVAL);
1894 }
1895
1896 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1897 {
1898         struct nlattr **a = info->attrs;
1899         struct ovs_header *ovs_header = info->userhdr;
1900         struct vport_parms parms;
1901         struct sk_buff *reply;
1902         struct vport *vport;
1903         struct datapath *dp;
1904         u32 port_no;
1905         int err;
1906
1907         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1908             !a[OVS_VPORT_ATTR_UPCALL_PID])
1909                 return -EINVAL;
1910
1911         port_no = a[OVS_VPORT_ATTR_PORT_NO]
1912                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1913         if (port_no >= DP_MAX_PORTS)
1914                 return -EFBIG;
1915
1916         reply = ovs_vport_cmd_alloc_info();
1917         if (!reply)
1918                 return -ENOMEM;
1919
1920         ovs_lock();
1921 restart:
1922         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1923         err = -ENODEV;
1924         if (!dp)
1925                 goto exit_unlock_free;
1926
1927         if (port_no) {
1928                 vport = ovs_vport_ovsl(dp, port_no);
1929                 err = -EBUSY;
1930                 if (vport)
1931                         goto exit_unlock_free;
1932         } else {
1933                 for (port_no = 1; ; port_no++) {
1934                         if (port_no >= DP_MAX_PORTS) {
1935                                 err = -EFBIG;
1936                                 goto exit_unlock_free;
1937                         }
1938                         vport = ovs_vport_ovsl(dp, port_no);
1939                         if (!vport)
1940                                 break;
1941                 }
1942         }
1943
1944         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1945         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1946         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1947         parms.dp = dp;
1948         parms.port_no = port_no;
1949         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1950
1951         vport = new_vport(&parms);
1952         err = PTR_ERR(vport);
1953         if (IS_ERR(vport)) {
1954                 if (err == -EAGAIN)
1955                         goto restart;
1956                 goto exit_unlock_free;
1957         }
1958
1959         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1960                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1961         BUG_ON(err < 0);
1962         ovs_unlock();
1963
1964         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
1965         return 0;
1966
1967 exit_unlock_free:
1968         ovs_unlock();
1969         kfree_skb(reply);
1970         return err;
1971 }
1972
1973 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1974 {
1975         struct nlattr **a = info->attrs;
1976         struct sk_buff *reply;
1977         struct vport *vport;
1978         int err;
1979
1980         reply = ovs_vport_cmd_alloc_info();
1981         if (!reply)
1982                 return -ENOMEM;
1983
1984         ovs_lock();
1985         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1986         err = PTR_ERR(vport);
1987         if (IS_ERR(vport))
1988                 goto exit_unlock_free;
1989
1990         if (a[OVS_VPORT_ATTR_TYPE] &&
1991             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1992                 err = -EINVAL;
1993                 goto exit_unlock_free;
1994         }
1995
1996         if (a[OVS_VPORT_ATTR_OPTIONS]) {
1997                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1998                 if (err)
1999                         goto exit_unlock_free;
2000         }
2001
2002         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2003                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2004
2005                 err = ovs_vport_set_upcall_portids(vport, ids);
2006                 if (err)
2007                         goto exit_unlock_free;
2008         }
2009
2010         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2011                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2012         BUG_ON(err < 0);
2013         ovs_unlock();
2014
2015         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
2016         return 0;
2017
2018 exit_unlock_free:
2019         ovs_unlock();
2020         kfree_skb(reply);
2021         return err;
2022 }
2023
2024 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2025 {
2026         struct nlattr **a = info->attrs;
2027         struct sk_buff *reply;
2028         struct vport *vport;
2029         int err;
2030
2031         reply = ovs_vport_cmd_alloc_info();
2032         if (!reply)
2033                 return -ENOMEM;
2034
2035         ovs_lock();
2036         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2037         err = PTR_ERR(vport);
2038         if (IS_ERR(vport))
2039                 goto exit_unlock_free;
2040
2041         if (vport->port_no == OVSP_LOCAL) {
2042                 err = -EINVAL;
2043                 goto exit_unlock_free;
2044         }
2045
2046         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2047                                       info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2048         BUG_ON(err < 0);
2049         ovs_dp_detach_port(vport);
2050         ovs_unlock();
2051
2052         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
2053         return 0;
2054
2055 exit_unlock_free:
2056         ovs_unlock();
2057         kfree_skb(reply);
2058         return err;
2059 }
2060
2061 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2062 {
2063         struct nlattr **a = info->attrs;
2064         struct ovs_header *ovs_header = info->userhdr;
2065         struct sk_buff *reply;
2066         struct vport *vport;
2067         int err;
2068
2069         reply = ovs_vport_cmd_alloc_info();
2070         if (!reply)
2071                 return -ENOMEM;
2072
2073         rcu_read_lock();
2074         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2075         err = PTR_ERR(vport);
2076         if (IS_ERR(vport))
2077                 goto exit_unlock_free;
2078         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2079                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2080         BUG_ON(err < 0);
2081         rcu_read_unlock();
2082
2083         return genlmsg_reply(reply, info);
2084
2085 exit_unlock_free:
2086         rcu_read_unlock();
2087         kfree_skb(reply);
2088         return err;
2089 }
2090
2091 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2092 {
2093         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2094         struct datapath *dp;
2095         int bucket = cb->args[0], skip = cb->args[1];
2096         int i, j = 0;
2097
2098         rcu_read_lock();
2099         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2100         if (!dp) {
2101                 rcu_read_unlock();
2102                 return -ENODEV;
2103         }
2104         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2105                 struct vport *vport;
2106
2107                 j = 0;
2108                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2109                         if (j >= skip &&
2110                             ovs_vport_cmd_fill_info(vport, skb,
2111                                                     NETLINK_CB(cb->skb).portid,
2112                                                     cb->nlh->nlmsg_seq,
2113                                                     NLM_F_MULTI,
2114                                                     OVS_VPORT_CMD_NEW) < 0)
2115                                 goto out;
2116
2117                         j++;
2118                 }
2119                 skip = 0;
2120         }
2121 out:
2122         rcu_read_unlock();
2123
2124         cb->args[0] = i;
2125         cb->args[1] = j;
2126
2127         return skb->len;
2128 }
2129
2130 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2131         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2132         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2133         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2134         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2135         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2136         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2137 };
2138
2139 static struct genl_ops dp_vport_genl_ops[] = {
2140         { .cmd = OVS_VPORT_CMD_NEW,
2141           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2142           .policy = vport_policy,
2143           .doit = ovs_vport_cmd_new
2144         },
2145         { .cmd = OVS_VPORT_CMD_DEL,
2146           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2147           .policy = vport_policy,
2148           .doit = ovs_vport_cmd_del
2149         },
2150         { .cmd = OVS_VPORT_CMD_GET,
2151           .flags = 0,               /* OK for unprivileged users. */
2152           .policy = vport_policy,
2153           .doit = ovs_vport_cmd_get,
2154           .dumpit = ovs_vport_cmd_dump
2155         },
2156         { .cmd = OVS_VPORT_CMD_SET,
2157           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2158           .policy = vport_policy,
2159           .doit = ovs_vport_cmd_set,
2160         },
2161 };
2162
2163 struct genl_family dp_vport_genl_family = {
2164         .id = GENL_ID_GENERATE,
2165         .hdrsize = sizeof(struct ovs_header),
2166         .name = OVS_VPORT_FAMILY,
2167         .version = OVS_VPORT_VERSION,
2168         .maxattr = OVS_VPORT_ATTR_MAX,
2169         .netnsok = true,
2170         .parallel_ops = true,
2171         .ops = dp_vport_genl_ops,
2172         .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2173         .mcgrps = &ovs_dp_vport_multicast_group,
2174         .n_mcgrps = 1,
2175 };
2176
2177 static struct genl_family *dp_genl_families[] = {
2178         &dp_datapath_genl_family,
2179         &dp_vport_genl_family,
2180         &dp_flow_genl_family,
2181         &dp_packet_genl_family,
2182 };
2183
2184 static void dp_unregister_genl(int n_families)
2185 {
2186         int i;
2187
2188         for (i = 0; i < n_families; i++)
2189                 genl_unregister_family(dp_genl_families[i]);
2190 }
2191
2192 static int dp_register_genl(void)
2193 {
2194         int err;
2195         int i;
2196
2197         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2198
2199                 err = genl_register_family(dp_genl_families[i]);
2200                 if (err)
2201                         goto error;
2202         }
2203
2204         return 0;
2205
2206 error:
2207         dp_unregister_genl(i);
2208         return err;
2209 }
2210
2211 static int __net_init ovs_init_net(struct net *net)
2212 {
2213         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2214
2215         INIT_LIST_HEAD(&ovs_net->dps);
2216         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2217         return 0;
2218 }
2219
2220 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2221                                             struct list_head *head)
2222 {
2223         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2224         struct datapath *dp;
2225
2226         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2227                 int i;
2228
2229                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2230                         struct vport *vport;
2231
2232                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2233                                 struct netdev_vport *netdev_vport;
2234
2235                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2236                                         continue;
2237
2238                                 netdev_vport = netdev_vport_priv(vport);
2239                                 if (dev_net(netdev_vport->dev) == dnet)
2240                                         list_add(&vport->detach_list, head);
2241                         }
2242                 }
2243         }
2244 }
2245
2246 static void __net_exit ovs_exit_net(struct net *dnet)
2247 {
2248         struct datapath *dp, *dp_next;
2249         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2250         struct vport *vport, *vport_next;
2251         struct net *net;
2252         LIST_HEAD(head);
2253
2254         ovs_lock();
2255         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2256                 __dp_destroy(dp);
2257
2258         rtnl_lock();
2259         for_each_net(net)
2260                 list_vports_from_net(net, dnet, &head);
2261         rtnl_unlock();
2262
2263         /* Detach all vports from given namespace. */
2264         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2265                 list_del(&vport->detach_list);
2266                 ovs_dp_detach_port(vport);
2267         }
2268
2269         ovs_unlock();
2270
2271         cancel_work_sync(&ovs_net->dp_notify_work);
2272 }
2273
2274 static struct pernet_operations ovs_net_ops = {
2275         .init = ovs_init_net,
2276         .exit = ovs_exit_net,
2277         .id   = &ovs_net_id,
2278         .size = sizeof(struct ovs_net),
2279 };
2280
2281 DEFINE_COMPAT_PNET_REG_FUNC(device);
2282
2283 static int __init dp_init(void)
2284 {
2285         int err;
2286
2287         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2288
2289         pr_info("Open vSwitch switching datapath %s\n", VERSION);
2290
2291         err = action_fifos_init();
2292         if (err)
2293                 goto error;
2294
2295         err = ovs_internal_dev_rtnl_link_register();
2296         if (err)
2297                 goto error_action_fifos_exit;
2298
2299         err = ovs_flow_init();
2300         if (err)
2301                 goto error_unreg_rtnl_link;
2302
2303         err = ovs_vport_init();
2304         if (err)
2305                 goto error_flow_exit;
2306
2307         err = register_pernet_device(&ovs_net_ops);
2308         if (err)
2309                 goto error_vport_exit;
2310
2311         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2312         if (err)
2313                 goto error_netns_exit;
2314
2315         err = ovs_netdev_init();
2316         if (err)
2317                 goto error_unreg_notifier;
2318
2319         err = dp_register_genl();
2320         if (err < 0)
2321                 goto error_unreg_netdev;
2322
2323         return 0;
2324
2325 error_unreg_netdev:
2326         ovs_netdev_exit();
2327 error_unreg_notifier:
2328         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2329 error_netns_exit:
2330         unregister_pernet_device(&ovs_net_ops);
2331 error_vport_exit:
2332         ovs_vport_exit();
2333 error_flow_exit:
2334         ovs_flow_exit();
2335 error_unreg_rtnl_link:
2336         ovs_internal_dev_rtnl_link_unregister();
2337 error_action_fifos_exit:
2338         action_fifos_exit();
2339 error:
2340         return err;
2341 }
2342
2343 static void dp_cleanup(void)
2344 {
2345         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2346         ovs_netdev_exit();
2347         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2348         unregister_pernet_device(&ovs_net_ops);
2349         rcu_barrier();
2350         ovs_vport_exit();
2351         ovs_flow_exit();
2352         ovs_internal_dev_rtnl_link_unregister();
2353         action_fifos_exit();
2354 }
2355
2356 module_init(dp_init);
2357 module_exit(dp_cleanup);
2358
2359 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2360 MODULE_LICENSE("GPL");
2361 MODULE_VERSION(VERSION);