ovsdb-idlc: Use column name for "*_set_*" smap arguments.
[cascardo/ovs.git] / datapath-windows / ovsext / Vxlan.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Switch.h"
20 #include "Vport.h"
21 #include "Flow.h"
22 #include "Vxlan.h"
23 #include "IpHelper.h"
24 #include "Checksum.h"
25 #include "User.h"
26 #include "PacketIO.h"
27 #include "Flow.h"
28 #include "PacketParser.h"
29
30 #pragma warning( push )
31 #pragma warning( disable:4127 )
32
33
34 #ifdef OVS_DBG_MOD
35 #undef OVS_DBG_MOD
36 #endif
37 #define OVS_DBG_MOD OVS_DBG_VXLAN
38 #include "Debug.h"
39
40 /* Helper macro to check if a VXLAN ID is valid. */
41 #define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff)
42 #define VXLAN_TUNNELID_TO_VNI(_tID)   (UINT32)(((UINT64)(_tID)) >> 40)
43 #define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40)
44 #define IP_DF_NBO 0x0040
45 #define VXLAN_DEFAULT_TTL 64
46 #define VXLAN_MULTICAST_TTL 64
47 #define VXLAN_DEFAULT_INSTANCE_ID 1
48
49 /* Move to a header file */
50 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
51
52 /*
53  * udpDestPort: the vxlan is set as payload to a udp frame. If the destination
54  * port of an udp frame is udpDestPort, we understand it to be vxlan.
55  */
56 NTSTATUS
57 OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
58                    UINT16 udpDestPort)
59 {
60     POVS_VXLAN_VPORT vxlanPort;
61
62     vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort));
63     if (vxlanPort == NULL) {
64         return STATUS_INSUFFICIENT_RESOURCES;
65     }
66
67     RtlZeroMemory(vxlanPort, sizeof(*vxlanPort));
68     vxlanPort->dstPort = udpDestPort;
69     /*
70      * since we are installing the WFP filter before the port is created
71      * We need to check if it is the same number
72      * XXX should be removed later
73      */
74     ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT);
75     vport->priv = (PVOID)vxlanPort;
76
77     return STATUS_SUCCESS;
78 }
79
80
81 VOID
82 OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport)
83 {
84     if (vport->ovsType != OVS_VPORT_TYPE_VXLAN ||
85         vport->priv == NULL) {
86         return;
87     }
88
89     OvsFreeMemory(vport->priv);
90     vport->priv = NULL;
91 }
92
93
94 /*
95  *----------------------------------------------------------------------------
96  * OvsDoEncapVxlan
97  *     Encapsulates the packet.
98  *----------------------------------------------------------------------------
99  */
100 static __inline NDIS_STATUS
101 OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
102                 OvsIPv4TunnelKey *tunKey,
103                 POVS_FWD_INFO fwdInfo,
104                 POVS_PACKET_HDR_INFO layers,
105                 POVS_SWITCH_CONTEXT switchContext,
106                 PNET_BUFFER_LIST *newNbl)
107 {
108     NDIS_STATUS status;
109     PNET_BUFFER curNb;
110     PMDL curMdl;
111     PUINT8 bufferStart;
112     EthHdr *ethHdr;
113     IPHdr *ipHdr;
114     UDPHdr *udpHdr;
115     VXLANHdr *vxlanHdr;
116     UINT32 headRoom = OvsGetVxlanTunHdrSize();
117     UINT32 packetLength;
118
119     /*
120      * XXX: the assumption currently is that the NBL is owned by OVS, and
121      * headroom has already been allocated as part of allocating the NBL and
122      * MDL.
123      */
124     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
125     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
126     if (layers->isTcp) {
127         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
128
129         tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
130                 TcpLargeSendNetBufferListInfo);
131         OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
132         if (tsoInfo.LsoV1Transmit.MSS) {
133             OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
134             *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
135                         tsoInfo.LsoV1Transmit.MSS, headRoom);
136             if (*newNbl == NULL) {
137                 OVS_LOG_ERROR("Unable to segment NBL");
138                 return NDIS_STATUS_FAILURE;
139             }
140         }
141     }
142     /* If we didn't split the packet above, make a copy now */
143     if (*newNbl == NULL) {
144         *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
145                                     FALSE /*NBL info*/);
146         if (*newNbl == NULL) {
147             OVS_LOG_ERROR("Unable to copy NBL");
148             return NDIS_STATUS_FAILURE;
149         }
150     }
151
152     curNbl = *newNbl;
153     for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
154             curNb = curNb->Next) {
155         status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
156         if (status != NDIS_STATUS_SUCCESS) {
157             goto ret_error;
158         }
159
160         curMdl = NET_BUFFER_CURRENT_MDL(curNb);
161         bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
162         if (!bufferStart) {
163             status = NDIS_STATUS_RESOURCES;
164             goto ret_error;
165         }
166
167         bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
168         if (NET_BUFFER_NEXT_NB(curNb)) {
169             OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb),
170                           NET_BUFFER_DATA_LENGTH(curNb->Next));
171         }
172
173         /* L2 header */
174         ethHdr = (EthHdr *)bufferStart;
175         NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
176                        sizeof ethHdr->Destination + sizeof ethHdr->Source);
177         ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
178                (PCHAR)&fwdInfo->srcMacAddr);
179         ethHdr->Type = htons(ETH_TYPE_IPV4);
180
181         // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
182         // should we use those values instead? or will they end up being
183         // uninitialized;
184         /* IP header */
185         ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
186
187         ipHdr->ihl = sizeof *ipHdr / 4;
188         ipHdr->version = IPV4;
189         ipHdr->tos = 0;
190         ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
191         ipHdr->id = 0;
192         ipHdr->frag_off = IP_DF_NBO;
193         ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
194         ipHdr->protocol = IPPROTO_UDP;
195         ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
196         ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
197         ipHdr->saddr = fwdInfo->srcIpAddr;
198         ipHdr->daddr = fwdInfo->dstIpAddr;
199         ipHdr->check = 0;
200         ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
201
202         /* UDP header */
203         udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
204         udpHdr->source = htons(tunKey->flow_hash | 32768);
205         udpHdr->dest = VXLAN_UDP_PORT_NBO;
206         udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
207                             sizeof *udpHdr + sizeof *vxlanHdr);
208         udpHdr->check = 0;
209
210         /* VXLAN header */
211         vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
212         vxlanHdr->flags1 = 0;
213         vxlanHdr->locallyReplicate = 0;
214         vxlanHdr->flags2 = 0;
215         vxlanHdr->reserved1 = 0;
216         if (tunKey->flags | OVS_TNL_F_KEY) {
217             vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId);
218             vxlanHdr->instanceID = 1;
219         }
220         vxlanHdr->reserved2 = 0;
221     }
222     return STATUS_SUCCESS;
223
224 ret_error:
225     OvsCompleteNBL(switchContext, *newNbl, TRUE);
226     *newNbl = NULL;
227     return status;
228 }
229
230
231 /*
232  *----------------------------------------------------------------------------
233  * OvsEncapVxlan --
234  *     Encapsulates the packet if L2/L3 for destination resolves. Otherwise,
235  *     enqueues a callback that does encapsulatation after resolution.
236  *----------------------------------------------------------------------------
237  */
238 NDIS_STATUS
239 OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
240               OvsIPv4TunnelKey *tunKey,
241               POVS_SWITCH_CONTEXT switchContext,
242               VOID *completionList,
243               POVS_PACKET_HDR_INFO layers,
244               PNET_BUFFER_LIST *newNbl)
245 {
246     NTSTATUS status;
247     OVS_FWD_INFO fwdInfo;
248     UNREFERENCED_PARAMETER(completionList);
249
250     status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
251     if (status != STATUS_SUCCESS) {
252         OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
253         // return NDIS_STATUS_PENDING;
254         /*
255          * XXX: Don't know if the completionList will make any sense when
256          * accessed in the callback. Make sure the caveats are known.
257          *
258          * XXX: This code will work once we are able to grab locks in the
259          * callback.
260          */
261         return NDIS_STATUS_FAILURE;
262     }
263
264     return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers,
265                            switchContext, newNbl);
266 }
267
268
269 /*
270  *----------------------------------------------------------------------------
271  * OvsIpHlprCbVxlan --
272  *     Callback function for IP helper.
273  *     XXX: not used currently
274  *----------------------------------------------------------------------------
275  */
276 static VOID
277 OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
278                  UINT32 inPort,
279                  OvsIPv4TunnelKey *tunKey,
280                  PVOID cbData1,
281                  PVOID cbData2,
282                  NTSTATUS result,
283                  POVS_FWD_INFO fwdInfo)
284 {
285     OVS_PACKET_HDR_INFO layers;
286     OvsFlowKey key;
287     NDIS_STATUS status;
288     UNREFERENCED_PARAMETER(inPort);
289
290     status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
291     if (result == STATUS_SUCCESS) {
292         status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
293                 (POVS_SWITCH_CONTEXT)cbData1, NULL);
294     } else {
295         status = NDIS_STATUS_FAILURE;
296     }
297
298     if (status != NDIS_STATUS_SUCCESS) {
299         // XXX: Free up the NBL;
300         return;
301     }
302
303     OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl);
304 }
305
306 /*
307  *----------------------------------------------------------------------------
308  * OvsCalculateUDPChecksum
309  *     Calculate UDP checksum
310  *----------------------------------------------------------------------------
311  */
312 static __inline NDIS_STATUS
313 OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl,
314                         PNET_BUFFER curNb,
315                         IPHdr *ipHdr,
316                         UDPHdr *udpHdr,
317                         UINT32 packetLength)
318 {
319     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
320     UINT16 checkSum;
321
322     csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
323
324     /* Next check if UDP checksum has been calculated. */
325     if (!csumInfo.Receive.UdpChecksumSucceeded) {
326         UINT32 l4Payload;
327
328         checkSum = udpHdr->check;
329
330         l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4;
331         udpHdr->check = 0;
332         udpHdr->check =
333             IPPseudoChecksum((UINT32 *)&ipHdr->saddr,
334                              (UINT32 *)&ipHdr->daddr,
335                              IPPROTO_UDP, (UINT16)l4Payload);
336         udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload,
337             sizeof(EthHdr) + ipHdr->ihl * 4);
338         if (checkSum != udpHdr->check) {
339             OVS_LOG_TRACE("UDP checksum incorrect.");
340             return NDIS_STATUS_INVALID_PACKET;
341         }
342     }
343
344     csumInfo.Receive.UdpChecksumSucceeded = 1;
345     NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
346     return NDIS_STATUS_SUCCESS;
347 }
348
349 /*
350  *----------------------------------------------------------------------------
351  * OvsDoDecapVxlan
352  *     Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
353  *----------------------------------------------------------------------------
354  */
355 NDIS_STATUS
356 OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
357                 PNET_BUFFER_LIST curNbl,
358                 OvsIPv4TunnelKey *tunKey,
359                 PNET_BUFFER_LIST *newNbl)
360 {
361     PNET_BUFFER curNb;
362     PMDL curMdl;
363     EthHdr *ethHdr;
364     IPHdr *ipHdr;
365     UDPHdr *udpHdr;
366     VXLANHdr *vxlanHdr;
367     UINT32 tunnelSize = 0, packetLength = 0;
368     PUINT8 bufferStart;
369     NDIS_STATUS status;
370
371     /* Check the the length of the UDP payload */
372     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
373     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
374     tunnelSize = OvsGetVxlanTunHdrSize();
375     if (packetLength <= tunnelSize) {
376         return NDIS_STATUS_INVALID_LENGTH;
377     }
378
379     /*
380      * Create a copy of the NBL so that we have all the headers in one MDL.
381      */
382     *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
383                                 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
384                                 TRUE /*copy NBL info */);
385
386     if (*newNbl == NULL) {
387         return NDIS_STATUS_RESOURCES;
388     }
389
390     /* XXX: Handle VLAN header. */
391     curNbl = *newNbl;
392     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
393     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
394     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
395                   NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
396     if (!bufferStart) {
397         status = NDIS_STATUS_RESOURCES;
398         goto dropNbl;
399     }
400
401     ethHdr = (EthHdr *)bufferStart;
402     /* XXX: Handle IP options. */
403     ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
404     tunKey->src = ipHdr->saddr;
405     tunKey->dst = ipHdr->daddr;
406     tunKey->tos = ipHdr->tos;
407     tunKey->ttl = ipHdr->ttl;
408     tunKey->pad = 0;
409     udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
410
411     /* Validate if NIC has indicated checksum failure. */
412     status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
413     if (status != NDIS_STATUS_SUCCESS) {
414         goto dropNbl;
415     }
416
417     /* Calculate and verify UDP checksum if NIC didn't do it. */
418     if (udpHdr->check != 0) {
419         status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength);
420         if (status != NDIS_STATUS_SUCCESS) {
421             goto dropNbl;
422         }
423     }
424
425     vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
426     if (vxlanHdr->instanceID) {
427         tunKey->flags = OVS_TNL_F_KEY;
428         tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID);
429     } else {
430         tunKey->flags = 0;
431         tunKey->tunnelId = 0;
432     }
433
434     /* Clear out the receive flag for the inner packet. */
435     NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
436     NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
437     return NDIS_STATUS_SUCCESS;
438
439 dropNbl:
440     OvsCompleteNBL(switchContext, *newNbl, TRUE);
441     *newNbl = NULL;
442     return status;
443 }
444
445
446 NDIS_STATUS
447 OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
448                    OvsIPv4TunnelKey *tunnelKey)
449 {
450     NDIS_STATUS status = NDIS_STATUS_FAILURE;
451     UDPHdr udpStorage;
452     const UDPHdr *udp;
453     VXLANHdr *VxlanHeader;
454     VXLANHdr  VxlanHeaderBuffer;
455     struct IPHdr ip_storage;
456     const struct IPHdr *nh;
457     OVS_PACKET_HDR_INFO layers;
458
459     layers.value = 0;
460
461     do {
462         nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
463         if (nh) {
464             layers.l4Offset = layers.l3Offset + nh->ihl * 4;
465         } else {
466             break;
467         }
468
469         /* make sure it's a VXLAN packet */
470         udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
471         if (udp) {
472             layers.l7Offset = layers.l4Offset + sizeof *udp;
473         } else {
474             break;
475         }
476
477         /* XXX Should be tested against the dynamic port # in the VXLAN vport */
478         ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));
479
480         VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
481                                                     sizeof(*VxlanHeader),
482                                                     layers.l7Offset,
483                                                     &VxlanHeaderBuffer);
484
485         if (VxlanHeader) {
486             tunnelKey->src = nh->saddr;
487             tunnelKey->dst = nh->daddr;
488             tunnelKey->ttl = nh->ttl;
489             tunnelKey->tos = nh->tos;
490             if (VxlanHeader->instanceID) {
491                 tunnelKey->flags = OVS_TNL_F_KEY;
492                 tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID);
493             } else {
494                 tunnelKey->flags = 0;
495                 tunnelKey->tunnelId = 0;
496             }
497         } else {
498             break;
499         }
500         status = NDIS_STATUS_SUCCESS;
501
502     } while(FALSE);
503
504     return status;
505 }
506
507 #pragma warning( pop )