Merge tag 'mvebu-soc-3.17-2' of git://git.infradead.org/linux-mvebu into next/soc
[cascardo/linux.git] / drivers / net / ethernet / freescale / fec_main.c
index 8d69e43..77037fd 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
+#include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -54,6 +55,7 @@
 #include <linux/of_net.h>
 #include <linux/regulator/consumer.h>
 #include <linux/if_vlan.h>
+#include <linux/pinctrl/consumer.h>
 
 #include <asm/cacheflush.h>
 
@@ -172,10 +174,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #endif
 #endif /* CONFIG_M5272 */
 
-#if (((RX_RING_SIZE + TX_RING_SIZE) * 32) > PAGE_SIZE)
-#error "FEC: descriptor ring size constants too large"
-#endif
-
 /* Interrupt events/masks. */
 #define FEC_ENET_HBERR ((uint)0x80000000)      /* Heartbeat error */
 #define FEC_ENET_BABR  ((uint)0x40000000)      /* Babbling receiver */
@@ -231,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #define FEC_PAUSE_FLAG_AUTONEG 0x1
 #define FEC_PAUSE_FLAG_ENABLE  0x2
 
+#define TSO_HEADER_SIZE                128
+/* Max number of allowed TCP segments for software TSO */
+#define FEC_MAX_TSO_SEGS       100
+#define FEC_MAX_SKB_DESCS      (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+       ((addr >= txq->tso_hdrs_dma) && \
+       (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
 static int mii_cnt;
 
 static inline
@@ -286,6 +293,22 @@ struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, struct fec_enet_priva
                return (new_bd < base) ? (new_bd + ring_size) : new_bd;
 }
 
+static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp,
+                               struct fec_enet_private *fep)
+{
+       return ((const char *)bdp - (const char *)base) / fep->bufdesc_size;
+}
+
+static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep)
+{
+       int entries;
+
+       entries = ((const char *)fep->dirty_tx -
+                       (const char *)fep->cur_tx) / fep->bufdesc_size - 1;
+
+       return entries > 0 ? entries : entries + fep->tx_ring_size;
+}
+
 static void *swap_buffer(void *bufaddr, int len)
 {
        int i;
@@ -297,6 +320,11 @@ static void *swap_buffer(void *bufaddr, int len)
        return bufaddr;
 }
 
+static inline bool is_ipv4_pkt(struct sk_buff *skb)
+{
+       return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
+}
+
 static int
 fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
 {
@@ -307,33 +335,134 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
        if (unlikely(skb_cow_head(skb, 0)))
                return -1;
 
+       if (is_ipv4_pkt(skb))
+               ip_hdr(skb)->check = 0;
        *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 0;
 
        return 0;
 }
 
-static netdev_tx_t
-fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static void
+fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
+{
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       struct bufdesc *bdp_pre;
+
+       bdp_pre = fec_enet_get_prevdesc(bdp, fep);
+       if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
+           !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
+               fep->delay_work.trig_tx = true;
+               schedule_delayed_work(&(fep->delay_work.delay_work),
+                                       msecs_to_jiffies(1));
+       }
+}
+
+static int
+fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
        const struct platform_device_id *id_entry =
                                platform_get_device_id(fep->pdev);
-       struct bufdesc *bdp, *bdp_pre;
-       void *bufaddr;
-       unsigned short  status;
+       struct bufdesc *bdp = fep->cur_tx;
+       struct bufdesc_ex *ebdp;
+       int nr_frags = skb_shinfo(skb)->nr_frags;
+       int frag, frag_len;
+       unsigned short status;
+       unsigned int estatus = 0;
+       skb_frag_t *this_frag;
        unsigned int index;
+       void *bufaddr;
+       int i;
 
-       /* Fill in a Tx ring entry */
+       for (frag = 0; frag < nr_frags; frag++) {
+               this_frag = &skb_shinfo(skb)->frags[frag];
+               bdp = fec_enet_get_nextdesc(bdp, fep);
+               ebdp = (struct bufdesc_ex *)bdp;
+
+               status = bdp->cbd_sc;
+               status &= ~BD_ENET_TX_STATS;
+               status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+               frag_len = skb_shinfo(skb)->frags[frag].size;
+
+               /* Handle the last BD specially */
+               if (frag == nr_frags - 1) {
+                       status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+                       if (fep->bufdesc_ex) {
+                               estatus |= BD_ENET_TX_INT;
+                               if (unlikely(skb_shinfo(skb)->tx_flags &
+                                       SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+                                       estatus |= BD_ENET_TX_TS;
+                       }
+               }
+
+               if (fep->bufdesc_ex) {
+                       if (skb->ip_summed == CHECKSUM_PARTIAL)
+                               estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+                       ebdp->cbd_bdu = 0;
+                       ebdp->cbd_esc = estatus;
+               }
+
+               bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
+
+               index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+               if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+                       id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+                       memcpy(fep->tx_bounce[index], bufaddr, frag_len);
+                       bufaddr = fep->tx_bounce[index];
+
+                       if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                               swap_buffer(bufaddr, frag_len);
+               }
+
+               bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+                                               frag_len, DMA_TO_DEVICE);
+               if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+                       dev_kfree_skb_any(skb);
+                       if (net_ratelimit())
+                               netdev_err(ndev, "Tx DMA memory map failed\n");
+                       goto dma_mapping_error;
+               }
+
+               bdp->cbd_datlen = frag_len;
+               bdp->cbd_sc = status;
+       }
+
+       fep->cur_tx = bdp;
+
+       return 0;
+
+dma_mapping_error:
        bdp = fep->cur_tx;
+       for (i = 0; i < frag; i++) {
+               bdp = fec_enet_get_nextdesc(bdp, fep);
+               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                               bdp->cbd_datlen, DMA_TO_DEVICE);
+       }
+       return NETDEV_TX_OK;
+}
 
-       status = bdp->cbd_sc;
+static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       int nr_frags = skb_shinfo(skb)->nr_frags;
+       struct bufdesc *bdp, *last_bdp;
+       void *bufaddr;
+       unsigned short status;
+       unsigned short buflen;
+       unsigned int estatus = 0;
+       unsigned int index;
+       int entries_free;
+       int ret;
 
-       if (status & BD_ENET_TX_READY) {
-               /* Ooops.  All transmit buffers are full.  Bail out.
-                * This should not happen, since ndev->tbusy should be set.
-                */
-               netdev_err(ndev, "tx queue full!\n");
-               return NETDEV_TX_BUSY;
+       entries_free = fec_enet_get_free_txdesc_num(fep);
+       if (entries_free < MAX_SKB_FRAGS + 1) {
+               dev_kfree_skb_any(skb);
+               if (net_ratelimit())
+                       netdev_err(ndev, "NOT enough BD for SG!\n");
+               return NETDEV_TX_OK;
        }
 
        /* Protocol checksum off-load for TCP and UDP. */
@@ -342,102 +471,300 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                return NETDEV_TX_OK;
        }
 
-       /* Clear all of the status flags */
+       /* Fill in a Tx ring entry */
+       bdp = fep->cur_tx;
+       status = bdp->cbd_sc;
        status &= ~BD_ENET_TX_STATS;
 
        /* Set buffer length and buffer pointer */
        bufaddr = skb->data;
-       bdp->cbd_datlen = skb->len;
+       buflen = skb_headlen(skb);
 
-       /*
-        * On some FEC implementations data must be aligned on
-        * 4-byte boundaries. Use bounce buffers to copy data
-        * and get it aligned. Ugh.
-        */
-       if (fep->bufdesc_ex)
-               index = (struct bufdesc_ex *)bdp -
-                       (struct bufdesc_ex *)fep->tx_bd_base;
-       else
-               index = bdp - fep->tx_bd_base;
-
-       if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
-               memcpy(fep->tx_bounce[index], skb->data, skb->len);
+       index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+       if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+               id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+               memcpy(fep->tx_bounce[index], skb->data, buflen);
                bufaddr = fep->tx_bounce[index];
-       }
 
-       /*
-        * Some design made an incorrect assumption on endian mode of
-        * the system that it's running on. As the result, driver has to
-        * swap every frame going to and coming from the controller.
-        */
-       if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
-               swap_buffer(bufaddr, skb->len);
-
-       /* Save skb pointer */
-       fep->tx_skbuff[index] = skb;
+               if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                       swap_buffer(bufaddr, buflen);
+       }
 
        /* Push the data cache so the CPM does not get stale memory
         * data.
         */
        bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
-                       skb->len, DMA_TO_DEVICE);
+                                       buflen, DMA_TO_DEVICE);
        if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
-               bdp->cbd_bufaddr = 0;
-               fep->tx_skbuff[index] = NULL;
                dev_kfree_skb_any(skb);
                if (net_ratelimit())
                        netdev_err(ndev, "Tx DMA memory map failed\n");
                return NETDEV_TX_OK;
        }
 
+       if (nr_frags) {
+               ret = fec_enet_txq_submit_frag_skb(skb, ndev);
+               if (ret)
+                       return ret;
+       } else {
+               status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+               if (fep->bufdesc_ex) {
+                       estatus = BD_ENET_TX_INT;
+                       if (unlikely(skb_shinfo(skb)->tx_flags &
+                               SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+                               estatus |= BD_ENET_TX_TS;
+               }
+       }
+
        if (fep->bufdesc_ex) {
 
                struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-               ebdp->cbd_bdu = 0;
+
                if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-                       fep->hwts_tx_en)) {
-                       ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
+                       fep->hwts_tx_en))
                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-               } else {
-                       ebdp->cbd_esc = BD_ENET_TX_INT;
 
-                       /* Enable protocol checksum flags
-                        * We do not bother with the IP Checksum bits as they
-                        * are done by the kernel
-                        */
-                       if (skb->ip_summed == CHECKSUM_PARTIAL)
-                               ebdp->cbd_esc |= BD_ENET_TX_PINS;
-               }
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+
+               ebdp->cbd_bdu = 0;
+               ebdp->cbd_esc = estatus;
        }
 
+       last_bdp = fep->cur_tx;
+       index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep);
+       /* Save skb pointer */
+       fep->tx_skbuff[index] = skb;
+
+       bdp->cbd_datlen = buflen;
+
        /* Send it on its way.  Tell FEC it's ready, interrupt when done,
         * it's the last BD of the frame, and to put the CRC on the end.
         */
-       status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
-                       | BD_ENET_TX_LAST | BD_ENET_TX_TC);
+       status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
        bdp->cbd_sc = status;
 
-       bdp_pre = fec_enet_get_prevdesc(bdp, fep);
-       if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
-           !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
-               fep->delay_work.trig_tx = true;
-               schedule_delayed_work(&(fep->delay_work.delay_work),
-                                       msecs_to_jiffies(1));
-       }
+       fec_enet_submit_work(bdp, fep);
 
        /* If this was the last BD in the ring, start at the beginning again. */
-       bdp = fec_enet_get_nextdesc(bdp, fep);
+       bdp = fec_enet_get_nextdesc(last_bdp, fep);
 
        skb_tx_timestamp(skb);
 
        fep->cur_tx = bdp;
 
-       if (fep->cur_tx == fep->dirty_tx)
-               netif_stop_queue(ndev);
+       /* Trigger transmission start */
+       writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
+       return 0;
+}
+
+static int
+fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
+                       struct bufdesc *bdp, int index, char *data,
+                       int size, bool last_tcp, bool is_last)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+       unsigned short status;
+       unsigned int estatus = 0;
+
+       status = bdp->cbd_sc;
+       status &= ~BD_ENET_TX_STATS;
+
+       status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+       bdp->cbd_datlen = size;
+
+       if (((unsigned long) data) & FEC_ALIGNMENT ||
+               id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+               memcpy(fep->tx_bounce[index], data, size);
+               data = fep->tx_bounce[index];
+
+               if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                       swap_buffer(data, size);
+       }
+
+       bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
+                                       size, DMA_TO_DEVICE);
+       if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+               dev_kfree_skb_any(skb);
+               if (net_ratelimit())
+                       netdev_err(ndev, "Tx DMA memory map failed\n");
+               return NETDEV_TX_BUSY;
+       }
+
+       if (fep->bufdesc_ex) {
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+               ebdp->cbd_bdu = 0;
+               ebdp->cbd_esc = estatus;
+       }
+
+       /* Handle the last BD specially */
+       if (last_tcp)
+               status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
+       if (is_last) {
+               status |= BD_ENET_TX_INTR;
+               if (fep->bufdesc_ex)
+                       ebdp->cbd_esc |= BD_ENET_TX_INT;
+       }
+
+       bdp->cbd_sc = status;
+
+       return 0;
+}
+
+static int
+fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
+                       struct bufdesc *bdp, int index)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+       void *bufaddr;
+       unsigned long dmabuf;
+       unsigned short status;
+       unsigned int estatus = 0;
+
+       status = bdp->cbd_sc;
+       status &= ~BD_ENET_TX_STATS;
+       status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+
+       bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+       dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
+       if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+               id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+               memcpy(fep->tx_bounce[index], skb->data, hdr_len);
+               bufaddr = fep->tx_bounce[index];
+
+               if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                       swap_buffer(bufaddr, hdr_len);
+
+               dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
+                                       hdr_len, DMA_TO_DEVICE);
+               if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
+                       dev_kfree_skb_any(skb);
+                       if (net_ratelimit())
+                               netdev_err(ndev, "Tx DMA memory map failed\n");
+                       return NETDEV_TX_BUSY;
+               }
+       }
+
+       bdp->cbd_bufaddr = dmabuf;
+       bdp->cbd_datlen = hdr_len;
+
+       if (fep->bufdesc_ex) {
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+               ebdp->cbd_bdu = 0;
+               ebdp->cbd_esc = estatus;
+       }
+
+       bdp->cbd_sc = status;
+
+       return 0;
+}
+
+static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       int total_len, data_left;
+       struct bufdesc *bdp = fep->cur_tx;
+       struct tso_t tso;
+       unsigned int index = 0;
+       int ret;
+
+       if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
+               dev_kfree_skb_any(skb);
+               if (net_ratelimit())
+                       netdev_err(ndev, "NOT enough BD for TSO!\n");
+               return NETDEV_TX_OK;
+       }
+
+       /* Protocol checksum off-load for TCP and UDP. */
+       if (fec_enet_clear_csum(skb, ndev)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* Initialize the TSO handler, and prepare the first payload */
+       tso_start(skb, &tso);
+
+       total_len = skb->len - hdr_len;
+       while (total_len > 0) {
+               char *hdr;
+
+               index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+               data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+               total_len -= data_left;
+
+               /* prepare packet headers: MAC + IP + TCP */
+               hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+               tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+               ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
+               if (ret)
+                       goto err_release;
+
+               while (data_left > 0) {
+                       int size;
+
+                       size = min_t(int, tso.size, data_left);
+                       bdp = fec_enet_get_nextdesc(bdp, fep);
+                       index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+                       ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
+                                                       size, size == data_left,
+                                                       total_len == 0);
+                       if (ret)
+                               goto err_release;
+
+                       data_left -= size;
+                       tso_build_data(skb, &tso, size);
+               }
+
+               bdp = fec_enet_get_nextdesc(bdp, fep);
+       }
+
+       /* Save skb pointer */
+       fep->tx_skbuff[index] = skb;
+
+       fec_enet_submit_work(bdp, fep);
+
+       skb_tx_timestamp(skb);
+       fep->cur_tx = bdp;
 
        /* Trigger transmission start */
        writel(0, fep->hwp + FEC_X_DES_ACTIVE);
 
+       return 0;
+
+err_release:
+       /* TODO: Release all used data descriptors for TSO */
+       return ret;
+}
+
+static netdev_tx_t
+fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       int entries_free;
+       int ret;
+
+       if (skb_is_gso(skb))
+               ret = fec_enet_txq_submit_tso(skb, ndev);
+       else
+               ret = fec_enet_txq_submit_skb(skb, ndev);
+       if (ret)
+               return ret;
+
+       entries_free = fec_enet_get_free_txdesc_num(fep);
+       if (entries_free <= fep->tx_stop_threshold)
+               netif_stop_queue(ndev);
+
        return NETDEV_TX_OK;
 }
 
@@ -756,6 +1083,7 @@ fec_enet_tx(struct net_device *ndev)
        unsigned short status;
        struct  sk_buff *skb;
        int     index = 0;
+       int     entries_free;
 
        fep = netdev_priv(ndev);
        bdp = fep->dirty_tx;
@@ -769,16 +1097,17 @@ fec_enet_tx(struct net_device *ndev)
                if (bdp == fep->cur_tx)
                        break;
 
-               if (fep->bufdesc_ex)
-                       index = (struct bufdesc_ex *)bdp -
-                               (struct bufdesc_ex *)fep->tx_bd_base;
-               else
-                       index = bdp - fep->tx_bd_base;
+               index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
 
                skb = fep->tx_skbuff[index];
-               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len,
-                               DMA_TO_DEVICE);
+               if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
+                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                                       bdp->cbd_datlen, DMA_TO_DEVICE);
                bdp->cbd_bufaddr = 0;
+               if (!skb) {
+                       bdp = fec_enet_get_nextdesc(bdp, fep);
+                       continue;
+               }
 
                /* Check for errors. */
                if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -797,7 +1126,7 @@ fec_enet_tx(struct net_device *ndev)
                                ndev->stats.tx_carrier_errors++;
                } else {
                        ndev->stats.tx_packets++;
-                       ndev->stats.tx_bytes += bdp->cbd_datlen;
+                       ndev->stats.tx_bytes += skb->len;
                }
 
                if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
@@ -834,15 +1163,15 @@ fec_enet_tx(struct net_device *ndev)
 
                /* Since we have freed up a buffer, the ring is no longer full
                 */
-               if (fep->dirty_tx != fep->cur_tx) {
-                       if (netif_queue_stopped(ndev))
+               if (netif_queue_stopped(ndev)) {
+                       entries_free = fec_enet_get_free_txdesc_num(fep);
+                       if (entries_free >= fep->tx_wake_threshold)
                                netif_wake_queue(ndev);
                }
        }
        return;
 }
 
-
 /* During a receive, the cur_rx points to the current incoming buffer.
  * When we update through the ring, if the next incoming buffer has
  * not been given to the system, we just set the empty indicator,
@@ -920,11 +1249,7 @@ fec_enet_rx(struct net_device *ndev, int budget)
                pkt_len = bdp->cbd_datlen;
                ndev->stats.rx_bytes += pkt_len;
 
-               if (fep->bufdesc_ex)
-                       index = (struct bufdesc_ex *)bdp -
-                               (struct bufdesc_ex *)fep->rx_bd_base;
-               else
-                       index = bdp - fep->rx_bd_base;
+               index = fec_enet_get_bd_index(fep->rx_bd_base, bdp, fep);
                data = fep->rx_skbuff[index]->data;
                dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
                                        FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);
@@ -1255,6 +1580,49 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
        return 0;
 }
 
+static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       int ret;
+
+       if (enable) {
+               ret = clk_prepare_enable(fep->clk_ahb);
+               if (ret)
+                       return ret;
+               ret = clk_prepare_enable(fep->clk_ipg);
+               if (ret)
+                       goto failed_clk_ipg;
+               if (fep->clk_enet_out) {
+                       ret = clk_prepare_enable(fep->clk_enet_out);
+                       if (ret)
+                               goto failed_clk_enet_out;
+               }
+               if (fep->clk_ptp) {
+                       ret = clk_prepare_enable(fep->clk_ptp);
+                       if (ret)
+                               goto failed_clk_ptp;
+               }
+       } else {
+               clk_disable_unprepare(fep->clk_ahb);
+               clk_disable_unprepare(fep->clk_ipg);
+               if (fep->clk_enet_out)
+                       clk_disable_unprepare(fep->clk_enet_out);
+               if (fep->clk_ptp)
+                       clk_disable_unprepare(fep->clk_ptp);
+       }
+
+       return 0;
+failed_clk_ptp:
+       if (fep->clk_enet_out)
+               clk_disable_unprepare(fep->clk_enet_out);
+failed_clk_enet_out:
+               clk_disable_unprepare(fep->clk_ipg);
+failed_clk_ipg:
+               clk_disable_unprepare(fep->clk_ahb);
+
+       return ret;
+}
+
 static int fec_enet_mii_probe(struct net_device *ndev)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1364,7 +1732,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
         * Reference Manual has an error on this, and gets fixed on i.MX6Q
         * document.
         */
-       fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000);
+       fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
        if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
                fep->phy_speed--;
        fep->phy_speed <<= 1;
@@ -1773,6 +2141,11 @@ fec_enet_open(struct net_device *ndev)
        struct fec_enet_private *fep = netdev_priv(ndev);
        int ret;
 
+       pinctrl_pm_select_default_state(&fep->pdev->dev);
+       ret = fec_enet_clk_enable(ndev, true);
+       if (ret)
+               return ret;
+
        /* I should reset the ring buffers here, but I don't yet know
         * a simple way to do that.
         */
@@ -1811,6 +2184,8 @@ fec_enet_close(struct net_device *ndev)
                phy_disconnect(fep->phy_dev);
        }
 
+       fec_enet_clk_enable(ndev, false);
+       pinctrl_pm_select_sleep_state(&fep->pdev->dev);
        fec_enet_free_buffers(ndev);
 
        return 0;
@@ -1988,13 +2363,35 @@ static int fec_enet_init(struct net_device *ndev)
        const struct platform_device_id *id_entry =
                                platform_get_device_id(fep->pdev);
        struct bufdesc *cbd_base;
+       int bd_size;
+
+       /* init the tx & rx ring size */
+       fep->tx_ring_size = TX_RING_SIZE;
+       fep->rx_ring_size = RX_RING_SIZE;
+
+       fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+       fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
+
+       if (fep->bufdesc_ex)
+               fep->bufdesc_size = sizeof(struct bufdesc_ex);
+       else
+               fep->bufdesc_size = sizeof(struct bufdesc);
+       bd_size = (fep->tx_ring_size + fep->rx_ring_size) *
+                       fep->bufdesc_size;
 
        /* Allocate memory for buffer descriptors. */
-       cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma,
+       cbd_base = dma_alloc_coherent(NULL, bd_size, &fep->bd_dma,
                                      GFP_KERNEL);
        if (!cbd_base)
                return -ENOMEM;
 
+       fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
+                                               &fep->tso_hdrs_dma, GFP_KERNEL);
+       if (!fep->tso_hdrs) {
+               dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
+               return -ENOMEM;
+       }
+
        memset(cbd_base, 0, PAGE_SIZE);
 
        fep->netdev = ndev;
@@ -2004,10 +2401,6 @@ static int fec_enet_init(struct net_device *ndev)
        /* make sure MAC we just acquired is programmed into the hw */
        fec_set_mac_address(ndev, NULL);
 
-       /* init the tx & rx ring size */
-       fep->tx_ring_size = TX_RING_SIZE;
-       fep->rx_ring_size = RX_RING_SIZE;
-
        /* Set receive and transmit descriptor base. */
        fep->rx_bd_base = cbd_base;
        if (fep->bufdesc_ex)
@@ -2024,21 +2417,21 @@ static int fec_enet_init(struct net_device *ndev)
        writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
        netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);
 
-       if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) {
+       if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN)
                /* enable hw VLAN support */
                ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
-               ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
-       }
 
        if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
+               ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
+
                /* enable hw accelerator */
                ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-                               | NETIF_F_RXCSUM);
-               ndev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-                               | NETIF_F_RXCSUM);
+                               | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
                fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
        }
 
+       ndev->hw_features = ndev->features;
+
        fec_restart(ndev, 0);
 
        return 0;
@@ -2114,6 +2507,9 @@ fec_probe(struct platform_device *pdev)
                fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG;
 #endif
 
+       /* Select default pin state */
+       pinctrl_pm_select_default_state(&pdev->dev);
+
        r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        fep->hwp = devm_ioremap_resource(&pdev->dev, r);
        if (IS_ERR(fep->hwp)) {
@@ -2164,26 +2560,10 @@ fec_probe(struct platform_device *pdev)
                fep->bufdesc_ex = 0;
        }
 
-       ret = clk_prepare_enable(fep->clk_ahb);
+       ret = fec_enet_clk_enable(ndev, true);
        if (ret)
                goto failed_clk;
 
-       ret = clk_prepare_enable(fep->clk_ipg);
-       if (ret)
-               goto failed_clk_ipg;
-
-       if (fep->clk_enet_out) {
-               ret = clk_prepare_enable(fep->clk_enet_out);
-               if (ret)
-                       goto failed_clk_enet_out;
-       }
-
-       if (fep->clk_ptp) {
-               ret = clk_prepare_enable(fep->clk_ptp);
-               if (ret)
-                       goto failed_clk_ptp;
-       }
-
        fep->reg_phy = devm_regulator_get(&pdev->dev, "phy");
        if (!IS_ERR(fep->reg_phy)) {
                ret = regulator_enable(fep->reg_phy);
@@ -2225,6 +2605,8 @@ fec_probe(struct platform_device *pdev)
 
        /* Carrier starts down, phylib will bring it up */
        netif_carrier_off(ndev);
+       fec_enet_clk_enable(ndev, false);
+       pinctrl_pm_select_sleep_state(&pdev->dev);
 
        ret = register_netdev(ndev);
        if (ret)
@@ -2244,15 +2626,7 @@ failed_init:
        if (fep->reg_phy)
                regulator_disable(fep->reg_phy);
 failed_regulator:
-       if (fep->clk_ptp)
-               clk_disable_unprepare(fep->clk_ptp);
-failed_clk_ptp:
-       if (fep->clk_enet_out)
-               clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-       clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
-       clk_disable_unprepare(fep->clk_ahb);
+       fec_enet_clk_enable(ndev, false);
 failed_clk:
 failed_ioremap:
        free_netdev(ndev);
@@ -2272,14 +2646,9 @@ fec_drv_remove(struct platform_device *pdev)
        del_timer_sync(&fep->time_keep);
        if (fep->reg_phy)
                regulator_disable(fep->reg_phy);
-       if (fep->clk_ptp)
-               clk_disable_unprepare(fep->clk_ptp);
        if (fep->ptp_clock)
                ptp_clock_unregister(fep->ptp_clock);
-       if (fep->clk_enet_out)
-               clk_disable_unprepare(fep->clk_enet_out);
-       clk_disable_unprepare(fep->clk_ipg);
-       clk_disable_unprepare(fep->clk_ahb);
+       fec_enet_clk_enable(ndev, false);
        free_netdev(ndev);
 
        return 0;
@@ -2296,12 +2665,8 @@ fec_suspend(struct device *dev)
                fec_stop(ndev);
                netif_device_detach(ndev);
        }
-       if (fep->clk_ptp)
-               clk_disable_unprepare(fep->clk_ptp);
-       if (fep->clk_enet_out)
-               clk_disable_unprepare(fep->clk_enet_out);
-       clk_disable_unprepare(fep->clk_ipg);
-       clk_disable_unprepare(fep->clk_ahb);
+       fec_enet_clk_enable(ndev, false);
+       pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 
        if (fep->reg_phy)
                regulator_disable(fep->reg_phy);
@@ -2322,25 +2687,10 @@ fec_resume(struct device *dev)
                        return ret;
        }
 
-       ret = clk_prepare_enable(fep->clk_ahb);
+       pinctrl_pm_select_default_state(&fep->pdev->dev);
+       ret = fec_enet_clk_enable(ndev, true);
        if (ret)
-               goto failed_clk_ahb;
-
-       ret = clk_prepare_enable(fep->clk_ipg);
-       if (ret)
-               goto failed_clk_ipg;
-
-       if (fep->clk_enet_out) {
-               ret = clk_prepare_enable(fep->clk_enet_out);
-               if (ret)
-                       goto failed_clk_enet_out;
-       }
-
-       if (fep->clk_ptp) {
-               ret = clk_prepare_enable(fep->clk_ptp);
-               if (ret)
-                       goto failed_clk_ptp;
-       }
+               goto failed_clk;
 
        if (netif_running(ndev)) {
                fec_restart(ndev, fep->full_duplex);
@@ -2349,14 +2699,7 @@ fec_resume(struct device *dev)
 
        return 0;
 
-failed_clk_ptp:
-       if (fep->clk_enet_out)
-               clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-       clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
-       clk_disable_unprepare(fep->clk_ahb);
-failed_clk_ahb:
+failed_clk:
        if (fep->reg_phy)
                regulator_disable(fep->reg_phy);
        return ret;