Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author David S. Miller <davem@davemloft.net>

Mon, 3 Oct 2016 01:17:07 +0000 (21:17 -0400)

committer David S. Miller <davem@davemloft.net>

Mon, 3 Oct 2016 02:20:41 +0000 (22:20 -0400)
author David S. Miller <davem@davemloft.net>
Mon, 3 Oct 2016 01:17:07 +0000 (21:17 -0400)
committer David S. Miller <davem@davemloft.net>
Mon, 3 Oct 2016 02:20:41 +0000 (22:20 -0400)
diff --combined MAINTAINERS

index 20de5f9,f593300..669909e
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -636,15 -636,6 +636,15 @@@ F:       drivers/tty/serial/altera_jtaguart.
   F:    include/linux/altera_uart.h
   F:    include/linux/altera_jtaguart.h
   
+ +AMAZON ETHERNET DRIVERS
+ +M:    Netanel Belgazal <netanel@annapurnalabs.com>
+ +R:    Saeed Bishara <saeed@annapurnalabs.com>
+ +R:    Zorik Machulsky <zorik@annapurnalabs.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +F:    Documentation/networking/ena.txt
+ +F:    drivers/net/ethernet/amazon/
+ +
   AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
   M:    Tom Lendacky <thomas.lendacky@amd.com>
   M:    Gary Hook <gary.hook@amd.com>
@@@ -5593,9 -5584,10 +5593,9 @@@ F:     Documentation/devicetree/bindings/sc
   
   HOST AP DRIVER
   M:    Jouni Malinen <j@w1.fi>
- -L:    hostap@shmoo.com (subscribers-only)
   L:    linux-wireless@vger.kernel.org
- -W:    http://hostap.epitest.fi/
- -S:    Maintained
+ +W:    http://w1.fi/hostap-driver.html
+ +S:    Obsolete
   F:    drivers/net/wireless/intersil/hostap/
   
   HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
@@@ -8753,7 -8745,7 +8753,7 @@@ F:      drivers/oprofile
   F:    include/linux/oprofile.h
   
   ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
- M:    Mark Fasheh <mfasheh@suse.com>
+ M:    Mark Fasheh <mfasheh@versity.com>
   M:    Joel Becker <jlbec@evilplan.org>
   L:    ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
   W:    http://ocfs2.wiki.kernel.org
@@@ -9708,12 -9700,6 +9708,12 @@@ T:    git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    drivers/net/wireless/ath/ath10k/
   
+ +QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+ +M:    Timur Tabi <timur@codeaurora.org>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +F:    drivers/net/ethernet/qualcomm/emac/
+ +
   QUALCOMM HEXAGON ARCHITECTURE
   M:    Richard Kuo <rkuo@codeaurora.org>
   L:    linux-hexagon@vger.kernel.org
@@@ -9969,7 -9955,6 +9969,7 @@@ F:      net/rfkill
   
   RHASHTABLE
   M:    Thomas Graf <tgraf@suug.ch>
+ +M:    Herbert Xu <herbert@gondor.apana.org.au>
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    lib/rhashtable.c
@@@ -11641,7 -11626,7 +11641,7 @@@ F:   Documentation/devicetree/bindings/th
   THERMAL/CPU_COOLING
   M:    Amit Daniel Kachhap <amit.kachhap@gmail.com>
   M:    Viresh Kumar <viresh.kumar@linaro.org>
- M:    Javi Merino <javi.merino@arm.com>
+ M:    Javi Merino <javi.merino@kernel.org>
   L:    linux-pm@vger.kernel.org
   S:    Supported
   F:    Documentation/thermal/cpu-cooling-api.txt
@@@ -12305,7 -12290,6 +12305,7 @@@ F:   drivers/net/usb/smsc75xx.
   
   USB SMSC95XX ETHERNET DRIVER
   M:    Steve Glendinning <steve.glendinning@shawell.net>
+ +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    drivers/net/usb/smsc95xx.*
diff --combined drivers/net/ethernet/broadcom/tg3.c

index 2726f03,ea967df..a927a73
--- 1/drivers/net/ethernet/broadcom/tg3.c
--- 2/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@@ -12079,107 -12079,95 +12079,107 @@@ static int tg3_set_eeprom(struct net_de
         return ret;
   }
   
- -static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+ +static int tg3_get_link_ksettings(struct net_device *dev,
+ +                                struct ethtool_link_ksettings *cmd)
   {
         struct tg3 *tp = netdev_priv(dev);
+ +      u32 supported, advertising;
   
         if (tg3_flag(tp, USE_PHYLIB)) {
                 struct phy_device *phydev;
                 if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
                         return -EAGAIN;
                 phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
- -              return phy_ethtool_gset(phydev, cmd);
+ +              return phy_ethtool_ksettings_get(phydev, cmd);
         }
   
- -      cmd->supported = (SUPPORTED_Autoneg);
+ +      supported = (SUPPORTED_Autoneg);
   
         if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY))
- -              cmd->supported |= (SUPPORTED_1000baseT_Half |
- -                                 SUPPORTED_1000baseT_Full);
+ +              supported |= (SUPPORTED_1000baseT_Half |
+ +                            SUPPORTED_1000baseT_Full);
   
         if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
- -              cmd->supported |= (SUPPORTED_100baseT_Half |
- -                                SUPPORTED_100baseT_Full |
- -                                SUPPORTED_10baseT_Half |
- -                                SUPPORTED_10baseT_Full |
- -                                SUPPORTED_TP);
- -              cmd->port = PORT_TP;
+ +              supported |= (SUPPORTED_100baseT_Half |
+ +                            SUPPORTED_100baseT_Full |
+ +                            SUPPORTED_10baseT_Half |
+ +                            SUPPORTED_10baseT_Full |
+ +                            SUPPORTED_TP);
+ +              cmd->base.port = PORT_TP;
         } else {
- -              cmd->supported |= SUPPORTED_FIBRE;
- -              cmd->port = PORT_FIBRE;
+ +              supported |= SUPPORTED_FIBRE;
+ +              cmd->base.port = PORT_FIBRE;
         }
+ +      ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ +                                              supported);
   
- -      cmd->advertising = tp->link_config.advertising;
+ +      advertising = tp->link_config.advertising;
         if (tg3_flag(tp, PAUSE_AUTONEG)) {
                 if (tp->link_config.flowctrl & FLOW_CTRL_RX) {
                         if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
- -                              cmd->advertising |= ADVERTISED_Pause;
+ +                              advertising |= ADVERTISED_Pause;
                         } else {
- -                              cmd->advertising |= ADVERTISED_Pause |
- -                                                  ADVERTISED_Asym_Pause;
+ +                              advertising |= ADVERTISED_Pause |
+ +                                      ADVERTISED_Asym_Pause;
                         }
                 } else if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
- -                      cmd->advertising |= ADVERTISED_Asym_Pause;
+ +                      advertising |= ADVERTISED_Asym_Pause;
                 }
         }
+ +      ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ +                                              advertising);
+ +
         if (netif_running(dev) && tp->link_up) {
- -              ethtool_cmd_speed_set(cmd, tp->link_config.active_speed);
- -              cmd->duplex = tp->link_config.active_duplex;
- -              cmd->lp_advertising = tp->link_config.rmt_adv;
+ +              cmd->base.speed = tp->link_config.active_speed;
+ +              cmd->base.duplex = tp->link_config.active_duplex;
+ +              ethtool_convert_legacy_u32_to_link_mode(
+ +                      cmd->link_modes.lp_advertising,
+ +                      tp->link_config.rmt_adv);
+ +
                 if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
                         if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE)
- -                              cmd->eth_tp_mdix = ETH_TP_MDI_X;
+ +                              cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
                         else
- -                              cmd->eth_tp_mdix = ETH_TP_MDI;
+ +                              cmd->base.eth_tp_mdix = ETH_TP_MDI;
                 }
         } else {
- -              ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
- -              cmd->duplex = DUPLEX_UNKNOWN;
- -              cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
- -      }
- -      cmd->phy_address = tp->phy_addr;
- -      cmd->transceiver = XCVR_INTERNAL;
- -      cmd->autoneg = tp->link_config.autoneg;
- -      cmd->maxtxpkt = 0;
- -      cmd->maxrxpkt = 0;
+ +              cmd->base.speed = SPEED_UNKNOWN;
+ +              cmd->base.duplex = DUPLEX_UNKNOWN;
+ +              cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+ +      }
+ +      cmd->base.phy_address = tp->phy_addr;
+ +      cmd->base.autoneg = tp->link_config.autoneg;
         return 0;
   }
   
- -static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+ +static int tg3_set_link_ksettings(struct net_device *dev,
+ +                                const struct ethtool_link_ksettings *cmd)
   {
         struct tg3 *tp = netdev_priv(dev);
- -      u32 speed = ethtool_cmd_speed(cmd);
+ +      u32 speed = cmd->base.speed;
+ +      u32 advertising;
   
         if (tg3_flag(tp, USE_PHYLIB)) {
                 struct phy_device *phydev;
                 if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
                         return -EAGAIN;
                 phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
- -              return phy_ethtool_sset(phydev, cmd);
+ +              return phy_ethtool_ksettings_set(phydev, cmd);
         }
   
- -      if (cmd->autoneg != AUTONEG_ENABLE &&
- -          cmd->autoneg != AUTONEG_DISABLE)
+ +      if (cmd->base.autoneg != AUTONEG_ENABLE &&
+ +          cmd->base.autoneg != AUTONEG_DISABLE)
                 return -EINVAL;
   
- -      if (cmd->autoneg == AUTONEG_DISABLE &&
- -          cmd->duplex != DUPLEX_FULL &&
- -          cmd->duplex != DUPLEX_HALF)
+ +      if (cmd->base.autoneg == AUTONEG_DISABLE &&
+ +          cmd->base.duplex != DUPLEX_FULL &&
+ +          cmd->base.duplex != DUPLEX_HALF)
                 return -EINVAL;
   
- -      if (cmd->autoneg == AUTONEG_ENABLE) {
+ +      ethtool_convert_link_mode_to_legacy_u32(&advertising,
+ +                                              cmd->link_modes.advertising);
+ +
+ +      if (cmd->base.autoneg == AUTONEG_ENABLE) {
                 u32 mask = ADVERTISED_Autoneg |
                            ADVERTISED_Pause |
                            ADVERTISED_Asym_Pause;
@@@ -12197,7 -12185,7 +12197,7 @@@
                 else
                         mask |= ADVERTISED_FIBRE;
   
- -              if (cmd->advertising & ~mask)
+ +              if (advertising & ~mask)
                         return -EINVAL;
   
                 mask &= (ADVERTISED_1000baseT_Half |
@@@ -12207,13 -12195,13 +12207,13 @@@
                          ADVERTISED_10baseT_Half |
                          ADVERTISED_10baseT_Full);
   
- -              cmd->advertising &= mask;
+ +              advertising &= mask;
         } else {
                 if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) {
                         if (speed != SPEED_1000)
                                 return -EINVAL;
   
- -                      if (cmd->duplex != DUPLEX_FULL)
+ +                      if (cmd->base.duplex != DUPLEX_FULL)
                                 return -EINVAL;
                 } else {
                         if (speed != SPEED_100 &&
@@@ -12224,16 -12212,16 +12224,16 @@@
   
         tg3_full_lock(tp, 0);
   
- -      tp->link_config.autoneg = cmd->autoneg;
- -      if (cmd->autoneg == AUTONEG_ENABLE) {
- -              tp->link_config.advertising = (cmd->advertising |
+ +      tp->link_config.autoneg = cmd->base.autoneg;
+ +      if (cmd->base.autoneg == AUTONEG_ENABLE) {
+ +              tp->link_config.advertising = (advertising |
                                               ADVERTISED_Autoneg);
                 tp->link_config.speed = SPEED_UNKNOWN;
                 tp->link_config.duplex = DUPLEX_UNKNOWN;
         } else {
                 tp->link_config.advertising = 0;
                 tp->link_config.speed = speed;
- -              tp->link_config.duplex = cmd->duplex;
+ +              tp->link_config.duplex = cmd->base.duplex;
         }
   
         tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED;
@@@ -14106,6 -14094,8 +14106,6 @@@ static int tg3_get_eee(struct net_devic
   }
   
   static const struct ethtool_ops tg3_ethtool_ops = {
- -      .get_settings           = tg3_get_settings,
- -      .set_settings           = tg3_set_settings,
         .get_drvinfo            = tg3_get_drvinfo,
         .get_regs_len           = tg3_get_regs_len,
         .get_regs               = tg3_get_regs,
@@@ -14138,8 -14128,6 +14138,8 @@@
         .get_ts_info            = tg3_get_ts_info,
         .get_eee                = tg3_get_eee,
         .set_eee                = tg3_set_eee,
+ +      .get_link_ksettings     = tg3_get_link_ksettings,
+ +      .set_link_ksettings     = tg3_set_link_ksettings,
   };
   
   static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
@@@ -18134,14 -18122,14 +18134,14 @@@ static pci_ers_result_t tg3_io_error_de
   
         rtnl_lock();
   
-       /* We needn't recover from permanent error */
-       if (state == pci_channel_io_frozen)
-               tp->pcierr_recovery = true;
- 
         /* We probably don't have netdev yet */
         if (!netdev || !netif_running(netdev))
                 goto done;
   
+       /* We needn't recover from permanent error */
+       if (state == pci_channel_io_frozen)
+               tp->pcierr_recovery = true;
+ 
         tg3_phy_stop(tp);
   
         tg3_netif_stop(tp);
@@@ -18238,7 -18226,7 +18238,7 @@@ static void tg3_io_resume(struct pci_de
   
         rtnl_lock();
   
-       if (!netif_running(netdev))
+       if (!netdev || !netif_running(netdev))
                 goto done;
   
         tg3_full_lock(tp, 0);
diff --combined drivers/net/ethernet/freescale/fec_main.c

index fb5c638,692ee24..1fa2d87
--- 1/drivers/net/ethernet/freescale/fec_main.c
--- 2/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@@ -89,10 -89,10 +89,10 @@@ static struct platform_device_id fec_de
                 .driver_data = 0,
         }, {
                 .name = "imx25-fec",
-               .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
+               .driver_data = FEC_QUIRK_USE_GASKET,
         }, {
                 .name = "imx27-fec",
-               .driver_data = FEC_QUIRK_HAS_RACC,
+               .driver_data = 0,
         }, {
                 .name = "imx28-fec",
                 .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
@@@ -180,6 -180,7 +180,7 @@@ MODULE_PARM_DESC(macaddr, "FEC Etherne
   /* FEC receive acceleration */
   #define FEC_RACC_IPDIS                (1 << 1)
   #define FEC_RACC_PRODIS               (1 << 2)
+ #define FEC_RACC_SHIFT16      BIT(7)
   #define FEC_RACC_OPTIONS      (FEC_RACC_IPDIS | FEC_RACC_PRODIS)
   
   /*
@@@ -945,9 -946,11 +946,11 @@@ fec_restart(struct net_device *ndev
   
   #if !defined(CONFIG_M5272)
         if (fep->quirks & FEC_QUIRK_HAS_RACC) {
-               /* set RX checksum */
                 val = readl(fep->hwp + FEC_RACC);
+               /* align IP header */
+               val |= FEC_RACC_SHIFT16;
                 if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+                       /* set RX checksum */
                         val |= FEC_RACC_OPTIONS;
                 else
                         val &= ~FEC_RACC_OPTIONS;
@@@ -1428,6 -1431,12 +1431,12 @@@ fec_enet_rx_queue(struct net_device *nd
                 prefetch(skb->data - NET_IP_ALIGN);
                 skb_put(skb, pkt_len - 4);
                 data = skb->data;
+ 
+ #if !defined(CONFIG_M5272)
+               if (fep->quirks & FEC_QUIRK_HAS_RACC)
+                       data = skb_pull_inline(skb, 2);
+ #endif
+ 
                 if (!is_copybreak && need_swap)
                         swap_buffer(data, pkt_len);
   
@@@ -2887,7 -2896,7 +2896,7 @@@ fec_enet_close(struct net_device *ndev
    * this kind of feature?).
    */
   
- -#define HASH_BITS     6               /* #bits in hash */
+ +#define FEC_HASH_BITS 6               /* #bits in hash */
   #define CRC32_POLY    0xEDB88320
   
   static void set_multicast_list(struct net_device *ndev)
@@@ -2935,10 -2944,10 +2944,10 @@@
                         }
                 }
   
- -              /* only upper 6 bits (HASH_BITS) are used
+ +              /* only upper 6 bits (FEC_HASH_BITS) are used
                  * which point to specific bit in he hash registers
                  */
- -              hash = (crc >> (32 - HASH_BITS)) & 0x3f;
+ +              hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
   
                 if (hash > 31) {
                         tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
diff --combined include/net/sctp/structs.h

index 8693dc4,ced0df3..11c3bf2
--- 1/include/net/sctp/structs.h
--- 2/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@@ -537,7 -537,6 +537,7 @@@ struct sctp_datamsg 
   struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
                                             struct sctp_sndrcvinfo *,
                                             struct iov_iter *);
+ +void sctp_datamsg_free(struct sctp_datamsg *);
   void sctp_datamsg_put(struct sctp_datamsg *);
   void sctp_chunk_fail(struct sctp_chunk *, int error);
   int sctp_chunk_abandoned(struct sctp_chunk *);
@@@ -555,6 -554,9 +555,9 @@@ struct sctp_chunk 
   
         atomic_t refcnt;
   
+       /* How many times this chunk have been sent, for prsctp RTX policy */
+       int sent_count;
+ 
         /* This is our link to the per-transport transmitted list.  */
         struct list_head transmitted_list;
   
@@@ -604,16 -606,6 +607,6 @@@
         /* This needs to be recoverable for SCTP_SEND_FAILED events. */
         struct sctp_sndrcvinfo sinfo;
   
-       /* We use this field to record param for prsctp policies,
-        * for TTL policy, it is the time_to_drop of this chunk,
-        * for RTX policy, it is the max_sent_count of this chunk,
-        * for PRIO policy, it is the priority of this chunk.
-        */
-       unsigned long prsctp_param;
- 
-       /* How many times this chunk have been sent, for prsctp RTX policy */
-       int sent_count;
- 
         /* Which association does this belong to?  */
         struct sctp_association *asoc;
   
@@@ -1077,7 -1069,7 +1070,7 @@@ struct sctp_outq 
   void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
   void sctp_outq_teardown(struct sctp_outq *);
   void sctp_outq_free(struct sctp_outq*);
- -int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
+ +void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
   int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
   int sctp_outq_is_empty(const struct sctp_outq *);
   void sctp_outq_restart(struct sctp_outq *);
@@@ -1085,7 -1077,7 +1078,7 @@@
   void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
                      sctp_retransmit_reason_t);
   void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
- -int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+ +void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
   void sctp_prsctp_prune(struct sctp_association *asoc,
                        struct sctp_sndrcvinfo *sinfo, int msg_len);
   /* Uncork and flush an outqueue.  */
diff --combined kernel/events/core.c

index 9fc3be0,fc9bb22..5c02f67
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -3929,7 -3929,7 +3929,7 @@@ static void exclusive_event_destroy(str
   
   static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
   {
-       if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+       if ((e1->pmu == e2->pmu) &&
             (e1->cpu == e2->cpu ||
              e1->cpu == -1 ||
              e2->cpu == -1))
@@@ -7049,7 -7049,7 +7049,7 @@@ static int __perf_event_overflow(struc
                 irq_work_queue(&event->pending);
         }
   
- -      event->overflow_handler(event, data, regs);
+ +      READ_ONCE(event->overflow_handler)(event, data, regs);
   
         if (*perf_event_fasync(event) && event->pending_kill) {
                 event->pending_wakeup = 1;
@@@ -7664,83 -7664,11 +7664,83 @@@ static void perf_event_free_filter(stru
         ftrace_profile_free_filter(event);
   }
   
+ +#ifdef CONFIG_BPF_SYSCALL
+ +static void bpf_overflow_handler(struct perf_event *event,
+ +                               struct perf_sample_data *data,
+ +                               struct pt_regs *regs)
+ +{
+ +      struct bpf_perf_event_data_kern ctx = {
+ +              .data = data,
+ +              .regs = regs,
+ +      };
+ +      int ret = 0;
+ +
+ +      preempt_disable();
+ +      if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ +              goto out;
+ +      rcu_read_lock();
+ +      ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ +      rcu_read_unlock();
+ +out:
+ +      __this_cpu_dec(bpf_prog_active);
+ +      preempt_enable();
+ +      if (!ret)
+ +              return;
+ +
+ +      event->orig_overflow_handler(event, data, regs);
+ +}
+ +
+ +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+ +{
+ +      struct bpf_prog *prog;
+ +
+ +      if (event->overflow_handler_context)
+ +              /* hw breakpoint or kernel counter */
+ +              return -EINVAL;
+ +
+ +      if (event->prog)
+ +              return -EEXIST;
+ +
+ +      prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ +      if (IS_ERR(prog))
+ +              return PTR_ERR(prog);
+ +
+ +      event->prog = prog;
+ +      event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ +      WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ +      return 0;
+ +}
+ +
+ +static void perf_event_free_bpf_handler(struct perf_event *event)
+ +{
+ +      struct bpf_prog *prog = event->prog;
+ +
+ +      if (!prog)
+ +              return;
+ +
+ +      WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ +      event->prog = NULL;
+ +      bpf_prog_put(prog);
+ +}
+ +#else
+ +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+ +{
+ +      return -EOPNOTSUPP;
+ +}
+ +static void perf_event_free_bpf_handler(struct perf_event *event)
+ +{
+ +}
+ +#endif
+ +
   static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
   {
         bool is_kprobe, is_tracepoint;
         struct bpf_prog *prog;
   
+ +      if (event->attr.type == PERF_TYPE_HARDWARE ||
+ +          event->attr.type == PERF_TYPE_SOFTWARE)
+ +              return perf_event_set_bpf_handler(event, prog_fd);
+ +
         if (event->attr.type != PERF_TYPE_TRACEPOINT)
                 return -EINVAL;
   
@@@ -7781,8 -7709,6 +7781,8 @@@ static void perf_event_free_bpf_prog(st
   {
         struct bpf_prog *prog;
   
+ +      perf_event_free_bpf_handler(event);
+ +
         if (!event->tp_event)
                 return;
   
@@@ -9099,19 -9025,6 +9099,19 @@@ perf_event_alloc(struct perf_event_att
         if (!overflow_handler && parent_event) {
                 overflow_handler = parent_event->overflow_handler;
                 context = parent_event->overflow_handler_context;
+ +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ +              if (overflow_handler == bpf_overflow_handler) {
+ +                      struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+ +
+ +                      if (IS_ERR(prog)) {
+ +                              err = PTR_ERR(prog);
+ +                              goto err_ns;
+ +                      }
+ +                      event->prog = prog;
+ +                      event->orig_overflow_handler =
+ +                              parent_event->orig_overflow_handler;
+ +              }
+ +#endif
         }
   
         if (overflow_handler) {
diff --combined net/ipv4/route.c

index 654a9af,62c3ed0..f2be689
--- 1/net/ipv4/route.c
--- 2/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -1252,9 -1252,7 +1252,9 @@@ static unsigned int ipv4_mtu(const stru
                         mtu = 576;
         }
   
- -      return min_t(unsigned int, mtu, IP_MAX_MTU);
+ +      mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+ +
+ +      return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
   }
   
   static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@@ -1837,7 -1835,7 +1837,7 @@@ static int ip_route_input_slow(struct s
          *      Now we are ready to route packet.
          */
         fl4.flowi4_oif = 0;
- -      fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+ +      fl4.flowi4_iif = dev->ifindex;
         fl4.flowi4_mark = skb->mark;
         fl4.flowi4_tos = tos;
         fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@@ -2024,9 -2022,7 +2024,9 @@@ static struct rtable *__mkroute_output(
                 return ERR_PTR(-EINVAL);
   
         if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- -              if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ +              if (ipv4_is_loopback(fl4->saddr) &&
+ +                  !(dev_out->flags & IFF_LOOPBACK) &&
+ +                  !netif_is_l3_master(dev_out))
                         return ERR_PTR(-EINVAL);
   
         if (ipv4_is_lbcast(fl4->daddr))
@@@ -2156,6 -2152,7 +2156,6 @@@ struct rtable *__ip_route_output_key_ha
         unsigned int flags = 0;
         struct fib_result res;
         struct rtable *rth;
- -      int master_idx;
         int orig_oif;
         int err = -ENETUNREACH;
   
@@@ -2165,6 -2162,9 +2165,6 @@@
   
         orig_oif = fl4->flowi4_oif;
   
- -      master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
- -      if (master_idx)
- -              fl4->flowi4_oif = master_idx;
         fl4->flowi4_iif = LOOPBACK_IFINDEX;
         fl4->flowi4_tos = tos & IPTOS_RT_MASK;
         fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@@ -2248,6 -2248,10 +2248,6 @@@
                                 fl4->saddr = inet_select_addr(dev_out, 0,
                                                               RT_SCOPE_HOST);
                 }
- -
- -              rth = l3mdev_get_rtable(dev_out, fl4);
- -              if (rth)
- -                      goto out;
         }
   
         if (!fl4->daddr) {
@@@ -2265,7 -2269,8 +2265,7 @@@
         if (err) {
                 res.fi = NULL;
                 res.table = NULL;
- -              if (fl4->flowi4_oif &&
- -                  !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ +              if (fl4->flowi4_oif) {
                         /* Apparently, routing tables are wrong. Assume,
                            that the destination is on link.
   
@@@ -2301,9 -2306,7 +2301,9 @@@
                         else
                                 fl4->saddr = fl4->daddr;
                 }
- -              dev_out = net->loopback_dev;
+ +
+ +              /* L3 master device is the loopback for that domain */
+ +              dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
                 fl4->flowi4_oif = dev_out->ifindex;
                 flags |= RTCF_LOCAL;
                 goto make_route;
@@@ -2500,7 -2503,8 +2500,8 @@@ static int rt_fill_info(struct net *net
                     IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
                         int err = ipmr_get_route(net, skb,
                                                  fl4->saddr, fl4->daddr,
-                                                r, nowait);
+                                                r, nowait, portid);
+ 
                         if (err <= 0) {
                                 if (!nowait) {
                                         if (err == 0)
@@@ -2578,6 -2582,9 +2579,6 @@@ static int inet_rtm_getroute(struct sk_
         fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
         fl4.flowi4_mark = mark;
   
- -      if (netif_index_is_l3_master(net, fl4.flowi4_oif))
- -              fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
- -
         if (iif) {
                 struct net_device *dev;
   
diff --combined net/ipv4/tcp_input.c

index 8c6ad2d,a756b87..a27b9c0
--- 1/net/ipv4/tcp_input.c
--- 2/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -289,7 -289,6 +289,7 @@@ static bool tcp_ecn_rcv_ecn_echo(const 
   static void tcp_sndbuf_expand(struct sock *sk)
   {
         const struct tcp_sock *tp = tcp_sk(sk);
+ +      const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
         int sndmem, per_mss;
         u32 nr_segs;
   
@@@ -310,8 -309,7 +310,8 @@@
          * Cubic needs 1.7 factor, rounded to 2 to include
          * extra cushion (application might react slowly to POLLOUT)
          */
- -      sndmem = 2 * nr_segs * per_mss;
+ +      sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
+ +      sndmem *= nr_segs * per_mss;
   
         if (sk->sk_sndbuf < sndmem)
                 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
@@@ -901,29 -899,12 +901,29 @@@ static void tcp_verify_retransmit_hint(
                 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
   }
   
+ +/* Sum the number of packets on the wire we have marked as lost.
+ + * There are two cases we care about here:
+ + * a) Packet hasn't been marked lost (nor retransmitted),
+ + *    and this is the first loss.
+ + * b) Packet has been marked both lost and retransmitted,
+ + *    and this means we think it was lost again.
+ + */
+ +static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
+ +{
+ +      __u8 sacked = TCP_SKB_CB(skb)->sacked;
+ +
+ +      if (!(sacked & TCPCB_LOST) ||
+ +          ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
+ +              tp->lost += tcp_skb_pcount(skb);
+ +}
+ +
   static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
   {
         if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
                 tcp_verify_retransmit_hint(tp, skb);
   
                 tp->lost_out += tcp_skb_pcount(skb);
+ +              tcp_sum_lost(tp, skb);
                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
         }
   }
@@@ -932,7 -913,6 +932,7 @@@ void tcp_skb_mark_lost_uncond_verify(st
   {
         tcp_verify_retransmit_hint(tp, skb);
   
+ +      tcp_sum_lost(tp, skb);
         if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
                 tp->lost_out += tcp_skb_pcount(skb);
                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@@ -1114,7 -1094,6 +1114,7 @@@ struct tcp_sacktag_state 
          */
         struct skb_mstamp first_sackt;
         struct skb_mstamp last_sackt;
+ +      struct rate_sample *rate;
         int     flag;
   };
   
@@@ -1282,7 -1261,6 +1282,7 @@@ static bool tcp_shifted_skb(struct soc
         tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
                         start_seq, end_seq, dup_sack, pcount,
                         &skb->skb_mstamp);
+ +      tcp_rate_skb_delivered(sk, skb, state->rate);
   
         if (skb == tp->lost_skb_hint)
                 tp->lost_cnt_hint += pcount;
@@@ -1333,9 -1311,6 +1333,9 @@@
                 tcp_advance_highest_sack(sk, skb);
   
         tcp_skb_collapse_tstamp(prev, skb);
+ +      if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
+ +              TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+ +
         tcp_unlink_write_queue(skb, sk);
         sk_wmem_free_skb(sk, skb);
   
@@@ -1565,7 -1540,6 +1565,7 @@@ static struct sk_buff *tcp_sacktag_walk
                                                 dup_sack,
                                                 tcp_skb_pcount(skb),
                                                 &skb->skb_mstamp);
+ +                      tcp_rate_skb_delivered(sk, skb, state->rate);
   
                         if (!before(TCP_SKB_CB(skb)->seq,
                                     tcp_highest_sack_seq(tp)))
@@@ -1648,10 -1622,8 +1648,10 @@@ tcp_sacktag_write_queue(struct sock *sk
   
         found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
                                          num_sacks, prior_snd_una);
- -      if (found_dup_sack)
+ +      if (found_dup_sack) {
                 state->flag |= FLAG_DSACKING_ACK;
+ +              tp->delivered++; /* A spurious retransmission is delivered */
+ +      }
   
         /* Eliminate too old ACKs, but take into
          * account more or less fresh ones, they can
@@@ -1918,7 -1890,6 +1918,7 @@@ void tcp_enter_loss(struct sock *sk
         struct sk_buff *skb;
         bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
         bool is_reneg;                  /* is receiver reneging on SACKs? */
+ +      bool mark_lost;
   
         /* Reduce ssthresh if it has not yet been made inside this window. */
         if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@@ -1952,12 -1923,8 +1952,12 @@@
                 if (skb == tcp_send_head(sk))
                         break;
   
+ +              mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+ +                           is_reneg);
+ +              if (mark_lost)
+ +                      tcp_sum_lost(tp, skb);
                 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- -              if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
+ +              if (mark_lost) {
                         TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                         TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                         tp->lost_out += tcp_skb_pcount(skb);
@@@ -2362,10 -2329,9 +2362,9 @@@ static void DBGUNDO(struct sock *sk, co
         }
   #if IS_ENABLED(CONFIG_IPV6)
         else if (sk->sk_family == AF_INET6) {
-               struct ipv6_pinfo *np = inet6_sk(sk);
                 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
                          msg,
-                        &np->daddr, ntohs(inet->inet_dport),
+                        &sk->sk_v6_daddr, ntohs(inet->inet_dport),
                          tp->snd_cwnd, tcp_left_out(tp),
                          tp->snd_ssthresh, tp->prior_ssthresh,
                          tp->packets_out);
@@@ -2536,9 -2502,6 +2535,9 @@@ static inline void tcp_end_cwnd_reducti
   {
         struct tcp_sock *tp = tcp_sk(sk);
   
+ +      if (inet_csk(sk)->icsk_ca_ops->cong_control)
+ +              return;
+ +
         /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
         if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
             (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
@@@ -2915,13 -2878,67 +2914,13 @@@ static void tcp_fastretrans_alert(struc
         *rexmit = REXMIT_LOST;
   }
   
- -/* Kathleen Nichols' algorithm for tracking the minimum value of
- - * a data stream over some fixed time interval. (E.g., the minimum
- - * RTT over the past five minutes.) It uses constant space and constant
- - * time per update yet almost always delivers the same minimum as an
- - * implementation that has to keep all the data in the window.
- - *
- - * The algorithm keeps track of the best, 2nd best & 3rd best min
- - * values, maintaining an invariant that the measurement time of the
- - * n'th best >= n-1'th best. It also makes sure that the three values
- - * are widely separated in the time window since that bounds the worse
- - * case error when that data is monotonically increasing over the window.
- - *
- - * Upon getting a new min, we can forget everything earlier because it
- - * has no value - the new min is <= everything else in the window by
- - * definition and it's the most recent. So we restart fresh on every new min
- - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
- - * best.
- - */
   static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
   {
- -      const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
- -      struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- -      struct rtt_meas rttm = {
- -              .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
- -              .ts = now,
- -      };
- -      u32 elapsed;
- -
- -      /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
- -      if (unlikely(rttm.rtt <= m[0].rtt))
- -              m[0] = m[1] = m[2] = rttm;
- -      else if (rttm.rtt <= m[1].rtt)
- -              m[1] = m[2] = rttm;
- -      else if (rttm.rtt <= m[2].rtt)
- -              m[2] = rttm;
- -
- -      elapsed = now - m[0].ts;
- -      if (unlikely(elapsed > wlen)) {
- -              /* Passed entire window without a new min so make 2nd choice
- -               * the new min & 3rd choice the new 2nd. So forth and so on.
- -               */
- -              m[0] = m[1];
- -              m[1] = m[2];
- -              m[2] = rttm;
- -              if (now - m[0].ts > wlen) {
- -                      m[0] = m[1];
- -                      m[1] = rttm;
- -                      if (now - m[0].ts > wlen)
- -                              m[0] = rttm;
- -              }
- -      } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
- -              /* Passed a quarter of the window without a new min so
- -               * take 2nd choice from the 2nd quarter of the window.
- -               */
- -              m[2] = m[1] = rttm;
- -      } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
- -              /* Passed half the window without a new min so take the 3rd
- -               * choice from the last half of the window.
- -               */
- -              m[2] = rttm;
- -      }
+ +      struct tcp_sock *tp = tcp_sk(sk);
+ +      u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
+ +
+ +      minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ +                         rtt_us ? : jiffies_to_usecs(1));
   }
   
   static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
@@@ -3084,11 -3101,10 +3083,11 @@@ static void tcp_ack_tstamp(struct sock 
    */
   static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                                u32 prior_snd_una, int *acked,
- -                             struct tcp_sacktag_state *sack)
+ +                             struct tcp_sacktag_state *sack,
+ +                             struct skb_mstamp *now)
   {
         const struct inet_connection_sock *icsk = inet_csk(sk);
- -      struct skb_mstamp first_ackt, last_ackt, now;
+ +      struct skb_mstamp first_ackt, last_ackt;
         struct tcp_sock *tp = tcp_sk(sk);
         u32 prior_sacked = tp->sacked_out;
         u32 reord = tp->packets_out;
@@@ -3120,6 -3136,7 +3119,6 @@@
                         acked_pcount = tcp_tso_acked(sk, skb);
                         if (!acked_pcount)
                                 break;
- -
                         fully_acked = false;
                 } else {
                         /* Speedup tcp_unlink_write_queue() and next loop */
@@@ -3155,7 -3172,6 +3154,7 @@@
   
                 tp->packets_out -= acked_pcount;
                 pkts_acked += acked_pcount;
+ +              tcp_rate_skb_delivered(sk, skb, sack->rate);
   
                 /* Initial outgoing SYN's get put onto the write_queue
                  * just like anything else we transmit.  It is not
@@@ -3188,15 -3204,16 +3187,15 @@@
         if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                 flag |= FLAG_SACK_RENEGING;
   
- -      skb_mstamp_get(&now);
         if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- -              seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- -              ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ +              seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
+ +              ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
         }
         if (sack->first_sackt.v64) {
- -              sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
- -              ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
+ +              sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
+ +              ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
         }
- -
+ +      sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
         rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
                                         ca_rtt_us);
   
@@@ -3224,7 -3241,7 +3223,7 @@@
                 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
   
         } else if (skb && rtt_update && sack_rtt_us >= 0 &&
- -                 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
+ +                 sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
                 /* Do not re-arm RTO if the sack RTT is measured from data sent
                  * after when the head was last (re)transmitted. Otherwise the
                  * timeout may continue to extend in loss recovery.
@@@ -3315,15 -3332,8 +3314,15 @@@ static inline bool tcp_may_raise_cwnd(c
    * information. All transmission or retransmission are delayed afterwards.
    */
   static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
- -                           int flag)
+ +                           int flag, const struct rate_sample *rs)
   {
+ +      const struct inet_connection_sock *icsk = inet_csk(sk);
+ +
+ +      if (icsk->icsk_ca_ops->cong_control) {
+ +              icsk->icsk_ca_ops->cong_control(sk, rs);
+ +              return;
+ +      }
+ +
         if (tcp_in_cwnd_reduction(sk)) {
                 /* Reduce cwnd if state mandates */
                 tcp_cwnd_reduction(sk, acked_sacked, flag);
@@@ -3568,21 -3578,17 +3567,21 @@@ static int tcp_ack(struct sock *sk, con
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct tcp_sacktag_state sack_state;
+ +      struct rate_sample rs = { .prior_delivered = 0 };
         u32 prior_snd_una = tp->snd_una;
         u32 ack_seq = TCP_SKB_CB(skb)->seq;
         u32 ack = TCP_SKB_CB(skb)->ack_seq;
         bool is_dupack = false;
         u32 prior_fackets;
         int prior_packets = tp->packets_out;
- -      u32 prior_delivered = tp->delivered;
+ +      u32 delivered = tp->delivered;
+ +      u32 lost = tp->lost;
         int acked = 0; /* Number of packets newly acked */
         int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ +      struct skb_mstamp now;
   
         sack_state.first_sackt.v64 = 0;
+ +      sack_state.rate = &rs;
   
         /* We very likely will need to access write queue head. */
         prefetchw(sk->sk_write_queue.next);
@@@ -3605,8 -3611,6 +3604,8 @@@
         if (after(ack, tp->snd_nxt))
                 goto invalid_ack;
   
+ +      skb_mstamp_get(&now);
+ +
         if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                 tcp_rearm_rto(sk);
@@@ -3617,7 -3621,6 +3616,7 @@@
         }
   
         prior_fackets = tp->fackets_out;
+ +      rs.prior_in_flight = tcp_packets_in_flight(tp);
   
         /* ts_recent update must be made after we are sure that the packet
          * is in window.
@@@ -3673,7 -3676,7 +3672,7 @@@
   
         /* See if we can take anything off of the retransmit queue. */
         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- -                                  &sack_state);
+ +                                  &sack_state, &now);
   
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@@ -3690,10 -3693,7 +3689,10 @@@
   
         if (icsk->icsk_pending == ICSK_TIME_RETRANS)
                 tcp_schedule_loss_probe(sk);
- -      tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ +      delivered = tp->delivered - delivered;  /* freshly ACKed or SACKed */
+ +      lost = tp->lost - lost;                 /* freshly marked lost */
+ +      tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ +      tcp_cong_control(sk, ack, delivered, flag, &rs);
         tcp_xmit_recovery(sk, rexmit);
         return 1;
   
@@@ -4107,7 -4107,7 +4106,7 @@@ void tcp_fin(struct sock *sk
         /* It _is_ possible, that we have something out-of-order _after_ FIN.
          * Probably, we should reset in this case. For now drop them.
          */
- -      __skb_queue_purge(&tp->out_of_order_queue);
+ +      skb_rbtree_purge(&tp->out_of_order_queue);
         if (tcp_is_sack(tp))
                 tcp_sack_reset(&tp->rx_opt);
         sk_mem_reclaim(sk);
@@@ -4267,7 -4267,7 +4266,7 @@@ static void tcp_sack_remove(struct tcp_
         int this_sack;
   
         /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
- -      if (skb_queue_empty(&tp->out_of_order_queue)) {
+ +      if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                 tp->rx_opt.num_sacks = 0;
                 return;
         }
@@@ -4343,13 -4343,10 +4342,13 @@@ static void tcp_ofo_queue(struct sock *
   {
         struct tcp_sock *tp = tcp_sk(sk);
         __u32 dsack_high = tp->rcv_nxt;
+ +      bool fin, fragstolen, eaten;
         struct sk_buff *skb, *tail;
- -      bool fragstolen, eaten;
+ +      struct rb_node *p;
   
- -      while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+ +      p = rb_first(&tp->out_of_order_queue);
+ +      while (p) {
+ +              skb = rb_entry(p, struct sk_buff, rbnode);
                 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                         break;
   
@@@ -4359,10 -4356,9 +4358,10 @@@
                                 dsack_high = TCP_SKB_CB(skb)->end_seq;
                         tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
                 }
+ +              p = rb_next(p);
+ +              rb_erase(&skb->rbnode, &tp->out_of_order_queue);
   
- -              __skb_unlink(skb, &tp->out_of_order_queue);
- -              if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ +              if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                         SOCK_DEBUG(sk, "ofo packet was already received\n");
                         tcp_drop(sk, skb);
                         continue;
@@@ -4374,19 -4370,12 +4373,19 @@@
                 tail = skb_peek_tail(&sk->sk_receive_queue);
                 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ +              fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                 if (!eaten)
                         __skb_queue_tail(&sk->sk_receive_queue, skb);
- -              if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- -                      tcp_fin(sk);
- -              if (eaten)
+ +              else
                         kfree_skb_partial(skb, fragstolen);
+ +
+ +              if (unlikely(fin)) {
+ +                      tcp_fin(sk);
+ +                      /* tcp_fin() purges tp->out_of_order_queue,
+ +                       * so we must end this loop right now.
+ +                       */
+ +                      break;
+ +              }
         }
   }
   
@@@ -4402,9 -4391,12 +4401,9 @@@ static int tcp_try_rmem_schedule(struc
                 if (tcp_prune_queue(sk) < 0)
                         return -1;
   
- -              if (!sk_rmem_schedule(sk, skb, size)) {
+ +              while (!sk_rmem_schedule(sk, skb, size)) {
                         if (!tcp_prune_ofo_queue(sk))
                                 return -1;
- -
- -                      if (!sk_rmem_schedule(sk, skb, size))
- -                              return -1;
                 }
         }
         return 0;
@@@ -4413,10 -4405,8 +4412,10 @@@
   static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
+ +      struct rb_node **p, *q, *parent;
         struct sk_buff *skb1;
         u32 seq, end_seq;
+ +      bool fragstolen;
   
         tcp_ecn_check_ce(tp, skb);
   
@@@ -4431,92 -4421,88 +4430,92 @@@
         inet_csk_schedule_ack(sk);
   
         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ +      seq = TCP_SKB_CB(skb)->seq;
+ +      end_seq = TCP_SKB_CB(skb)->end_seq;
         SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- -                 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ +                 tp->rcv_nxt, seq, end_seq);
   
- -      skb1 = skb_peek_tail(&tp->out_of_order_queue);
- -      if (!skb1) {
+ +      p = &tp->out_of_order_queue.rb_node;
+ +      if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                 /* Initial out of order segment, build 1 SACK. */
                 if (tcp_is_sack(tp)) {
                         tp->rx_opt.num_sacks = 1;
- -                      tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- -                      tp->selective_acks[0].end_seq =
- -                                              TCP_SKB_CB(skb)->end_seq;
+ +                      tp->selective_acks[0].start_seq = seq;
+ +                      tp->selective_acks[0].end_seq = end_seq;
                 }
- -              __skb_queue_head(&tp->out_of_order_queue, skb);
+ +              rb_link_node(&skb->rbnode, NULL, p);
+ +              rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+ +              tp->ooo_last_skb = skb;
                 goto end;
         }
   
- -      seq = TCP_SKB_CB(skb)->seq;
- -      end_seq = TCP_SKB_CB(skb)->end_seq;
- -
- -      if (seq == TCP_SKB_CB(skb1)->end_seq) {
- -              bool fragstolen;
- -
- -              if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
- -                      __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
- -              } else {
- -                      tcp_grow_window(sk, skb);
- -                      kfree_skb_partial(skb, fragstolen);
- -                      skb = NULL;
- -              }
- -
- -              if (!tp->rx_opt.num_sacks ||
- -                  tp->selective_acks[0].end_seq != seq)
- -                      goto add_sack;
- -
- -              /* Common case: data arrive in order after hole. */
- -              tp->selective_acks[0].end_seq = end_seq;
- -              goto end;
- -      }
- -
- -      /* Find place to insert this segment. */
- -      while (1) {
- -              if (!after(TCP_SKB_CB(skb1)->seq, seq))
- -                      break;
- -              if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- -                      skb1 = NULL;
- -                      break;
- -              }
- -              skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
- -      }
- -
- -      /* Do skb overlap to previous one? */
- -      if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- -              if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- -                      /* All the bits are present. Drop. */
- -                      NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- -                      tcp_drop(sk, skb);
- -                      skb = NULL;
- -                      tcp_dsack_set(sk, seq, end_seq);
- -                      goto add_sack;
+ +      /* In the typical case, we are adding an skb to the end of the list.
+ +       * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ +       */
+ +      if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+ +coalesce_done:
+ +              tcp_grow_window(sk, skb);
+ +              kfree_skb_partial(skb, fragstolen);
+ +              skb = NULL;
+ +              goto add_sack;
+ +      }
+ +      /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+ +      if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+ +              parent = &tp->ooo_last_skb->rbnode;
+ +              p = &parent->rb_right;
+ +              goto insert;
+ +      }
+ +
+ +      /* Find place to insert this segment. Handle overlaps on the way. */
+ +      parent = NULL;
+ +      while (*p) {
+ +              parent = *p;
+ +              skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ +              if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+ +                      p = &parent->rb_left;
+ +                      continue;
                 }
- -              if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- -                      /* Partial overlap. */
- -                      tcp_dsack_set(sk, seq,
- -                                    TCP_SKB_CB(skb1)->end_seq);
- -              } else {
- -                      if (skb_queue_is_first(&tp->out_of_order_queue,
- -                                             skb1))
- -                              skb1 = NULL;
- -                      else
- -                              skb1 = skb_queue_prev(
- -                                      &tp->out_of_order_queue,
- -                                      skb1);
+ +              if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ +                      if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ +                              /* All the bits are present. Drop. */
+ +                              NET_INC_STATS(sock_net(sk),
+ +                                            LINUX_MIB_TCPOFOMERGE);
+ +                              __kfree_skb(skb);
+ +                              skb = NULL;
+ +                              tcp_dsack_set(sk, seq, end_seq);
+ +                              goto add_sack;
+ +                      }
+ +                      if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ +                              /* Partial overlap. */
+ +                              tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+ +                      } else {
+ +                              /* skb's seq == skb1's seq and skb covers skb1.
+ +                               * Replace skb1 with skb.
+ +                               */
+ +                              rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ +                                              &tp->out_of_order_queue);
+ +                              tcp_dsack_extend(sk,
+ +                                               TCP_SKB_CB(skb1)->seq,
+ +                                               TCP_SKB_CB(skb1)->end_seq);
+ +                              NET_INC_STATS(sock_net(sk),
+ +                                            LINUX_MIB_TCPOFOMERGE);
+ +                              __kfree_skb(skb1);
+ +                              goto merge_right;
+ +                      }
+ +              } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ +                      goto coalesce_done;
                 }
+ +              p = &parent->rb_right;
         }
- -      if (!skb1)
- -              __skb_queue_head(&tp->out_of_order_queue, skb);
- -      else
- -              __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ +insert:
+ +      /* Insert segment into RB tree. */
+ +      rb_link_node(&skb->rbnode, parent, p);
+ +      rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
   
- -      /* And clean segments covered by new one as whole. */
- -      while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- -              skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+ +merge_right:
+ +      /* Remove other segments covered by skb. */
+ +      while ((q = rb_next(&skb->rbnode)) != NULL) {
+ +              skb1 = rb_entry(q, struct sk_buff, rbnode);
   
                 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
                         break;
@@@ -4525,15 -4511,12 +4524,15 @@@
                                          end_seq);
                         break;
                 }
- -              __skb_unlink(skb1, &tp->out_of_order_queue);
+ +              rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
                 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
                                  TCP_SKB_CB(skb1)->end_seq);
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
                 tcp_drop(sk, skb1);
         }
+ +      /* If there is no skb after us, we are the last_skb ! */
+ +      if (!q)
+ +              tp->ooo_last_skb = skb;
   
   add_sack:
         if (tcp_is_sack(tp))
@@@ -4670,13 -4653,13 +4669,13 @@@ queue_and_out
                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                         tcp_fin(sk);
   
- -              if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ +              if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                         tcp_ofo_queue(sk);
   
                         /* RFC2581. 4.2. SHOULD send immediate ACK, when
                          * gap in queue is filled.
                          */
- -                      if (skb_queue_empty(&tp->out_of_order_queue))
+ +                      if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
                                 inet_csk(sk)->icsk_ack.pingpong = 0;
                 }
   
@@@ -4730,76 -4713,48 +4729,76 @@@ drop
         tcp_data_queue_ofo(sk, skb);
   }
   
+ +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+ +{
+ +      if (list)
+ +              return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+ +
+ +      return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ +}
+ +
   static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
- -                                      struct sk_buff_head *list)
+ +                                      struct sk_buff_head *list,
+ +                                      struct rb_root *root)
   {
- -      struct sk_buff *next = NULL;
+ +      struct sk_buff *next = tcp_skb_next(skb, list);
   
- -      if (!skb_queue_is_last(list, skb))
- -              next = skb_queue_next(list, skb);
+ +      if (list)
+ +              __skb_unlink(skb, list);
+ +      else
+ +              rb_erase(&skb->rbnode, root);
   
- -      __skb_unlink(skb, list);
         __kfree_skb(skb);
         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
   
         return next;
   }
   
+ +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ +{
+ +      struct rb_node **p = &root->rb_node;
+ +      struct rb_node *parent = NULL;
+ +      struct sk_buff *skb1;
+ +
+ +      while (*p) {
+ +              parent = *p;
+ +              skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ +              if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+ +                      p = &parent->rb_left;
+ +              else
+ +                      p = &parent->rb_right;
+ +      }
+ +      rb_link_node(&skb->rbnode, parent, p);
+ +      rb_insert_color(&skb->rbnode, root);
+ +}
+ +
   /* Collapse contiguous sequence of skbs head..tail with
    * sequence numbers start..end.
    *
- - * If tail is NULL, this means until the end of the list.
+ + * If tail is NULL, this means until the end of the queue.
    *
    * Segments with FIN/SYN are not collapsed (only because this
    * simplifies code)
    */
   static void
- -tcp_collapse(struct sock *sk, struct sk_buff_head *list,
- -           struct sk_buff *head, struct sk_buff *tail,
- -           u32 start, u32 end)
+ +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+ +           struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
   {
- -      struct sk_buff *skb, *n;
+ +      struct sk_buff *skb = head, *n;
+ +      struct sk_buff_head tmp;
         bool end_of_skbs;
   
         /* First, check that queue is collapsible and find
- -       * the point where collapsing can be useful. */
- -      skb = head;
+ +       * the point where collapsing can be useful.
+ +       */
   restart:
- -      end_of_skbs = true;
- -      skb_queue_walk_from_safe(list, skb, n) {
- -              if (skb == tail)
- -                      break;
+ +      for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+ +              n = tcp_skb_next(skb, list);
+ +
                 /* No new bits? It is possible on ofo queue. */
                 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- -                      skb = tcp_collapse_one(sk, skb, list);
+ +                      skb = tcp_collapse_one(sk, skb, list, root);
                         if (!skb)
                                 break;
                         goto restart;
@@@ -4817,10 -4772,13 +4816,10 @@@
                         break;
                 }
   
- -              if (!skb_queue_is_last(list, skb)) {
- -                      struct sk_buff *next = skb_queue_next(list, skb);
- -                      if (next != tail &&
- -                          TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
- -                              end_of_skbs = false;
- -                              break;
- -                      }
+ +              if (n && n != tail &&
+ +                  TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+ +                      end_of_skbs = false;
+ +                      break;
                 }
   
                 /* Decided to skip this, advance start seq. */
@@@ -4830,22 -4788,17 +4829,22 @@@
             (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
                 return;
   
+ +      __skb_queue_head_init(&tmp);
+ +
         while (before(start, end)) {
                 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
                 struct sk_buff *nskb;
   
                 nskb = alloc_skb(copy, GFP_ATOMIC);
                 if (!nskb)
- -                      return;
+ +                      break;
   
                 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
- -              __skb_queue_before(list, skb, nskb);
+ +              if (list)
+ +                      __skb_queue_before(list, skb, nskb);
+ +              else
+ +                      __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
                 skb_set_owner_r(nskb, sk);
   
                 /* Copy data, releasing collapsed skbs. */
@@@ -4863,17 -4816,14 +4862,17 @@@
                                 start += size;
                         }
                         if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- -                              skb = tcp_collapse_one(sk, skb, list);
+ +                              skb = tcp_collapse_one(sk, skb, list, root);
                                 if (!skb ||
                                     skb == tail ||
                                     (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
- -                                      return;
+ +                                      goto end;
                         }
                 }
         }
+ +end:
+ +      skb_queue_walk_safe(&tmp, skb, n)
+ +              tcp_rbtree_insert(root, skb);
   }
   
   /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
@@@ -4882,86 -4832,70 +4881,86 @@@
   static void tcp_collapse_ofo_queue(struct sock *sk)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
- -      struct sk_buff *head;
+ +      struct sk_buff *skb, *head;
+ +      struct rb_node *p;
         u32 start, end;
   
- -      if (!skb)
+ +      p = rb_first(&tp->out_of_order_queue);
+ +      skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ +new_range:
+ +      if (!skb) {
+ +              p = rb_last(&tp->out_of_order_queue);
+ +              /* Note: This is possible p is NULL here. We do not
+ +               * use rb_entry_safe(), as ooo_last_skb is valid only
+ +               * if rbtree is not empty.
+ +               */
+ +              tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
                 return;
- -
+ +      }
         start = TCP_SKB_CB(skb)->seq;
         end = TCP_SKB_CB(skb)->end_seq;
- -      head = skb;
- -
- -      for (;;) {
- -              struct sk_buff *next = NULL;
   
- -              if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
- -                      next = skb_queue_next(&tp->out_of_order_queue, skb);
- -              skb = next;
+ +      for (head = skb;;) {
+ +              skb = tcp_skb_next(skb, NULL);
   
- -              /* Segment is terminated when we see gap or when
- -               * we are at the end of all the queue. */
+ +              /* Range is terminated when we see a gap or when
+ +               * we are at the queue end.
+ +               */
                 if (!skb ||
                     after(TCP_SKB_CB(skb)->seq, end) ||
                     before(TCP_SKB_CB(skb)->end_seq, start)) {
- -                      tcp_collapse(sk, &tp->out_of_order_queue,
+ +                      tcp_collapse(sk, NULL, &tp->out_of_order_queue,
                                      head, skb, start, end);
- -                      head = skb;
- -                      if (!skb)
- -                              break;
- -                      /* Start new segment */
+ +                      goto new_range;
+ +              }
+ +
+ +              if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
                         start = TCP_SKB_CB(skb)->seq;
+ +              if (after(TCP_SKB_CB(skb)->end_seq, end))
                         end = TCP_SKB_CB(skb)->end_seq;
- -              } else {
- -                      if (before(TCP_SKB_CB(skb)->seq, start))
- -                              start = TCP_SKB_CB(skb)->seq;
- -                      if (after(TCP_SKB_CB(skb)->end_seq, end))
- -                              end = TCP_SKB_CB(skb)->end_seq;
- -              }
         }
   }
   
   /*
- - * Purge the out-of-order queue.
- - * Return true if queue was pruned.
+ + * Clean the out-of-order queue to make room.
+ + * We drop high sequences packets to :
+ + * 1) Let a chance for holes to be filled.
+ + * 2) not add too big latencies if thousands of packets sit there.
+ + *    (But if application shrinks SO_RCVBUF, we could still end up
+ + *     freeing whole queue here)
+ + *
+ + * Return true if queue has shrunk.
    */
   static bool tcp_prune_ofo_queue(struct sock *sk)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      bool res = false;
+ +      struct rb_node *node, *prev;
   
- -      if (!skb_queue_empty(&tp->out_of_order_queue)) {
- -              NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
- -              __skb_queue_purge(&tp->out_of_order_queue);
+ +      if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+ +              return false;
   
- -              /* Reset SACK state.  A conforming SACK implementation will
- -               * do the same at a timeout based retransmit.  When a connection
- -               * is in a sad state like this, we care only about integrity
- -               * of the connection not performance.
- -               */
- -              if (tp->rx_opt.sack_ok)
- -                      tcp_sack_reset(&tp->rx_opt);
+ +      NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ +      node = &tp->ooo_last_skb->rbnode;
+ +      do {
+ +              prev = rb_prev(node);
+ +              rb_erase(node, &tp->out_of_order_queue);
+ +              tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
                 sk_mem_reclaim(sk);
- -              res = true;
- -      }
- -      return res;
+ +              if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ +                  !tcp_under_memory_pressure(sk))
+ +                      break;
+ +              node = prev;
+ +      } while (node);
+ +      tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ +
+ +      /* Reset SACK state.  A conforming SACK implementation will
+ +       * do the same at a timeout based retransmit.  When a connection
+ +       * is in a sad state like this, we care only about integrity
+ +       * of the connection not performance.
+ +       */
+ +      if (tp->rx_opt.sack_ok)
+ +              tcp_sack_reset(&tp->rx_opt);
+ +      return true;
   }
   
   /* Reduce allocated memory if we can, trying to get
@@@ -4986,7 -4920,7 +4985,7 @@@ static int tcp_prune_queue(struct sock 
   
         tcp_collapse_ofo_queue(sk);
         if (!skb_queue_empty(&sk->sk_receive_queue))
- -              tcp_collapse(sk, &sk->sk_receive_queue,
+ +              tcp_collapse(sk, &sk->sk_receive_queue, NULL,
                              skb_peek(&sk->sk_receive_queue),
                              NULL,
                              tp->copied_seq, tp->rcv_nxt);
@@@ -5091,7 -5025,7 +5090,7 @@@ static void __tcp_ack_snd_check(struct 
             /* We ACK each frame or... */
             tcp_in_quickack_mode(sk) ||
             /* We have out of order data. */
- -          (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+ +          (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
                 /* Then ack it now */
                 tcp_send_ack(sk);
         } else {
@@@ -5992,8 -5926,7 +5991,8 @@@ int tcp_rcv_state_process(struct sock *
                 } else
                         tcp_init_metrics(sk);
   
- -              tcp_update_pacing_rate(sk);
+ +              if (!inet_csk(sk)->icsk_ca_ops->cong_control)
+ +                      tcp_update_pacing_rate(sk);
   
                 /* Prevent spurious tcp_cwnd_restart() on first data packet */
                 tp->lsndtime = tcp_time_stamp;
@@@ -6326,7 -6259,6 +6325,7 @@@ int tcp_conn_request(struct request_soc
   
         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
         tcp_openreq_init(req, &tmp_opt, skb, sk);
+ +      inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
   
         /* Note: tcp_v6_init_req() might override ir_iif for link locals */
         inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --combined net/ipv4/tcp_output.c

index 7c77708,d48d557..896e9df
--- 1/net/ipv4/tcp_output.c
--- 2/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -734,16 -734,9 +734,16 @@@ static void tcp_tsq_handler(struct soc
   {
         if ((1 << sk->sk_state) &
             (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
- -           TCPF_CLOSE_WAIT  | TCPF_LAST_ACK))
- -              tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ +           TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) {
+ +              struct tcp_sock *tp = tcp_sk(sk);
+ +
+ +              if (tp->lost_out > tp->retrans_out &&
+ +                  tp->snd_cwnd > tcp_packets_in_flight(tp))
+ +                      tcp_xmit_retransmit_queue(sk);
+ +
+ +              tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
                                0, GFP_ATOMIC);
+ +      }
   }
   /*
    * One tasklet per cpu tries to send more skbs.
@@@ -925,7 -918,6 +925,7 @@@ static int tcp_transmit_skb(struct soc
                 skb_mstamp_get(&skb->skb_mstamp);
                 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
                         - tp->snd_una;
+ +              tcp_rate_skb_sent(sk, skb);
   
                 if (unlikely(skb_cloned(skb)))
                         skb = pskb_copy(skb, gfp_mask);
@@@ -1221,9 -1213,6 +1221,9 @@@ int tcp_fragment(struct sock *sk, struc
         tcp_set_skb_tso_segs(skb, mss_now);
         tcp_set_skb_tso_segs(buff, mss_now);
   
+ +      /* Update delivered info for the new segment */
+ +      TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
+ +
         /* If this packet has been sent out already, we must
          * adjust the various packet counters.
          */
@@@ -1369,7 -1358,6 +1369,7 @@@ int tcp_mss_to_mtu(struct sock *sk, in
         }
         return mtu;
   }
+ +EXPORT_SYMBOL(tcp_mss_to_mtu);
   
   /* MTU probing init per socket */
   void tcp_mtup_init(struct sock *sk)
@@@ -1557,8 -1545,7 +1557,8 @@@ static bool tcp_nagle_check(bool partia
   /* Return how many segs we'd like on a TSO packet,
    * to send one TSO packet per ms
    */
- -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ +                   int min_tso_segs)
   {
         u32 bytes, segs;
   
@@@ -1570,23 -1557,10 +1570,23 @@@
          * This preserves ACK clocking and is consistent
          * with tcp_tso_should_defer() heuristic.
          */
- -      segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+ +      segs = max_t(u32, bytes / mss_now, min_tso_segs);
   
         return min_t(u32, segs, sk->sk_gso_max_segs);
   }
+ +EXPORT_SYMBOL(tcp_tso_autosize);
+ +
+ +/* Return the number of segments we want in the skb we are transmitting.
+ + * See if congestion control module wants to decide; otherwise, autosize.
+ + */
+ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ +{
+ +      const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ +      u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ +
+ +      return tso_segs ? :
+ +              tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+ +}
   
   /* Returns the portion of skb which can be sent right away */
   static unsigned int tcp_mss_split_point(const struct sock *sk,
@@@ -1992,12 -1966,14 +1992,14 @@@ static int tcp_mtu_probe(struct sock *s
         len = 0;
         tcp_for_write_queue_from_safe(skb, next, sk) {
                 copy = min_t(int, skb->len, probe_size - len);
-               if (nskb->ip_summed)
+               if (nskb->ip_summed) {
                         skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-               else
-                       nskb->csum = skb_copy_and_csum_bits(skb, 0,
-                                                           skb_put(nskb, copy),
-                                                           copy, nskb->csum);
+               } else {
+                       __wsum csum = skb_copy_and_csum_bits(skb, 0,
+                                                            skb_put(nskb, copy),
+                                                            copy, 0);
+                       nskb->csum = csum_block_add(nskb->csum, csum, len);
+               }
   
                 if (skb->len <= copy) {
                         /* We've eaten all the data from this skb.
@@@ -2046,39 -2022,6 +2048,39 @@@
         return -1;
   }
   
+ +/* TCP Small Queues :
+ + * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ + * (These limits are doubled for retransmits)
+ + * This allows for :
+ + *  - better RTT estimation and ACK scheduling
+ + *  - faster recovery
+ + *  - high rates
+ + * Alas, some drivers / subsystems require a fair amount
+ + * of queued bytes to ensure line rate.
+ + * One example is wifi aggregation (802.11 AMPDU)
+ + */
+ +static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ +                                unsigned int factor)
+ +{
+ +      unsigned int limit;
+ +
+ +      limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ +      limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ +      limit <<= factor;
+ +
+ +      if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ +              set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ +              /* It is possible TX completion already happened
+ +               * before we set TSQ_THROTTLED, so we must
+ +               * test again the condition.
+ +               */
+ +              smp_mb__after_atomic();
+ +              if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ +                      return true;
+ +      }
+ +      return false;
+ +}
+ +
   /* This routine writes packets to the network.  It advances the
    * send_head.  This happens as incoming acks open up the remote
    * window for us.
@@@ -2116,7 -2059,7 +2118,7 @@@ static bool tcp_write_xmit(struct sock 
                 }
         }
   
- -      max_segs = tcp_tso_autosize(sk, mss_now);
+ +      max_segs = tcp_tso_segs(sk, mss_now);
         while ((skb = tcp_send_head(sk))) {
                 unsigned int limit;
   
@@@ -2165,8 -2108,29 +2167,8 @@@
                     unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
                         break;
   
- -              /* TCP Small Queues :
- -               * Control number of packets in qdisc/devices to two packets / or ~1 ms.
- -               * This allows for :
- -               *  - better RTT estimation and ACK scheduling
- -               *  - faster recovery
- -               *  - high rates
- -               * Alas, some drivers / subsystems require a fair amount
- -               * of queued bytes to ensure line rate.
- -               * One example is wifi aggregation (802.11 AMPDU)
- -               */
- -              limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- -              limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
- -
- -              if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- -                      set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- -                      /* It is possible TX completion already happened
- -                       * before we set TSQ_THROTTLED, so we must
- -                       * test again the condition.
- -                       */
- -                      smp_mb__after_atomic();
- -                      if (atomic_read(&sk->sk_wmem_alloc) > limit)
- -                              break;
- -              }
+ +              if (tcp_small_queue_check(sk, skb, 0))
+ +                      break;
   
                 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
                         break;
@@@ -2813,9 -2777,9 +2815,9 @@@ void tcp_xmit_retransmit_queue(struct s
                 last_lost = tp->snd_una;
         }
   
- -      max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
+ +      max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
         tcp_for_write_queue_from(skb, sk) {
- -              __u8 sacked = TCP_SKB_CB(skb)->sacked;
+ +              __u8 sacked;
                 int segs;
   
                 if (skb == tcp_send_head(sk))
@@@ -2827,7 -2791,6 +2829,7 @@@
                 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
                 if (segs <= 0)
                         return;
+ +              sacked = TCP_SKB_CB(skb)->sacked;
                 /* In case tcp_shift_skb_data() have aggregated large skbs,
                  * we need to make sure not sending too bigs TSO packets
                  */
@@@ -2867,9 -2830,6 +2869,9 @@@ begin_fwd
                 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
                         continue;
   
+ +              if (tcp_small_queue_check(sk, skb, 1))
+ +                      return;
+ +
                 if (tcp_retransmit_skb(sk, skb, segs))
                         return;
   
diff --combined net/ipv6/ip6_gre.c

index 4ce74f8,edc3daa..d7d6d3a
--- 1/net/ipv6/ip6_gre.c
--- 2/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@@ -61,12 -61,12 +61,12 @@@ static bool log_ecn_error = true
   module_param(log_ecn_error, bool, 0644);
   MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
   
- -#define HASH_SIZE_SHIFT  5
- -#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+ +#define IP6_GRE_HASH_SIZE_SHIFT  5
+ +#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
   
   static int ip6gre_net_id __read_mostly;
   struct ip6gre_net {
- -      struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+ +      struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
   
         struct net_device *fb_tunnel_dev;
   };
@@@ -96,12 -96,12 +96,12 @@@ static void ip6gre_tnl_link_config(stru
      will match fallback tunnel.
    */
   
- -#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+ +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
   static u32 HASH_ADDR(const struct in6_addr *addr)
   {
         u32 hash = ipv6_addr_hash(addr);
   
- -      return hash_32(hash, HASH_SIZE_SHIFT);
+ +      return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
   }
   
   #define tunnels_r_l   tunnels[3]
@@@ -648,7 -648,6 +648,6 @@@ static int ip6gre_xmit_other(struct sk_
                 encap_limit = t->parms.encap_limit;
   
         memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-       fl6.flowi6_proto = skb->protocol;
   
         err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
         if (err)
@@@ -1087,7 -1086,7 +1086,7 @@@ static void ip6gre_destroy_tunnels(stru
   
         for (prio = 0; prio < 4; prio++) {
                 int h;
- -              for (h = 0; h < HASH_SIZE; h++) {
+ +              for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
                         struct ip6_tnl *t;
   
                         t = rtnl_dereference(ign->tunnels[prio][h]);
@@@ -1239,7 -1238,7 +1238,7 @@@ static void ip6gre_netlink_parms(struc
                 parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
   
         if (data[IFLA_GRE_FLOWINFO])
- -              parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+ +              parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
   
         if (data[IFLA_GRE_FLAGS])
                 parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
diff --combined net/ipv6/route.c

index 5a5aeb9,269218a..bdbc38e
--- 1/net/ipv6/route.c
--- 2/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -1147,16 -1147,15 +1147,16 @@@ static struct rt6_info *ip6_pol_route_i
         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
   }
   
- -static struct dst_entry *ip6_route_input_lookup(struct net *net,
- -                                              struct net_device *dev,
- -                                              struct flowi6 *fl6, int flags)
+ +struct dst_entry *ip6_route_input_lookup(struct net *net,
+ +                                       struct net_device *dev,
+ +                                       struct flowi6 *fl6, int flags)
   {
         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
                 flags |= RT6_LOOKUP_F_IFACE;
   
         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
   }
+ +EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
   
   void ip6_route_input(struct sk_buff *skb)
   {
@@@ -1165,7 -1164,7 +1165,7 @@@
         int flags = RT6_LOOKUP_F_HAS_SADDR;
         struct ip_tunnel_info *tun_info;
         struct flowi6 fl6 = {
- -              .flowi6_iif = l3mdev_fib_oif(skb->dev),
+ +              .flowi6_iif = skb->dev->ifindex,
                 .daddr = iph->daddr,
                 .saddr = iph->saddr,
                 .flowlabel = ip6_flowinfo(iph),
@@@ -1189,15 -1188,12 +1189,15 @@@ static struct rt6_info *ip6_pol_route_o
   struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                          struct flowi6 *fl6, int flags)
   {
- -      struct dst_entry *dst;
         bool any_src;
   
- -      dst = l3mdev_get_rt6_dst(net, fl6);
- -      if (dst)
- -              return dst;
+ +      if (rt6_need_strict(&fl6->daddr)) {
+ +              struct dst_entry *dst;
+ +
+ +              dst = l3mdev_link_scope_lookup(net, fl6);
+ +              if (dst)
+ +                      return dst;
+ +      }
   
         fl6->flowi6_iif = LOOPBACK_IFINDEX;
   
@@@ -1608,9 -1604,7 +1608,9 @@@ static unsigned int ip6_mtu(const struc
         rcu_read_unlock();
   
   out:
- -      return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ +      mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+ +
+ +      return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
   }
   
   static struct dst_entry *icmp6_dst_gc_list;
@@@ -2571,16 -2565,8 +2571,16 @@@ struct rt6_info *addrconf_dst_alloc(str
   {
         u32 tb_id;
         struct net *net = dev_net(idev->dev);
- -      struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- -                                          DST_NOCOUNT);
+ +      struct net_device *dev = net->loopback_dev;
+ +      struct rt6_info *rt;
+ +
+ +      /* use L3 Master device as loopback for host routes if device
+ +       * is enslaved and address is not link local or multicast
+ +       */
+ +      if (!rt6_need_strict(addr))
+ +              dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+ +
+ +      rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
         if (!rt)
                 return ERR_PTR(-ENOMEM);
   
@@@ -3216,7 -3202,9 +3216,9 @@@ static int rt6_fill_node(struct net *ne
         if (iif) {
   #ifdef CONFIG_IPV6_MROUTE
                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
-                       int err = ip6mr_get_route(net, skb, rtm, nowait);
+                       int err = ip6mr_get_route(net, skb, rtm, nowait,
+                                                 portid);
+ 
                         if (err <= 0) {
                                 if (!nowait) {
                                         if (err == 0)
@@@ -3359,6 -3347,11 +3361,6 @@@ static int inet6_rtm_getroute(struct sk
         } else {
                 fl6.flowi6_oif = oif;
   
- -              if (netif_index_is_l3_master(net, oif)) {
- -                      fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- -                                         FLOWI_FLAG_SKIP_NH_OIF;
- -              }
- -
                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
         }
   
diff --combined net/sched/act_ife.c

index ccf7b4b,4a60cd5..95c463c
--- 1/net/sched/act_ife.c
--- 2/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@@ -53,7 -53,7 +53,7 @@@ int ife_tlv_meta_encode(void *skbdata, 
         u32 *tlv = (u32 *)(skbdata);
         u16 totlen = nla_total_size(dlen);      /*alignment + hdr */
         char *dptr = (char *)tlv + NLA_HDRLEN;
-       u32 htlv = attrtype << 16 | dlen;
+       u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
   
         *tlv = htonl(htlv);
         memset(dptr, 0, totlen - NLA_HDRLEN);
@@@ -63,23 -63,6 +63,23 @@@
   }
   EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
   
+ +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
+ +{
+ +      u16 edata = 0;
+ +
+ +      if (mi->metaval)
+ +              edata = *(u16 *)mi->metaval;
+ +      else if (metaval)
+ +              edata = metaval;
+ +
+ +      if (!edata) /* will not encode */
+ +              return 0;
+ +
+ +      edata = htons(edata);
+ +      return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata);
+ +}
+ +EXPORT_SYMBOL_GPL(ife_encode_meta_u16);
+ +
   int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
   {
         if (mi->metaval)
@@@ -98,15 -81,6 +98,15 @@@ int ife_check_meta_u32(u32 metaval, str
   }
   EXPORT_SYMBOL_GPL(ife_check_meta_u32);
   
+ +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi)
+ +{
+ +      if (metaval || mi->metaval)
+ +              return 8; /* T+L+(V) == 2+2+(2+2bytepad) */
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(ife_check_meta_u16);
+ +
   int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
   {
         u32 edata = metaval;
@@@ -653,7 -627,7 +653,7 @@@ static int tcf_ife_decode(struct sk_buf
         struct tcf_ife_info *ife = to_ife(a);
         int action = ife->tcf_action;
         struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
-       u16 ifehdrln = ifehdr->metalen;
+       int ifehdrln = (int)ifehdr->metalen;
         struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
   
         spin_lock(&ife->tcf_lock);
@@@ -766,8 -740,6 +766,6 @@@ static int tcf_ife_encode(struct sk_buf
                 return TC_ACT_SHOT;
         }
   
-       iethh = eth_hdr(skb);
- 
         err = skb_cow_head(skb, hdrm);
         if (unlikely(err)) {
                 ife->tcf_qstats.drops++;
@@@ -778,6 -750,7 +776,7 @@@
         if (!(at & AT_EGRESS))
                 skb_push(skb, skb->dev->hard_header_len);
   
+       iethh = (struct ethhdr *)skb->data;
         __skb_push(skb, hdrm);
         memcpy(skb->data, iethh, skb->mac_len);
         skb_reset_mac_header(skb);
diff --combined net/sctp/chunk.c

index 8afe2e9,0a3dbec..7a1cdf4
--- 1/net/sctp/chunk.c
--- 2/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@@ -70,19 -70,6 +70,19 @@@ static struct sctp_datamsg *sctp_datams
         return msg;
   }
   
+ +void sctp_datamsg_free(struct sctp_datamsg *msg)
+ +{
+ +      struct sctp_chunk *chunk;
+ +
+ +      /* This doesn't have to be a _safe vairant because
+ +       * sctp_chunk_free() only drops the refs.
+ +       */
+ +      list_for_each_entry(chunk, &msg->chunks, frag_list)
+ +              sctp_chunk_free(chunk);
+ +
+ +      sctp_datamsg_put(msg);
+ +}
+ +
   /* Final destructruction of datamsg memory. */
   static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
   {
@@@ -192,13 -179,17 +192,18 @@@ struct sctp_datamsg *sctp_datamsg_from_
                          msg, msg->expires_at, jiffies);
         }
   
+       if (asoc->peer.prsctp_capable &&
+           SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+               msg->expires_at =
+                       jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+ 
         /* This is the biggest possible DATA chunk that can fit into
          * the packet
          */
- -      max_data = (asoc->pathmtu -
- -              sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- -              sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+ +      max_data = asoc->pathmtu -
+ +                 sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+ +                 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ +      max_data = SCTP_TRUNC4(max_data);
   
         max = asoc->frag_point;
         /* If the the peer requested that we authenticate DATA chunks
@@@ -209,8 -200,8 +214,8 @@@
                 struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
   
                 if (hmac_desc)
- -                      max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
- -                                          hmac_desc->hmac_len);
+ +                      max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+ +                                            hmac_desc->hmac_len);
         }
   
         /* Now, check if we need to reduce our max */
@@@ -230,7 -221,7 +235,7 @@@
             asoc->outqueue.out_qlen == 0 &&
             list_empty(&asoc->outqueue.retransmit) &&
             msg_len > max)
- -              max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+ +              max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
   
         /* Encourage Cookie-ECHO bundling. */
         if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
@@@ -349,7 -340,7 +354,7 @@@ errout
   /* Check whether this message has expired. */
   int sctp_chunk_abandoned(struct sctp_chunk *chunk)
   {
-       if (!chunk->asoc->prsctp_enable ||
+       if (!chunk->asoc->peer.prsctp_capable ||
             !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
                 struct sctp_datamsg *msg = chunk->msg;
   
@@@ -363,14 -354,14 +368,14 @@@
         }
   
         if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
-           time_after(jiffies, chunk->prsctp_param)) {
+           time_after(jiffies, chunk->msg->expires_at)) {
                 if (chunk->sent_count)
                         chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
                 else
                         chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
                 return 1;
         } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
-                  chunk->sent_count > chunk->prsctp_param) {
+                  chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
                 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
                 return 1;
         }
diff --combined net/sctp/outqueue.c

index 3ec6da8,107233d..5825853
--- 1/net/sctp/outqueue.c
--- 2/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@@ -68,7 -68,7 +68,7 @@@ static void sctp_mark_missing(struct sc
   
   static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
   
- -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
+ +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
   
   /* Add data to the front of the queue. */
   static inline void sctp_outq_head_data(struct sctp_outq *q,
@@@ -285,9 -285,10 +285,9 @@@ void sctp_outq_free(struct sctp_outq *q
   }
   
   /* Put a new chunk in an sctp_outq.  */
- -int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
+ +void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
   {
         struct net *net = sock_net(q->asoc->base.sk);
- -      int error = 0;
   
         pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
                  chunk && chunk->chunk_hdr ?
@@@ -298,26 -299,54 +298,26 @@@
          * immediately.
          */
         if (sctp_chunk_is_data(chunk)) {
- -              /* Is it OK to queue data chunks?  */
- -              /* From 9. Termination of Association
- -               *
- -               * When either endpoint performs a shutdown, the
- -               * association on each peer will stop accepting new
- -               * data from its user and only deliver data in queue
- -               * at the time of sending or receiving the SHUTDOWN
- -               * chunk.
- -               */
- -              switch (q->asoc->state) {
- -              case SCTP_STATE_CLOSED:
- -              case SCTP_STATE_SHUTDOWN_PENDING:
- -              case SCTP_STATE_SHUTDOWN_SENT:
- -              case SCTP_STATE_SHUTDOWN_RECEIVED:
- -              case SCTP_STATE_SHUTDOWN_ACK_SENT:
- -                      /* Cannot send after transport endpoint shutdown */
- -                      error = -ESHUTDOWN;
- -                      break;
- -
- -              default:
- -                      pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
- -                               __func__, q, chunk, chunk && chunk->chunk_hdr ?
- -                               sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
- -                               "illegal chunk");
- -
- -                      sctp_chunk_hold(chunk);
- -                      sctp_outq_tail_data(q, chunk);
- -                      if (chunk->asoc->peer.prsctp_capable &&
- -                          SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- -                              chunk->asoc->sent_cnt_removable++;
- -                      if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- -                              SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
- -                      else
- -                              SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
- -                      break;
- -              }
+ +              pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
+ +                       __func__, q, chunk, chunk && chunk->chunk_hdr ?
+ +                       sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
+ +                       "illegal chunk");
+ +
+ +              sctp_outq_tail_data(q, chunk);
-               if (chunk->asoc->prsctp_enable &&
++              if (chunk->asoc->peer.prsctp_capable &&
+ +                  SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ +                      chunk->asoc->sent_cnt_removable++;
+ +              if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ +                      SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
+ +              else
+ +                      SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
         } else {
                 list_add_tail(&chunk->list, &q->control_chunk_list);
                 SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
         }
   
- -      if (error < 0)
- -              return error;
- -
         if (!q->cork)
- -              error = sctp_outq_flush(q, 0, gfp);
- -
- -      return error;
+ +              sctp_outq_flush(q, 0, gfp);
   }
   
   /* Insert a chunk into the sorted list based on the TSNs.  The retransmit list
@@@ -354,7 -383,7 +354,7 @@@ static int sctp_prsctp_prune_sent(struc
   
         list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
                 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-                   chk->prsctp_param <= sinfo->sinfo_timetolive)
+                   chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
                         continue;
   
                 list_del_init(&chk->transmitted_list);
@@@ -389,7 -418,7 +389,7 @@@ static int sctp_prsctp_prune_unsent(str
   
         list_for_each_entry_safe(chk, temp, queue, list) {
                 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-                   chk->prsctp_param <= sinfo->sinfo_timetolive)
+                   chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
                         continue;
   
                 list_del_init(&chk->list);
@@@ -413,7 -442,7 +413,7 @@@ void sctp_prsctp_prune(struct sctp_asso
   {
         struct sctp_transport *transport;
   
-       if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+       if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable)
                 return;
   
         msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
@@@ -530,6 -559,7 +530,6 @@@ void sctp_retransmit(struct sctp_outq *
                      sctp_retransmit_reason_t reason)
   {
         struct net *net = sock_net(q->asoc->base.sk);
- -      int error = 0;
   
         switch (reason) {
         case SCTP_RTXR_T3_RTX:
@@@ -573,7 -603,10 +573,7 @@@
          * will be flushed at the end.
          */
         if (reason != SCTP_RTXR_FAST_RTX)
- -              error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
- -
- -      if (error)
- -              q->asoc->base.sk->sk_err = -error;
+ +              sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
   }
   
   /*
@@@ -745,12 -778,12 +745,12 @@@ redo
   }
   
   /* Cork the outqueue so queued chunks are really queued. */
- -int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
+ +void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
   {
         if (q->cork)
                 q->cork = 0;
   
- -      return sctp_outq_flush(q, 0, gfp);
+ +      sctp_outq_flush(q, 0, gfp);
   }
   
   
@@@ -763,7 -796,7 +763,7 @@@
    * locking concerns must be made.  Today we use the sock lock to protect
    * this function.
    */
- -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+ +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
   {
         struct sctp_packet *packet;
         struct sctp_packet singleton;
@@@ -886,10 -919,8 +886,10 @@@
                         sctp_packet_config(&singleton, vtag, 0);
                         sctp_packet_append_chunk(&singleton, chunk);
                         error = sctp_packet_transmit(&singleton, gfp);
- -                      if (error < 0)
- -                              return error;
+ +                      if (error < 0) {
+ +                              asoc->base.sk->sk_err = -error;
+ +                              return;
+ +                      }
                         break;
   
                 case SCTP_CID_ABORT:
@@@ -987,8 -1018,6 +987,8 @@@
                 retran:
                         error = sctp_outq_flush_rtx(q, packet,
                                                     rtx_timeout, &start_timer);
+ +                      if (error < 0)
+ +                              asoc->base.sk->sk_err = -error;
   
                         if (start_timer) {
                                 sctp_transport_reset_t3_rtx(transport);
@@@ -1026,7 -1055,7 +1026,7 @@@
   
                                 /* Mark as failed send. */
                                 sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
-                               if (asoc->prsctp_enable &&
+                               if (asoc->peer.prsctp_capable &&
                                     SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
                                         asoc->sent_cnt_removable--;
                                 sctp_chunk_free(chunk);
@@@ -1163,15 -1192,14 +1163,15 @@@ sctp_flush_out
                                                       struct sctp_transport,
                                                       send_ready);
                 packet = &t->packet;
- -              if (!sctp_packet_empty(packet))
+ +              if (!sctp_packet_empty(packet)) {
                         error = sctp_packet_transmit(packet, gfp);
+ +                      if (error < 0)
+ +                              asoc->base.sk->sk_err = -error;
+ +              }
   
                 /* Clear the burst limited state, if any */
                 sctp_transport_burst_reset(t);
         }
- -
- -      return error;
   }
   
   /* Update unack_data based on the incoming SACK chunk */
@@@ -1319,7 -1347,7 +1319,7 @@@ int sctp_outq_sack(struct sctp_outq *q
                 tsn = ntohl(tchunk->subh.data_hdr->tsn);
                 if (TSN_lte(tsn, ctsn)) {
                         list_del_init(&tchunk->transmitted_list);
-                       if (asoc->prsctp_enable &&
+                       if (asoc->peer.prsctp_capable &&
                             SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
                                 asoc->sent_cnt_removable--;
                         sctp_chunk_free(tchunk);
@@@ -1719,7 -1747,7 +1719,7 @@@ static int sctp_acked(struct sctp_sackh
   {
         int i;
         sctp_sack_variable_t *frags;
- -      __u16 gap;
+ +      __u16 tsn_offset, blocks;
         __u32 ctsn = ntohl(sack->cum_tsn_ack);
   
         if (TSN_lte(tsn, ctsn))
@@@ -1738,11 -1766,10 +1738,11 @@@
          */
   
         frags = sack->variable;
- -      gap = tsn - ctsn;
- -      for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
- -              if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
- -                  TSN_lte(gap, ntohs(frags[i].gab.end)))
+ +      blocks = ntohs(sack->num_gap_ack_blocks);
+ +      tsn_offset = tsn - ctsn;
+ +      for (i = 0; i < blocks; ++i) {
+ +              if (tsn_offset >= ntohs(frags[i].gab.start) &&
+ +                  tsn_offset <= ntohs(frags[i].gab.end))
                         goto pass;
         }
   
diff --combined net/sctp/sctp_diag.c

index 807158e,cef0cee..048954e
--- 1/net/sctp/sctp_diag.c
--- 2/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@@ -106,8 -106,7 +106,8 @@@ static int inet_sctp_diag_fill(struct s
                                const struct inet_diag_req_v2 *req,
                                struct user_namespace *user_ns,
                                int portid, u32 seq, u16 nlmsg_flags,
- -                             const struct nlmsghdr *unlh)
+ +                             const struct nlmsghdr *unlh,
+ +                             bool net_admin)
   {
         struct sctp_endpoint *ep = sctp_sk(sk)->ep;
         struct list_head *addr_list;
@@@ -134,7 -133,7 +134,7 @@@
                 r->idiag_retrans = 0;
         }
   
- -      if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+ +      if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
                 goto errout;
   
         if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
@@@ -204,7 -203,6 +204,7 @@@ struct sctp_comm_param 
         struct netlink_callback *cb;
         const struct inet_diag_req_v2 *r;
         const struct nlmsghdr *nlh;
+ +      bool net_admin;
   };
   
   static size_t inet_assoc_attr_size(struct sctp_association *asoc)
@@@ -221,7 -219,6 +221,7 @@@
                 + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
                 + nla_total_size(1) /* INET_DIAG_TOS */
                 + nla_total_size(1) /* INET_DIAG_TCLASS */
+ +              + nla_total_size(4) /* INET_DIAG_MARK */
                 + nla_total_size(addrlen * asoc->peer.transport_count)
                 + nla_total_size(addrlen * addrcnt)
                 + nla_total_size(sizeof(struct inet_diag_meminfo))
@@@ -259,8 -256,7 +259,8 @@@ static int sctp_tsp_dump_one(struct sct
         err = inet_sctp_diag_fill(sk, assoc, rep, req,
                                   sk_user_ns(NETLINK_CB(in_skb).sk),
                                   NETLINK_CB(in_skb).portid,
- -                                nlh->nlmsg_seq, 0, nlh);
+ +                                nlh->nlmsg_seq, 0, nlh,
+ +                                commp->net_admin);
         release_sock(sk);
         if (err < 0) {
                 WARN_ON(err == -EMSGSIZE);
@@@ -276,28 -272,17 +276,17 @@@ out
         return err;
   }
   
- static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
+ static int sctp_sock_dump(struct sock *sk, void *p)
   {
-       struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
         struct sctp_comm_param *commp = p;
-       struct sock *sk = ep->base.sk;
         struct sk_buff *skb = commp->skb;
         struct netlink_callback *cb = commp->cb;
         const struct inet_diag_req_v2 *r = commp->r;
-       struct sctp_association *assoc =
-               list_entry(ep->asocs.next, struct sctp_association, asocs);
+       struct sctp_association *assoc;
         int err = 0;
   
-       /* find the ep only once through the transports by this condition */
-       if (tsp->asoc != assoc)
-               goto out;
- 
-       if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
-               goto out;
- 
         lock_sock(sk);
-       if (sk != assoc->base.sk)
-               goto release;
         list_for_each_entry(assoc, &ep->asocs, asocs) {
                 if (cb->args[4] < cb->args[1])
                         goto next;
@@@ -314,10 -299,9 +303,10 @@@
                                         sk_user_ns(NETLINK_CB(cb->skb).sk),
                                         NETLINK_CB(cb->skb).portid,
                                         cb->nlh->nlmsg_seq,
- -                                      NLM_F_MULTI, cb->nlh) < 0) {
+ +                                      NLM_F_MULTI, cb->nlh,
+ +                                      commp->net_admin) < 0) {
                         cb->args[3] = 1;
-                       err = 2;
+                       err = 1;
                         goto release;
                 }
                 cb->args[3] = 1;
@@@ -325,9 -309,8 +314,9 @@@
                 if (inet_sctp_diag_fill(sk, assoc, skb, r,
                                         sk_user_ns(NETLINK_CB(cb->skb).sk),
                                         NETLINK_CB(cb->skb).portid,
- -                                      cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
+ +                                      cb->nlh->nlmsg_seq, 0, cb->nlh,
+ +                                      commp->net_admin) < 0) {
-                       err = 2;
+                       err = 1;
                         goto release;
                 }
   next:
@@@ -339,10 -322,35 +328,35 @@@
         cb->args[4] = 0;
   release:
         release_sock(sk);
+       sock_put(sk);
         return err;
+ }
+ 
+ static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+ {
+       struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       struct netlink_callback *cb = commp->cb;
+       const struct inet_diag_req_v2 *r = commp->r;
+       struct sctp_association *assoc =
+               list_entry(ep->asocs.next, struct sctp_association, asocs);
+ 
+       /* find the ep only once through the transports by this condition */
+       if (tsp->asoc != assoc)
+               goto out;
+ 
+       if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+               goto out;
+ 
+       sock_hold(sk);
+       cb->args[5] = (long)sk;
+ 
+       return 1;
+ 
   out:
         cb->args[2]++;
-       return err;
+       return 0;
   }
   
   static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@@ -381,7 -389,7 +395,7 @@@
                                 sk_user_ns(NETLINK_CB(cb->skb).sk),
                                 NETLINK_CB(cb->skb).portid,
                                 cb->nlh->nlmsg_seq, NLM_F_MULTI,
- -                              cb->nlh) < 0) {
+ +                              cb->nlh, commp->net_admin) < 0) {
                 err = 2;
                 goto out;
         }
@@@ -418,7 -426,6 +432,7 @@@ static int sctp_diag_dump_one(struct sk
                 .skb = in_skb,
                 .r = req,
                 .nlh = nlh,
+ +              .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
         };
   
         if (req->sdiag_family == AF_INET) {
@@@ -454,7 -461,6 +468,7 @@@ static void sctp_diag_dump(struct sk_bu
                 .skb = skb,
                 .cb = cb,
                 .r = r,
+ +              .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
         };
   
         /* eps hashtable dumps
@@@ -480,10 -486,18 +494,18 @@@ skip
          * 2 : to record the transport pos of this time's traversal
          * 3 : to mark if we have dumped the ep info of the current asoc
          * 4 : to work as a temporary variable to traversal list
+        * 5 : to save the sk we get from travelsing the tsp list.
          */
         if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
                 goto done;
-       sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
+ 
+ next:
+       cb->args[5] = 0;
+       sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
+ 
+       if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
+               goto next;
+ 
   done:
         cb->args[1] = cb->args[4];
         cb->args[4] = 0;
diff --combined net/sctp/sm_make_chunk.c

index 79dd660,46ffecc..9e9690b
--- 1/net/sctp/sm_make_chunk.c
--- 2/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@@ -253,7 -253,7 +253,7 @@@ struct sctp_chunk *sctp_make_init(cons
         num_types = sp->pf->supported_addrs(sp, types);
   
         chunksize = sizeof(init) + addrs_len;
- -      chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+ +      chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
         chunksize += sizeof(ecap_param);
   
         if (asoc->prsctp_enable)
@@@ -283,14 -283,14 +283,14 @@@
                 /* Add HMACS parameter length if any were defined */
                 auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
                 if (auth_hmacs->length)
- -                      chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ +                      chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
                 else
                         auth_hmacs = NULL;
   
                 /* Add CHUNKS parameter length */
                 auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
                 if (auth_chunks->length)
- -                      chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ +                      chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
                 else
                         auth_chunks = NULL;
   
@@@ -300,8 -300,8 +300,8 @@@
   
         /* If we have any extensions to report, account for that */
         if (num_ext)
- -              chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- -                                      num_ext);
+ +              chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ +                                     num_ext);
   
         /* RFC 2960 3.3.2 Initiation (INIT) (1)
          *
@@@ -443,13 -443,13 +443,13 @@@ struct sctp_chunk *sctp_make_init_ack(c
   
                 auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
                 if (auth_hmacs->length)
- -                      chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ +                      chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
                 else
                         auth_hmacs = NULL;
   
                 auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
                 if (auth_chunks->length)
- -                      chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ +                      chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
                 else
                         auth_chunks = NULL;
   
@@@ -458,8 -458,8 +458,8 @@@
         }
   
         if (num_ext)
- -              chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- -                                      num_ext);
+ +              chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ +                                     num_ext);
   
         /* Now allocate and fill out the chunk.  */
         retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@@ -706,20 -706,6 +706,6 @@@ nodata
         return retval;
   }
   
- static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
-                                  const struct sctp_sndrcvinfo *sinfo)
- {
-       if (!chunk->asoc->prsctp_enable)
-               return;
- 
-       if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
-               chunk->prsctp_param =
-                       jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
-       else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
-                SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
-               chunk->prsctp_param = sinfo->sinfo_timetolive;
- }
- 
   /* Make a DATA chunk for the given association from the provided
    * parameters.  However, do not populate the data payload.
    */
@@@ -753,7 -739,6 +739,6 @@@ struct sctp_chunk *sctp_make_datafrag_e
   
         retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
         memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
-       sctp_set_prsctp_policy(retval, sinfo);
   
   nodata:
         return retval;
@@@ -1390,7 -1375,7 +1375,7 @@@ static struct sctp_chunk *_sctp_make_ch
         struct sock *sk;
   
         /* No need to allocate LL here, as this is only a chunk. */
- -      skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+ +      skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
         if (!skb)
                 goto nodata;
   
@@@ -1482,7 -1467,7 +1467,7 @@@ void *sctp_addto_chunk(struct sctp_chun
         void *target;
         void *padding;
         int chunklen = ntohs(chunk->chunk_hdr->length);
- -      int padlen = WORD_ROUND(chunklen) - chunklen;
+ +      int padlen = SCTP_PAD4(chunklen) - chunklen;
   
         padding = skb_put(chunk->skb, padlen);
         target = skb_put(chunk->skb, len);
@@@ -1900,7 -1885,7 +1885,7 @@@ static int sctp_process_missing_param(c
         struct __sctp_missing report;
         __u16 len;
   
- -      len = WORD_ROUND(sizeof(report));
+ +      len = SCTP_PAD4(sizeof(report));
   
         /* Make an ERROR chunk, preparing enough room for
          * returning multiple unknown parameters.
@@@ -2098,9 -2083,9 +2083,9 @@@ static sctp_ierror_t sctp_process_unk_p
   
                 if (*errp) {
                         if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
- -                                      WORD_ROUND(ntohs(param.p->length))))
+ +                                      SCTP_PAD4(ntohs(param.p->length))))
                                 sctp_addto_chunk_fixed(*errp,
- -                                              WORD_ROUND(ntohs(param.p->length)),
+ +                                              SCTP_PAD4(ntohs(param.p->length)),
                                                 param.v);
                 } else {
                         /* If there is no memory for generating the ERROR
diff --combined net/sctp/socket.c

index 6cdc61c,8ed2d99..fb02c70
--- 1/net/sctp/socket.c
--- 2/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -1958,8 -1958,6 +1958,8 @@@ static int sctp_sendmsg(struct sock *sk
   
         /* Now send the (possibly) fragmented message. */
         list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
+ +              sctp_chunk_hold(chunk);
+ +
                 /* Do accounting for the write space.  */
                 sctp_set_owner_w(chunk);
   
@@@ -1972,15 -1970,13 +1972,15 @@@
          * breaks.
          */
         err = sctp_primitive_SEND(net, asoc, datamsg);
- -      sctp_datamsg_put(datamsg);
         /* Did the lower layer accept the chunk? */
- -      if (err)
+ +      if (err) {
+ +              sctp_datamsg_free(datamsg);
                 goto out_free;
+ +      }
   
         pr_debug("%s: we sent primitively\n", __func__);
   
+ +      sctp_datamsg_put(datamsg);
         err = msg_len;
   
         if (unlikely(wait_connect)) {
@@@ -4473,17 -4469,21 +4473,21 @@@ int sctp_transport_lookup_process(int (
                                   const union sctp_addr *paddr, void *p)
   {
         struct sctp_transport *transport;
-       int err = 0;
+       int err = -ENOENT;
   
         rcu_read_lock();
         transport = sctp_addrs_lookup_transport(net, laddr, paddr);
         if (!transport || !sctp_transport_hold(transport))
                 goto out;
-       err = cb(transport, p);
+ 
+       sctp_association_hold(transport->asoc);
         sctp_transport_put(transport);
   
- out:
         rcu_read_unlock();
+       err = cb(transport, p);
+       sctp_association_put(transport->asoc);
+ 
+ out:
         return err;
   }
   EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
author	David S. Miller <davem@davemloft.net>
	Mon, 3 Oct 2016 01:17:07 +0000 (21:17 -0400)
committer	David S. Miller <davem@davemloft.net>
	Mon, 3 Oct 2016 02:20:41 +0000 (22:20 -0400)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/broadcom/tg3.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/freescale/fec_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/sctp/structs.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_input.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ip6_gre.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/act_ife.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/chunk.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/outqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/sctp_diag.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/sm_make_chunk.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/socket.c	patch \|	diff1 \|	diff2 \|	blob \| history