Three sets of overlapping changes. Nothing serious.
Signed-off-by: David S. Miller <davem@davemloft.net>
F: include/linux/altera_uart.h
F: include/linux/altera_jtaguart.h
+AMAZON ETHERNET DRIVERS
+M: Netanel Belgazal <netanel@annapurnalabs.com>
+R: Saeed Bishara <saeed@annapurnalabs.com>
+R: Zorik Machulsky <zorik@annapurnalabs.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/ena.txt
+F: drivers/net/ethernet/amazon/
+
AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
M: Gary Hook <gary.hook@amd.com>
HOST AP DRIVER
M: Jouni Malinen <j@w1.fi>
-L: hostap@shmoo.com (subscribers-only)
L: linux-wireless@vger.kernel.org
-W: http://hostap.epitest.fi/
-S: Maintained
+W: http://w1.fi/hostap-driver.html
+S: Obsolete
F: drivers/net/wireless/intersil/hostap/
HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
F: include/linux/oprofile.h
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
- M: Mark Fasheh <mfasheh@suse.com>
+ M: Mark Fasheh <mfasheh@versity.com>
M: Joel Becker <jlbec@evilplan.org>
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
S: Supported
F: drivers/net/wireless/ath/ath10k/
+QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+M: Timur Tabi <timur@codeaurora.org>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/qualcomm/emac/
+
QUALCOMM HEXAGON ARCHITECTURE
M: Richard Kuo <rkuo@codeaurora.org>
L: linux-hexagon@vger.kernel.org
RHASHTABLE
M: Thomas Graf <tgraf@suug.ch>
+M: Herbert Xu <herbert@gondor.apana.org.au>
L: netdev@vger.kernel.org
S: Maintained
F: lib/rhashtable.c
THERMAL/CPU_COOLING
M: Amit Daniel Kachhap <amit.kachhap@gmail.com>
M: Viresh Kumar <viresh.kumar@linaro.org>
- M: Javi Merino <javi.merino@arm.com>
+ M: Javi Merino <javi.merino@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/thermal/cpu-cooling-api.txt
USB SMSC95XX ETHERNET DRIVER
M: Steve Glendinning <steve.glendinning@shawell.net>
+M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/usb/smsc95xx.*
return ret;
}
-static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tg3_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct tg3 *tp = netdev_priv(dev);
+ u32 supported, advertising;
if (tg3_flag(tp, USE_PHYLIB)) {
struct phy_device *phydev;
if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
return -EAGAIN;
phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
- return phy_ethtool_gset(phydev, cmd);
+ return phy_ethtool_ksettings_get(phydev, cmd);
}
- cmd->supported = (SUPPORTED_Autoneg);
+ supported = (SUPPORTED_Autoneg);
if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY))
- cmd->supported |= (SUPPORTED_1000baseT_Half |
- SUPPORTED_1000baseT_Full);
+ supported |= (SUPPORTED_1000baseT_Half |
+ SUPPORTED_1000baseT_Full);
if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
- cmd->supported |= (SUPPORTED_100baseT_Half |
- SUPPORTED_100baseT_Full |
- SUPPORTED_10baseT_Half |
- SUPPORTED_10baseT_Full |
- SUPPORTED_TP);
- cmd->port = PORT_TP;
+ supported |= (SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_TP);
+ cmd->base.port = PORT_TP;
} else {
- cmd->supported |= SUPPORTED_FIBRE;
- cmd->port = PORT_FIBRE;
+ supported |= SUPPORTED_FIBRE;
+ cmd->base.port = PORT_FIBRE;
}
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
- cmd->advertising = tp->link_config.advertising;
+ advertising = tp->link_config.advertising;
if (tg3_flag(tp, PAUSE_AUTONEG)) {
if (tp->link_config.flowctrl & FLOW_CTRL_RX) {
if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
- cmd->advertising |= ADVERTISED_Pause;
+ advertising |= ADVERTISED_Pause;
} else {
- cmd->advertising |= ADVERTISED_Pause |
- ADVERTISED_Asym_Pause;
+ advertising |= ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause;
}
} else if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
- cmd->advertising |= ADVERTISED_Asym_Pause;
+ advertising |= ADVERTISED_Asym_Pause;
}
}
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+
if (netif_running(dev) && tp->link_up) {
- ethtool_cmd_speed_set(cmd, tp->link_config.active_speed);
- cmd->duplex = tp->link_config.active_duplex;
- cmd->lp_advertising = tp->link_config.rmt_adv;
+ cmd->base.speed = tp->link_config.active_speed;
+ cmd->base.duplex = tp->link_config.active_duplex;
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.lp_advertising,
+ tp->link_config.rmt_adv);
+
if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE)
- cmd->eth_tp_mdix = ETH_TP_MDI_X;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
else
- cmd->eth_tp_mdix = ETH_TP_MDI;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI;
}
} else {
- ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
- cmd->duplex = DUPLEX_UNKNOWN;
- cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
- }
- cmd->phy_address = tp->phy_addr;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->autoneg = tp->link_config.autoneg;
- cmd->maxtxpkt = 0;
- cmd->maxrxpkt = 0;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+ }
+ cmd->base.phy_address = tp->phy_addr;
+ cmd->base.autoneg = tp->link_config.autoneg;
return 0;
}
-static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tg3_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct tg3 *tp = netdev_priv(dev);
- u32 speed = ethtool_cmd_speed(cmd);
+ u32 speed = cmd->base.speed;
+ u32 advertising;
if (tg3_flag(tp, USE_PHYLIB)) {
struct phy_device *phydev;
if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
return -EAGAIN;
phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
- return phy_ethtool_sset(phydev, cmd);
+ return phy_ethtool_ksettings_set(phydev, cmd);
}
- if (cmd->autoneg != AUTONEG_ENABLE &&
- cmd->autoneg != AUTONEG_DISABLE)
+ if (cmd->base.autoneg != AUTONEG_ENABLE &&
+ cmd->base.autoneg != AUTONEG_DISABLE)
return -EINVAL;
- if (cmd->autoneg == AUTONEG_DISABLE &&
- cmd->duplex != DUPLEX_FULL &&
- cmd->duplex != DUPLEX_HALF)
+ if (cmd->base.autoneg == AUTONEG_DISABLE &&
+ cmd->base.duplex != DUPLEX_FULL &&
+ cmd->base.duplex != DUPLEX_HALF)
return -EINVAL;
- if (cmd->autoneg == AUTONEG_ENABLE) {
+ ethtool_convert_link_mode_to_legacy_u32(&advertising,
+ cmd->link_modes.advertising);
+
+ if (cmd->base.autoneg == AUTONEG_ENABLE) {
u32 mask = ADVERTISED_Autoneg |
ADVERTISED_Pause |
ADVERTISED_Asym_Pause;
else
mask |= ADVERTISED_FIBRE;
- if (cmd->advertising & ~mask)
+ if (advertising & ~mask)
return -EINVAL;
mask &= (ADVERTISED_1000baseT_Half |
ADVERTISED_10baseT_Half |
ADVERTISED_10baseT_Full);
- cmd->advertising &= mask;
+ advertising &= mask;
} else {
if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) {
if (speed != SPEED_1000)
return -EINVAL;
- if (cmd->duplex != DUPLEX_FULL)
+ if (cmd->base.duplex != DUPLEX_FULL)
return -EINVAL;
} else {
if (speed != SPEED_100 &&
tg3_full_lock(tp, 0);
- tp->link_config.autoneg = cmd->autoneg;
- if (cmd->autoneg == AUTONEG_ENABLE) {
- tp->link_config.advertising = (cmd->advertising |
+ tp->link_config.autoneg = cmd->base.autoneg;
+ if (cmd->base.autoneg == AUTONEG_ENABLE) {
+ tp->link_config.advertising = (advertising |
ADVERTISED_Autoneg);
tp->link_config.speed = SPEED_UNKNOWN;
tp->link_config.duplex = DUPLEX_UNKNOWN;
} else {
tp->link_config.advertising = 0;
tp->link_config.speed = speed;
- tp->link_config.duplex = cmd->duplex;
+ tp->link_config.duplex = cmd->base.duplex;
}
tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED;
}
static const struct ethtool_ops tg3_ethtool_ops = {
- .get_settings = tg3_get_settings,
- .set_settings = tg3_set_settings,
.get_drvinfo = tg3_get_drvinfo,
.get_regs_len = tg3_get_regs_len,
.get_regs = tg3_get_regs,
.get_ts_info = tg3_get_ts_info,
.get_eee = tg3_get_eee,
.set_eee = tg3_set_eee,
+ .get_link_ksettings = tg3_get_link_ksettings,
+ .set_link_ksettings = tg3_set_link_ksettings,
};
static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
rtnl_lock();
- /* We needn't recover from permanent error */
- if (state == pci_channel_io_frozen)
- tp->pcierr_recovery = true;
-
/* We probably don't have netdev yet */
if (!netdev || !netif_running(netdev))
goto done;
+ /* We needn't recover from permanent error */
+ if (state == pci_channel_io_frozen)
+ tp->pcierr_recovery = true;
+
tg3_phy_stop(tp);
tg3_netif_stop(tp);
rtnl_lock();
- if (!netif_running(netdev))
+ if (!netdev || !netif_running(netdev))
goto done;
tg3_full_lock(tp, 0);
.driver_data = 0,
}, {
.name = "imx25-fec",
- .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
+ .driver_data = FEC_QUIRK_USE_GASKET,
}, {
.name = "imx27-fec",
- .driver_data = FEC_QUIRK_HAS_RACC,
+ .driver_data = 0,
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
/* FEC receive acceleration */
#define FEC_RACC_IPDIS (1 << 1)
#define FEC_RACC_PRODIS (1 << 2)
+ #define FEC_RACC_SHIFT16 BIT(7)
#define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS)
/*
#if !defined(CONFIG_M5272)
if (fep->quirks & FEC_QUIRK_HAS_RACC) {
- /* set RX checksum */
val = readl(fep->hwp + FEC_RACC);
+ /* align IP header */
+ val |= FEC_RACC_SHIFT16;
if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+ /* set RX checksum */
val |= FEC_RACC_OPTIONS;
else
val &= ~FEC_RACC_OPTIONS;
prefetch(skb->data - NET_IP_ALIGN);
skb_put(skb, pkt_len - 4);
data = skb->data;
+
+ #if !defined(CONFIG_M5272)
+ if (fep->quirks & FEC_QUIRK_HAS_RACC)
+ data = skb_pull_inline(skb, 2);
+ #endif
+
if (!is_copybreak && need_swap)
swap_buffer(data, pkt_len);
* this kind of feature?).
*/
-#define HASH_BITS 6 /* #bits in hash */
+#define FEC_HASH_BITS 6 /* #bits in hash */
#define CRC32_POLY 0xEDB88320
static void set_multicast_list(struct net_device *ndev)
}
}
- /* only upper 6 bits (HASH_BITS) are used
+ /* only upper 6 bits (FEC_HASH_BITS) are used
* which point to specific bit in he hash registers
*/
- hash = (crc >> (32 - HASH_BITS)) & 0x3f;
+ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
if (hash > 31) {
tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
struct sctp_sndrcvinfo *,
struct iov_iter *);
+void sctp_datamsg_free(struct sctp_datamsg *);
void sctp_datamsg_put(struct sctp_datamsg *);
void sctp_chunk_fail(struct sctp_chunk *, int error);
int sctp_chunk_abandoned(struct sctp_chunk *);
atomic_t refcnt;
+ /* How many times this chunk have been sent, for prsctp RTX policy */
+ int sent_count;
+
/* This is our link to the per-transport transmitted list. */
struct list_head transmitted_list;
/* This needs to be recoverable for SCTP_SEND_FAILED events. */
struct sctp_sndrcvinfo sinfo;
- /* We use this field to record param for prsctp policies,
- * for TTL policy, it is the time_to_drop of this chunk,
- * for RTX policy, it is the max_sent_count of this chunk,
- * for PRIO policy, it is the priority of this chunk.
- */
- unsigned long prsctp_param;
-
- /* How many times this chunk have been sent, for prsctp RTX policy */
- int sent_count;
-
/* Which association does this belong to? */
struct sctp_association *asoc;
void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
void sctp_outq_teardown(struct sctp_outq *);
void sctp_outq_free(struct sctp_outq*);
-int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
+void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
int sctp_outq_is_empty(const struct sctp_outq *);
void sctp_outq_restart(struct sctp_outq *);
void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
sctp_retransmit_reason_t);
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
-int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
void sctp_prsctp_prune(struct sctp_association *asoc,
struct sctp_sndrcvinfo *sinfo, int msg_len);
/* Uncork and flush an outqueue. */
static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
{
- if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+ if ((e1->pmu == e2->pmu) &&
(e1->cpu == e2->cpu ||
e1->cpu == -1 ||
e2->cpu == -1))
irq_work_queue(&event->pending);
}
- event->overflow_handler(event, data, regs);
+ READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
ftrace_profile_free_filter(event);
}
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct bpf_perf_event_data_kern ctx = {
+ .data = data,
+ .regs = regs,
+ };
+ int ret = 0;
+
+ preempt_disable();
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ goto out;
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ rcu_read_unlock();
+out:
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ if (!ret)
+ return;
+
+ event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ struct bpf_prog *prog;
+
+ if (event->overflow_handler_context)
+ /* hw breakpoint or kernel counter */
+ return -EINVAL;
+
+ if (event->prog)
+ return -EEXIST;
+
+ prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ event->prog = prog;
+ event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+ struct bpf_prog *prog = event->prog;
+
+ if (!prog)
+ return;
+
+ WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ event->prog = NULL;
+ bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint;
struct bpf_prog *prog;
+ if (event->attr.type == PERF_TYPE_HARDWARE ||
+ event->attr.type == PERF_TYPE_SOFTWARE)
+ return perf_event_set_bpf_handler(event, prog_fd);
+
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
{
struct bpf_prog *prog;
+ perf_event_free_bpf_handler(event);
+
if (!event->tp_event)
return;
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ if (overflow_handler == bpf_overflow_handler) {
+ struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto err_ns;
+ }
+ event->prog = prog;
+ event->orig_overflow_handler =
+ parent_event->orig_overflow_handler;
+ }
+#endif
}
if (overflow_handler) {
mtu = 576;
}
- return min_t(unsigned int, mtu, IP_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+ fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
return ERR_PTR(-EINVAL);
if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ if (ipv4_is_loopback(fl4->saddr) &&
+ !(dev_out->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(dev_out))
return ERR_PTR(-EINVAL);
if (ipv4_is_lbcast(fl4->daddr))
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- int master_idx;
int orig_oif;
int err = -ENETUNREACH;
orig_oif = fl4->flowi4_oif;
- master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
- if (master_idx)
- fl4->flowi4_oif = master_idx;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
-
- rth = l3mdev_get_rtable(dev_out, fl4);
- if (rth)
- goto out;
}
if (!fl4->daddr) {
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif &&
- !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ if (fl4->flowi4_oif) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
else
fl4->saddr = fl4->daddr;
}
- dev_out = net->loopback_dev;
+
+ /* L3 master device is the loopback for that domain */
+ dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
- r, nowait);
+ r, nowait, portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
- if (netif_index_is_l3_master(net, fl4.flowi4_oif))
- fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
if (iif) {
struct net_device *dev;
static void tcp_sndbuf_expand(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
int sndmem, per_mss;
u32 nr_segs;
* Cubic needs 1.7 factor, rounded to 2 to include
* extra cushion (application might react slowly to POLLOUT)
*/
- sndmem = 2 * nr_segs * per_mss;
+ sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
+ sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
}
+/* Sum the number of packets on the wire we have marked as lost.
+ * There are two cases we care about here:
+ * a) Packet hasn't been marked lost (nor retransmitted),
+ * and this is the first loss.
+ * b) Packet has been marked both lost and retransmitted,
+ * and this means we think it was lost again.
+ */
+static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ __u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+ if (!(sacked & TCPCB_LOST) ||
+ ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
+ tp->lost += tcp_skb_pcount(skb);
+}
+
static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
{
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tcp_verify_retransmit_hint(tp, skb);
tp->lost_out += tcp_skb_pcount(skb);
+ tcp_sum_lost(tp, skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
}
}
{
tcp_verify_retransmit_hint(tp, skb);
+ tcp_sum_lost(tp, skb);
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
+ struct rate_sample *rate;
int flag;
};
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
&skb->skb_mstamp);
+ tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
tcp_advance_highest_sack(sk, skb);
tcp_skb_collapse_tstamp(prev, skb);
+ if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
+ TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
dup_sack,
tcp_skb_pcount(skb),
&skb->skb_mstamp);
+ tcp_rate_skb_delivered(sk, skb, state->rate);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
- if (found_dup_sack)
+ if (found_dup_sack) {
state->flag |= FLAG_DSACKING_ACK;
+ tp->delivered++; /* A spurious retransmission is delivered */
+ }
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
struct sk_buff *skb;
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
+ bool mark_lost;
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
if (skb == tcp_send_head(sk))
break;
+ mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+ is_reneg);
+ if (mark_lost)
+ tcp_sum_lost(tp, skb);
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
+ if (mark_lost) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
- &np->daddr, ntohs(inet->inet_dport),
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
tp->snd_cwnd, tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (inet_csk(sk)->icsk_ca_ops->cong_control)
+ return;
+
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
*rexmit = REXMIT_LOST;
}
-/* Kathleen Nichols' algorithm for tracking the minimum value of
- * a data stream over some fixed time interval. (E.g., the minimum
- * RTT over the past five minutes.) It uses constant space and constant
- * time per update yet almost always delivers the same minimum as an
- * implementation that has to keep all the data in the window.
- *
- * The algorithm keeps track of the best, 2nd best & 3rd best min
- * values, maintaining an invariant that the measurement time of the
- * n'th best >= n-1'th best. It also makes sure that the three values
- * are widely separated in the time window since that bounds the worse
- * case error when that data is monotonically increasing over the window.
- *
- * Upon getting a new min, we can forget everything earlier because it
- * has no value - the new min is <= everything else in the window by
- * definition and it's the most recent. So we restart fresh on every new min
- * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
- * best.
- */
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
- const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
- struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = {
- .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
- .ts = now,
- };
- u32 elapsed;
-
- /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
- if (unlikely(rttm.rtt <= m[0].rtt))
- m[0] = m[1] = m[2] = rttm;
- else if (rttm.rtt <= m[1].rtt)
- m[1] = m[2] = rttm;
- else if (rttm.rtt <= m[2].rtt)
- m[2] = rttm;
-
- elapsed = now - m[0].ts;
- if (unlikely(elapsed > wlen)) {
- /* Passed entire window without a new min so make 2nd choice
- * the new min & 3rd choice the new 2nd. So forth and so on.
- */
- m[0] = m[1];
- m[1] = m[2];
- m[2] = rttm;
- if (now - m[0].ts > wlen) {
- m[0] = m[1];
- m[1] = rttm;
- if (now - m[0].ts > wlen)
- m[0] = rttm;
- }
- } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
- /* Passed a quarter of the window without a new min so
- * take 2nd choice from the 2nd quarter of the window.
- */
- m[2] = m[1] = rttm;
- } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
- /* Passed half the window without a new min so take the 3rd
- * choice from the last half of the window.
- */
- m[2] = rttm;
- }
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
+
+ minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ rtt_us ? : jiffies_to_usecs(1));
}
static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_snd_una, int *acked,
- struct tcp_sacktag_state *sack)
+ struct tcp_sacktag_state *sack,
+ struct skb_mstamp *now)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct skb_mstamp first_ackt, last_ackt, now;
+ struct skb_mstamp first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
acked_pcount = tcp_tso_acked(sk, skb);
if (!acked_pcount)
break;
-
fully_acked = false;
} else {
/* Speedup tcp_unlink_write_queue() and next loop */
tp->packets_out -= acked_pcount;
pkts_acked += acked_pcount;
+ tcp_rate_skb_delivered(sk, skb, sack->rate);
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- skb_mstamp_get(&now);
if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
+ ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
}
if (sack->first_sackt.v64) {
- sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
- ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
+ sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
+ ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
}
-
+ sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
ca_rtt_us);
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
+ sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
* information. All transmission or retransmission are delayed afterwards.
*/
static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
- int flag)
+ int flag, const struct rate_sample *rs)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ca_ops->cong_control) {
+ icsk->icsk_ca_ops->cong_control(sk, rs);
+ return;
+ }
+
if (tcp_in_cwnd_reduction(sk)) {
/* Reduce cwnd if state mandates */
tcp_cwnd_reduction(sk, acked_sacked, flag);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_sacktag_state sack_state;
+ struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
u32 prior_fackets;
int prior_packets = tp->packets_out;
- u32 prior_delivered = tp->delivered;
+ u32 delivered = tp->delivered;
+ u32 lost = tp->lost;
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ struct skb_mstamp now;
sack_state.first_sackt.v64 = 0;
+ sack_state.rate = &rs;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
if (after(ack, tp->snd_nxt))
goto invalid_ack;
+ skb_mstamp_get(&now);
+
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
}
prior_fackets = tp->fackets_out;
+ rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
* is in window.
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ &sack_state, &now);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
+ lost = tp->lost - lost; /* freshly marked lost */
+ tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_xmit_recovery(sk, rexmit);
return 1;
/* It _is_ possible, that we have something out-of-order _after_ FIN.
* Probably, we should reset in this case. For now drop them.
*/
- __skb_queue_purge(&tp->out_of_order_queue);
+ skb_rbtree_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
int this_sack;
/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
- if (skb_queue_empty(&tp->out_of_order_queue)) {
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tp->rx_opt.num_sacks = 0;
return;
}
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 dsack_high = tp->rcv_nxt;
+ bool fin, fragstolen, eaten;
struct sk_buff *skb, *tail;
- bool fragstolen, eaten;
+ struct rb_node *p;
- while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+ p = rb_first(&tp->out_of_order_queue);
+ while (p) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
dsack_high = TCP_SKB_CB(skb)->end_seq;
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- __skb_unlink(skb, &tp->out_of_order_queue);
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
tcp_drop(sk, skb);
continue;
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- tcp_fin(sk);
- if (eaten)
+ else
kfree_skb_partial(skb, fragstolen);
+
+ if (unlikely(fin)) {
+ tcp_fin(sk);
+ /* tcp_fin() purges tp->out_of_order_queue,
+ * so we must end this loop right now.
+ */
+ break;
+ }
}
}
if (tcp_prune_queue(sk) < 0)
return -1;
- if (!sk_rmem_schedule(sk, skb, size)) {
+ while (!sk_rmem_schedule(sk, skb, size)) {
if (!tcp_prune_ofo_queue(sk))
return -1;
-
- if (!sk_rmem_schedule(sk, skb, size))
- return -1;
}
}
return 0;
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct rb_node **p, *q, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
+ bool fragstolen;
tcp_ecn_check_ce(tp, skb);
inet_csk_schedule_ack(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ seq = TCP_SKB_CB(skb)->seq;
+ end_seq = TCP_SKB_CB(skb)->end_seq;
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ tp->rcv_nxt, seq, end_seq);
- skb1 = skb_peek_tail(&tp->out_of_order_queue);
- if (!skb1) {
+ p = &tp->out_of_order_queue.rb_node;
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
/* Initial out of order segment, build 1 SACK. */
if (tcp_is_sack(tp)) {
tp->rx_opt.num_sacks = 1;
- tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- tp->selective_acks[0].end_seq =
- TCP_SKB_CB(skb)->end_seq;
+ tp->selective_acks[0].start_seq = seq;
+ tp->selective_acks[0].end_seq = end_seq;
}
- __skb_queue_head(&tp->out_of_order_queue, skb);
+ rb_link_node(&skb->rbnode, NULL, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+ tp->ooo_last_skb = skb;
goto end;
}
- seq = TCP_SKB_CB(skb)->seq;
- end_seq = TCP_SKB_CB(skb)->end_seq;
-
- if (seq == TCP_SKB_CB(skb1)->end_seq) {
- bool fragstolen;
-
- if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
- } else {
- tcp_grow_window(sk, skb);
- kfree_skb_partial(skb, fragstolen);
- skb = NULL;
- }
-
- if (!tp->rx_opt.num_sacks ||
- tp->selective_acks[0].end_seq != seq)
- goto add_sack;
-
- /* Common case: data arrive in order after hole. */
- tp->selective_acks[0].end_seq = end_seq;
- goto end;
- }
-
- /* Find place to insert this segment. */
- while (1) {
- if (!after(TCP_SKB_CB(skb1)->seq, seq))
- break;
- if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- skb1 = NULL;
- break;
- }
- skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
- }
-
- /* Do skb overlap to previous one? */
- if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- /* All the bits are present. Drop. */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
- skb = NULL;
- tcp_dsack_set(sk, seq, end_seq);
- goto add_sack;
+ /* In the typical case, we are adding an skb to the end of the list.
+ * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ */
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+coalesce_done:
+ tcp_grow_window(sk, skb);
+ kfree_skb_partial(skb, fragstolen);
+ skb = NULL;
+ goto add_sack;
+ }
+ /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+ if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+ parent = &tp->ooo_last_skb->rbnode;
+ p = &parent->rb_right;
+ goto insert;
+ }
+
+ /* Find place to insert this segment. Handle overlaps on the way. */
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+ p = &parent->rb_left;
+ continue;
}
- if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- /* Partial overlap. */
- tcp_dsack_set(sk, seq,
- TCP_SKB_CB(skb1)->end_seq);
- } else {
- if (skb_queue_is_first(&tp->out_of_order_queue,
- skb1))
- skb1 = NULL;
- else
- skb1 = skb_queue_prev(
- &tp->out_of_order_queue,
- skb1);
+ if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ /* All the bits are present. Drop. */
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb);
+ skb = NULL;
+ tcp_dsack_set(sk, seq, end_seq);
+ goto add_sack;
+ }
+ if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ /* Partial overlap. */
+ tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+ } else {
+ /* skb's seq == skb1's seq and skb covers skb1.
+ * Replace skb1 with skb.
+ */
+ rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ &tp->out_of_order_queue);
+ tcp_dsack_extend(sk,
+ TCP_SKB_CB(skb1)->seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb1);
+ goto merge_right;
+ }
+ } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ goto coalesce_done;
}
+ p = &parent->rb_right;
}
- if (!skb1)
- __skb_queue_head(&tp->out_of_order_queue, skb);
- else
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+insert:
+ /* Insert segment into RB tree. */
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
- /* And clean segments covered by new one as whole. */
- while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+merge_right:
+ /* Remove other segments covered by skb. */
+ while ((q = rb_next(&skb->rbnode)) != NULL) {
+ skb1 = rb_entry(q, struct sk_buff, rbnode);
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
end_seq);
break;
}
- __skb_unlink(skb1, &tp->out_of_order_queue);
+ rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb1);
}
+ /* If there is no skb after us, we are the last_skb ! */
+ if (!q)
+ tp->ooo_last_skb = skb;
add_sack:
if (tcp_is_sack(tp))
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tcp_ofo_queue(sk);
/* RFC2581. 4.2. SHOULD send immediate ACK, when
* gap in queue is filled.
*/
- if (skb_queue_empty(&tp->out_of_order_queue))
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
inet_csk(sk)->icsk_ack.pingpong = 0;
}
tcp_data_queue_ofo(sk, skb);
}
+static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ if (list)
+ return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+
+ return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+}
+
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *list)
+ struct sk_buff_head *list,
+ struct rb_root *root)
{
- struct sk_buff *next = NULL;
+ struct sk_buff *next = tcp_skb_next(skb, list);
- if (!skb_queue_is_last(list, skb))
- next = skb_queue_next(list, skb);
+ if (list)
+ __skb_unlink(skb, list);
+ else
+ rb_erase(&skb->rbnode, root);
- __skb_unlink(skb, list);
__kfree_skb(skb);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
return next;
}
+/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sk_buff *skb1;
+
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, root);
+}
+
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
*
- * If tail is NULL, this means until the end of the list.
+ * If tail is NULL, this means until the end of the queue.
*
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
static void
-tcp_collapse(struct sock *sk, struct sk_buff_head *list,
- struct sk_buff *head, struct sk_buff *tail,
- u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+ struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
{
- struct sk_buff *skb, *n;
+ struct sk_buff *skb = head, *n;
+ struct sk_buff_head tmp;
bool end_of_skbs;
/* First, check that queue is collapsible and find
- * the point where collapsing can be useful. */
- skb = head;
+ * the point where collapsing can be useful.
+ */
restart:
- end_of_skbs = true;
- skb_queue_walk_from_safe(list, skb, n) {
- if (skb == tail)
- break;
+ for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+ n = tcp_skb_next(skb, list);
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb)
break;
goto restart;
break;
}
- if (!skb_queue_is_last(list, skb)) {
- struct sk_buff *next = skb_queue_next(list, skb);
- if (next != tail &&
- TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
- end_of_skbs = false;
- break;
- }
+ if (n && n != tail &&
+ TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+ end_of_skbs = false;
+ break;
}
/* Decided to skip this, advance start seq. */
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
+ __skb_queue_head_init(&tmp);
+
while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
- return;
+ break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
- __skb_queue_before(list, skb, nskb);
+ if (list)
+ __skb_queue_before(list, skb, nskb);
+ else
+ __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
start += size;
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
- return;
+ goto end;
}
}
}
+end:
+ skb_queue_walk_safe(&tmp, skb, n)
+ tcp_rbtree_insert(root, skb);
}
/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
- struct sk_buff *head;
+ struct sk_buff *skb, *head;
+ struct rb_node *p;
u32 start, end;
- if (!skb)
+ p = rb_first(&tp->out_of_order_queue);
+ skb = rb_entry_safe(p, struct sk_buff, rbnode);
+new_range:
+ if (!skb) {
+ p = rb_last(&tp->out_of_order_queue);
+ /* Note: This is possible p is NULL here. We do not
+ * use rb_entry_safe(), as ooo_last_skb is valid only
+ * if rbtree is not empty.
+ */
+ tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
return;
-
+ }
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
- head = skb;
-
- for (;;) {
- struct sk_buff *next = NULL;
- if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
- next = skb_queue_next(&tp->out_of_order_queue, skb);
- skb = next;
+ for (head = skb;;) {
+ skb = tcp_skb_next(skb, NULL);
- /* Segment is terminated when we see gap or when
- * we are at the end of all the queue. */
+ /* Range is terminated when we see a gap or when
+ * we are at the queue end.
+ */
if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
- tcp_collapse(sk, &tp->out_of_order_queue,
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
head, skb, start, end);
- head = skb;
- if (!skb)
- break;
- /* Start new segment */
+ goto new_range;
+ }
+
+ if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq;
+ if (after(TCP_SKB_CB(skb)->end_seq, end))
end = TCP_SKB_CB(skb)->end_seq;
- } else {
- if (before(TCP_SKB_CB(skb)->seq, start))
- start = TCP_SKB_CB(skb)->seq;
- if (after(TCP_SKB_CB(skb)->end_seq, end))
- end = TCP_SKB_CB(skb)->end_seq;
- }
}
}
/*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ * (But if application shrinks SO_RCVBUF, we could still end up
+ * freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
*/
static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool res = false;
+ struct rb_node *node, *prev;
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
- __skb_queue_purge(&tp->out_of_order_queue);
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+ return false;
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tp->rx_opt.sack_ok)
- tcp_sack_reset(&tp->rx_opt);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ node = &tp->ooo_last_skb->rbnode;
+ do {
+ prev = rb_prev(node);
+ rb_erase(node, &tp->out_of_order_queue);
+ tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
sk_mem_reclaim(sk);
- res = true;
- }
- return res;
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !tcp_under_memory_pressure(sk))
+ break;
+ node = prev;
+ } while (node);
+ tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ return true;
}
/* Reduce allocated memory if we can, trying to get
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
- tcp_collapse(sk, &sk->sk_receive_queue,
+ tcp_collapse(sk, &sk->sk_receive_queue, NULL,
skb_peek(&sk->sk_receive_queue),
NULL,
tp->copied_seq, tp->rcv_nxt);
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
- (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+ (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
} else
tcp_init_metrics(sk);
- tcp_update_pacing_rate(sk);
+ if (!inet_csk(sk)->icsk_ca_ops->cong_control)
+ tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp;
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
+ inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
{
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
- TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->lost_out > tp->retrans_out &&
+ tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tcp_xmit_retransmit_queue(sk);
+
+ tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
+ }
}
/*
* One tasklet per cpu tries to send more skbs.
skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
+ tcp_rate_skb_sent(sk, skb);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
tcp_set_skb_tso_segs(skb, mss_now);
tcp_set_skb_tso_segs(buff, mss_now);
+ /* Update delivered info for the new segment */
+ TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
+
/* If this packet has been sent out already, we must
* adjust the various packet counters.
*/
}
return mtu;
}
+EXPORT_SYMBOL(tcp_mss_to_mtu);
/* MTU probing init per socket */
void tcp_mtup_init(struct sock *sk)
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
* This preserves ACK clocking and is consistent
* with tcp_tso_should_defer() heuristic.
*/
- segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+ segs = max_t(u32, bytes / mss_now, min_tso_segs);
return min_t(u32, segs, sk->sk_gso_max_segs);
}
+EXPORT_SYMBOL(tcp_tso_autosize);
+
+/* Return the number of segments we want in the skb we are transmitting.
+ * See if congestion control module wants to decide; otherwise, autosize.
+ */
+static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+
+ return tso_segs ? :
+ tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+}
/* Returns the portion of skb which can be sent right away */
static unsigned int tcp_mss_split_point(const struct sock *sk,
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed)
+ if (nskb->ip_summed) {
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- else
- nskb->csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, nskb->csum);
+ } else {
+ __wsum csum = skb_copy_and_csum_bits(skb, 0,
+ skb_put(nskb, copy),
+ copy, 0);
+ nskb->csum = csum_block_add(nskb->csum, csum, len);
+ }
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
return -1;
}
+/* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
+ */
+static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ unsigned int factor)
+{
+ unsigned int limit;
+
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit <<= factor;
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ */
+ smp_mb__after_atomic();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ return true;
+ }
+ return false;
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
}
}
- max_segs = tcp_tso_autosize(sk, mss_now);
+ max_segs = tcp_tso_segs(sk, mss_now);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
- /* TCP Small Queues :
- * Control number of packets in qdisc/devices to two packets / or ~1 ms.
- * This allows for :
- * - better RTT estimation and ACK scheduling
- * - faster recovery
- * - high rates
- * Alas, some drivers / subsystems require a fair amount
- * of queued bytes to ensure line rate.
- * One example is wifi aggregation (802.11 AMPDU)
- */
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
-
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- /* It is possible TX completion already happened
- * before we set TSQ_THROTTLED, so we must
- * test again the condition.
- */
- smp_mb__after_atomic();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
- break;
- }
+ if (tcp_small_queue_check(sk, skb, 0))
+ break;
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
last_lost = tp->snd_una;
}
- max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
+ max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
tcp_for_write_queue_from(skb, sk) {
- __u8 sacked = TCP_SKB_CB(skb)->sacked;
+ __u8 sacked;
int segs;
if (skb == tcp_send_head(sk))
segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
if (segs <= 0)
return;
+ sacked = TCP_SKB_CB(skb)->sacked;
/* In case tcp_shift_skb_data() have aggregated large skbs,
* we need to make sure not sending too bigs TSO packets
*/
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
continue;
+ if (tcp_small_queue_check(sk, skb, 1))
+ return;
+
if (tcp_retransmit_skb(sk, skb, segs))
return;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT 5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
static int ip6gre_net_id __read_mostly;
struct ip6gre_net {
- struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+ struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct net_device *fb_tunnel_dev;
};
will match fallback tunnel.
*/
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
static u32 HASH_ADDR(const struct in6_addr *addr)
{
u32 hash = ipv6_addr_hash(addr);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
}
#define tunnels_r_l tunnels[3]
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = skb->protocol;
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
struct ip6_tnl *t;
t = rtnl_dereference(ign->tunnels[prio][h]);
parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
if (data[IFLA_GRE_FLOWINFO])
- parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+ parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
if (data[IFLA_GRE_FLAGS])
parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
}
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
- struct net_device *dev,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
void ip6_route_input(struct sk_buff *skb)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = l3mdev_fib_oif(skb->dev),
+ .flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags)
{
- struct dst_entry *dst;
bool any_src;
- dst = l3mdev_get_rt6_dst(net, fl6);
- if (dst)
- return dst;
+ if (rt6_need_strict(&fl6->daddr)) {
+ struct dst_entry *dst;
+
+ dst = l3mdev_link_scope_lookup(net, fl6);
+ if (dst)
+ return dst;
+ }
fl6->flowi6_iif = LOOPBACK_IFINDEX;
rcu_read_unlock();
out:
- return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct dst_entry *icmp6_dst_gc_list;
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT);
+ struct net_device *dev = net->loopback_dev;
+ struct rt6_info *rt;
+
+ /* use L3 Master device as loopback for host routes if device
+ * is enslaved and address is not link local or multicast
+ */
+ if (!rt6_need_strict(addr))
+ dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+ rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
- int err = ip6mr_get_route(net, skb, rtm, nowait);
+ int err = ip6mr_get_route(net, skb, rtm, nowait,
+ portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
} else {
fl6.flowi6_oif = oif;
- if (netif_index_is_l3_master(net, oif)) {
- fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF;
- }
-
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
u32 *tlv = (u32 *)(skbdata);
u16 totlen = nla_total_size(dlen); /*alignment + hdr */
char *dptr = (char *)tlv + NLA_HDRLEN;
- u32 htlv = attrtype << 16 | dlen;
+ u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
*tlv = htonl(htlv);
memset(dptr, 0, totlen - NLA_HDRLEN);
}
EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
+int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
+{
+ u16 edata = 0;
+
+ if (mi->metaval)
+ edata = *(u16 *)mi->metaval;
+ else if (metaval)
+ edata = metaval;
+
+ if (!edata) /* will not encode */
+ return 0;
+
+ edata = htons(edata);
+ return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata);
+}
+EXPORT_SYMBOL_GPL(ife_encode_meta_u16);
+
int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
{
if (mi->metaval)
}
EXPORT_SYMBOL_GPL(ife_check_meta_u32);
+int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi)
+{
+ if (metaval || mi->metaval)
+ return 8; /* T+L+(V) == 2+2+(2+2bytepad) */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ife_check_meta_u16);
+
int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
{
u32 edata = metaval;
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
- u16 ifehdrln = ifehdr->metalen;
+ int ifehdrln = (int)ifehdr->metalen;
struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
spin_lock(&ife->tcf_lock);
return TC_ACT_SHOT;
}
- iethh = eth_hdr(skb);
-
err = skb_cow_head(skb, hdrm);
if (unlikely(err)) {
ife->tcf_qstats.drops++;
if (!(at & AT_EGRESS))
skb_push(skb, skb->dev->hard_header_len);
+ iethh = (struct ethhdr *)skb->data;
__skb_push(skb, hdrm);
memcpy(skb->data, iethh, skb->mac_len);
skb_reset_mac_header(skb);
return msg;
}
+void sctp_datamsg_free(struct sctp_datamsg *msg)
+{
+ struct sctp_chunk *chunk;
+
+ /* This doesn't have to be a _safe vairant because
+ * sctp_chunk_free() only drops the refs.
+ */
+ list_for_each_entry(chunk, &msg->chunks, frag_list)
+ sctp_chunk_free(chunk);
+
+ sctp_datamsg_put(msg);
+}
+
/* Final destructruction of datamsg memory. */
static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
{
msg, msg->expires_at, jiffies);
}
+ if (asoc->peer.prsctp_capable &&
+ SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+ msg->expires_at =
+ jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- max_data = (asoc->pathmtu -
- sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+ max_data = asoc->pathmtu -
+ sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+ sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ max_data = SCTP_TRUNC4(max_data);
max = asoc->frag_point;
/* If the the peer requested that we authenticate DATA chunks
struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
if (hmac_desc)
- max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
- hmac_desc->hmac_len);
+ max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+ hmac_desc->hmac_len);
}
/* Now, check if we need to reduce our max */
asoc->outqueue.out_qlen == 0 &&
list_empty(&asoc->outqueue.retransmit) &&
msg_len > max)
- max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+ max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
/* Encourage Cookie-ECHO bundling. */
if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- if (!chunk->asoc->prsctp_enable ||
+ if (!chunk->asoc->peer.prsctp_capable ||
!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
struct sctp_datamsg *msg = chunk->msg;
}
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
- time_after(jiffies, chunk->prsctp_param)) {
+ time_after(jiffies, chunk->msg->expires_at)) {
if (chunk->sent_count)
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
else
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
- chunk->sent_count > chunk->prsctp_param) {
+ chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
}
static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
}
/* Put a new chunk in an sctp_outq. */
-int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
+void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
{
struct net *net = sock_net(q->asoc->base.sk);
- int error = 0;
pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
chunk && chunk->chunk_hdr ?
* immediately.
*/
if (sctp_chunk_is_data(chunk)) {
- /* Is it OK to queue data chunks? */
- /* From 9. Termination of Association
- *
- * When either endpoint performs a shutdown, the
- * association on each peer will stop accepting new
- * data from its user and only deliver data in queue
- * at the time of sending or receiving the SHUTDOWN
- * chunk.
- */
- switch (q->asoc->state) {
- case SCTP_STATE_CLOSED:
- case SCTP_STATE_SHUTDOWN_PENDING:
- case SCTP_STATE_SHUTDOWN_SENT:
- case SCTP_STATE_SHUTDOWN_RECEIVED:
- case SCTP_STATE_SHUTDOWN_ACK_SENT:
- /* Cannot send after transport endpoint shutdown */
- error = -ESHUTDOWN;
- break;
-
- default:
- pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
- __func__, q, chunk, chunk && chunk->chunk_hdr ?
- sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
- "illegal chunk");
-
- sctp_chunk_hold(chunk);
- sctp_outq_tail_data(q, chunk);
- if (chunk->asoc->peer.prsctp_capable &&
- SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- chunk->asoc->sent_cnt_removable++;
- if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
- else
- SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
- break;
- }
+ pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
+ __func__, q, chunk, chunk && chunk->chunk_hdr ?
+ sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
+ "illegal chunk");
+
+ sctp_outq_tail_data(q, chunk);
- if (chunk->asoc->prsctp_enable &&
++ if (chunk->asoc->peer.prsctp_capable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ chunk->asoc->sent_cnt_removable++;
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
+ else
+ SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
- if (error < 0)
- return error;
-
if (!q->cork)
- error = sctp_outq_flush(q, 0, gfp);
-
- return error;
+ sctp_outq_flush(q, 0, gfp);
}
/* Insert a chunk into the sorted list based on the TSNs. The retransmit list
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->transmitted_list);
list_for_each_entry_safe(chk, temp, queue, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->list);
{
struct sctp_transport *transport;
- if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable)
return;
msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
sctp_retransmit_reason_t reason)
{
struct net *net = sock_net(q->asoc->base.sk);
- int error = 0;
switch (reason) {
case SCTP_RTXR_T3_RTX:
* will be flushed at the end.
*/
if (reason != SCTP_RTXR_FAST_RTX)
- error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
-
- if (error)
- q->asoc->base.sk->sk_err = -error;
+ sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
}
/*
}
/* Cork the outqueue so queued chunks are really queued. */
-int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
+void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
{
if (q->cork)
q->cork = 0;
- return sctp_outq_flush(q, 0, gfp);
+ sctp_outq_flush(q, 0, gfp);
}
* locking concerns must be made. Today we use the sock lock to protect
* this function.
*/
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
{
struct sctp_packet *packet;
struct sctp_packet singleton;
sctp_packet_config(&singleton, vtag, 0);
sctp_packet_append_chunk(&singleton, chunk);
error = sctp_packet_transmit(&singleton, gfp);
- if (error < 0)
- return error;
+ if (error < 0) {
+ asoc->base.sk->sk_err = -error;
+ return;
+ }
break;
case SCTP_CID_ABORT:
retran:
error = sctp_outq_flush_rtx(q, packet,
rtx_timeout, &start_timer);
+ if (error < 0)
+ asoc->base.sk->sk_err = -error;
if (start_timer) {
sctp_transport_reset_t3_rtx(transport);
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
struct sctp_transport,
send_ready);
packet = &t->packet;
- if (!sctp_packet_empty(packet))
+ if (!sctp_packet_empty(packet)) {
error = sctp_packet_transmit(packet, gfp);
+ if (error < 0)
+ asoc->base.sk->sk_err = -error;
+ }
/* Clear the burst limited state, if any */
sctp_transport_burst_reset(t);
}
-
- return error;
}
/* Update unack_data based on the incoming SACK chunk */
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
{
int i;
sctp_sack_variable_t *frags;
- __u16 gap;
+ __u16 tsn_offset, blocks;
__u32 ctsn = ntohl(sack->cum_tsn_ack);
if (TSN_lte(tsn, ctsn))
*/
frags = sack->variable;
- gap = tsn - ctsn;
- for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
- if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
- TSN_lte(gap, ntohs(frags[i].gab.end)))
+ blocks = ntohs(sack->num_gap_ack_blocks);
+ tsn_offset = tsn - ctsn;
+ for (i = 0; i < blocks; ++i) {
+ if (tsn_offset >= ntohs(frags[i].gab.start) &&
+ tsn_offset <= ntohs(frags[i].gab.end))
goto pass;
}
const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
int portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ const struct nlmsghdr *unlh,
+ bool net_admin)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct list_head *addr_list;
r->idiag_retrans = 0;
}
- if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
goto errout;
if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
struct netlink_callback *cb;
const struct inet_diag_req_v2 *r;
const struct nlmsghdr *nlh;
+ bool net_admin;
};
static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
+ nla_total_size(sizeof(struct inet_diag_meminfo))
err = inet_sctp_diag_fill(sk, assoc, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh);
+ nlh->nlmsg_seq, 0, nlh,
+ commp->net_admin);
release_sock(sk);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
return err;
}
- static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
+ static int sctp_sock_dump(struct sock *sk, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
+ struct sctp_association *assoc;
int err = 0;
- /* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
- goto out;
-
- if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
- goto out;
-
lock_sock(sk);
- if (sk != assoc->base.sk)
- goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->nlh) < 0) {
+ NLM_F_MULTI, cb->nlh,
+ commp->net_admin) < 0) {
cb->args[3] = 1;
- err = 2;
+ err = 1;
goto release;
}
cb->args[3] = 1;
if (inet_sctp_diag_fill(sk, assoc, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
+ cb->nlh->nlmsg_seq, 0, cb->nlh,
+ commp->net_admin) < 0) {
- err = 2;
+ err = 1;
goto release;
}
next:
cb->args[4] = 0;
release:
release_sock(sk);
+ sock_put(sk);
return err;
+ }
+
+ static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+ {
+ struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct netlink_callback *cb = commp->cb;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct sctp_association *assoc =
+ list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+ /* find the ep only once through the transports by this condition */
+ if (tsp->asoc != assoc)
+ goto out;
+
+ if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+ goto out;
+
+ sock_hold(sk);
+ cb->args[5] = (long)sk;
+
+ return 1;
+
out:
cb->args[2]++;
- return err;
+ return 0;
}
static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->nlh) < 0) {
+ cb->nlh, commp->net_admin) < 0) {
err = 2;
goto out;
}
.skb = in_skb,
.r = req,
.nlh = nlh,
+ .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
};
if (req->sdiag_family == AF_INET) {
.skb = skb,
.cb = cb,
.r = r,
+ .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
};
/* eps hashtable dumps
* 2 : to record the transport pos of this time's traversal
* 3 : to mark if we have dumped the ep info of the current asoc
* 4 : to work as a temporary variable to traversal list
+ * 5 : to save the sk we get from travelsing the tsp list.
*/
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
+
+ next:
+ cb->args[5] = 0;
+ sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
+
+ if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
+ goto next;
+
done:
cb->args[1] = cb->args[4];
cb->args[4] = 0;
num_types = sp->pf->supported_addrs(sp, types);
chunksize = sizeof(init) + addrs_len;
- chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+ chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
if (asoc->prsctp_enable)
/* Add HMACS parameter length if any were defined */
auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
- chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
/* Add CHUNKS parameter length */
auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
if (auth_chunks->length)
- chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
auth_chunks = NULL;
/* If we have any extensions to report, account for that */
if (num_ext)
- chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ num_ext);
/* RFC 2960 3.3.2 Initiation (INIT) (1)
*
auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
- chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+ chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
if (auth_chunks->length)
- chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+ chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
auth_chunks = NULL;
}
if (num_ext)
- chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+ num_ext);
/* Now allocate and fill out the chunk. */
retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
return retval;
}
- static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
- const struct sctp_sndrcvinfo *sinfo)
- {
- if (!chunk->asoc->prsctp_enable)
- return;
-
- if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param =
- jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
- else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
- SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param = sinfo->sinfo_timetolive;
- }
-
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
- sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
struct sock *sk;
/* No need to allocate LL here, as this is only a chunk. */
- skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+ skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
if (!skb)
goto nodata;
void *target;
void *padding;
int chunklen = ntohs(chunk->chunk_hdr->length);
- int padlen = WORD_ROUND(chunklen) - chunklen;
+ int padlen = SCTP_PAD4(chunklen) - chunklen;
padding = skb_put(chunk->skb, padlen);
target = skb_put(chunk->skb, len);
struct __sctp_missing report;
__u16 len;
- len = WORD_ROUND(sizeof(report));
+ len = SCTP_PAD4(sizeof(report));
/* Make an ERROR chunk, preparing enough room for
* returning multiple unknown parameters.
if (*errp) {
if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
- WORD_ROUND(ntohs(param.p->length))))
+ SCTP_PAD4(ntohs(param.p->length))))
sctp_addto_chunk_fixed(*errp,
- WORD_ROUND(ntohs(param.p->length)),
+ SCTP_PAD4(ntohs(param.p->length)),
param.v);
} else {
/* If there is no memory for generating the ERROR
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
+ sctp_chunk_hold(chunk);
+
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
* breaks.
*/
err = sctp_primitive_SEND(net, asoc, datamsg);
- sctp_datamsg_put(datamsg);
/* Did the lower layer accept the chunk? */
- if (err)
+ if (err) {
+ sctp_datamsg_free(datamsg);
goto out_free;
+ }
pr_debug("%s: we sent primitively\n", __func__);
+ sctp_datamsg_put(datamsg);
err = msg_len;
if (unlikely(wait_connect)) {
const union sctp_addr *paddr, void *p)
{
struct sctp_transport *transport;
- int err = 0;
+ int err = -ENOENT;
rcu_read_lock();
transport = sctp_addrs_lookup_transport(net, laddr, paddr);
if (!transport || !sctp_transport_hold(transport))
goto out;
- err = cb(transport, p);
+
+ sctp_association_hold(transport->asoc);
sctp_transport_put(transport);
- out:
rcu_read_unlock();
+ err = cb(transport, p);
+ sctp_association_put(transport->asoc);
+
+ out:
return err;
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);