From: Linus Torvalds Date: Mon, 10 Oct 2016 00:04:33 +0000 (-0700) Subject: Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma X-Git-Tag: v4.9-rc1~71 X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=commitdiff_plain;h=b9044ac8292fc94bee33f6f08acaed3ac55f0c75;hp=-c Merge tag 'for-linus' of git://git./linux/kernel/git/dledford/rdma Pull main rdma updates from Doug Ledford: "This is the main pull request for the rdma stack this release. The code has been through 0day and I had it tagged for linux-next testing for a couple days. Summary: - updates to mlx5 - updates to mlx4 (two conflicts, both minor and easily resolved) - updates to iw_cxgb4 (one conflict, not so obvious to resolve, proper resolution is to keep the code in cxgb4_main.c as it is in Linus' tree as attach_uld was refactored and moved into cxgb4_uld.c) - improvements to uAPI (moved vendor specific API elements to uAPI area) - add hns-roce driver and hns and hns-roce ACPI reset support - conversion of all rdma code away from deprecated create_singlethread_workqueue - security improvement: remove unsafe ib_get_dma_mr (breaks lustre in staging)" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (75 commits) staging/lustre: Disable InfiniBand support iw_cxgb4: add fast-path for small REG_MR operations cxgb4: advertise support for FR_NSMR_TPTE_WR IB/core: correctly handle rdma_rw_init_mrs() failure IB/srp: Fix infinite loop when FMR sg[0].offset != 0 IB/srp: Remove an unused argument IB/core: Improve ib_map_mr_sg() documentation IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets IB/mthca: Move user vendor structures IB/nes: Move user vendor structures IB/ocrdma: Move user vendor structures IB/mlx4: Move user vendor structures IB/cxgb4: Move user vendor structures IB/cxgb3: Move user vendor structures IB/mlx5: Move and decouple user vendor structures IB/{core,hw}: Add constant for node_desc ipoib: Make ipoib_warn ratelimited IB/mlx4/alias_GUID: Remove deprecated create_singlethread_workqueue IB/ipoib_verbs: Remove deprecated create_singlethread_workqueue IB/ipoib: Remove deprecated create_singlethread_workqueue ... --- b9044ac8292fc94bee33f6f08acaed3ac55f0c75 diff --combined MAINTAINERS index 5327bbe00d13,cc77b9ab9d52..a009e004f8f7 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -807,7 -807,6 +807,7 @@@ M: Laura Abbott L: devel@driverdev.osuosl.org S: Supported +F: Documentation/devicetree/bindings/staging/ion/ F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h F: drivers/staging/android/uapi/ion_test.h @@@ -819,11 -818,11 +819,11 @@@ L: alsa-devel@alsa-project.org (moderat S: Maintained F: sound/aoa/ -APEX EMBEDDED SYSTEMS STX104 DAC DRIVER +APEX EMBEDDED SYSTEMS STX104 IIO DRIVER M: William Breathitt Gray L: linux-iio@vger.kernel.org S: Maintained -F: drivers/iio/dac/stx104.c +F: drivers/iio/adc/stx104.c APM DRIVER M: Jiri Kosina @@@ -866,13 -865,6 +866,13 @@@ F: drivers/net/phy/mdio-xgene. F: Documentation/devicetree/bindings/net/apm-xgene-enet.txt F: Documentation/devicetree/bindings/net/apm-xgene-mdio.txt +APPLIED MICRO (APM) X-GENE SOC PMU +M: Tai Nguyen +S: Supported +F: drivers/perf/xgene_pmu.c +F: Documentation/perf/xgene-pmu.txt +F: Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt + APTINA CAMERA SENSOR PLL M: Laurent Pinchart L: linux-media@vger.kernel.org @@@ -898,15 -890,6 +898,15 @@@ S: Supporte F: drivers/gpu/drm/arc/ F: Documentation/devicetree/bindings/display/snps,arcpgu.txt +ARM ARCHITECTED TIMER DRIVER +M: Mark Rutland +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/include/asm/arch_timer.h +F: arch/arm64/include/asm/arch_timer.h +F: drivers/clocksource/arm_arch_timer.c + ARM HDLCD DRM DRIVER M: Liviu Dudau S: Supported @@@ -929,17 -912,15 +929,17 @@@ F: arch/arm/include/asm/floppy. ARM PMU PROFILING AND DEBUGGING M: Will Deacon -R: Mark Rutland +M: Mark Rutland S: Maintained +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) F: arch/arm*/kernel/perf_* F: arch/arm/oprofile/common.c F: arch/arm*/kernel/hw_breakpoint.c F: arch/arm*/include/asm/hw_breakpoint.h F: arch/arm*/include/asm/perf_event.h -F: drivers/perf/arm_pmu.c +F: drivers/perf/* F: include/linux/perf/arm_pmu.h +F: Documentation/devicetree/bindings/arm/pmu.txt ARM PORT M: Russell King @@@ -1010,7 -991,6 +1010,7 @@@ M: Chen-Yu Tsai @@@ -1142,11 -1122,6 +1142,11 @@@ F: drivers/hwtracing/coresight/ F: Documentation/trace/coresight.txt F: Documentation/devicetree/bindings/arm/coresight.txt F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* +F: tools/perf/arch/arm/util/pmu.c +F: tools/perf/arch/arm/util/auxtrace.c +F: tools/perf/arch/arm/util/cs-etm.c +F: tools/perf/arch/arm/util/cs-etm.h +F: tools/perf/util/cs-etm.h ARM/CORGI MACHINE SUPPORT M: Richard Purdie @@@ -1467,7 -1442,6 +1467,7 @@@ F: arch/arm/mach-orion5x/ts78xx- ARM/OXNAS platform support M: Neil Armstrong L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-oxnas/ F: arch/arm/boot/dts/oxnas* @@@ -1649,8 -1623,7 +1649,8 @@@ N: rockchi ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +R: Javier Martinez Canillas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained @@@ -1670,6 -1643,7 +1670,6 @@@ F: drivers/*/*s3c64xx F: drivers/*/*s5pv210* F: drivers/memory/samsung/* F: drivers/soc/samsung/* -F: drivers/spi/spi-s3c* F: Documentation/arm/Samsung/ F: Documentation/devicetree/bindings/arm/samsung/ F: Documentation/devicetree/bindings/sram/samsung-sram.txt @@@ -1857,7 -1831,6 +1857,7 @@@ T: git git://git.kernel.org/pub/scm/lin ARM/UNIPHIER ARCHITECTURE M: Masahiro Yamada L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git S: Maintained F: arch/arm/boot/dts/uniphier* F: arch/arm/include/asm/hardware/cache-uniphier.h @@@ -1865,10 -1838,8 +1865,10 @@@ F: arch/arm/mach-uniphier F: arch/arm/mm/cache-uniphier.c F: arch/arm64/boot/dts/socionext/ F: drivers/bus/uniphier-system-bus.c +F: drivers/clk/uniphier/ F: drivers/i2c/busses/i2c-uniphier* F: drivers/pinctrl/uniphier/ +F: drivers/reset/reset-uniphier.c F: drivers/tty/serial/8250/8250_uniphier.c N: uniphier @@@ -2003,13 -1974,6 +2003,13 @@@ S: Maintaine F: drivers/media/i2c/as3645a.c F: include/media/i2c/as3645a.h +ASAHI KASEI AK8974 DRIVER +M: Linus Walleij +L: linux-iio@vger.kernel.org +W: http://www.akm.com/ +S: Supported +F: drivers/iio/magnetometer/ak8974.c + ASC7621 HARDWARE MONITOR DRIVER M: George Joseph L: linux-hwmon@vger.kernel.org @@@ -2148,6 -2112,11 +2148,6 @@@ M: Ludovic Desroches -S: Supported -F: drivers/tty/serial/atmel_serial.c - ATMEL AT91 SAMA5D2-Compatible Shutdown Controller M: Nicolas Ferre S: Supported @@@ -2482,7 -2451,6 +2482,7 @@@ T: git git://git.kernel.org/pub/scm/lin S: Maintained F: block/ F: kernel/trace/blktrace.c +F: lib/sbitmap.c BLOCK2MTD DRIVER M: Joern Engel @@@ -2516,7 -2484,7 +2516,7 @@@ F: include/net/bluetooth BONDING DRIVER M: Jay Vosburgh M: Veaceslav Falico -M: Andy Gospodarek +M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ S: Supported @@@ -2531,7 -2499,7 +2531,7 @@@ S: Supporte F: kernel/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER -M: Gary Zambrano +M: Michael Chan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* @@@ -2606,13 -2574,6 +2606,13 @@@ F: arch/arm/mach-bcm/bcm_5301x. F: arch/arm/boot/dts/bcm5301x*.dtsi F: arch/arm/boot/dts/bcm470* +BROADCOM BCM53573 ARM ARCHITECTURE +M: Rafał Miłecki +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: arch/arm/boot/dts/bcm53573* +F: arch/arm/boot/dts/bcm47189* + BROADCOM BCM63XX ARM ARCHITECTURE M: Florian Fainelli M: bcm-kernel-feedback-list@broadcom.com @@@ -2926,14 -2887,6 +2926,14 @@@ S: Maintaine F: drivers/iio/light/cm* F: Documentation/devicetree/bindings/i2c/trivial-devices.txt +CAVIUM I2C DRIVER +M: Jan Glauber +M: David Daney +W: http://www.cavium.com +S: Supported +F: drivers/i2c/busses/i2c-octeon* +F: drivers/i2c/busses/i2c-thunderx* + CAVIUM LIQUIDIO NETWORK DRIVER M: Derek Chickles M: Satanand Burla @@@ -3181,7 -3134,7 +3181,7 @@@ L: cocci@systeme.lip6.fr (moderated fo T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc W: http://coccinelle.lip6.fr/ S: Supported -F: Documentation/coccinelle.txt +F: Documentation/dev-tools/coccinelle.rst F: scripts/coccinelle/ F: scripts/coccicheck @@@ -3207,7 -3160,6 +3207,7 @@@ COMMON CLK FRAMEWOR M: Michael Turquette M: Stephen Boyd L: linux-clk@vger.kernel.org +Q: http://patchwork.kernel.org/project/linux-clk/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git S: Maintained F: Documentation/devicetree/bindings/clock/ @@@ -3295,7 -3247,7 +3295,7 @@@ F: kernel/cpuset. CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner M: Michal Hocko -M: Vladimir Davydov +M: Vladimir Davydov L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained @@@ -3316,7 -3268,7 +3316,7 @@@ S: Maintaine F: drivers/net/wan/cosa* CPMAC ETHERNET DRIVER -M: Florian Fainelli +M: Florian Fainelli L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ti/cpmac.c @@@ -3328,7 -3280,6 +3328,7 @@@ L: linux-pm@vger.kernel.or S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) +F: Documentation/cpu-freq/ F: drivers/cpufreq/ F: include/linux/cpufreq.h @@@ -3509,6 -3460,7 +3509,7 @@@ L: linux-rdma@vger.kernel.or W: http://www.openfabrics.org S: Supported F: drivers/infiniband/hw/cxgb3/ + F: include/uapi/rdma/cxgb3-abi.h CXGB4 ETHERNET DRIVER (CXGB4) M: Hariprasad S @@@ -3530,6 -3482,7 +3531,7 @@@ L: linux-rdma@vger.kernel.or W: http://www.openfabrics.org S: Supported F: drivers/infiniband/hw/cxgb4/ + F: include/uapi/rdma/cxgb4-abi.h CXGB4VF ETHERNET DRIVER (CXGB4VF) M: Casey Leedom @@@ -3540,14 -3493,14 +3542,14 @@@ F: drivers/net/ethernet/chelsio/cxgb4vf CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER M: Ian Munsie -M: Michael Neuling +M: Frederic Barrat L: linuxppc-dev@lists.ozlabs.org S: Supported +F: arch/powerpc/platforms/powernv/pci-cxl.c F: drivers/misc/cxl/ F: include/misc/cxl* F: include/uapi/misc/cxl.h F: Documentation/powerpc/cxl.txt -F: Documentation/powerpc/cxl.txt F: Documentation/ABI/testing/sysfs-class-cxl CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER @@@ -3810,8 -3763,8 +3812,8 @@@ F: drivers/leds/leds-da90??. F: drivers/mfd/da903x.c F: drivers/mfd/da90??-*.c F: drivers/mfd/da91??-*.c -F: drivers/power/da9052-battery.c -F: drivers/power/da91??-*.c +F: drivers/power/supply/da9052-battery.c +F: drivers/power/supply/da91??-*.c F: drivers/regulator/da903x.c F: drivers/regulator/da9???-regulator.[ch] F: drivers/rtc/rtc-da90??.c @@@ -3827,12 -3780,6 +3829,12 @@@ F: include/linux/regulator/da9211. F: include/sound/da[79]*.h F: sound/soc/codecs/da[79]*.[ch] +DIAMOND SYSTEMS GPIO-MM GPIO DRIVER +M: William Breathitt Gray +L: linux-gpio@vger.kernel.org +S: Maintained +F: drivers/gpio/gpio-gpio-mm.c + DIGI NEO AND CLASSIC PCI PRODUCTS M: Lidza Louina M: Mark Hounschell @@@ -4458,6 -4405,7 +4460,6 @@@ F: Documentation/filesystems/ecryptfs.t F: fs/ecryptfs/ EDAC-CORE -M: Doug Thompson M: Borislav Petkov M: Mauro Carvalho Chehab M: Mauro Carvalho Chehab @@@ -4470,12 -4418,14 +4472,12 @@@ F: drivers/edac F: include/linux/edac.h EDAC-AMD64 -M: Doug Thompson M: Borislav Petkov L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/amd64_edac* EDAC-CALXEDA -M: Doug Thompson M: Robert Richter L: linux-edac@vger.kernel.org S: Maintained @@@ -4491,21 -4441,17 +4493,21 @@@ F: drivers/edac/octeon_edac EDAC-E752X M: Mark Gross -M: Doug Thompson L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/e752x_edac.c EDAC-E7XXX -M: Doug Thompson L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/e7xxx_edac.c +EDAC-FSL_DDR +M: York Sun +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/fsl_ddr_edac.* + EDAC-GHES M: Mauro Carvalho Chehab M: Mauro Carvalho Chehab @@@ -4520,11 -4466,13 +4522,11 @@@ S: Maintaine F: drivers/edac/i82443bxgx_edac.c EDAC-I3000 -M: Jason Uhlenkott L: linux-edac@vger.kernel.org -S: Maintained +S: Orphan F: drivers/edac/i3000_edac.c EDAC-I5000 -M: Doug Thompson L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/i5000_edac.c @@@ -4588,12 -4536,6 +4590,12 @@@ L: linux-edac@vger.kernel.or S: Maintained F: drivers/edac/sb_edac.c +EDAC-SKYLAKE +M: Tony Luck +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/skx_edac.c + EDAC-XGENE APPLIED MICRO (APM) X-GENE SOC EDAC M: Loc Ho @@@ -4636,13 -4578,6 +4638,13 @@@ M: Peter Jones +M: Matt Fleming +S: Maintained +F: drivers/firmware/efi/test/ + EFS FILESYSTEM W: http://aeschi.ch.eu.org/efs/ S: Orphan @@@ -4910,7 -4845,6 +4912,7 @@@ F: tools/firewire FIRMWARE LOADER (request_firmware) M: Ming Lei +M: Luis R. Rodriguez L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/firmware_class/ @@@ -5131,9 -5065,10 +5133,9 @@@ F: include/linux/fscrypto. F2FS FILE SYSTEM M: Jaegeuk Kim -M: Changman Lee -R: Chao Yu +M: Chao Yu L: linux-f2fs-devel@lists.sourceforge.net -W: http://en.wikipedia.org/wiki/F2FS +W: https://f2fs.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git S: Maintained F: Documentation/filesystems/f2fs.txt @@@ -5195,7 -5130,7 +5197,7 @@@ GCOV BASED KERNEL PROFILIN M: Peter Oberparleiter S: Maintained F: kernel/gcov/ -F: Documentation/gcov.txt +F: Documentation/dev-tools/gcov.rst GDT SCSI DISK ARRAY CONTROLLER DRIVER M: Achim Leubner @@@ -5312,13 -5247,6 +5314,13 @@@ L: linux-input@vger.kernel.or S: Maintained F: drivers/input/touchscreen/goodix.c +GPIO MOCKUP DRIVER +M: Bamvor Jian Zhang +L: linux-gpio@vger.kernel.org +S: Maintained +F: drivers/gpio/gpio-mockup.c +F: tools/testing/selftests/gpio/ + GPIO SUBSYSTEM M: Linus Walleij M: Alexandre Courbot @@@ -5350,77 -5278,6 +5352,77 @@@ L: netdev@vger.kernel.or S: Maintained F: drivers/net/ethernet/aeroflex/ +GREYBUS SUBSYSTEM +M: Johan Hovold +M: Alex Elder +M: Greg Kroah-Hartman +S: Maintained +F: drivers/staging/greybus/ + +GREYBUS AUDIO PROTOCOLS DRIVERS +M: Vaibhav Agarwal +M: Mark Greer +S: Maintained +F: drivers/staging/greybus/audio_apbridgea.c +F: drivers/staging/greybus/audio_apbridgea.h +F: drivers/staging/greybus/audio_codec.c +F: drivers/staging/greybus/audio_codec.h +F: drivers/staging/greybus/audio_gb.c +F: drivers/staging/greybus/audio_manager.c +F: drivers/staging/greybus/audio_manager.h +F: drivers/staging/greybus/audio_manager_module.c +F: drivers/staging/greybus/audio_manager_private.h +F: drivers/staging/greybus/audio_manager_sysfs.c +F: drivers/staging/greybus/audio_module.c +F: drivers/staging/greybus/audio_topology.c + +GREYBUS PROTOCOLS DRIVERS +M: Rui Miguel Silva +S: Maintained +F: drivers/staging/greybus/sdio.c +F: drivers/staging/greybus/light.c +F: drivers/staging/greybus/gpio.c +F: drivers/staging/greybus/power_supply.c +F: drivers/staging/greybus/spi.c +F: drivers/staging/greybus/spilib.c + +GREYBUS PROTOCOLS DRIVERS +M: Bryan O'Donoghue +S: Maintained +F: drivers/staging/greybus/loopback.c +F: drivers/staging/greybus/timesync.c +F: drivers/staging/greybus/timesync_platform.c + +GREYBUS PROTOCOLS DRIVERS +M: Viresh Kumar +S: Maintained +F: drivers/staging/greybus/authentication.c +F: drivers/staging/greybus/bootrom.c +F: drivers/staging/greybus/firmware.h +F: drivers/staging/greybus/fw-core.c +F: drivers/staging/greybus/fw-download.c +F: drivers/staging/greybus/fw-managament.c +F: drivers/staging/greybus/greybus_authentication.h +F: drivers/staging/greybus/greybus_firmware.h +F: drivers/staging/greybus/hid.c +F: drivers/staging/greybus/i2c.c +F: drivers/staging/greybus/spi.c +F: drivers/staging/greybus/spilib.c +F: drivers/staging/greybus/spilib.h + +GREYBUS PROTOCOLS DRIVERS +M: David Lin +S: Maintained +F: drivers/staging/greybus/uart.c +F: drivers/staging/greybus/log.c + +GREYBUS PLATFORM DRIVERS +M: Vaibhav Hiremath +S: Maintained +F: drivers/staging/greybus/arche-platform.c +F: drivers/staging/greybus/arche-apb-ctrl.c +F: drivers/staging/greybus/arche_platform.h + GSPCA FINEPIX SUBDRIVER M: Frank Zago L: linux-media@vger.kernel.org @@@ -5712,6 -5569,14 +5714,14 @@@ S: Maintaine F: drivers/net/ethernet/hisilicon/ F: Documentation/devicetree/bindings/net/hisilicon*.txt + HISILICON ROCE DRIVER + M: Lijun Ou + M: Wei Hu(Xavier) + L: linux-rdma@vger.kernel.org + S: Maintained + F: drivers/infiniband/hw/hns/ + F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt + HISILICON SAS Controller M: John Garry W: http://www.hisilicon.com @@@ -5721,9 -5586,10 +5731,9 @@@ F: Documentation/devicetree/bindings/sc HOST AP DRIVER M: Jouni Malinen -L: hostap@shmoo.com (subscribers-only) L: linux-wireless@vger.kernel.org -W: http://hostap.epitest.fi/ -S: Maintained +W: http://w1.fi/hostap-driver.html +S: Obsolete F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER @@@ -5760,7 -5626,7 +5770,7 @@@ M: Sebastian Reichel +M: Jiri Kosina +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/intel-ish-hid/ + INTEL PSTATE DRIVER M: Srinivas Pandruvada M: Len Brown @@@ -6248,7 -6105,7 +6258,7 @@@ S: Supporte F: drivers/cpufreq/intel_pstate.c INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) -M: Maik Broemme +M: Maik Broemme L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/fb/intelfb.txt @@@ -6750,7 -6607,7 +6760,7 @@@ L: kasan-dev@googlegroups.co S: Maintained F: arch/*/include/asm/kasan.h F: arch/*/mm/kasan_init* -F: Documentation/kasan.txt +F: Documentation/dev-tools/kasan.rst F: include/linux/kasan*.h F: lib/test_kasan.c F: mm/kasan/ @@@ -6966,7 -6823,7 +6976,7 @@@ KMEMCHEC M: Vegard Nossum M: Pekka Enberg S: Maintained -F: Documentation/kmemcheck.txt +F: Documentation/dev-tools/kmemcheck.rst F: arch/x86/include/asm/kmemcheck.h F: arch/x86/mm/kmemcheck/ F: include/linux/kmemcheck.h @@@ -6975,7 -6832,7 +6985,7 @@@ F: mm/kmemcheck. KMEMLEAK M: Catalin Marinas S: Maintained -F: Documentation/kmemleak.txt +F: Documentation/dev-tools/kmemleak.rst F: include/linux/kmemleak.h F: mm/kmemleak.c F: mm/kmemleak-test.c @@@ -7588,8 -7445,9 +7598,8 @@@ F: Documentation/hwmon/max2075 F: drivers/hwmon/max20751.c MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER -M: "Hans J. Koch" L: linux-hwmon@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/hwmon/max6650 F: drivers/hwmon/max6650.c @@@ -7610,12 -7468,11 +7620,12 @@@ F: Documentation/devicetree/bindings/so F: sound/soc/codecs/max9860.* MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-pm@vger.kernel.org S: Supported -F: drivers/power/max14577_charger.c -F: drivers/power/max77693_charger.c +F: drivers/power/supply/max14577_charger.c +F: drivers/power/supply/max77693_charger.c MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS M: Javier Martinez Canillas @@@ -7627,8 -7484,7 +7637,8 @@@ F: include/dt-bindings/*/*max77802. MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org S: Supported F: drivers/*/max14577*.c @@@ -7660,12 -7516,6 +7670,12 @@@ L: linux-iio@vger.kernel.or S: Maintained F: drivers/iio/potentiometer/mcp4531.c +MEASUREMENT COMPUTING CIO-DAC IIO DRIVER +M: William Breathitt Gray +L: linux-iio@vger.kernel.org +S: Maintained +F: drivers/iio/dac/cio-dac.c + MEDIA DRIVERS FOR RENESAS - FCP M: Laurent Pinchart L: linux-media@vger.kernel.org @@@ -7818,26 -7668,13 +7828,26 @@@ W: http://www.mellanox.co Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlxsw/ +MELLANOX MLXCPLD LED DRIVER +M: Vadim Pasternak +L: linux-leds@vger.kernel.org +S: Supported +F: drivers/leds/leds-mlxcpld.c +F: Documentation/leds/leds-mlxcpld.txt + +MELLANOX PLATFORM DRIVER +M: Vadim Pasternak +L: platform-driver-x86@vger.kernel.org +S: Supported +F: arch/x86/platform/mellanox/mlx-platform.c + SOFT-ROCE DRIVER (rxe) M: Moni Shoua L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home Q: http://patchwork.kernel.org/project/linux-rdma/list/ -F: drivers/infiniband/hw/rxe/ +F: drivers/infiniband/sw/rxe/ F: include/uapi/rdma/rdma_user_rxe.h MEMBARRIER SUPPORT @@@ -7916,12 -7753,6 +7926,12 @@@ T: git git://git.monstr.eu/linux-2.6-mi S: Supported F: arch/microblaze/ +MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER +M: Richard Genoud +S: Maintained +F: drivers/tty/serial/atmel_serial.c +F: include/linux/atmel_serial.h + MICROSOFT SURFACE PRO 3 BUTTON DRIVER M: Chen Yu L: platform-driver-x86@vger.kernel.org @@@ -7970,6 -7801,7 +7980,7 @@@ Q: http://patchwork.ozlabs.org/project/ S: Supported F: drivers/net/ethernet/mellanox/mlx4/ F: include/linux/mlx4/ + F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX4 IB driver M: Yishai Hadas @@@ -7990,6 -7822,7 +8001,7 @@@ Q: http://patchwork.ozlabs.org/project/ S: Supported F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ + F: include/uapi/rdma/mlx5-abi.h MELLANOX MLX5 IB driver M: Matan Barak @@@ -8008,18 -7841,6 +8020,18 @@@ W: http://www.melexis.co S: Supported F: drivers/iio/temperature/mlx90614.c +MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi) +M: Don Brace +L: esc.storagedev@microsemi.com +L: linux-scsi@vger.kernel.org +S: Supported +F: drivers/scsi/smartpqi/smartpqi*.[ch] +F: drivers/scsi/smartpqi/Kconfig +F: drivers/scsi/smartpqi/Makefile +F: include/linux/cciss*.h +F: include/uapi/linux/cciss*.h +F: Documentation/scsi/smartpqi.txt + MN88472 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org @@@ -8145,7 -7966,6 +8157,7 @@@ MULTIFUNCTION DEVICES (MFD M: Lee Jones T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git S: Supported +F: Documentation/devicetree/bindings/mfd/ F: drivers/mfd/ F: include/linux/mfd/ @@@ -8232,16 -8052,20 +8244,16 @@@ M: Michael Schmitz @@@ -8262,6 -8086,7 +8274,7 @@@ L: linux-rdma@vger.kernel.or W: http://www.intel.com/Products/Server/Adapters/Server-Cluster/Server-Cluster-overview.htm S: Supported F: drivers/infiniband/hw/nes/ + F: include/uapi/rdma/nes-abi.h NETEM NETWORK EMULATOR M: Stephen Hemminger @@@ -8339,15 -8164,6 +8352,15 @@@ S: Maintaine W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c +NETWORKING [DSA] +M: Andrew Lunn +M: Vivien Didelot +M: Florian Fainelli +S: Maintained +F: net/dsa/ +F: include/net/dsa.h +F: drivers/net/dsa/ + NETWORKING [GENERAL] M: "David S. Miller" L: netdev@vger.kernel.org @@@ -8530,11 -8346,11 +8543,11 @@@ R: Pali Rohár @@@ -8923,7 -8739,7 +8936,7 @@@ F: drivers/oprofile F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) -M: Mark Fasheh +M: Mark Fasheh M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://ocfs2.wiki.kernel.org @@@ -9035,7 -8851,6 +9048,7 @@@ S: Supporte F: Documentation/virtual/paravirt_ops.txt F: arch/*/kernel/paravirt* F: arch/*/include/asm/paravirt.h +F: include/linux/hypervisor.h PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES M: Tim Waugh @@@ -9291,15 -9106,6 +9304,15 @@@ S: Maintaine F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt F: drivers/pci/host/pcie-hisi.c +PCIE DRIVER FOR ROCKCHIP +M: Shawn Lin +M: Wenrui Li +L: linux-pci@vger.kernel.org +L: linux-rockchip@lists.infradead.org +S: Maintained +F: Documentation/devicetree/bindings/pci/rockchip-pcie.txt +F: drivers/pci/host/pcie-rockchip.c + PCIE DRIVER FOR QUALCOMM MSM M: Stanimir Varbanov L: linux-pci@vger.kernel.org @@@ -9447,14 -9253,12 +9460,14 @@@ F: drivers/pinctrl/sh-pfc PIN CONTROLLER - SAMSUNG M: Tomasz Figa -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained F: drivers/pinctrl/samsung/ +F: include/dt-bindings/pinctrl/samsung.h +F: Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt PIN CONTROLLER - SINGLE M: Tony Lindgren @@@ -9545,12 -9349,16 +9558,12 @@@ F: drivers/powercap POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS M: Sebastian Reichel -M: Dmitry Eremin-Solenikov -M: David Woodhouse L: linux-pm@vger.kernel.org -T: git git://git.infradead.org/battery-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git S: Maintained -F: Documentation/devicetree/bindings/power/ -F: Documentation/devicetree/bindings/power_supply/ +F: Documentation/devicetree/bindings/power/supply/ F: include/linux/power_supply.h -F: drivers/power/ -X: drivers/power/avs/ +F: drivers/power/supply/ POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland @@@ -9886,12 -9694,6 +9899,12 @@@ T: git git://git.kernel.org/pub/scm/lin S: Supported F: drivers/net/wireless/ath/ath10k/ +QUALCOMM EMAC GIGABIT ETHERNET DRIVER +M: Timur Tabi +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/qualcomm/emac/ + QUALCOMM HEXAGON ARCHITECTURE M: Richard Kuo L: linux-hexagon@vger.kernel.org @@@ -10112,12 -9914,6 +10125,12 @@@ F: drivers/rpmsg F: Documentation/rpmsg.txt F: include/linux/rpmsg.h +RENESAS CLOCK DRIVERS +M: Geert Uytterhoeven +L: linux-renesas-soc@vger.kernel.org +S: Supported +F: drivers/clk/renesas/ + RENESAS ETHERNET DRIVERS R: Sergei Shtylyov L: netdev@vger.kernel.org @@@ -10297,8 -10093,8 +10310,8 @@@ S: Supporte F: drivers/s390/cio/ S390 DASD DRIVER -M: Stefan Weinhuber -M: Stefan Haberland +M: Stefan Haberland +M: Jan Hoeppner L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported @@@ -10391,7 -10187,7 +10404,7 @@@ S: Maintaine F: drivers/platform/x86/samsung-laptop.c SAMSUNG AUDIO (ASoC) DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sangbeom Kim M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) @@@ -10406,8 -10202,7 +10419,8 @@@ F: drivers/video/fbdev/s3c-fb. SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS M: Sangbeom Kim -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Supported @@@ -10462,23 -10257,9 +10475,23 @@@ F: drivers/nfc/s3fwrn SAMSUNG SOC CLOCK DRIVERS M: Sylwester Nawrocki M: Tomasz Figa +M: Chanwoo Choi S: Supported L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) F: drivers/clk/samsung/ +F: include/dt-bindings/clock/exynos*.h +F: Documentation/devicetree/bindings/clock/exynos*.txt + +SAMSUNG SPI DRIVERS +M: Kukjin Kim +M: Krzysztof Kozlowski +M: Andi Shyti +L: linux-spi@vger.kernel.org +L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/spi/spi-samsung.txt +F: drivers/spi/spi-s3c* +F: include/linux/platform_data/spi-s3c64xx.h SAMSUNG SXGBE DRIVERS M: Byungho An @@@ -10565,8 -10346,8 +10578,8 @@@ F: drivers/thunderbolt TI BQ27XXX POWER SUPPLY DRIVER R: Andrew F. Davis F: include/linux/power/bq27xxx_battery.h -F: drivers/power/bq27xxx_battery.c -F: drivers/power/bq27xxx_battery_i2c.c +F: drivers/power/supply/bq27xxx_battery.c +F: drivers/power/supply/bq27xxx_battery_i2c.c TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz @@@ -10794,12 -10575,12 +10807,12 @@@ S: Maintaine F: drivers/misc/phantom.c F: include/uapi/linux/phantom.h -SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER -M: Jayamohan Kallickal -M: Ketan Mukadam -M: John Soni Jose +Emulex 10Gbps iSCSI - OneConnect DRIVER +M: Subbu Seetharaman +M: Ketan Mukadam +M: Jitendra Bhivare L: linux-scsi@vger.kernel.org -W: http://www.avagotech.com +W: http://www.broadcom.com S: Supported F: drivers/scsi/be2iscsi/ @@@ -10821,6 -10602,7 +10834,7 @@@ L: linux-rdma@vger.kernel.or W: http://www.emulex.com S: Supported F: drivers/infiniband/hw/ocrdma/ + F: include/uapi/rdma/ocrdma-abi.h SFC NETWORK DRIVER M: Solarflare linux maintainers @@@ -11326,7 -11108,6 +11340,7 @@@ F: Documentation/spi F: drivers/spi/ F: include/linux/spi/ F: include/uapi/linux/spi/ +F: tools/spi/ SPIDERNET NETWORK DRIVER for CELL M: Ishizaki Kou @@@ -11397,7 -11178,6 +11411,7 @@@ F: drivers/staging/media/lirc STAGING - LUSTRE PARALLEL FILESYSTEM M: Oleg Drokin M: Andreas Dilger +M: James Simmons L: lustre-devel@lists.lustre.org (moderated for non-subscribers) W: http://wiki.lustre.org/ S: Maintained @@@ -11424,6 -11204,13 +11438,6 @@@ M: Florian Schilhabel -M: Jes Sorensen -L: linux-wireless@vger.kernel.org -S: Maintained -F: drivers/staging/rtl8723au/ - STAGING - SILICON MOTION SM750 FRAME BUFFER DRIVER M: Sudip Mukherjee M: Teddy Wang @@@ -11454,8 -11241,12 +11468,8 @@@ S: Odd Fixe F: drivers/staging/vt665?/ STAGING - WILC1000 WIFI DRIVER -M: Johnny Kim -M: Austin Shin -M: Chris Park -M: Tony Cho -M: Glen Lee -M: Leo Kim +M: Aditya Shankar +M: Ganesh Krishna L: linux-wireless@vger.kernel.org S: Supported F: drivers/staging/wilc1000/ @@@ -11563,14 -11354,6 +11577,14 @@@ T: git git://git.kernel.org/pub/scm/lin S: Supported F: drivers/mfd/syscon.c +SYSTEM RESET/SHUTDOWN DRIVERS +M: Sebastian Reichel +L: linux-pm@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git +S: Maintained +F: Documentation/devicetree/bindings/power/reset/ +F: drivers/power/reset/ + SYSV FILESYSTEM M: Christoph Hellwig S: Maintained @@@ -11831,7 -11614,7 +11845,7 @@@ F: Documentation/devicetree/bindings/th THERMAL/CPU_COOLING M: Amit Daniel Kachhap M: Viresh Kumar -M: Javi Merino +M: Javi Merino L: linux-pm@vger.kernel.org S: Supported F: Documentation/thermal/cpu-cooling-api.txt @@@ -11919,7 -11702,7 +11933,7 @@@ F: include/linux/platform_data/lp855x. TI LP8727 CHARGER DRIVER M: Milo Kim S: Maintained -F: drivers/power/lp8727_charger.c +F: drivers/power/supply/lp8727_charger.c F: include/linux/platform_data/lp8727.h TI LP8788 MFD DRIVER @@@ -11928,7 -11711,7 +11942,7 @@@ S: Maintaine F: drivers/iio/adc/lp8788_adc.c F: drivers/leds/leds-lp8788.c F: drivers/mfd/lp8788*.c -F: drivers/power/lp8788-charger.c +F: drivers/power/supply/lp8788-charger.c F: drivers/regulator/lp8788-*.c F: include/linux/mfd/lp8788*.h @@@ -12197,6 -11980,12 +12211,6 @@@ S: Maintaine F: drivers/tc/ F: include/linux/tc.h -U14-34F SCSI DRIVER -M: Dario Ballabio -L: linux-scsi@vger.kernel.org -S: Maintained -F: drivers/scsi/u14-34f.c - UBI FILE SYSTEM (UBIFS) M: Richard Weinberger M: Artem Bityutskiy @@@ -12392,7 -12181,7 +12406,7 @@@ S: Maintaine F: drivers/net/usb/lan78xx.* USB MASS STORAGE DRIVER -M: Matthew Dharm +M: Alan Stern L: linux-usb@vger.kernel.org L: usb-storage@lists.one-eyed-alien.net S: Maintained @@@ -12476,7 -12265,6 +12490,7 @@@ F: drivers/net/usb/rtl8150. USB SERIAL SUBSYSTEM M: Johan Hovold L: linux-usb@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git S: Maintained F: Documentation/usb/usb-serial.txt F: drivers/usb/serial/ @@@ -12490,7 -12278,6 +12504,7 @@@ F: drivers/net/usb/smsc75xx. USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning +M: Microchip Linux Driver Support L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* @@@ -12597,6 -12384,7 +12611,6 @@@ F: fs/hostfs F: fs/hppfs/ USERSPACE I/O (UIO) -M: "Hans J. Koch" M: Greg Kroah-Hartman S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git @@@ -12778,7 -12566,7 +12792,7 @@@ F: include/linux/if_*vlan. F: net/8021q/ VLYNQ BUS -M: Florian Fainelli +M: Florian Fainelli L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Maintained F: drivers/vlynq/vlynq.c @@@ -12924,6 -12712,12 +12938,6 @@@ F: drivers/watchdog F: include/linux/watchdog.h F: include/uapi/linux/watchdog.h -WD7000 SCSI DRIVER -M: Miroslav Zagorac -L: linux-scsi@vger.kernel.org -S: Maintained -F: drivers/scsi/wd7000.c - WIIMOTE HID DRIVER M: David Herrmann L: linux-input@vger.kernel.org @@@ -12993,7 -12787,7 +13007,7 @@@ F: drivers/input/touchscreen/wm97*. F: drivers/mfd/arizona* F: drivers/mfd/wm*.c F: drivers/mfd/cs47l24* -F: drivers/power/wm83*.c +F: drivers/power/supply/wm83*.c F: drivers/rtc/rtc-wm83*.c F: drivers/regulator/wm8*.c F: drivers/video/backlight/wm83*_bl.c @@@ -13145,10 -12939,11 +13159,10 @@@ F: arch/x86/xen/*swiotlb F: drivers/xen/*swiotlb* XFS FILESYSTEM -P: Silicon Graphics Inc M: Dave Chinner -M: xfs@oss.sgi.com -L: xfs@oss.sgi.com -W: http://oss.sgi.com/projects/xfs +M: linux-xfs@vger.kernel.org +L: linux-xfs@vger.kernel.org +W: http://xfs.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git S: Supported F: Documentation/filesystems/xfs.txt diff --combined drivers/infiniband/core/cma.c index 5f65a78b27c9,8954792f1acc..36bf50ebb187 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@@ -2462,24 -2462,18 +2462,24 @@@ static int cma_resolve_iboe_route(struc if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; @@@ -4369,7 -4363,7 +4369,7 @@@ static int __init cma_init(void { int ret; - cma_wq = create_singlethread_workqueue("rdma_cm"); + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; diff --combined drivers/infiniband/core/multicast.c index 51c79b2fb0b8,49ecde98a3d9..e51b739f6ea3 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@@ -106,6 -106,7 +106,6 @@@ struct mcast_group atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; - int query_id; u16 pkey_index; u8 leave_state; int retries; @@@ -339,7 -340,11 +339,7 @@@ static int send_join(struct mcast_grou member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) @@@ -359,7 -364,11 +359,7 @@@ IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, @@@ -873,7 -882,7 +873,7 @@@ int mcast_init(void { int ret; - mcast_wq = create_singlethread_workqueue("ib_mcast"); + mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; diff --combined drivers/infiniband/hw/cxgb4/cm.c index 71c8867ef66b,c9661d8f11dc..f1510cc76d2d --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@@ -49,7 -49,6 +49,7 @@@ #include +#include #include "iw_cxgb4.h" #include "clip_tbl.h" @@@ -240,13 -239,15 +240,13 @@@ int c4iw_ofld_send(struct c4iw_rdev *rd static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } @@@ -332,8 -333,6 +332,8 @@@ static void remove_ep_tid(struct c4iw_e spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } @@@ -465,6 -464,72 +465,6 @@@ static struct net_device *get_real_dev( return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@@ -639,32 -704,56 +639,32 @@@ static int send_flowc(struct c4iw_ep *e static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); -} + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static int send_connect(struct c4iw_ep *ep) @@@ -679,7 -768,7 +679,7 @@@ u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@@ -726,10 -815,10 +726,10 @@@ } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@@ -1356,9 -1445,9 +1356,9 @@@ static void established_upcall(struct c static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@@ -1375,12 -1464,15 +1375,12 @@@ if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } @@@ -1735,12 -1827,8 +1735,12 @@@ static int process_mpa_request(struct c (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) @@@ -1878,7 -1966,7 +1878,7 @@@ static int send_fw_act_open_req(struct struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@@ -1903,10 -1991,10 +1903,10 @@@ htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@@ -1960,6 -2048,15 +1960,6 @@@ static inline int act_open_has_tid(int status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@@ -2016,10 -2113,8 +2016,10 @@@ static int import_ep(struct c4iw_ep *ep } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, @@@ -2115,21 -2210,16 +2115,21 @@@ static int c4iw_reconnect(struct c4iw_e /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@@ -2201,7 -2291,7 +2201,7 @@@ static int act_open_rpl(struct c4iw_de PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@@ -2328,7 -2418,7 +2328,7 @@@ static int accept_cr(struct c4iw_ep *ep unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@@ -2349,10 -2439,10 +2349,10 @@@ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@@ -2424,6 -2514,42 +2424,6 @@@ static void reject_cr(struct c4iw_dev * return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@@ -2452,8 -2578,8 +2452,8 @@@ goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@@ -2461,19 -2587,18 +2461,19 @@@ , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@@ -2706,18 -2831,18 +2706,18 @@@ static int peer_abort(struct c4iw_dev * { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@@ -2810,9 -2935,11 +2810,9 @@@ release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) @@@ -3009,7 -3136,7 +3009,7 @@@ int c4iw_accept_cr(struct iw_cm_id *cm_ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; @@@ -3244,11 -3371,9 +3244,11 @@@ int c4iw_connect(struct iw_cm_id *cm_id PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@@ -3267,12 -3392,10 +3267,12 @@@ __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); @@@ -3914,9 -4037,8 +3914,9 @@@ static int rx_pkt(struct c4iw_dev *dev ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); @@@ -4191,7 -4313,7 +4191,7 @@@ static int peer_abort_intr(struct c4iw_ kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@@ -4235,7 -4357,7 +4235,7 @@@ int __init c4iw_cm_init(void spin_lock_init(&timeout_lock); skb_queue_head_init(&rxq); - workq = create_singlethread_workqueue("iw_cxgb4"); + workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); if (!workq) return -ENOMEM; diff --combined drivers/infiniband/hw/cxgb4/cq.c index ac926c942fee,bc522a9b2bfa..867b8cf82be8 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@@ -666,6 -666,18 +666,18 @@@ skip_cqe return ret; } + static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) + { + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); + } + /* * Get one cq entry from c4iw and map it to openib. * @@@ -721,6 -733,7 +733,7 @@@ static int c4iw_poll_cq_one(struct c4iw CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; + invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@@ -746,6 -759,10 +759,10 @@@ break; case FW_RI_FAST_REGISTER: wc->opcode = IB_WC_REG_MR; + + /* Invalidate the MR if the fastreg failed */ + if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) + invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " @@@ -1016,15 -1033,15 +1033,15 @@@ int c4iw_resize_cq(struct ib_cq *cq, in int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --combined drivers/infiniband/hw/cxgb4/iw_cxgb4.h index cdcf3eeb6f4a,f83604b2f82d..7e7f79e55006 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@@ -58,7 -58,7 +58,7 @@@ #include "cxgb4.h" #include "cxgb4_uld.h" #include "l2t.h" - #include "user.h" + #include #define DRV_NAME "iw_cxgb4" #define MOD DRV_NAME ":" @@@ -263,7 -263,6 +263,7 @@@ struct c4iw_dev struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@@ -882,6 -881,15 +882,6 @@@ static inline struct c4iw_listen_ep *to return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<pbl; + + fr->r2 = cpu_to_be32(0); + fr->stag = cpu_to_be32(mhp->ibmr.rkey); + + fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) | + FW_RI_TPTE_STAGSTATE_V(1) | + FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) | + FW_RI_TPTE_PDID_V(mhp->attr.pdid)); + fr->tpte.locread_to_qpid = cpu_to_be32( + FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) | + FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) | + FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12)); + fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V( + PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3)); + fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0); + fr->tpte.len_hi = cpu_to_be32(0); + fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length); + fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + + p[0] = cpu_to_be64((u64)mhp->mpl[0]); + p[1] = cpu_to_be64((u64)mhp->mpl[1]); + + *len16 = DIV_ROUND_UP(sizeof(*fr), 16); + } + static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported) + struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, + bool dsgl_supported) { - struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; @@@ -674,16 -706,19 +706,19 @@@ return 0; } - static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, - u8 *len16) + static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { + struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); + + mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); return 0; } -void _free_qp(struct kref *kref) +static void _free_qp(struct kref *kref) { struct c4iw_qp *qhp; @@@ -816,18 -851,32 +851,32 @@@ int c4iw_post_send(struct ib_qp *ibqp, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_REG_MR: - fw_opcode = FW_RI_FR_NSMR_WR; + case IB_WR_REG_MR: { + struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); + swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + !mhp->attr.state && mhp->mpl_len <= 2) { + fw_opcode = FW_RI_FR_NSMR_TPTE_WR; + build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), + mhp, &len16); + } else { + fw_opcode = FW_RI_FR_NSMR_WR; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), + mhp, &len16, + qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (err) + break; + } + mhp->attr.state = 1; break; + } case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(wqe, wr, &len16); + err = build_inv_stag(qhp->rhp, wqe, wr, &len16); break; default: PDBG("%s post of type=%d TBD!\n", __func__, diff --combined drivers/infiniband/hw/cxgb4/t4.h index 02173f4315fa,b2bfbb1eef1a..862381aa83c8 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@@ -95,6 -95,7 +95,7 @@@ union t4_wr struct fw_ri_rdma_read_wr read; struct fw_ri_bind_mw_wr bind; struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; @@@ -170,7 -171,7 +171,7 @@@ struct t4_cqe __be32 msn; } rcqe; struct { - u32 nada1; + u32 stag; u16 nada2; u16 cidx; } scqe; @@@ -232,6 -233,7 +233,7 @@@ /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) + #define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) /* generic accessor macros */ #define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) @@@ -634,11 -636,6 +636,11 @@@ static inline int t4_valid_cqe(struct t return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; diff --combined drivers/infiniband/hw/hfi1/verbs.c index f2f6b5a78e0e,f803f7b5ef5d..4b7a16ceb362 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@@ -76,7 -76,7 +76,7 @@@ static unsigned int hfi1_max_ahs = 0xFF module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); -unsigned int hfi1_max_cqes = 0x2FFFF; +unsigned int hfi1_max_cqes = 0x2FFFFF; module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); MODULE_PARM_DESC(max_cqes, "Maximum number of completion queue entries to support"); @@@ -89,7 -89,7 +89,7 @@@ unsigned int hfi1_max_qp_wrs = 0x3FFF module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); -unsigned int hfi1_max_qps = 16384; +unsigned int hfi1_max_qps = 32768; module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); @@@ -335,7 -335,7 +335,7 @@@ const u8 hdr_len_by_opcode[256] = [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, - [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, @@@ -403,28 -403,6 +403,28 @@@ static const opcode_handler opcode_hand [IB_OPCODE_CNP] = &hfi1_cnp_rcv }; +#define OPMASK 0x1f + +static const u32 pio_opmask[BIT(3)] = { + /* RC */ + [IB_OPCODE_RC >> 5] = + BIT(RC_OP(SEND_ONLY) & OPMASK) | + BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | + BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(COMPARE_SWAP) & OPMASK) | + BIT(RC_OP(FETCH_ADD) & OPMASK), + /* UC */ + [IB_OPCODE_UC >> 5] = + BIT(UC_OP(SEND_ONLY) & OPMASK) | + BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), +}; + /* * System image GUID. */ @@@ -589,7 -567,7 +589,7 @@@ static inline opcode_handler qp_ok(int void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; @@@ -741,7 -719,7 +741,7 @@@ static void verbs_sdma_complete if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct hfi1_ib_header *hdr; + struct ib_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@@ -770,7 -748,7 +770,7 @@@ static int wait_kmem(struct hfi1_ibdev qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@@ -981,7 -959,7 +981,7 @@@ static int pio_wait(struct rvt_qp *qp was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); @@@ -1222,7 -1200,7 +1222,7 @@@ static inline send_routine get_send_rou { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ib_header *h = &tx->phdr.hdr; + struct ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@@ -1232,18 -1210,22 +1232,18 @@@ case IB_QPT_GSI: case IB_QPT_UD: break; - case IB_QPT_RC: - if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0 && - !sdma_txreq_built(&tx->txreq)) - return dd->process_pio_send; - break; case IB_QPT_UC: + case IB_QPT_RC: { + u8 op = get_opcode(h); + if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; + } default: break; } @@@ -1262,8 -1244,8 +1262,8 @@@ int hfi1_verbs_send(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; - struct hfi1_ib_header *hdr; + struct ib_other_headers *ohdr; + struct ib_header *hdr; send_routine sr; int ret; u8 lnh; @@@ -1441,7 -1423,8 +1441,8 @@@ static int modify_device(struct ib_devi } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; @@@ -1772,7 -1755,7 +1773,7 @@@ void hfi1_cnp_rcv(struct hfi1_packet *p { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; @@@ -1799,7 -1782,7 +1800,7 @@@ return; } - sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + sc5 = hdr2sc(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --combined drivers/infiniband/hw/i40iw/i40iw_cm.c index 7ca0638579c0,c490f8d49864..85637696f6e9 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@@ -535,8 -535,8 +535,8 @@@ static struct i40iw_puda_buf *i40iw_for buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); @@@ -3166,8 -3166,11 +3166,11 @@@ void i40iw_setup_cm_core(struct i40iw_d spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); - cm_core->event_wq = create_singlethread_workqueue("iwewq"); - cm_core->disconn_wq = create_singlethread_workqueue("iwdwq"); + cm_core->event_wq = alloc_ordered_workqueue("iwewq", + WQ_MEM_RECLAIM); + + cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", + WQ_MEM_RECLAIM); } /** @@@ -3346,6 -3349,26 +3349,6 @@@ int i40iw_cm_disconn(struct i40iw_qp *i return 0; } -/** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - /** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection @@@ -3618,7 -3641,7 +3621,7 @@@ int i40iw_accept(struct iw_cm_id *cm_id } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) diff --combined drivers/infiniband/hw/i40iw/i40iw_main.c index 445e230d5ff8,798335fa3105..ac2f3cd9478c --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@@ -100,7 -100,7 +100,7 @@@ static struct notifier_block i40iw_net_ .notifier_call = i40iw_net_event }; -static int i40iw_notifiers_registered; +static atomic_t i40iw_notifiers_registered; /** * i40iw_find_i40e_handler - find a handler given a client info @@@ -1342,11 -1342,12 +1342,11 @@@ exit */ static void i40iw_register_notifiers(void) { - if (!i40iw_notifiers_registered) { + if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { register_inetaddr_notifier(&i40iw_inetaddr_notifier); register_inet6addr_notifier(&i40iw_inetaddr6_notifier); register_netevent_notifier(&i40iw_net_notifier); } - i40iw_notifiers_registered++; } /** @@@ -1428,7 -1429,8 +1428,7 @@@ static void i40iw_deinit_device(struct i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: - if (i40iw_notifiers_registered > 0) { - i40iw_notifiers_registered--; + if (!atomic_dec_return(&i40iw_notifiers_registered)) { unregister_netevent_notifier(&i40iw_net_notifier); unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); @@@ -1556,10 -1558,6 +1556,10 @@@ static int i40iw_open(struct i40e_info enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; @@@ -1615,7 -1613,7 +1615,7 @@@ status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) break; - iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch"); + iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM); i40iw_register_notifiers(); iwdev->init_state = INET_NOTIFIER; status = i40iw_add_mac_ip(iwdev); diff --combined drivers/infiniband/hw/mlx4/cq.c index 5df63dacaaa3,2f0b4eed7eae..1ea686b9e0f9 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@@ -37,7 -37,7 +37,7 @@@ #include #include "mlx4_ib.h" - #include "user.h" + #include static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { @@@ -576,8 -576,8 +576,8 @@@ static int mlx4_ib_ipoib_csum_ok(__be1 checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@@ -600,6 -600,8 +600,6 @@@ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@@ -687,6 -689,12 +687,6 @@@ repoll is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && - is_send)) { - pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; - } - /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { @@@ -712,6 -720,12 +712,6 @@@ */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); - if (unlikely(!mqp)) { - pr_warn("CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@@ -724,6 -738,11 +724,6 @@@ /* SRQ is also in the radix tree */ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, srq_num); - if (unlikely(!msrq)) { - pr_warn("CQ %06x with entry for unknown SRQN %06x\n", - cq->mcq.cqn, srq_num); - return -EINVAL; - } } if (is_send) { @@@ -833,11 -852,9 +833,11 @@@ if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); @@@ -874,6 -891,7 +874,6 @@@ int mlx4_ib_poll_cq(struct ib_cq *ibcq struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; - int err = 0; struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); @@@ -883,7 -901,8 +883,7 @@@ } for (npolled = 0; npolled < num_entries; ++npolled) { - err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) break; } @@@ -892,7 -911,10 +892,7 @@@ out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return npolled; } int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --combined drivers/infiniband/hw/mlx4/mad.c index 0f21c3a25552,1301a1db958c..1672907ff219 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@@ -230,6 -230,8 +230,8 @@@ static void smp_snoop(struct ib_device mad->mad_hdr.method == IB_MGMT_METHOD_SET) switch (mad->mad_hdr.attr_id) { case IB_SMP_ATTR_PORT_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; lid = be16_to_cpu(pinfo->lid); @@@ -245,6 -247,8 +247,8 @@@ break; case IB_SMP_ATTR_PKEY_TABLE: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; if (!mlx4_is_mfunc(dev->dev)) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); @@@ -281,6 -285,8 +285,8 @@@ break; case IB_SMP_ATTR_GUID_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, @@@ -296,6 -302,26 +302,26 @@@ } break; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV && + dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT) + return; + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port_num, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64); + } + break; + default: break; } @@@ -345,7 -371,8 +371,8 @@@ static void node_desc_override(struct i mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } @@@ -805,8 -832,7 +832,7 @@@ static int ib_process_mad(struct ib_dev return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) - smp_snoop(ibdev, port_num, in_mad, prev_lid); + smp_snoop(ibdev, port_num, in_mad, prev_lid); /* slaves get node desc from FW */ if (!mlx4_is_slave(to_mdev(ibdev)->dev)) node_desc_override(ibdev, out_mad); @@@ -1037,6 -1063,23 +1063,23 @@@ static void handle_client_rereg_event(s MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); } } + + /* Update the sl to vl table from inside client rereg + * only if in secure-host mode (snooping is not possible) + * and the sl-to-vl change event is not generated by FW. + */ + if (!mlx4_is_slave(dev->dev) && + dev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) { + if (mlx4_is_master(dev->dev)) + /* already in work queue from mlx4_ib_event queueing + * mlx4_handle_port_mgmt_change_event, which calls + * this procedure. Therefore, call sl2vl_update directly. + */ + mlx4_ib_sl2vl_update(dev, port_num); + else + mlx4_sched_ib_sl2vl_update_work(dev, port_num); + } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); } @@@ -1128,27 -1171,6 +1171,27 @@@ void handle_port_mgmt_change_event(stru /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@@ -1176,6 -1198,24 +1219,24 @@@ handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); } break; + + case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = + eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64); + } + break; default: pr_warn("Unsupported subtype 0x%x for " "Port Management Change event\n", eqe->subtype); @@@ -1918,7 -1958,7 +1979,7 @@@ static int create_pv_resources(struct i goto err_buf; } - ctx->pd = ib_alloc_pd(ctx->ib_dev); + ctx->pd = ib_alloc_pd(ctx->ib_dev, 0); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); pr_err("Couldn't create tunnel PD (%d)\n", ret); @@@ -2091,7 -2131,7 +2152,7 @@@ static int mlx4_ib_alloc_demux_ctx(stru } snprintf(name, sizeof name, "mlx4_ibt%d", port); - ctx->wq = create_singlethread_workqueue(name); + ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); ret = -ENOMEM; @@@ -2099,7 -2139,7 +2160,7 @@@ } snprintf(name, sizeof name, "mlx4_ibud%d", port); - ctx->ud_wq = create_singlethread_workqueue(name); + ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); ret = -ENOMEM; @@@ -2223,8 -2263,6 +2284,8 @@@ int mlx4_ib_init_sriov(struct mlx4_ib_d if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --combined drivers/infiniband/hw/mlx4/main.c index 87ba9bca4181,1811eb5b6aab..b597e8227591 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@@ -55,7 -55,7 +55,7 @@@ #include #include "mlx4_ib.h" - #include "user.h" + #include #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "2.2-1" @@@ -832,6 -832,66 +832,66 @@@ static int mlx4_ib_query_gid(struct ib_ return ret; } + static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl) + { + union sl2vl_tbl_to_u64 sl2vl64; + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; + int err = -ENOMEM; + int jj; + + if (mlx4_is_slave(to_mdev(ibdev)->dev)) { + *sl2vl_tbl = 0; + return 0; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE; + in_mad->attr_mod = 0; + + if (mlx4_is_mfunc(to_mdev(ibdev)->dev)) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + + for (jj = 0; jj < 8; jj++) + sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj]; + *sl2vl_tbl = sl2vl64.sl64; + + out: + kfree(in_mad); + kfree(out_mad); + return err; + } + + static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev) + { + u64 sl2vl; + int i; + int err; + + for (i = 1; i <= mdev->dev->caps.num_ports; i++) { + if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + continue; + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl); + if (err) { + pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n", + i, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[i - 1], sl2vl); + } + } + int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { @@@ -886,7 -946,7 +946,7 @@@ static int mlx4_ib_modify_device(struc return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* @@@ -897,7 -957,7 +957,7 @@@ if (IS_ERR(mailbox)) return 0; - memcpy(mailbox->buf, props->node_desc, 64); + memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@@ -1259,7 -1319,7 +1319,7 @@@ static struct ib_xrcd *mlx4_ib_alloc_xr if (err) goto err1; - xrcd->pd = ib_alloc_pd(ibdev); + xrcd->pd = ib_alloc_pd(ibdev, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; @@@ -1361,6 -1421,19 +1421,19 @@@ struct mlx4_ib_steering union ib_gid gid; }; + #define LAST_ETH_FIELD vlan_tag + #define LAST_IB_FIELD sl + #define LAST_IPV4_FIELD dst_ip + #define LAST_TCP_UDP_FIELD src_port + + /* Field is the last supported field */ + #define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(struct mlx4_dev *dev, u32 qp_num, union ib_flow_spec *ib_spec, @@@ -1370,6 -1443,9 +1443,9 @@@ switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, ETH_ALEN); @@@ -1379,6 -1455,9 +1455,9 @@@ mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; break; case IB_FLOW_SPEC_IB: + if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IB; mlx4_spec->ib.l3_qpn = cpu_to_be32(qp_num); @@@ -1388,6 -1467,9 +1467,9 @@@ case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IPV4; mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; @@@ -1397,6 -1479,9 +1479,9 @@@ case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; + type = ib_spec->type == IB_FLOW_SPEC_TCP ? MLX4_NET_TRANS_RULE_ID_TCP : MLX4_NET_TRANS_RULE_ID_UDP; @@@ -2000,7 -2085,7 +2085,7 @@@ static int init_node_data(struct mlx4_i if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; @@@ -2202,9 -2287,6 +2287,9 @@@ static int mlx4_ib_alloc_diag_counters( bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) @@@ -2653,6 -2735,7 +2738,7 @@@ static void *mlx4_ib_add(struct mlx4_de if (init_node_data(ibdev)) goto err_map; + mlx4_init_sl2vl_tbl(ibdev); for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); @@@ -3101,6 -3184,47 +3187,47 @@@ static void handle_bonded_port_state_ev ib_dispatch_event(&ibev); } + void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port) + { + u64 sl2vl; + int err; + + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl); + if (err) { + pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n", + port, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[port - 1], sl2vl); + } + + static void ib_sl2vl_update_work(struct work_struct *work) + { + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *mdev = ew->ib_dev; + int port = ew->port; + + mlx4_ib_sl2vl_update(mdev, port); + + kfree(ew); + } + + void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port) + { + struct ib_event_work *ew; + + ew = kmalloc(sizeof(*ew), GFP_ATOMIC); + if (ew) { + INIT_WORK(&ew->work, ib_sl2vl_update_work); + ew->port = port; + ew->ib_dev = ibdev; + queue_work(wq, &ew->work); + } else { + pr_err("failed to allocate memory for sl2vl update work\n"); + } + } + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@@ -3131,10 -3255,14 +3258,14 @@@ case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; - if (mlx4_is_master(dev) && + if (!mlx4_is_slave(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { - mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (mlx4_is_master(dev)) + mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) + mlx4_sched_ib_sl2vl_update_work(ibdev, p); } ibev.event = IB_EVENT_PORT_ACTIVE; break; @@@ -3222,7 -3350,7 +3353,7 @@@ static int __init mlx4_ib_init(void { int err; - wq = create_singlethread_workqueue("mlx4_ib"); + wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM); if (!wq) return -ENOMEM; diff --combined drivers/infiniband/hw/mlx4/mcg.c index 097bfcc4ee99,7d30be0f287b..a21d37f02f35 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@@ -489,7 -489,7 +489,7 @@@ static u8 get_leave_state(struct mcast_ if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@@ -564,8 -564,8 +564,8 @@@ static void mlx4_ib_mcg_timeout_handler } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@@ -605,7 -605,7 +605,7 @@@ static int handle_leave_req(struct mcas static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@@ -690,8 -690,8 +690,8 @@@ static void mlx4_ib_mcg_work_handler(st u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@@ -710,7 -710,7 +710,7 @@@ process_requests req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent @@@ -1045,7 -1045,7 +1045,7 @@@ int mlx4_ib_mcg_port_init(struct mlx4_i atomic_set(&ctx->tid, 0); sprintf(name, "mlx4_ib_mcg%d", ctx->port); - ctx->mcg_wq = create_singlethread_workqueue(name); + ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->mcg_wq) return -ENOMEM; @@@ -1246,7 -1246,7 +1246,7 @@@ void clean_vf_mcast(struct mlx4_ib_demu int mlx4_ib_mcg_init(void) { - clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); + clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM); if (!clean_wq) return -ENOMEM; diff --combined drivers/infiniband/hw/mlx4/mlx4_ib.h index 686ab48ff644,8db7cb1a3716..35141f451e5c --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@@ -448,7 -448,7 +448,7 @@@ struct mlx4_ib_demux_ctx struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ @@@ -570,6 -570,7 +570,7 @@@ struct mlx4_ib_dev struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + atomic64_t sl2vl[MLX4_MAX_PORTS]; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; @@@ -600,6 -601,7 +601,7 @@@ struct ib_event_work struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; + int port; }; struct mlx4_ib_qp_tunnel_init_attr { @@@ -883,4 -885,9 +885,9 @@@ int mlx4_ib_rereg_user_mr(struct ib_mr int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, u8 port_num, int index); + void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port); + + void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); + #endif /* MLX4_IB_H */ diff --combined drivers/infiniband/hw/mlx4/qp.c index 7fb9629bd12b,16f654dc8a46..570bc866b1d6 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@@ -47,7 -47,7 +47,7 @@@ #include #include "mlx4_ib.h" - #include "user.h" + #include static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); @@@ -2405,6 -2405,22 +2405,22 @@@ static int build_sriov_qp0_header(struc return 0; } + static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) + { + union sl2vl_tbl_to_u64 tmp_vltab; + u8 vl; + + if (sl > 15) + return 0xf; + tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]); + vl = tmp_vltab.sl8[sl >> 1]; + if (sl & 1) + vl &= 0x0f; + else + vl >>= 4; + return vl; + } + #define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) @@@ -2493,27 -2509,24 +2509,27 @@@ sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); @@@ -2590,7 -2603,12 +2606,12 @@@ sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + sqp->qp.port); + if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } diff --combined drivers/infiniband/hw/mlx5/cq.c index 5de9a65f53bc,1188fef08450..79d017baf6f4 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@@ -35,7 -35,6 +35,6 @@@ #include #include #include "mlx5_ib.h" - #include "user.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@@ -553,6 -552,12 +552,6 @@@ repoll * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@@ -613,6 -618,13 +612,6 @@@ read_lock(&dev->mdev->priv.mkey_table.lock); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmkey)) { - read_unlock(&dev->mdev->priv.mkey_table.lock); - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", - cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); - return -EINVAL; - } - mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@@ -663,6 -675,7 +662,6 @@@ int mlx5_ib_poll_cq(struct ib_cq *ibcq unsigned long flags; int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@@ -674,7 -687,8 +673,7 @@@ soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); - if (err) + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } @@@ -683,7 -697,10 +682,7 @@@ out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return soft_polled + npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --combined drivers/infiniband/hw/mlx5/main.c index 551aa0e789aa,f4160d56dc4f..22174774dbb8 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@@ -37,6 -37,7 +37,6 @@@ #include #include #include -#include #if defined(CONFIG_X86) #include #endif @@@ -53,7 -54,6 +53,6 @@@ #include #include #include - #include "user.h" #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" @@@ -106,13 -106,42 +105,42 @@@ static int mlx5_netdev_event(struct not struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, roce.nb); - if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) - return NOTIFY_DONE; + switch (event) { + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + write_lock(&ibdev->roce.netdev_lock); + if (ndev->dev.parent == &ibdev->mdev->pdev->dev) + ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? + NULL : ndev; + write_unlock(&ibdev->roce.netdev_lock); + break; + + case NETDEV_UP: + case NETDEV_DOWN: { + struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + struct net_device *upper = NULL; + + if (lag_ndev) { + upper = netdev_master_upper_dev_get(lag_ndev); + dev_put(lag_ndev); + } + + if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) + && ibdev->ib_active) { + struct ib_event ibev = {0}; + + ibev.device = &ibdev->ib_dev; + ibev.event = (event == NETDEV_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + ibev.element.port_num = 1; + ib_dispatch_event(&ibev); + } + break; + } - write_lock(&ibdev->roce.netdev_lock); - if (ndev->dev.parent == &ibdev->mdev->pdev->dev) - ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - write_unlock(&ibdev->roce.netdev_lock); + default: + break; + } return NOTIFY_DONE; } @@@ -123,6 -152,10 +151,10 @@@ static struct net_device *mlx5_ib_get_n struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; + ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + if (ndev) + return ndev; + /* Ensure ndev does not disappear before we invoke dev_hold() */ read_lock(&ibdev->roce.netdev_lock); @@@ -138,7 -171,7 +170,7 @@@ static int mlx5_query_port_roce(struct struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); - struct net_device *ndev; + struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; @@@ -162,6 -195,17 +194,17 @@@ if (!ndev) return 0; + if (mlx5_lag_is_active(dev->mdev)) { + rcu_read_lock(); + upper = netdev_master_upper_dev_get_rcu(ndev); + if (upper) { + dev_put(ndev); + ndev = upper; + dev_hold(ndev); + } + rcu_read_unlock(); + } + if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; props->phys_state = 5; @@@ -284,9 -328,7 +327,9 @@@ __be16 mlx5_get_roce_udp_sport(struct m static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { @@@ -429,7 -471,7 +472,7 @@@ static int mlx5_query_node_guid(struct } struct mlx5_reg_node_desc { - u8 desc[64]; + u8 desc[IB_DEVICE_NODE_DESC_MAX]; }; static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) @@@ -532,6 -574,26 +575,26 @@@ static int mlx5_ib_query_device(struct resp.response_length += sizeof(resp.tso_caps); } } + + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + resp.rss_caps.rx_hash_function = + MLX5_RX_HASH_FUNC_TOEPLITZ; + resp.rss_caps.rx_hash_fields_mask = + MLX5_RX_HASH_SRC_IPV4 | + MLX5_RX_HASH_DST_IPV4 | + MLX5_RX_HASH_SRC_IPV6 | + MLX5_RX_HASH_DST_IPV6 | + MLX5_RX_HASH_SRC_PORT_TCP | + MLX5_RX_HASH_DST_PORT_TCP | + MLX5_RX_HASH_SRC_PORT_UDP | + MLX5_RX_HASH_DST_PORT_UDP; + resp.response_length += sizeof(resp.rss_caps); + } + } else { + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + resp.response_length += sizeof(resp.tso_caps); + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + resp.response_length += sizeof(resp.rss_caps); } if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { @@@ -595,6 -657,17 +658,17 @@@ if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (mlx5_ib_port_link_layer(ibdev, 1) == + IB_LINK_LAYER_ETHERNET) { + props->rss_caps.max_rwq_indirection_tables = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); + props->rss_caps.max_rwq_indirection_table_size = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; + props->max_wq_type_rq = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@@ -846,13 -919,13 +920,13 @@@ static int mlx5_ib_modify_device(struc * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ - memcpy(&in, props->node_desc, 64); + memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, sizeof(out), MLX5_REG_NODE_DESC, 0, 1); if (err) return err; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); return err; } @@@ -1395,28 -1468,77 +1469,77 @@@ static int mlx5_ib_dealloc_pd(struct ib return 0; } - static bool outer_header_zero(u32 *match_criteria) + enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT + }; + + #define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + + static u8 get_match_criteria_enable(u32 *match_criteria) { - int size = MLX5_ST_SZ_BYTES(fte_match_param); - char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); + u8 match_criteria_enable; - return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, - outer_headers_c + 1, - size - 1); + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; + + return match_criteria_enable; + } + + static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) + { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } + static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) + { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); + } + + #define LAST_ETH_FIELD vlan_tag + #define LAST_IB_FIELD sl + #define LAST_IPV4_FIELD tos + #define LAST_IPV6_FIELD traffic_class + #define LAST_TCP_UDP_FIELD src_port + + /* Field is the last supported field */ + #define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(u32 *match_c, u32 *match_v, - union ib_flow_spec *ib_spec) + const union ib_flow_spec *ib_spec) { void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - if (ib_spec->size != sizeof(ib_spec->eth)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), @@@ -1425,13 -1547,6 +1548,13 @@@ dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); @@@ -1463,8 -1578,8 +1586,8 @@@ ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: - if (ib_spec->size != sizeof(ib_spec->ipv4)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@@ -1487,10 -1602,16 +1610,16 @@@ dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: - if (ib_spec->size != sizeof(ib_spec->ipv6)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@@ -1513,10 -1634,26 +1642,26 @@@ dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr); + + MLX5_SET(fte_match_set_misc, misc_params_c, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.mask.flow_label)); + MLX5_SET(fte_match_set_misc, misc_params_v, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.val.flow_label)); break; case IB_FLOW_SPEC_TCP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); @@@ -1534,8 -1671,9 +1679,9 @@@ ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); @@@ -1582,7 -1720,7 +1728,7 @@@ static bool flow_is_multicast_only(stru is_multicast_ether_addr(eth_spec->val.dst_mac); } - static bool is_valid_attr(struct ib_flow_attr *flow_attr) + static bool is_valid_attr(const struct ib_flow_attr *flow_attr) { union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); bool has_ipv4_spec = false; @@@ -1626,12 -1764,13 +1772,13 @@@ static int mlx5_ib_destroy_flow(struct list_for_each_entry_safe(iter, tmp, &handler->list, list) { mlx5_del_flow_rule(iter->rule); + put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } mlx5_del_flow_rule(handler->rule); - put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); kfree(handler); @@@ -1647,10 -1786,16 +1794,16 @@@ static int ib_prio_to_core_prio(unsigne return priority; } + enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX + }; + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr) + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; @@@ -1681,6 -1826,19 +1834,19 @@@ &num_entries, &num_groups); prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-ENOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db.sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; } if (!ns) @@@ -1706,13 -1864,13 +1872,13 @@@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, + const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; - void *ib_flow = flow_attr + 1; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; u32 action; int err = 0; @@@ -1738,9 -1896,7 +1904,7 @@@ ib_flow += ((union ib_flow_spec *)ib_flow)->size; } - /* Outer header support only */ - spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)) - << 0; + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, spec, @@@ -1753,7 -1909,8 +1917,8 @@@ goto free; } - handler->prio = ft_prio - dev->flow_db.prios; + ft_prio->refcount++; + handler->prio = ft_prio; ft_prio->flow_table = ft; free: @@@ -1777,6 -1934,7 +1942,7 @@@ static struct mlx5_ib_flow_handler *cre flow_attr, dst); if (IS_ERR(handler_dst)) { mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_dst; } else { @@@ -1838,6 -1996,8 +2004,8 @@@ static struct mlx5_ib_flow_handler *cre &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_ucast; } else { @@@ -1848,14 -2008,51 +2016,52 @@@ return handler; } + static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) + { + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + + err_tx: + mlx5_del_flow_rule(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); + err: + return ERR_PTR(err); + } + static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; @@@ -1873,17 -2070,22 +2079,25 @@@ mutex_lock(&dev->flow_db.lock); - ft_prio = get_flow_table(dev, flow_attr); + ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { @@@ -1897,6 -2099,8 +2111,8 @@@ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); } else { err = -EINVAL; goto destroy_ft; @@@ -1908,7 -2112,6 +2124,6 @@@ goto destroy_ft; } - ft_prio->refcount++; mutex_unlock(&dev->flow_db.lock); kfree(dst); @@@ -1916,6 -2119,8 +2131,8 @@@ destroy_ft: put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); unlock: mutex_unlock(&dev->flow_db.lock); kfree(dst); @@@ -2105,14 -2310,19 +2322,19 @@@ static void mlx5_ib_event(struct mlx5_c break; case MLX5_DEV_EVENT_PORT_UP: - ibev.event = IB_EVENT_PORT_ACTIVE; - port = (u8)param; - break; - case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: - ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; + + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return; + + ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; case MLX5_DEV_EVENT_LID_CHANGE: @@@ -2235,7 -2445,7 +2457,7 @@@ static int create_umr_res(struct mlx5_i goto error_0; } - pd = ib_alloc_pd(&dev->ib_dev); + pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); ret = PTR_ERR(pd); @@@ -2517,30 -2727,88 +2739,88 @@@ static void get_dev_fw_str(struct ib_de fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } + static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) + { + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, + MLX5_FLOW_NAMESPACE_LAG); + struct mlx5_flow_table *ft; + int err; + + if (!ns || !mlx5_lag_is_active(mdev)) + return 0; + + err = mlx5_cmd_create_vport_lag(mdev); + if (err) + return err; + + ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_destroy_vport_lag; + } + + dev->flow_db.lag_demux_ft = ft; + return 0; + + err_destroy_vport_lag: + mlx5_cmd_destroy_vport_lag(mdev); + return err; + } + + static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) + { + struct mlx5_core_dev *mdev = dev->mdev; + + if (dev->flow_db.lag_demux_ft) { + mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); + dev->flow_db.lag_demux_ft = NULL; + + mlx5_cmd_destroy_vport_lag(mdev); + } + } + + static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) + { + if (dev->roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->roce.nb); + dev->roce.nb.notifier_call = NULL; + } + } + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; dev->roce.nb.notifier_call = mlx5_netdev_event; err = register_netdevice_notifier(&dev->roce.nb); - if (err) + if (err) { + dev->roce.nb.notifier_call = NULL; return err; + } err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) goto err_unregister_netdevice_notifier; + err = mlx5_roce_lag_init(dev); + if (err) + goto err_disable_roce; + return 0; + err_disable_roce: + mlx5_nic_vport_disable_roce(dev->mdev); + err_unregister_netdevice_notifier: - unregister_netdevice_notifier(&dev->roce.nb); + mlx5_remove_roce_notifier(dev); return err; } static void mlx5_disable_roce(struct mlx5_ib_dev *dev) { + mlx5_roce_lag_cleanup(dev); mlx5_nic_vport_disable_roce(dev->mdev); - unregister_netdevice_notifier(&dev->roce.nb); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@@ -2655,6 -2923,7 +2935,7 @@@ static void *mlx5_ib_add(struct mlx5_co struct mlx5_ib_dev *dev; enum rdma_link_layer ll; int port_type_cap; + const char *name; int err; int i; @@@ -2687,7 -2956,12 +2968,12 @@@ MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); - strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + if (!mlx5_lag_is_active(mdev)) + name = "mlx5_%d"; + else + name = "mlx5_bond_%d"; + + strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; @@@ -2889,8 -3163,10 +3175,10 @@@ err_rsrc destroy_dev_resources(&dev->devr); err_disable_roce: - if (ll == IB_LINK_LAYER_ETHERNET) + if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_roce(dev); + mlx5_remove_roce_notifier(dev); + } err_free_port: kfree(dev->port); @@@ -2906,6 -3182,7 +3194,7 @@@ static void mlx5_ib_remove(struct mlx5_ struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); diff --combined drivers/infiniband/hw/mlx5/mlx5_ib.h index 67cc7416fdff,1df8a67d4f02..dcdcd195fe53 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@@ -44,6 -44,7 +44,7 @@@ #include #include #include + #include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@@ -142,6 -143,7 +143,7 @@@ struct mlx5_ib_pd #define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) + #define MLX5_IB_NUM_SNIFFER_FTS 2 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@@ -150,12 -152,14 +152,14 @@@ struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; - unsigned int prio; + struct mlx5_ib_flow_prio *prio; struct mlx5_flow_rule *rule; }; struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; + struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done @@@ -225,7 -229,7 +229,7 @@@ struct mlx5_ib_wq struct mlx5_ib_rwq { struct ib_wq ibwq; - u32 rqn; + struct mlx5_core_qp core_qp; u32 rq_num_pas; u32 log_rq_stride; u32 log_rq_size; @@@ -402,7 -406,6 +406,7 @@@ enum mlx5_ib_qp_flags /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, + MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { @@@ -603,6 -606,7 +607,7 @@@ struct mlx5_roce rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; + atomic_t next_port; }; struct mlx5_ib_dev { @@@ -663,6 -667,11 +668,11 @@@ static inline struct mlx5_ib_qp *to_mib return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } + static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) + { + return container_of(core_qp, struct mlx5_ib_rwq, core_qp); + } + static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); @@@ -947,4 -956,40 +957,40 @@@ static inline int verify_assign_uidx(u return 0; } + + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_qp *ucmd, + int inlen, + u32 *user_index) + { + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); + } + + static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_srq *ucmd, + int inlen, + u32 *user_index) + { + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); + } #endif /* MLX5_IB_H */ diff --combined drivers/infiniband/hw/mlx5/qp.c index 9529b464fbdc,9d97a71a1335..41f4c2afbcdd --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@@ -35,7 -35,6 +35,6 @@@ #include #include #include "mlx5_ib.h" - #include "user.h" /* not supported currently */ static int wq_signature; @@@ -77,6 -76,17 +76,17 @@@ struct mlx5_wqe_eth_pad u8 rsvd0[16]; }; + enum raw_qp_set_mask_map { + MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, + }; + + struct mlx5_modify_raw_qp_param { + u16 operation; + + u32 set_mask; /* raw_qp_set_mask_map */ + u8 rq_q_ctr_id; + }; + static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); @@@ -1457,7 -1467,6 +1467,7 @@@ create_tir kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; + qp->flags |= MLX5_IB_QP_RSS; return 0; err: @@@ -1863,7 -1872,8 +1873,8 @@@ static void get_cqs(enum ib_qp_type qp_ } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation); + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 lag_tx_affinity); static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { @@@ -1888,8 -1898,11 +1899,11 @@@ MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); } else { - err = modify_raw_packet_qp(dev, qp, - MLX5_CMD_OP_2RST_QP); + struct mlx5_modify_raw_qp_param raw_qp_param = { + .operation = MLX5_CMD_OP_2RST_QP + }; + + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0); } if (err) mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", @@@ -2153,6 -2166,31 +2167,31 @@@ static int modify_raw_packet_eth_prio(s return err; } + static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, u8 tx_affinity) + { + void *in; + void *tisc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tis_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); + + tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); + MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); + + err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); + + kvfree(in); + + return err; + } + static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, @@@ -2363,8 -2401,9 +2402,9 @@@ static int ib_mask_to_mlx5_opt(int ib_m return result; } - static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, - struct mlx5_ib_rq *rq, int new_state) + static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { void *in; void *rqc; @@@ -2381,7 -2420,17 +2421,17 @@@ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); - err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen); + if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { + if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); + MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); + } else + pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", + dev->ib_dev.name); + } + + err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); if (err) goto out; @@@ -2422,7 -2471,8 +2472,8 @@@ out } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation) + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 tx_affinity) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; @@@ -2431,7 -2481,7 +2482,7 @@@ int sq_state; int err; - switch (operation) { + switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; sq_state = MLX5_SQC_STATE_RDY; @@@ -2448,21 -2498,31 +2499,31 @@@ case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - /* Nothing to do here... */ - return 0; + if (raw_qp_param->set_mask) + return -EINVAL; + else + return 0; default: WARN_ON(1); return -EINVAL; } if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state); + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { + if (tx_affinity) { + err = modify_raw_packet_tx_affinity(dev->mdev, sq, + tx_affinity); + if (err) + return err; + } + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + } return 0; } @@@ -2514,12 -2574,14 +2575,14 @@@ static int __mlx5_ib_modify_qp(struct i struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; + struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; int sqd_event; int mlx5_st; int err; u16 op; + u8 tx_affinity = 0; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@@ -2549,6 -2611,23 +2612,23 @@@ } } + if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { + if ((ibqp->qp_type == IB_QPT_RC) || + (ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_SQPN_QP1)) || + (ibqp->qp_type == IB_QPT_UC) || + (ibqp->qp_type == IB_QPT_RAW_PACKET) || + (ibqp->qp_type == IB_QPT_XRC_INI) || + (ibqp->qp_type == IB_QPT_XRC_TGT)) { + if (mlx5_lag_is_active(dev->mdev)) { + tx_affinity = (unsigned int)atomic_add_return(1, + &dev->roce.next_port) % + MLX5_MAX_PORTS + 1; + context->flags |= cpu_to_be32(tx_affinity << 24); + } + } + } + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || @@@ -2654,8 -2733,7 +2734,7 @@@ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; - struct mlx5_ib_port *mibport = &dev->port[port_num]; - + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->q_cnt_id) << 24); } @@@ -2690,11 -2768,20 +2769,20 @@@ optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) - err = modify_raw_packet_qp(dev, qp, op); - else + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + struct mlx5_modify_raw_qp_param raw_qp_param = {}; + + raw_qp_param.operation = op; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; + raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; + } + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); + } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); + } + if (err) goto out; @@@ -3657,8 -3744,12 +3745,8 @@@ static int begin_wqe(struct mlx5_ib_qp struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { - int err = 0; - - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { - err = -ENOMEM; - return err; - } + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); @@@ -3674,7 -3765,7 +3762,7 @@@ *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; - return err; + return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, @@@ -3753,7 -3844,7 +3841,7 @@@ int mlx5_ib_post_send(struct ib_qp *ibq num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; + err = -EINVAL; *bad_wr = wr; goto out; } @@@ -4497,6 -4588,28 +4585,28 @@@ int mlx5_ib_dealloc_xrcd(struct ib_xrc return 0; } + static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) + { + struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp); + struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device); + struct ib_event event; + + if (rwq->ibwq.event_handler) { + event.device = rwq->ibwq.device; + event.element.wq = &rwq->ibwq; + switch (type) { + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_WQ_FATAL; + break; + default: + mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn); + return; + } + + rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context); + } + } + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@@ -4534,7 -4647,7 +4644,7 @@@ MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); - err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); kvfree(in); return err; } @@@ -4650,7 -4763,7 +4760,7 @@@ struct ib_wq *mlx5_ib_create_wq(struct return ERR_PTR(-EINVAL); } - rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + @@@ -4660,10 -4773,12 +4770,12 @@@ goto err_copy; } + rwq->core_qp.event = mlx5_ib_wq_event; + rwq->ibwq.event_handler = init_attr->event_handler; return &rwq->ibwq; err_copy: - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: destroy_user_rq(pd, rwq); err: @@@ -4676,7 -4791,7 +4788,7 @@@ int mlx5_ib_destroy_wq(struct ib_wq *wq struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(wq->pd, rwq); kfree(rwq); @@@ -4808,7 -4923,7 +4920,7 @@@ int mlx5_ib_modify_wq(struct ib_wq *wq MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); - err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); kvfree(in); if (!err) rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; diff --combined drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 0aa854737e74,71d0534960d6..6af44f8db3d5 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@@ -51,7 -51,7 +51,7 @@@ #include "ocrdma.h" #include "ocrdma_hw.h" #include "ocrdma_verbs.h" - #include "ocrdma_abi.h" + #include int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { @@@ -125,8 -125,8 +125,8 @@@ int ocrdma_query_device(struct ib_devic IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; diff --combined drivers/infiniband/hw/qib/qib_verbs.c index 876ebb442d38,2d7e52619b55..954f15064514 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@@ -313,7 -313,7 +313,7 @@@ static void qib_copy_from_sge(void *dat * for the given QP. * Called at interrupt level. */ -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@@ -366,10 -366,10 +366,10 @@@ void qib_ib_rcv(struct qib_ctxtdata *rc { struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_ib_header *hdr = rhdr; + struct ib_header *hdr = rhdr; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; int lnh; @@@ -841,7 -841,7 +841,7 @@@ static void sdma_complete(struct qib_sd if (tx->wqe) qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct qib_ib_header *hdr; + struct ib_header *hdr; if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) hdr = &tx->align_buf->hdr; @@@ -889,7 -889,7 +889,7 @@@ static int wait_kmem(struct qib_ibdev * return ret; } -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@@ -1025,7 -1025,7 +1025,7 @@@ static int no_bufs_available(struct rvt return ret; } -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@@ -1133,7 -1133,7 +1133,7 @@@ done * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@@ -1370,7 -1370,8 +1370,8 @@@ static int qib_modify_device(struct ib_ } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct qib_ibport *ibp = &dd->pport[i].ibport_data; @@@ -1606,6 -1607,8 +1607,6 @@@ int qib_register_ib_device(struct qib_d /* Only need to initialize non-zero fields. */ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); - qpt_mask = dd->qpn_mask; - INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); diff --combined drivers/infiniband/ulp/ipoib/ipoib.h index 9dbfcc0ab577,7899167536e3..7b8d2d9e2263 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@@ -478,7 -478,6 +478,7 @@@ void ipoib_send(struct net_device *dev struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); @@@ -772,7 -771,13 +772,13 @@@ static inline void ipoib_unregister_deb #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ - ipoib_printk(KERN_WARNING, priv, format , ## arg) + do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + 10 * HZ /*10 seconds */, \ + 100); \ + if (__ratelimit(&_rs)) \ + ipoib_printk(KERN_WARNING, priv, format , ## arg);\ + } while (0) extern int ipoib_sendq_size; extern int ipoib_recvq_size; diff --combined drivers/infiniband/ulp/ipoib/ipoib_main.c index cc1c1b062ea5,e95c02ee05c0..5636fc3da6b8 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@@ -485,7 -485,7 +485,7 @@@ int ipoib_set_mode(struct net_device *d return -EINVAL; } -static struct ipoib_path *__path_find(struct net_device *dev, void *gid) +struct ipoib_path *__path_find(struct net_device *dev, void *gid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; @@@ -2196,7 -2196,8 +2196,8 @@@ static int __init ipoib_init_module(voi * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", + WQ_MEM_RECLAIM); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; diff --combined drivers/infiniband/ulp/isert/ib_isert.c index cae9bbcc27e7,8df608ede366..6dd43f63238e --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@@ -309,7 -309,7 +309,7 @@@ isert_create_device_ib_res(struct isert if (ret) goto out; - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", @@@ -403,7 -403,6 +403,7 @@@ isert_init_conn(struct isert_conn *iser INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); + init_waitqueue_head(&isert_conn->rem_wait); kref_init(&isert_conn->kref); mutex_init(&isert_conn->mutex); INIT_WORK(&isert_conn->release_work, isert_release_work); @@@ -449,7 -448,7 +449,7 @@@ isert_alloc_login_buf(struct isert_con isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } @@@ -579,8 -578,7 +579,8 @@@ isert_connect_release(struct isert_con BUG_ON(!device); isert_free_rx_descriptors(isert_conn); - if (isert_conn->cm_id) + if (isert_conn->cm_id && + !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); if (isert_conn->qp) { @@@ -595,10 -593,7 +595,10 @@@ isert_device_put(device); - kfree(isert_conn); + if (isert_conn->dev_removed) + wake_up_interruptible(&isert_conn->rem_wait); + else + kfree(isert_conn); } static void @@@ -758,7 -753,6 +758,7 @@@ static in isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", @@@ -779,21 -773,10 +779,21 @@@ break; case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ - case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_conn = cma_id->qp->qp_context; + isert_conn->dev_removed = true; + isert_disconnected_handler(cma_id, event->event); + wait_event_interruptible(isert_conn->rem_wait, + isert_conn->state == ISER_CONN_DOWN); + kfree(isert_conn); + /* + * return non-zero from the callback to destroy + * the rdma cm id + */ + return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: diff --combined drivers/infiniband/ulp/srpt/ib_srpt.c index 883bbfe08e0e,48a44af740a6..0b1f69ed2e92 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@@ -522,11 -522,6 +522,11 @@@ static int srpt_refresh_port(struct srp if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@@ -2480,7 -2475,7 +2480,7 @@@ static void srpt_add_one(struct ib_devi init_waitqueue_head(&sdev->ch_releaseQ); mutex_init(&sdev->mutex); - sdev->pd = ib_alloc_pd(device); + sdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(sdev->pd)) goto free_dev; @@@ -2553,6 -2548,10 +2553,6 @@@ sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 28e653e9c856,f6099d0c6351..2125903043fb --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@@ -1,7 -1,7 +1,7 @@@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@@ -347,9 -347,9 +347,10 @@@ struct adapter_params unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + unsigned int nsched_cls; /* number of traffic classes */ unsigned int max_ordird_qp; /* Max read depth per RDMA QP */ unsigned int max_ird_adapter; /* Max read depth per adapter */ + bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */ }; /* State needed to monitor the forward progress of SGE Ingress DMA activities @@@ -422,8 -422,8 +423,8 @@@ struct link_config unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ @@@ -437,6 -437,11 +438,6 @@@ enum MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */ MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */ MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ - MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ - MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ - - /* # of streaming iSCSIT Rx queues */ - MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS, }; enum { @@@ -453,7 -458,8 +454,7 @@@ enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + - MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS, + MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS, }; struct adapter; @@@ -490,7 -496,6 +491,7 @@@ struct port_info #endif /* CONFIG_CHELSIO_T4_FCOE */ bool rxtstamp; /* Enable TS */ struct hwtstamp_config tstamp_config; + struct sched_table *sched_tbl; }; struct dentry; @@@ -698,6 -703,10 +699,6 @@@ struct sge struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES]; struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; - struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS]; - struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES]; - struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; - struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; struct sge_uld_rxq_info **uld_rxq_info; @@@ -707,8 -716,15 +708,8 @@@ u16 max_ethqsets; /* # of available Ethernet queue sets */ u16 ethqsets; /* # of active Ethernet queue sets */ u16 ethtxq_rover; /* Tx queue to clean up next */ - u16 iscsiqsets; /* # of active iSCSI queue sets */ - u16 niscsitq; /* # of available iSCST Rx queues */ - u16 rdmaqs; /* # of available RDMA Rx queues */ - u16 rdmaciqs; /* # of available RDMA concentrator IQs */ + u16 ofldqsets; /* # of active ofld queue sets */ u16 nqs_per_uld; /* # of Rx queues per ULD */ - u16 iscsi_rxq[MAX_OFLD_QSETS]; - u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; - u16 rdma_rxq[MAX_RDMA_QUEUES]; - u16 rdma_ciq[MAX_RDMA_CIQS]; u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; u32 fl_pg_order; /* large page allocation size */ @@@ -732,7 -748,10 +733,7 @@@ }; #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++) -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++) -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) struct l2t_data; @@@ -766,12 -785,6 +767,12 @@@ struct uld_msix_bmap struct uld_msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; + unsigned int idx; +}; + +struct vf_info { + unsigned char vf_mac_addr[ETH_ALEN]; + bool pf_set_mac; }; struct adapter { @@@ -784,7 -797,6 +785,7 @@@ unsigned int mbox; unsigned int pf; unsigned int flags; + unsigned int adap_idx; enum chip_type chip; int msg_enable; @@@ -799,7 -811,7 +800,7 @@@ } msix_info[MAX_INGQ + 1]; struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ - unsigned int msi_idx; + int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@@ -807,9 -819,6 +808,9 @@@ struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + struct vf_info *vfinfo; + u8 num_vfs; + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; @@@ -817,10 -826,9 +818,10 @@@ unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; - struct cxgb4_pci_uld_info *uld; + struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; + unsigned int num_ofld_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@@ -840,8 -848,6 +841,8 @@@ #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; + struct mutex uld_mutex; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@@ -851,58 -857,6 +852,58 @@@ spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; +}; + +/* Support for "sched-class" command to allow a TX Scheduling Class to be + * programmed with various parameters. + */ +struct ch_sched_params { + s8 type; /* packet or flow */ + union { + struct { + s8 level; /* scheduler hierarchy level */ + s8 mode; /* per-class or per-flow */ + s8 rateunit; /* bit or packet rate */ + s8 ratemode; /* %port relative or kbps absolute */ + s8 channel; /* scheduler channel [0..N] */ + s8 class; /* scheduler class [0..N] */ + s32 minrate; /* minimum rate */ + s32 maxrate; /* maximum rate */ + s16 weight; /* percent weight */ + s16 pktsize; /* average packet size */ + } params; + } u; +}; + +enum { + SCHED_CLASS_TYPE_PACKET = 0, /* class type */ +}; + +enum { + SCHED_CLASS_LEVEL_CL_RL = 0, /* class rate limiter */ +}; + +enum { + SCHED_CLASS_MODE_CLASS = 0, /* per-class scheduling */ +}; + +enum { + SCHED_CLASS_RATEUNIT_BITS = 0, /* bit rate scheduling */ +}; + +enum { + SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ +}; + +/* Support for "sched_queue" command to allow one or more NIC TX Queues + * to be bound to a TX Scheduling Class. + */ +struct ch_sched_queue { + s8 queue; /* queue index */ + s8 class; /* class index */ }; /* Defined bit width of user definable filter tuples @@@ -1028,32 -982,6 +1029,32 @@@ enum VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; @@@ -1064,11 -992,6 +1065,11 @@@ static inline int is_pci_uld(const stru return adap->params.crypto; } +static inline int is_uld(const struct adapter *adap) +{ + return (adap->params.offload || adap->params.crypto); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@@ -1295,8 -1218,6 +1296,8 @@@ int t4_sge_alloc_eth_txq(struct adapte int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid); int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); @@@ -1643,9 -1564,6 +1644,9 @@@ void t4_get_trace_filter(struct adapte int filter_index, int *enabled); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); +int t4_sched_params(struct adapter *adapter, int type, int level, int mode, + int rateunit, int ratemode, int channel, int class, + int minrate, int maxrate, int weight, int pktsize); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); void t4_idma_monitor_init(struct adapter *adapter, @@@ -1655,9 -1573,7 +1656,9 @@@ void t4_idma_monitor(struct adapter *ad int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); -void uld_mem_free(struct adapter *adap); -int uld_mem_alloc(struct adapter *adap); +void t4_uld_mem_free(struct adapter *adap); +int t4_uld_mem_alloc(struct adapter *adap); +void t4_uld_clean_up(struct adapter *adap); +void t4_register_netevent_notifier(void); void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index cf147ca419a8,7e858b2768b7..f320497368f4 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@@ -1,7 -1,7 +1,7 @@@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@@ -67,7 -67,6 +67,7 @@@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@@ -77,8 -76,6 +77,8 @@@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@@ -89,6 -86,30 +89,6 @@@ const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@@ -204,6 -225,11 +204,6 @@@ static struct dentry *cxgb4_debugfs_roo LIST_HEAD(adapter_list); DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; static void link_report(struct net_device *dev) { @@@ -277,9 -303,11 +277,9 @@@ static void dcb_tx_queue_prio_enable(st txq->dcb_prio = value; } } -#endif /* CONFIG_CHELSIO_T4_DCB */ -int cxgb4_dcb_enabled(const struct net_device *dev) +static int cxgb4_dcb_enabled(const struct net_device *dev) { -#ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); if (!pi->dcb.enabled) @@@ -287,8 -315,11 +287,8 @@@ return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || (pi->dcb.state == CXGB4_DCB_STATE_HOST)); -#else - return 0; -#endif } -EXPORT_SYMBOL(cxgb4_dcb_enabled); +#endif /* CONFIG_CHELSIO_T4_DCB */ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) { @@@ -500,6 -531,66 +500,6 @@@ static void dcb_rpl(struct adapter *ada } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@@ -586,6 -677,56 +586,6 @@@ out return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@@ -637,12 -778,30 +637,12 @@@ static void name_msix_vecs(struct adapt snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@@ -659,9 -818,57 +659,9 @@@ goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@@ -677,6 -884,16 +677,6 @@@ static void free_msix_queue_irqs(struc free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@@ -815,11 -1032,42 +815,11 @@@ static void enable_rx(struct adapter *a } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) -{ - int i, err; - - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); - if (err) - return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; - } - return 0; -} -/** - * setup_sge_queues - configure SGE Tx/Rx/response queues - * @adap: the adapter - * - * Determines how many sets of SGE queues to use and initializes them. - * We support multiple queue sets per port if we have MSI-X, otherwise - * just one queue set per port. - */ -static int setup_sge_queues(struct adapter *adap) +static int setup_fw_sge_queues(struct adapter *adap) { - int err, i, j; struct sge *s = &adap->sge; + int err = 0; bitmap_zero(s->starving_fl, s->egr_sz); bitmap_zero(s->txq_maperr, s->egr_sz); @@@ -834,27 -1082,25 +834,27 @@@ adap->msi_idx = -((int)s->intrq.abs_id + 1); } - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], adap->msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + if (err) + t4_free_sge_resources(adap); + return err; +} + +/** + * setup_sge_queues - configure SGE Tx/Rx/response queues + * @adap: the adapter + * + * Determines how many sets of SGE queues to use and initializes them. + * We support multiple queue sets per port if we have MSI-X, otherwise + * just one queue set per port. + */ +static int setup_sge_queues(struct adapter *adap) +{ + int err, i, j; + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@@ -885,8 -1131,8 +885,8 @@@ } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@@ -894,15 -1140,30 +894,15 @@@ goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (adap->msi_idx > 0) \ - adap->msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@@ -913,9 -1174,6 +913,9 @@@ RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@@ -939,6 -1197,151 +939,6 @@@ void t4_free_mem(void *addr kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@@ -1320,22 -1723,19 +1320,22 @@@ EXPORT_SYMBOL(cxgb4_remove_tid) */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@@ -1345,10 -1745,8 +1345,10 @@@ t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@@ -1363,16 -1761,12 +1363,16 @@@ t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@@ -1922,7 -2316,7 +1922,7 @@@ static void disable_dbs(struct adapter for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@@ -1934,7 -2328,7 +1934,7 @@@ static void enable_dbs(struct adapter * for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@@ -1942,10 -2336,9 +1942,10 @@@ static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@@ -1999,14 -2392,13 +1999,14 @@@ out if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@@ -2071,12 -2463,95 +2071,12 @@@ void t4_db_dropped(struct adapter *adap queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) -{ - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - lli.fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support; - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - +void t4_register_netevent_notifier(void) +{ if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@@ -2086,6 -2561,12 +2086,6 @@@ mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], - CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; - } - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) { adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); @@@ -2096,6 -2577,10 +2096,6 @@@ netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@@ -2104,12 -2589,65 +2104,12 @@@ mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) adap->uld[i].state_change(adap->uld[i].handle, new_state); mutex_unlock(&uld_mutex); } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; - mutex_unlock(&uld_mutex); - return 0; -} -EXPORT_SYMBOL(cxgb4_unregister_uld); - #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, unsigned long event, void *data) @@@ -2214,6 -2752,7 +2214,6 @@@ static int cxgb_up(struct adapter *adap adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@@ -2291,6 -2830,40 +2291,6 @@@ static int cxgb_close(struct net_devic return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) @@@ -2512,53 -3085,10 +2512,53 @@@ static int cxgb_change_mtu(struct net_d } #ifdef CONFIG_PCI_IOV +static int dummy_open(struct net_device *dev) +{ + /* Turn carrier off since we don't have to transmit anything on this + * interface. + */ + netif_carrier_off(dev); + return 0; +} + +/* Fill MAC address that will be assigned by the FW */ +static void fill_vf_station_mac_addr(struct adapter *adap) +{ + unsigned int i; + u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + int err; + u8 *na; + u16 a, b; + + err = t4_get_raw_vpd_params(adap, &adap->params.vpd); + if (!err) { + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (i = 0; i < adap->num_vfs; i++) { + macaddr[5] = adap->pf * 16 + i; + ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); + } + } +} + static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; + int ret; /* verify MAC addr is valid */ if (!is_valid_ether_addr(mac)) { @@@ -2570,23 -3100,7 +2570,23 @@@ dev_info(pi->adapter->pdev_dev, "Setting MAC %pM on VF %d\n", mac, vf); - return t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +} + +static int cxgb_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (vf >= adap->num_vfs) + return -EINVAL; + ivi->vf = vf; + ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr); + return 0; } #endif @@@ -2626,116 -3140,6 +2626,116 @@@ static void cxgb_netpoll(struct net_dev } #endif +static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct ch_sched_params p; + struct ch_sched_queue qe; + u32 req_rate; + int err = 0; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (index < 0 || index > pi->nqsets - 1) + return -EINVAL; + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to rate limit on queue %d. Link Down?\n", + index); + return -EINVAL; + } + + /* Convert from Mbps to Kbps */ + req_rate = rate << 10; + + /* Max rate is 10 Gbps */ + if (req_rate >= SCHED_MAX_RATE_KBPS) { + dev_err(adap->pdev_dev, + "Invalid rate %u Mbps, Max rate is %u Gbps\n", + rate, SCHED_MAX_RATE_KBPS); + return -ERANGE; + } + + /* First unbind the queue from any existing class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = SCHED_CLS_NONE; + + err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) { + dev_err(adap->pdev_dev, + "Unbinding Queue %d on port %d fail. Err: %d\n", + index, pi->port_id, err); + return err; + } + + /* Queue already unbound */ + if (!req_rate) + return 0; + + /* Fetch any available unused or matching scheduling class */ + memset(&p, 0, sizeof(p)); + p.type = SCHED_CLASS_TYPE_PACKET; + p.u.params.level = SCHED_CLASS_LEVEL_CL_RL; + p.u.params.mode = SCHED_CLASS_MODE_CLASS; + p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS; + p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS; + p.u.params.channel = pi->tx_chan; + p.u.params.class = SCHED_CLS_NONE; + p.u.params.minrate = 0; + p.u.params.maxrate = req_rate; + p.u.params.weight = 0; + p.u.params.pktsize = dev->mtu; + + e = cxgb4_sched_class_alloc(dev, &p); + if (!e) + return -ENOMEM; + + /* Bind the queue to a scheduling class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = e->idx; + + err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) + dev_err(adap->pdev_dev, + "Queue rate limiting failed. Err: %d\n", err); + return err; +} + +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@@ -2758,17 -3162,13 +2758,17 @@@ #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = cxgb_busy_poll, #endif + .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, }; -static const struct net_device_ops cxgb4_mgmt_netdev_ops = { #ifdef CONFIG_PCI_IOV +static const struct net_device_ops cxgb4_mgmt_netdev_ops = { + .ndo_open = dummy_open, .ndo_set_vf_mac = cxgb_set_vf_mac, -#endif + .ndo_get_vf_config = cxgb_get_vf_config, }; +#endif static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@@ -3625,12 -4025,6 +3625,12 @@@ static int adap_init0(struct adapter *a adap->clipt_start = val[0]; adap->clipt_end = val[1]; + /* We don't yet have a PARAMs calls to retrieve the number of Traffic + * Classes supported by the hardware/firmware so we hard code it here + * for now. + */ + adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16; + /* query params related to active filter region */ params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START); params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END); @@@ -3668,6 -4062,12 +3668,12 @@@ adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0); } + /* See if FW supports FW_RI_FR_NSMR_TPTE_WR work request */ + params[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0); + /* * Get device capabilities so we can determine what resources we need * to manage. @@@ -3719,7 -4119,6 +3725,7 @@@ adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@@ -3772,7 -4171,6 +3778,7 @@@ "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@@ -3783,8 -4181,6 +3789,8 @@@ goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; } if (caps_cmd.cryptocaps) { /* Should query params here...TODO */ @@@ -3966,17 -4362,10 +3972,17 @@@ static const struct pci_error_handlers .resume = eeh_resume, }; +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } /* @@@ -3991,14 -4380,14 +3997,14 @@@ static void cfg_queues(struct adapter * #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel()) { adap->params.offload = 0; adap->params.crypto = 0; - } else if (adap->num_uld && uld_mem_alloc(adap)) { + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; } @@@ -4044,18 -4433,33 +4050,18 @@@ s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@@ -4074,8 -4478,47 +4080,8 @@@ for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@@ -4109,15 -4552,7 +4115,15 @@@ static void reduce_ethqs(struct adapte static int get_msix_info(struct adapter *adap) { struct uld_msix_info *msix_info; - int max_ingq = (MAX_OFLD_QSETS * adap->num_uld); + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); if (!msix_info) @@@ -4131,13 -4566,12 +4137,13 @@@ } spin_lock_init(&adap->msix_bmap_ulds.lock); adap->msix_info_ulds = msix_info; +out: return 0; } static void free_msix_info(struct adapter *adap) { - if (!adap->num_uld) + if (!(adap->num_uld && adap->num_ofld_uld)) return; kfree(adap->msix_info_ulds); @@@ -4156,32 -4590,32 +4162,32 @@@ static int enable_msix(struct adapter * struct msix_entry *entries; int max_ingq = MAX_INGQ; - max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; /* map for msix */ - if (is_pci_uld(adap) && get_msix_info(adap)) + if (get_msix_info(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; + } for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; } if (is_pci_uld(adap)) { - want += netif_get_num_default_rss_queues() * nchan; - uld_need = nchan; + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for @@@ -4209,25 -4643,43 +4215,25 @@@ if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_pci_uld(adap)) { + if (is_uld(adap)) { if (allocated < want) s->nqs_per_uld = nchan; else - s->nqs_per_uld = netif_get_num_default_rss_queues() * - nchan; - } - - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; - - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; - } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - if (is_pci_uld(adap)) - i -= s->nqs_per_uld * adap->num_uld; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - + s->nqs_per_uld = s->ofldqsets; } - for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i) + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) adap->msix_info[i].vec = entries[i].vector; - if (is_pci_uld(adap)) { - for (j = 0 ; i < allocated; ++i, j++) + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; + } adap->msix_bmap_ulds.mapsize = j; } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs, s->nqs_per_uld); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@@ -4410,12 -4862,8 +4416,12 @@@ static void print_port_info(const struc bufp += sprintf(bufp, "1000/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) bufp += sprintf(bufp, "10G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) + bufp += sprintf(bufp, "25G/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) bufp += sprintf(bufp, "40G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) + bufp += sprintf(bufp, "100G/"); if (bufp != buf) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); @@@ -4441,9 -4889,7 +4447,9 @@@ static void free_some_resources(struct unsigned int i; t4_free_mem(adapter->l2t); + t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@@ -4494,51 -4940,6 +4500,51 @@@ static int get_chip_type(struct pci_de } #ifdef CONFIG_PCI_IOV +static void dummy_setup(struct net_device *dev) +{ + dev->type = ARPHRD_NONE; + dev->mtu = 0; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 0; + dev->flags |= IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + + /* Initialize the device structure. */ + dev->netdev_ops = &cxgb4_mgmt_netdev_ops; + dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; + dev->destructor = free_netdev; +} + +static int config_mgmt_dev(struct pci_dev *pdev) +{ + struct adapter *adap = pci_get_drvdata(pdev); + struct net_device *netdev; + struct port_info *pi; + char name[IFNAMSIZ]; + int err; + + snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf); + netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup); + if (!netdev) + return -ENOMEM; + + pi = netdev_priv(netdev); + pi->adapter = adap; + SET_NETDEV_DEV(netdev, &pdev->dev); + + adap->port[0] = netdev; + + err = register_netdev(adap->port[0]); + if (err) { + pr_info("Unable to register VF mgmt netdev %s\n", name); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + return err; + } + return 0; +} + static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { struct adapter *adap = pci_get_drvdata(pdev); @@@ -4573,14 -4974,8 +4579,14 @@@ */ if (!num_vfs) { pci_disable_sriov(pdev); - if (adap->port[0]->reg_state == NETREG_REGISTERED) + if (adap->port[0]) { unregister_netdev(adap->port[0]); + adap->port[0] = NULL; + } + /* free VF resources */ + kfree(adap->vfinfo); + adap->vfinfo = NULL; + adap->num_vfs = 0; return num_vfs; } @@@ -4589,16 -4984,12 +4595,16 @@@ if (err) return err; - if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) { - err = register_netdev(adap->port[0]); - if (err < 0) - pr_info("Unable to register VF mgmt netdev\n"); - } + adap->num_vfs = num_vfs; + err = config_mgmt_dev(pdev); + if (err) + return err; } + + adap->vfinfo = kcalloc(adap->num_vfs, + sizeof(struct vf_info), GFP_KERNEL); + if (adap->vfinfo) + fill_vf_station_mac_addr(adap); return num_vfs; } #endif @@@ -4610,6 -5001,9 +4616,6 @@@ static int init_one(struct pci_dev *pde bool highdma = false; struct adapter *adapter = NULL; struct net_device *netdev; -#ifdef CONFIG_PCI_IOV - char name[IFNAMSIZ]; -#endif void __iomem *regs; u32 whoami, pl_rev; enum chip_type chip; @@@ -4788,8 -5182,7 +4794,8 @@@ netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@@ -4863,26 -5256,10 +4869,26 @@@ } } #endif - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls); + if (!pi->sched_tbl) + dev_warn(&pdev->dev, + "could not activate scheduling on port %d\n", + i); + } + + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { @@@ -4952,14 -5329,10 +4958,14 @@@ /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); return 0; sriov: @@@ -4981,24 -5354,40 +4987,24 @@@ goto free_pci_region; } - snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func); - netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup); - if (!netdev) { - err = -ENOMEM; - goto free_adapter; - } - adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; adapter->name = pci_name(pdev); adapter->mbox = func; adapter->pf = func; adapter->regs = regs; + adapter->adap_idx = adap_idx; adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + (sizeof(struct mbox_cmd) * T4_OS_LOG_MBOX_CMDS), GFP_KERNEL); if (!adapter->mbox_log) { err = -ENOMEM; - goto free_netdevice; + goto free_adapter; } - pi = netdev_priv(netdev); - pi->adapter = adapter; - SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, adapter); - - adapter->port[0] = netdev; - netdev->netdev_ops = &cxgb4_mgmt_netdev_ops; - netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; - return 0; - free_netdevice: - free_netdev(adapter->port[0]); free_adapter: kfree(adapter); free_pci_region: @@@ -5014,8 -5403,8 +5020,8 @@@ free_some_resources(adapter); if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@@ -5052,7 -5441,7 +5058,7 @@@ static void remove_one(struct pci_dev * */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@@ -5066,15 -5455,21 +5072,15 @@@ /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@@ -5094,56 -5489,10 +5100,56 @@@ } #ifdef CONFIG_PCI_IOV else { - if (adapter->port[0]->reg_state == NETREG_REGISTERED) + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif +} + +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) unregister_netdev(adapter->port[0]); - free_netdev(adapter->port[0]); iounmap(adapter->regs); + kfree(adapter->vfinfo); kfree(adapter); pci_disable_sriov(pdev); pci_release_regions(pdev); @@@ -5156,7 -5505,7 +5162,7 @@@ static struct pci_driver cxgb4_driver .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index b4b2d20aab3c,aac6e444abf2..0945fa49a5dd --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@@ -36,6 -36,7 +36,6 @@@ */ #include -#include #include #include #include @@@ -82,24 -83,6 +82,24 @@@ static void free_msix_idx_in_bmap(struc spin_unlock_irqrestore(&bmap->lock, flags); } +/* Flush the aggregated lro sessions */ +static void uldrx_flush_handler(struct sge_rspq *q) +{ + struct adapter *adap = q->adap; + + if (adap->uld[q->uld].lro_flush) + adap->uld[q->uld].lro_flush(&q->lro_mgr); +} + +/** + * uldrx_handler - response queue handler for ULD queues + * @q: the response queue that received the packet + * @rsp: the response queue descriptor holding the offload message + * @gl: the gather list of packet fragments + * + * Deliver an ingress offload packet to a ULD. All processing is done by + * the ULD, we just maintain statistics. + */ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) { @@@ -142,8 -125,8 +142,8 @@@ static int alloc_uld_rxqs(struct adapte struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; unsigned short *ids = rxq_info->rspq_id + offset; unsigned int per_chan = nq / adap->params.nports; - unsigned int msi_idx, bmap_idx; - int i, err; + unsigned int bmap_idx = 0; + int i, err, msi_idx; if (adap->flags & USING_MSIX) msi_idx = 1; @@@ -153,14 -136,14 +153,14 @@@ for (i = 0; i < nq; i++, q++) { if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); - adap->msi_idx++; + msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], - adap->msi_idx, + msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, - NULL, + lro ? uldrx_flush_handler : NULL, 0); if (err) goto freeout; @@@ -177,6 -160,7 +177,6 @@@ freeout if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } /* We need to free rxq also in case of ciq allocation failure */ @@@ -186,47 -170,26 +186,47 @@@ if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } return err; } -int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) +static int +setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int i, ret = 0; if (adap->flags & USING_MSIX) { - rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq, + rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq), + sizeof(unsigned short), GFP_KERNEL); if (!rxq_info->msix_tbl) return -ENOMEM; } - return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, rxq_info->nrxq, lro)); + + /* Tell uP to route control queue completions to rdma rspq */ + if (adap->flags & FULL_INIT_DONE && + !ret && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + unsigned int cmplqid; + u32 param, cmdop; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + ret = t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + return ret; } static void t4_free_uld_rxqs(struct adapter *adap, int n, @@@ -236,28 -199,14 +236,28 @@@ if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } -void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + u32 param, cmdop, cmplqid = 0; + int i; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + if (rxq_info->nciq) t4_free_uld_rxqs(adap, rxq_info->nciq, rxq_info->uldrxq + rxq_info->nrxq); @@@ -266,39 -215,27 +266,39 @@@ kfree(rxq_info->msix_tbl); } -int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_pci_uld_info *uld_info) +static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; - int i, nrxq; + int i, nrxq, ciq_size; rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); if (!rxq_info) return -ENOMEM; - if (uld_info->nrxq > s->nqs_per_uld) - rxq_info->nrxq = s->nqs_per_uld; - else - rxq_info->nrxq = uld_info->nrxq; - if (!uld_info->nciq) + if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) { + i = s->nqs_per_uld; + rxq_info->nrxq = roundup(i, adap->params.nports); + } else { + i = min_t(int, uld_info->nrxq, + num_online_cpus()); + rxq_info->nrxq = roundup(i, adap->params.nports); + } + if (!uld_info->ciq) { rxq_info->nciq = 0; - else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld) - rxq_info->nciq = s->nqs_per_uld; - else - rxq_info->nciq = uld_info->nciq; + } else { + if (adap->flags & USING_MSIX) + rxq_info->nciq = min_t(int, s->nqs_per_uld, + num_online_cpus()); + else + rxq_info->nciq = min_t(int, MAX_OFLD_QSETS, + num_online_cpus()); + rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) * + adap->params.nports); + rxq_info->nciq = max_t(int, rxq_info->nciq, + adap->params.nports); + } nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), @@@ -309,7 -246,7 +309,7 @@@ } rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); - if (!rxq_info->uldrxq) { + if (!rxq_info->rspq_id) { kfree(rxq_info->uldrxq); kfree(rxq_info); return -ENOMEM; @@@ -323,17 -260,12 +323,17 @@@ r->fl.size = 72; } + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; + if (ciq_size > SGE_MAX_IQ_SIZE) { + dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n"); + ciq_size = SGE_MAX_IQ_SIZE; + } + for (i = rxq_info->nrxq; i < nrxq; i++) { struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; - init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64); + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); r->rspq.uld = uld_type; - r->fl.size = 72; } memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); @@@ -342,7 -274,7 +342,7 @@@ return 0; } -void free_queues_uld(struct adapter *adap, unsigned int uld_type) +static void free_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; @@@ -351,12 -283,10 +351,12 @@@ kfree(rxq_info); } -int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static int +request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx, bmap_idx, err = 0; + int err = 0; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; @@@ -369,7 -299,7 +369,7 @@@ } return 0; unwind: - while (--idx >= 0) { + while (idx-- > 0) { bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, @@@ -378,14 -308,13 +378,14 @@@ return err; } -void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +static void +free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, @@@ -393,14 -322,14 +393,14 @@@ } } -void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) +static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", adap->port[0]->name, rxq_info->name, idx); @@@ -433,7 -362,7 +433,7 @@@ static void quiesce_rx(struct adapter * } } -void enable_rx_uld(struct adapter *adap, unsigned int uld_type) +static void enable_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; @@@ -442,7 -371,7 +442,7 @@@ enable_rx(adap, &rxq_info->uldrxq[idx].rspq); } -void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) +static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int idx; @@@ -462,15 -391,15 +462,15 @@@ static void uld_queue_init(struct adapt lli->nciq = rxq_info->nciq; } -int uld_mem_alloc(struct adapter *adap) +int t4_uld_mem_alloc(struct adapter *adap) { struct sge *s = &adap->sge; - adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL); + adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL); if (!adap->uld) return -ENOMEM; - s->uld_rxq_info = kzalloc(adap->num_uld * + s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX * sizeof(struct sge_uld_rxq_info *), GFP_KERNEL); if (!s->uld_rxq_info) @@@ -482,7 -411,7 +482,7 @@@ err_uld return -ENOMEM; } -void uld_mem_free(struct adapter *adap) +void t4_uld_mem_free(struct adapter *adap) { struct sge *s = &adap->sge; @@@ -490,26 -419,6 +490,26 @@@ kfree(adap->uld); } +void t4_uld_clean_up(struct adapter *adap) +{ + struct sge_uld_rxq_info *rxq_info; + unsigned int i; + + if (!adap->uld) + return; + for (i = 0; i < CXGB4_ULD_MAX; i++) { + if (!adap->uld[i].handle) + continue; + rxq_info = adap->sge.uld_rxq_info[i]; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, i); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, i); + free_sge_queues_uld(adap, i); + free_queues_uld(adap, i); + } +} + static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) { int i; @@@ -521,15 -430,10 +521,15 @@@ lld->ports = adap->port; lld->vr = &adap->vres; lld->mtus = adap->params.mtus; - lld->ntxq = adap->sge.iscsiqsets; + lld->ntxq = adap->sge.ofldqsets; lld->nchan = adap->params.nports; lld->nports = adap->params.nports; lld->wr_cred = adap->params.ofldq_wr_cred; + lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); + lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); + lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); + lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); + lld->iscsi_ppm = &adap->iscsi_ppm; lld->adapter_type = adap->params.chip; lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; lld->udb_density = 1 << adap->params.sge.eq_qpp; @@@ -550,6 -454,6 +550,7 @@@ lld->max_ird_adapter = adap->params.max_ird_adapter; lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; lld->nodeid = dev_to_node(adap->pdev_dev); ++ lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support; } static void uld_attach(struct adapter *adap, unsigned int uld) @@@ -569,37 -473,23 +570,37 @@@ } adap->uld[uld].handle = handle; + t4_register_netevent_notifier(); if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); } -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p) +/** + * cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods + * + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. + */ +int cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) { int ret = 0; + unsigned int adap_idx = 0; struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; ret = cfg_queues_uld(adap, type, p); if (ret) @@@ -621,14 -511,11 +622,14 @@@ } adap->uld[type] = *p; uld_attach(adap, type); + adap_idx++; } mutex_unlock(&uld_mutex); return 0; free_irq: + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); if (adap->flags & USING_MSIX) free_msix_queue_irqs_uld(adap, type); free_rxq: @@@ -636,49 -523,21 +637,49 @@@ free_queues: free_queues_uld(adap, type); out: + + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + if (!adap_idx) + break; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + adap_idx--; + } mutex_unlock(&uld_mutex); return ret; } -EXPORT_SYMBOL(cxgb4_register_pci_uld); +EXPORT_SYMBOL(cxgb4_register_uld); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) +/** + * cxgb4_unregister_uld - unregister an upper-layer driver + * @type: the ULD type + * + * Unregisters an existing upper-layer driver. + */ +int cxgb4_unregister_uld(enum cxgb4_uld type) { struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; adap->uld[type].handle = NULL; adap->uld[type].add = NULL; @@@ -693,4 -552,4 +694,4 @@@ return 0; } -EXPORT_SYMBOL(cxgb4_unregister_pci_uld); +EXPORT_SYMBOL(cxgb4_unregister_uld); diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 47bd14f602db,93b1550ac7e6..2996793b1aaa --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@@ -1,7 -1,7 +1,7 @@@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@@ -42,8 -42,6 +42,8 @@@ #include #include "cxgb4.h" +#define MAX_ULD_QSETS 16 + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ @@@ -106,7 -104,6 +106,7 @@@ struct tid_info unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@@ -127,8 -124,6 +127,8 @@@ atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@@ -188,38 -183,15 +188,38 @@@ int cxgb4_create_server_filter(const st int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); } enum cxgb4_uld { + CXGB4_ULD_INIT, CXGB4_ULD_RDMA, CXGB4_ULD_ISCSI, CXGB4_ULD_ISCSIT, + CXGB4_ULD_CRYPTO, CXGB4_ULD_MAX }; @@@ -308,15 -280,36 +308,16 @@@ struct cxgb4_lld_info unsigned int iscsi_llimit; /* chip's iscsi region llimit */ void **iscsi_ppm; /* iscsi page pod manager */ int nodeid; /* device numa node id */ + bool fr_nsmr_tpte_wr_support; /* FW supports FR_NSMR_TPTE_WR */ }; struct cxgb4_uld_info { const char *name; - void *(*add)(const struct cxgb4_lld_info *p); - int (*rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl); - int (*state_change)(void *handle, enum cxgb4_state new_state); - int (*control)(void *handle, enum cxgb4_control control, ...); - int (*lro_rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl, - struct t4_lro_mgr *lro_mgr, - struct napi_struct *napi); - void (*lro_flush)(struct t4_lro_mgr *); -}; - -enum cxgb4_pci_uld { - CXGB4_PCI_ULD1, - CXGB4_PCI_ULD_MAX -}; - -struct cxgb4_pci_uld_info { - const char *name; - bool lro; void *handle; unsigned int nrxq; - unsigned int nciq; unsigned int rxq_size; - unsigned int ciq_size; + bool ciq; + bool lro; void *(*add)(const struct cxgb4_lld_info *p); int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); @@@ -331,6 -324,9 +332,6 @@@ int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); diff --combined drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 4b58b32105f7,985a521ac511..8d9e4b7a8e84 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@@ -1,7 -1,7 +1,7 @@@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@@ -100,6 -100,7 +100,7 @@@ enum fw_wr_opcodes FW_RI_RECV_WR = 0x17, FW_RI_BIND_MW_WR = 0x18, FW_RI_FR_NSMR_WR = 0x19, + FW_RI_FR_NSMR_TPTE_WR = 0x20, FW_RI_INV_LSTAG_WR = 0x1a, FW_ISCSI_TX_DATA_WR = 0x45, FW_CRYPTO_LOOKASIDE_WR = 0X6d, @@@ -681,7 -682,6 +682,7 @@@ enum fw_cmd_opcodes FW_RSS_IND_TBL_CMD = 0x20, FW_RSS_GLB_CONFIG_CMD = 0x22, FW_RSS_VI_CONFIG_CMD = 0x23, + FW_SCHED_CMD = 0x24, FW_DEVLOG_CMD = 0x25, FW_CLIP_CMD = 0x28, FW_LASTC2E_CMD = 0x40, @@@ -1121,6 -1121,7 +1122,7 @@@ enum fw_params_param_dev FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, FW_PARAMS_PARAM_DEV_FWCACHE = 0x18, + FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, }; /* @@@ -2267,12 -2268,6 +2269,12 @@@ enum fw_port_cap FW_PORT_CAP_802_3_ASM_DIR = 0x8000, }; +#define FW_PORT_CAP_SPEED_S 0 +#define FW_PORT_CAP_SPEED_M 0x3f +#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S) +#define FW_PORT_CAP_SPEED_G(x) \ + (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M) + enum fw_port_mdi { FW_PORT_CAP_MDI_UNCHANGED, FW_PORT_CAP_MDI_AUTO, @@@ -2969,41 -2964,6 +2971,41 @@@ struct fw_rss_vi_config_cmd #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x) ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S) #define FW_RSS_VI_CONFIG_CMD_UDPEN_F FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U) +enum fw_sched_sc { + FW_SCHED_SC_PARAMS = 1, +}; + +struct fw_sched_cmd { + __be32 op_to_write; + __be32 retval_len16; + union fw_sched { + struct fw_sched_config { + __u8 sc; + __u8 type; + __u8 minmaxen; + __u8 r3[5]; + __u8 nclasses[4]; + __be32 r4; + } config; + struct fw_sched_params { + __u8 sc; + __u8 type; + __u8 level; + __u8 mode; + __u8 unit; + __u8 rate; + __u8 ch; + __u8 cl; + __be32 min; + __be32 max; + __be16 weight; + __be16 pktsize; + __be16 burstsize; + __be16 r4; + } params; + } u; +}; + struct fw_clip_cmd { __be32 op_to_write; __be32 alloc_to_len16; diff --combined drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index eb448dff7564,9283bc60bb24..8e5b3f51b47b --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@@ -116,8 -116,10 +116,8 @@@ int hns_dsaf_get_cfg(struct dsaf_devic dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sc_base)) { - dev_err(dsaf_dev->dev, "subctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sc_base)) return PTR_ERR(dsaf_dev->sc_base); - } res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); @@@ -128,8 -130,10 +128,8 @@@ dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sds_base)) { - dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sds_base)) return PTR_ERR(dsaf_dev->sds_base); - } } else { dsaf_dev->sub_ctrl = syscon; } @@@ -144,8 -148,10 +144,8 @@@ } } dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->ppe_base)) { - dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->ppe_base)) return PTR_ERR(dsaf_dev->ppe_base); - } dsaf_dev->ppe_paddr = res->start; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { @@@ -161,8 -167,10 +161,8 @@@ } } dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->io_base)) { - dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->io_base)) return PTR_ERR(dsaf_dev->io_base); - } } ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num); @@@ -2780,7 -2788,7 +2780,7 @@@ module_platform_driver(g_dsaf_driver) * @enable: false - request reset , true - drop reset * retuen 0 - success , negative -fail */ - int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) + int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) { struct dsaf_device *dsaf_dev; struct platform_device *pdev; @@@ -2809,24 -2817,44 +2809,44 @@@ {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, }; - if (!is_of_node(dsaf_fwnode)) { - pr_err("hisi_dsaf: Only support DT node!\n"); + /* find the platform device corresponding to fwnode */ + if (is_of_node(dsaf_fwnode)) { + pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + } else if (is_acpi_device_node(dsaf_fwnode)) { + pdev = hns_dsaf_find_platform_device(dsaf_fwnode); + } else { + pr_err("fwnode is neither OF or ACPI type\n"); return -EINVAL; } - pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + + /* check if we were a success in fetching pdev */ + if (!pdev) { + pr_err("couldn't find platform device for node\n"); + return -ENODEV; + } + + /* retrieve the dsaf_device from the driver data */ dsaf_dev = dev_get_drvdata(&pdev->dev); + if (!dsaf_dev) { + dev_err(&pdev->dev, "dsaf_dev is NULL\n"); + return -ENODEV; + } + + /* now, make sure we are running on compatible SoC */ if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", dsaf_dev->ae_dev.name); return -ENODEV; } - if (!enable) { - /* Reset rocee-channels in dsaf and rocee */ - hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false); - hns_dsaf_roce_srst(dsaf_dev, false); + /* do reset or de-reset according to the flag */ + if (!dereset) { + /* reset rocee-channels in dsaf and rocee */ + dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, + false); + dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, false); } else { - /* Configure dsaf tx roce correspond to port map and sl map */ + /* configure dsaf tx roce correspond to port map and sl map */ mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG); for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) dsaf_set_field(mp, 7 << i * 3, i * 3, @@@ -2840,12 -2868,13 +2860,13 @@@ sl_map[i][DSAF_ROCE_6PORT_MODE]); dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl); - /* De-reset rocee-channels in dsaf and rocee */ - hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true); + /* de-reset rocee-channels in dsaf and rocee */ + dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, + true); msleep(SRST_TIME_INTERVAL); - hns_dsaf_roce_srst(dsaf_dev, true); + dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, true); - /* Eanble dsaf channel rocee credit */ + /* enable dsaf channel rocee credit */ credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG); dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0); dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); diff --combined drivers/net/ethernet/mellanox/mlx4/fw.c index f9cbc67f1694,d87bbe65c2b0..c41ab31a39f8 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@@ -159,7 -159,7 +159,8 @@@ static void dump_dev_cap_flags2(struct [32] = "Loopback source checks support", [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", + [35] = "QinQ VST mode support", + [36] = "sl to vl mapping table change event support" }; int i; @@@ -249,72 -249,6 +250,72 @@@ out return err; } +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@@ -378,18 -312,14 +379,18 @@@ #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 + +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; @@@ -428,24 -358,15 +429,24 @@@ MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); - if (dev->caps.phv_bit[port]) { - field = QUERY_FUNC_CAP_PHV_BIT; - MLX4_PUT(outbox->buf, field, - QUERY_FUNC_CAP_FLAGS0_OFFSET); - } + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ @@@ -519,10 -440,6 +520,10 @@@ size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + } else err = -EINVAL; @@@ -538,12 -455,10 +539,12 @@@ int mlx4_QUERY_FUNC_CAP(struct mlx4_de u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; + u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ - in_modifier = op_modifier ? gen_or_port : + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) @@@ -698,7 -613,8 +699,7 @@@ MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); - func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: @@@ -775,7 -691,6 +776,7 @@@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 @@@ -789,6 -704,7 +790,7 @@@ #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 + #define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a #define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 @@@ -853,8 -769,12 +855,8 @@@ dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); - dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); - dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); @@@ -904,6 -824,9 +906,9 @@@ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET); + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT; MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN; @@@ -939,9 -862,6 +944,9 @@@ MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); @@@ -2783,7 -2703,6 +2788,6 @@@ static int mlx4_check_smp_firewall_acti int mlx4_config_mad_demux(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; - int secure_host_active; int err; /* Check if mad_demux is supported */ @@@ -2806,7 -2725,8 +2810,8 @@@ goto out; } - secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox); + if (mlx4_check_smp_firewall_active(dev, mailbox)) + dev->flags |= MLX4_FLAG_SECURE_HOST; /* Config mad_demux to handle all MADs returned by the query above */ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, @@@ -2817,7 -2737,7 +2822,7 @@@ goto out; } - if (secure_host_active) + if (dev->flags & MLX4_FLAG_SECURE_HOST) mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); out: mlx4_free_cmd_mailbox(dev, mailbox); @@@ -2999,7 -2919,7 +3004,7 @@@ int get_phv_bit(struct mlx4_dev *dev, u memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) - *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); @@@ -3023,22 -2943,6 +3028,22 @@@ int set_phv_bit(struct mlx4_dev *dev, u } EXPORT_SYMBOL(set_phv_bit); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; diff --combined drivers/nvme/host/rdma.c index fbdb2267e460,1a1854796797..28632292e85e --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@@ -43,6 -43,10 +43,6 @@@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -#define NVME_RDMA_MAX_PAGES_PER_MR 512 - -#define NVME_RDMA_DEF_RECONNECT_DELAY 20 - /* * We handle AEN commands ourselves and don't even let the * block layer know about them. @@@ -54,7 -58,6 +54,6 @@@ struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct kref ref; struct list_head entry; }; @@@ -73,6 -76,7 +72,6 @@@ struct nvme_rdma_request u32 num_sge; int nents; bool inline_data; - bool need_inval; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; @@@ -82,8 -86,6 +81,8 @@@ enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@@ -283,7 -285,7 +282,7 @@@ static int nvme_rdma_reinit_request(voi struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; - if (!req->need_inval) + if (!req->mr->need_inval) goto out; ib_dereg_mr(req->mr); @@@ -293,10 -295,9 +292,10 @@@ if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } - req->need_inval = false; + req->mr->need_inval = false; out: return ret; @@@ -408,10 -409,7 +407,7 @@@ static void nvme_rdma_free_dev(struct k list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - if (!register_always) - ib_dereg_mr(ndev->mr); ib_dealloc_pd(ndev->pd); - kfree(ndev); } @@@ -444,24 -442,16 +440,16 @@@ nvme_rdma_find_get_device(struct rdma_c ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev); + ndev->pd = ib_alloc_pd(ndev->dev, + register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(ndev->pd)) goto out_free_dev; - if (!register_always) { - ndev->mr = ib_get_dma_mr(ndev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(ndev->mr)) - goto out_free_pd; - } - if (!(ndev->dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { dev_err(&ndev->dev->dev, "Memory registrations not supported.\n"); - goto out_free_mr; + goto out_free_pd; } list_add(&ndev->entry, &device_list); @@@ -469,9 -459,6 +457,6 @@@ out_unlock mutex_unlock(&device_list_mutex); return ndev; - out_free_mr: - if (!register_always) - ib_dereg_mr(ndev->mr); out_free_pd: ib_dealloc_pd(ndev->pd); out_free_dev: @@@ -483,14 -470,9 +468,14 @@@ out_err static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@@ -541,7 -523,6 +526,7 @@@ static int nvme_rdma_create_queue_ib(st ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@@ -594,13 -575,11 +579,13 @@@ static int nvme_rdma_init_queue(struct goto out_destroy_cm_id; } + clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags); return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@@ -619,7 -598,7 +604,7 @@@ static void nvme_rdma_free_queue(struc static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@@ -651,8 -630,7 +636,8 @@@ static int nvme_rdma_init_io_queues(str int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize); + ret = nvme_rdma_init_queue(ctrl, i, + ctrl->ctrl.opts->queue_size); if (ret) { dev_info(ctrl->ctrl.device, "failed to initialize i/o queue: %d\n", ret); @@@ -663,7 -641,7 +648,7 @@@ return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; @@@ -772,13 -750,8 +757,13 @@@ static void nvme_rdma_error_recovery_wo { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@@ -861,7 -834,7 +846,7 @@@ static void nvme_rdma_unmap_data(struc if (!blk_rq_bytes(rq)) return; - if (req->need_inval) { + if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); if (res < 0) { dev_err(ctrl->ctrl.device, @@@ -915,7 -888,7 +900,7 @@@ static int nvme_rdma_map_sg_single(stru sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); - put_unaligned_le32(queue->device->mr->rkey, sg->key); + put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; } @@@ -947,7 -920,7 +932,7 @@@ static int nvme_rdma_map_sg_fr(struct n IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->need_inval = true; + req->mr->need_inval = true; sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); @@@ -970,7 -943,7 +955,7 @@@ static int nvme_rdma_map_data(struct nv req->num_sge = 1; req->inline_data = false; - req->need_inval = false; + req->mr->need_inval = false; c->common.flags |= NVME_CMD_SGL_METABUF; @@@ -1000,7 -973,7 +985,7 @@@ nvme_rdma_queue_idx(queue)) return nvme_rdma_map_sg_inline(queue, req, c); - if (!register_always) + if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) return nvme_rdma_map_sg_single(queue, req, c); } @@@ -1157,7 -1130,7 +1142,7 @@@ static int nvme_rdma_process_nvme_rsp(s if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) - req->need_inval = false; + req->mr->need_inval = false; blk_mq_complete_request(rq, status); @@@ -1290,22 -1263,8 +1275,22 @@@ static int nvme_rdma_route_resolved(str priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue)); - priv.hrqsize = cpu_to_le16(queue->queue_size); - priv.hsqsize = cpu_to_le16(queue->queue_size); + /* + * set the admin queue depth to the minimum size + * specified by the Fabrics standard. + */ + if (priv.qid == 0) { + priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); + priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + } else { + /* + * current interpretation of the fabrics spec + * is at minimum you make hrqsize sqsize+1, or a + * 1's based representation of sqsize. + */ + priv.hrqsize = cpu_to_le16(queue->queue_size); + priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); + } ret = rdma_connect(queue->cm_id, ¶m); if (ret) { @@@ -1321,6 -1280,58 +1306,6 @@@ out_destroy_queue_ib return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* Queue controller deletion */ - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@@ -1362,8 -1373,8 +1347,8 @@@ nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@@ -1435,7 -1446,7 +1420,7 @@@ static int nvme_rdma_queue_rq(struct bl if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) flush = true; ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); if (ret) { nvme_rdma_unmap_data(queue, rq); goto err; @@@ -1664,19 -1675,15 +1649,19 @@@ static int __nvme_rdma_del_ctrl(struct static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) @@@ -1794,7 -1801,7 +1779,7 @@@ static int nvme_rdma_create_io_queues(s memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@@ -1892,7 -1899,7 +1877,7 @@@ static struct nvme_ctrl *nvme_rdma_crea spin_lock_init(&ctrl->lock); ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ - ctrl->ctrl.sqsize = opts->queue_size; + ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; @@@ -1973,57 -1980,27 +1958,57 @@@ static struct nvmf_transport_ops nvme_r .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } diff --combined drivers/nvme/target/rdma.c index 1cbe6e053b5b,187763a77355..f8d23999e0f2 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@@ -848,7 -848,7 +848,7 @@@ nvmet_rdma_find_get_device(struct rdma_ ndev->device = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->device); + ndev->pd = ib_alloc_pd(ndev->device, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; @@@ -978,11 -978,10 +978,11 @@@ static void nvmet_rdma_release_queue_wo container_of(w, struct nvmet_rdma_queue, release_work); struct rdma_cm_id *cm_id = queue->cm_id; struct nvmet_rdma_device *dev = queue->dev; + enum nvmet_rdma_queue_state state = queue->state; nvmet_rdma_free_queue(queue); - if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) + if (state != NVMET_RDMA_IN_DEVICE_REMOVAL) rdma_destroy_id(cm_id); kref_put(&dev->ref, nvmet_rdma_free_dev); @@@ -1004,10 -1003,10 +1004,10 @@@ nvmet_rdma_parse_cm_connect_req(struct queue->host_qid = le16_to_cpu(req->qid); /* - * req->hsqsize corresponds to our recv queue size + * req->hsqsize corresponds to our recv queue size plus 1 * req->hrqsize corresponds to our send queue size */ - queue->recv_queue_size = le16_to_cpu(req->hsqsize); + queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; queue->send_queue_size = le16_to_cpu(req->hrqsize); if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) diff --combined drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index c7a5d49e487f,0e4c6090bf62..9e8802181452 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@@ -128,7 -128,6 +128,7 @@@ static int kiblnd_msgtype2size(int type static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) { struct kib_rdma_desc *rd; + int msg_size; int nob; int n; int i; @@@ -147,6 -146,12 +147,6 @@@ n = rd->rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return 1; - } - nob = offsetof(struct kib_msg, ibm_u) + kiblnd_rd_msg_size(rd, msg->ibm_type, n); @@@ -156,13 -161,6 +156,13 @@@ return 1; } + msg_size = kiblnd_rd_size(rd); + if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) { + CERROR("Bad msg_size: %d, should be 0 < n <= %d\n", + msg_size, LNET_MAX_PAYLOAD); + return 1; + } + if (!flip) return 0; @@@ -620,7 -618,7 +620,7 @@@ static int kiblnd_get_completion_vector } struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, - int state, int version) + int state, int version) { /* * CAVEAT EMPTOR: @@@ -2467,7 -2465,7 +2467,7 @@@ int kiblnd_dev_failover(struct kib_dev hdev->ibh_cmid = cmid; hdev->ibh_ibdev = cmid->device; - pd = ib_alloc_pd(cmid->device); + pd = ib_alloc_pd(cmid->device, 0); if (IS_ERR(pd)) { rc = PTR_ERR(pd); CERROR("Can't allocate PD: %d\n", rc); diff --combined include/linux/mlx4/device.h index 59b50d3eedb4,062d10aaf5cb..f6a164297358 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@@ -71,7 -71,8 +71,8 @@@ enum MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, - MLX4_FLAG_BONDED = 1 << 7 + MLX4_FLAG_BONDED = 1 << 7, + MLX4_FLAG_SECURE_HOST = 1 << 8, }; enum { @@@ -221,7 -222,7 +222,8 @@@ enum MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, - MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 36, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, ++ MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37, }; enum { @@@ -449,6 -450,7 +451,7 @@@ enum MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, + MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP = 0x17, }; /* Port mgmt change event handling */ @@@ -460,6 -462,11 +463,11 @@@ enum MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; + union sl2vl_tbl_to_u64 { + u8 sl8[8]; + u64 sl64; + }; + enum { MLX4_DEVICE_STATE_UP = 1 << 0, MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, @@@ -946,6 -953,9 +954,9 @@@ struct mlx4_eqe __be32 block_ptr; __be32 tbl_entries_mask; } __packed tbl_change_info; + struct { + u8 sl2vl_table[8]; + } __packed sl2vl_tbl_change_info; } params; } __packed port_mgmt_change; struct { @@@ -1372,8 -1382,6 +1383,8 @@@ int mlx4_SET_PORT_fcs_check(struct mlx4 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --combined include/rdma/ib_verbs.h index 9e935655fccb,d3fba0a56e17..5ad43a487745 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@@ -261,6 -261,16 +261,16 @@@ struct ib_odp_caps } per_transport_caps; }; + struct ib_rss_caps { + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + u32 supported_qpts; + u32 max_rwq_indirection_tables; + u32 max_rwq_indirection_table_size; + }; + enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, @@@ -318,6 -328,8 +328,8 @@@ struct ib_device_attr struct ib_odp_caps odp_caps; uint64_t timestamp_mask; uint64_t hca_core_clock; /* in KHZ */ + struct ib_rss_caps rss_caps; + u32 max_wq_type_rq; }; enum ib_mtu { @@@ -525,9 -537,11 +537,11 @@@ enum ib_device_modify_flags IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; + #define IB_DEVICE_NODE_DESC_MAX 64 + struct ib_device_modify { u64 sys_image_guid; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; }; enum ib_port_modify_flags { @@@ -1370,10 -1384,17 +1384,17 @@@ struct ib_udata struct ib_pd { u32 local_dma_lkey; + u32 flags; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ - struct ib_mr *local_mr; + + u32 unsafe_global_rkey; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct ib_mr *__internal_mr; }; struct ib_xrcd { @@@ -1604,6 -1625,8 +1625,8 @@@ struct ib_flow_eth_filter u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_eth { @@@ -1616,6 -1639,8 +1639,8 @@@ struct ib_flow_ib_filter { __be16 dlid; __u8 sl; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ib { @@@ -1625,9 -1650,22 +1650,22 @@@ struct ib_flow_ib_filter mask; }; + /* IPv4 header flags */ + enum ib_ipv4_flags { + IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */ + IB_IPV4_MORE_FRAG = 0X4 /* For All fragmented packets except the + last have this flag set */ + }; + struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; + u8 proto; + u8 tos; + u8 ttl; + u8 flags; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv4 { @@@ -1640,6 -1678,12 +1678,12 @@@ struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; + __be32 flow_label; + u8 next_hdr; + u8 traffic_class; + u8 hop_limit; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv6 { @@@ -1652,6 -1696,8 +1696,8 @@@ struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_tcp_udp { @@@ -1739,14 -1785,6 +1785,14 @@@ struct ib_dma_mapping_ops void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); + int (*map_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs); + void (*unmap_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, @@@ -2041,7 -2079,7 +2087,7 @@@ struct ib_device u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; @@@ -2123,17 -2161,22 +2169,17 @@@ static inline bool ib_is_udata_cleared( size_t len) { const void __user *p = udata->inbuf + offset; - bool ret = false; + bool ret; u8 *buf; if (len > USHRT_MAX) return false; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) + buf = memdup_user(p, len); + if (IS_ERR(buf)) return false; - if (copy_from_user(buf, p, len)) - goto free; - ret = !memchr_inv(buf, 0, len); - -free: kfree(buf); return ret; } @@@ -2505,8 -2548,23 +2551,23 @@@ int ib_find_gid(struct ib_device *devic int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); - struct ib_pd *ib_alloc_pd(struct ib_device *device); + enum ib_pd_flags { + /* + * Create a memory registration for all memory in the system and place + * the rkey for it into pd->unsafe_global_rkey. This can be used by + * ULPs to avoid the overhead of dynamic MRs. + * + * This flag is generally considered unsafe and must only be used in + * extremly trusted environments. Every use of it will log a warning + * in the kernel log. + */ + IB_PD_UNSAFE_GLOBAL_RKEY = 0x01, + }; + struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller); + #define ib_alloc_pd(device, flags) \ + __ib_alloc_pd((device), (flags), __func__) void ib_dealloc_pd(struct ib_pd *pd); /** @@@ -2859,18 -2917,6 +2920,6 @@@ static inline int ib_req_ncomp_notif(st -ENOSYS; } - /** - * ib_get_dma_mr - Returns a memory region for system memory that is - * usable for DMA. - * @pd: The protection domain associated with the memory region. - * @mr_access_flags: Specifies the memory access rights. - * - * Note that the ib_dma_*() functions defined below must be used - * to create/destroy addresses used with the Lkey or Rkey returned - * by ib_get_dma_mr(). - */ - struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@@ -3008,12 -3054,8 +3057,12 @@@ static inline int ib_dma_map_sg_attrs(s enum dma_data_direction direction, unsigned long dma_attrs) { - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); + if (dev->dma_ops) + return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction, + dma_attrs); + else + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@@ -3021,12 -3063,7 +3070,12 @@@ enum dma_data_direction direction, unsigned long dma_attrs) { - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + if (dev->dma_ops) + return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction, + dma_attrs); + else + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry diff --combined net/sunrpc/xprtrdma/verbs.c index 799cce6cbe45,6561d4a35acb..be3178e5e2d2 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@@ -51,7 -51,6 +51,7 @@@ #include #include #include +#include #include #include /* try_module_get()/module_put() */ @@@ -387,7 -386,7 +387,7 @@@ rpcrdma_ia_open(struct rpcrdma_xprt *xp } ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_device); + ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); @@@ -924,7 -923,7 +924,7 @@@ rpcrdma_buffer_create(struct rpcrdma_xp } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@@ -1019,7 -1018,6 +1019,7 @@@ rpcrdma_buffer_destroy(struct rpcrdma_b rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(ia, rep); } + buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@@ -1034,7 -1032,6 +1034,7 @@@ spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); + buf->rb_recv_count = 0; rpcrdma_destroy_mrs(buf); } @@@ -1077,27 -1074,8 +1077,27 @@@ rpcrdma_put_mw(struct rpcrdma_xprt *r_x spin_unlock(&buf->rb_mwlock); } +static struct rpcrdma_rep * +rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) +{ + /* If an RPC previously completed without a reply (say, a + * credential problem or a soft timeout occurs) then hold off + * on supplying more Receive buffers until the number of new + * pending RPCs catches up to the number of posted Receives. + */ + if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) + return NULL; + + if (unlikely(list_empty(&buffers->rb_recv_bufs))) + return NULL; + buffers->rb_recv_count++; + return rpcrdma_buffer_get_rep_locked(buffers); +} + /* * Get a set of request/reply buffers. + * + * Reply buffer (if available) is attached to send buffer upon return. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@@ -1107,15 -1085,21 +1107,15 @@@ spin_lock(&buffers->rb_lock); if (list_empty(&buffers->rb_send_bufs)) goto out_reqbuf; + buffers->rb_send_count++; req = rpcrdma_buffer_get_req_locked(buffers); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_repbuf; - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); return req; out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of request buffers (%p)\n", buffers); - return NULL; -out_repbuf: - list_add(&req->rl_free, &buffers->rb_send_bufs); - spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers); + pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; } @@@ -1133,12 -1117,9 +1133,12 @@@ rpcrdma_buffer_put(struct rpcrdma_req * req->rl_reply = NULL; spin_lock(&buffers->rb_lock); + buffers->rb_send_count--; list_add_tail(&req->rl_free, &buffers->rb_send_bufs); - if (rep) + if (rep) { + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + } spin_unlock(&buffers->rb_lock); } @@@ -1152,7 -1133,8 +1152,7 @@@ rpcrdma_recv_buffer_get(struct rpcrdma_ struct rpcrdma_buffer *buffers = req->rl_buffer; spin_lock(&buffers->rb_lock); - if (!list_empty(&buffers->rb_recv_bufs)) - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); } @@@ -1166,7 -1148,6 +1166,7 @@@ rpcrdma_recv_buffer_put(struct rpcrdma_ struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; spin_lock(&buffers->rb_lock); + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); spin_unlock(&buffers->rb_lock); }