Merge branch 'staging-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Aug 2011 20:03:12 +0000 (13:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Aug 2011 20:03:12 +0000 (13:03 -0700)
* 'staging-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging-2.6:
  staging: tidspbridge: fix compilation on dsp clock functions
  staging: octeon-ethernet: Add missing #includes.
  Staging: zcache: signedness bug in tmem_get()
  staging: zcache: fix crash on high memory swap
  staging: brcm80211: SPARC build error fix
  staging: brcm80211: fix compile error on non-x86 archs since 3.0 kernel

303 files changed:
Documentation/networking/00-INDEX
Documentation/networking/ip-sysctl.txt
Documentation/networking/scaling.txt
MAINTAINERS
arch/alpha/include/asm/sysinfo.h
arch/alpha/include/asm/thread_info.h
arch/alpha/kernel/osf_sys.c
arch/cris/include/asm/serial.h [new file with mode: 0644]
arch/m68k/include/asm/page_mm.h
arch/powerpc/sysdev/fsl_rio.c
arch/s390/kernel/early.c
arch/s390/kernel/ipl.c
arch/sparc/include/asm/sigcontext.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/signal32.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/signal_64.c
arch/sparc/kernel/sigutil.h [new file with mode: 0644]
arch/sparc/kernel/sigutil_32.c [new file with mode: 0644]
arch/sparc/kernel/sigutil_64.c [new file with mode: 0644]
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/entry_32.S
arch/x86/platform/mrst/mrst.c
arch/x86/platform/olpc/olpc.c
arch/x86/vdso/vdso32/sysenter.S
drivers/base/firmware_class.c
drivers/char/msm_smd_pkt.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_test.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/hid/Kconfig
drivers/hid/hid-apple.c
drivers/hid/hid-core.c
drivers/hid/hid-ids.h
drivers/hid/hid-wiimote.c
drivers/hid/usbhid/hid-quirks.c
drivers/hwmon/i5k_amb.c
drivers/hwmon/ntc_thermistor.c
drivers/i2c/busses/i2c-nomadik.c
drivers/i2c/busses/i2c-omap.c
drivers/input/joystick/analog.c
drivers/input/keyboard/ep93xx_keypad.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/misc/ad714x-i2c.c
drivers/input/misc/ad714x-spi.c
drivers/input/misc/ad714x.c
drivers/input/misc/ad714x.h
drivers/input/misc/mma8450.c
drivers/input/misc/mpu3050.c
drivers/input/mouse/bcm5974.c
drivers/input/tablet/wacom_sys.c
drivers/input/tablet/wacom_wac.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/input/touchscreen/max11801_ts.c
drivers/input/touchscreen/tnetv107x-ts.c
drivers/leds/leds-ams-delta.c
drivers/leds/leds-bd2802.c
drivers/leds/leds-hp6xx.c
drivers/misc/ab8500-pwm.c
drivers/misc/fsa9480.c
drivers/misc/pti.c
drivers/net/bonding/bond_main.c
drivers/net/can/sja1000/plx_pci.c
drivers/net/e1000e/e1000.h
drivers/net/e1000e/ich8lan.c
drivers/net/e1000e/netdev.c
drivers/net/forcedeth.c
drivers/net/gianfar.c
drivers/net/gianfar_ethtool.c
drivers/net/ixgbe/ixgbe_main.c
drivers/net/rionet.c
drivers/net/sh_eth.c
drivers/net/usb/cdc_ncm.c
drivers/net/via-velocity.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/wireless/iwlwifi/iwl-pci.c
drivers/net/wireless/rt2x00/rt2800usb.c
drivers/net/wireless/rt2x00/rt2x00usb.c
drivers/net/wireless/wl12xx/acx.c
drivers/net/wireless/wl12xx/testmode.c
drivers/power/max8997_charger.c
drivers/power/max8998_charger.c
drivers/power/s3c_adc_battery.c
drivers/rapidio/rio-scan.c
drivers/rtc/rtc-s3c.c
drivers/s390/block/dasd_ioctl.c
drivers/s390/char/sclp_cmd.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/iscsi/iscsi_target_erl1.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_parameters.c
drivers/target/iscsi/iscsi_target_util.c
drivers/target/target_core_cdb.c
drivers/target/target_core_device.c
drivers/target/target_core_fabric_configfs.c
drivers/target/target_core_pr.c
drivers/target/target_core_rd.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
drivers/target/tcm_fc/tfc_conf.c
drivers/video/backlight/adp8870_bl.c
drivers/video/backlight/ep93xx_bl.c
drivers/video/backlight/pwm_bl.c
drivers/w1/masters/ds2490.c
drivers/w1/masters/matrox_w1.c
drivers/w1/slaves/w1_ds2408.c
drivers/w1/slaves/w1_smem.c
drivers/w1/slaves/w1_therm.c
drivers/w1/w1.c
drivers/w1/w1.h
drivers/w1/w1_family.c
drivers/w1/w1_family.h
drivers/w1/w1_int.c
drivers/w1/w1_int.h
drivers/w1/w1_io.c
drivers/w1/w1_log.h
drivers/w1/w1_netlink.c
drivers/w1/w1_netlink.h
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/xfs/Makefile
fs/xfs/kmem.c [new file with mode: 0644]
fs/xfs/kmem.h [new file with mode: 0644]
fs/xfs/linux-2.6/kmem.c [deleted file]
fs/xfs/linux-2.6/kmem.h [deleted file]
fs/xfs/linux-2.6/mrlock.h [deleted file]
fs/xfs/linux-2.6/time.h [deleted file]
fs/xfs/linux-2.6/xfs_acl.c [deleted file]
fs/xfs/linux-2.6/xfs_aops.c [deleted file]
fs/xfs/linux-2.6/xfs_aops.h [deleted file]
fs/xfs/linux-2.6/xfs_buf.c [deleted file]
fs/xfs/linux-2.6/xfs_buf.h [deleted file]
fs/xfs/linux-2.6/xfs_discard.c [deleted file]
fs/xfs/linux-2.6/xfs_discard.h [deleted file]
fs/xfs/linux-2.6/xfs_export.c [deleted file]
fs/xfs/linux-2.6/xfs_export.h [deleted file]
fs/xfs/linux-2.6/xfs_file.c [deleted file]
fs/xfs/linux-2.6/xfs_fs_subr.c [deleted file]
fs/xfs/linux-2.6/xfs_globals.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl.h [deleted file]
fs/xfs/linux-2.6/xfs_ioctl32.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl32.h [deleted file]
fs/xfs/linux-2.6/xfs_iops.c [deleted file]
fs/xfs/linux-2.6/xfs_iops.h [deleted file]
fs/xfs/linux-2.6/xfs_linux.h [deleted file]
fs/xfs/linux-2.6/xfs_message.c [deleted file]
fs/xfs/linux-2.6/xfs_message.h [deleted file]
fs/xfs/linux-2.6/xfs_quotaops.c [deleted file]
fs/xfs/linux-2.6/xfs_stats.c [deleted file]
fs/xfs/linux-2.6/xfs_stats.h [deleted file]
fs/xfs/linux-2.6/xfs_super.c [deleted file]
fs/xfs/linux-2.6/xfs_super.h [deleted file]
fs/xfs/linux-2.6/xfs_sync.c [deleted file]
fs/xfs/linux-2.6/xfs_sync.h [deleted file]
fs/xfs/linux-2.6/xfs_sysctl.c [deleted file]
fs/xfs/linux-2.6/xfs_sysctl.h [deleted file]
fs/xfs/linux-2.6/xfs_trace.c [deleted file]
fs/xfs/linux-2.6/xfs_trace.h [deleted file]
fs/xfs/linux-2.6/xfs_vnode.h [deleted file]
fs/xfs/linux-2.6/xfs_xattr.c [deleted file]
fs/xfs/mrlock.h [new file with mode: 0644]
fs/xfs/quota/xfs_dquot.c [deleted file]
fs/xfs/quota/xfs_dquot.h [deleted file]
fs/xfs/quota/xfs_dquot_item.c [deleted file]
fs/xfs/quota/xfs_dquot_item.h [deleted file]
fs/xfs/quota/xfs_qm.c [deleted file]
fs/xfs/quota/xfs_qm.h [deleted file]
fs/xfs/quota/xfs_qm_bhv.c [deleted file]
fs/xfs/quota/xfs_qm_stats.c [deleted file]
fs/xfs/quota/xfs_qm_stats.h [deleted file]
fs/xfs/quota/xfs_qm_syscalls.c [deleted file]
fs/xfs/quota/xfs_quota_priv.h [deleted file]
fs/xfs/quota/xfs_trans_dquot.c [deleted file]
fs/xfs/support/uuid.c [deleted file]
fs/xfs/support/uuid.h [deleted file]
fs/xfs/time.h [new file with mode: 0644]
fs/xfs/uuid.c [new file with mode: 0644]
fs/xfs/uuid.h [new file with mode: 0644]
fs/xfs/xfs.h
fs/xfs/xfs_acl.c [new file with mode: 0644]
fs/xfs/xfs_aops.c [new file with mode: 0644]
fs/xfs/xfs_aops.h [new file with mode: 0644]
fs/xfs/xfs_buf.c [new file with mode: 0644]
fs/xfs/xfs_buf.h [new file with mode: 0644]
fs/xfs/xfs_discard.c [new file with mode: 0644]
fs/xfs/xfs_discard.h [new file with mode: 0644]
fs/xfs/xfs_dquot.c [new file with mode: 0644]
fs/xfs/xfs_dquot.h [new file with mode: 0644]
fs/xfs/xfs_dquot_item.c [new file with mode: 0644]
fs/xfs/xfs_dquot_item.h [new file with mode: 0644]
fs/xfs/xfs_export.c [new file with mode: 0644]
fs/xfs/xfs_export.h [new file with mode: 0644]
fs/xfs/xfs_file.c [new file with mode: 0644]
fs/xfs/xfs_fs_subr.c [new file with mode: 0644]
fs/xfs/xfs_globals.c [new file with mode: 0644]
fs/xfs/xfs_ioctl.c [new file with mode: 0644]
fs/xfs/xfs_ioctl.h [new file with mode: 0644]
fs/xfs/xfs_ioctl32.c [new file with mode: 0644]
fs/xfs/xfs_ioctl32.h [new file with mode: 0644]
fs/xfs/xfs_iops.c [new file with mode: 0644]
fs/xfs/xfs_iops.h [new file with mode: 0644]
fs/xfs/xfs_linux.h [new file with mode: 0644]
fs/xfs/xfs_message.c [new file with mode: 0644]
fs/xfs/xfs_message.h [new file with mode: 0644]
fs/xfs/xfs_qm.c [new file with mode: 0644]
fs/xfs/xfs_qm.h [new file with mode: 0644]
fs/xfs/xfs_qm_bhv.c [new file with mode: 0644]
fs/xfs/xfs_qm_stats.c [new file with mode: 0644]
fs/xfs/xfs_qm_stats.h [new file with mode: 0644]
fs/xfs/xfs_qm_syscalls.c [new file with mode: 0644]
fs/xfs/xfs_quota_priv.h [new file with mode: 0644]
fs/xfs/xfs_quotaops.c [new file with mode: 0644]
fs/xfs/xfs_stats.c [new file with mode: 0644]
fs/xfs/xfs_stats.h [new file with mode: 0644]
fs/xfs/xfs_super.c [new file with mode: 0644]
fs/xfs/xfs_super.h [new file with mode: 0644]
fs/xfs/xfs_sync.c [new file with mode: 0644]
fs/xfs/xfs_sync.h [new file with mode: 0644]
fs/xfs/xfs_sysctl.c [new file with mode: 0644]
fs/xfs/xfs_sysctl.h [new file with mode: 0644]
fs/xfs/xfs_trace.c [new file with mode: 0644]
fs/xfs/xfs_trace.h [new file with mode: 0644]
fs/xfs/xfs_trans_dquot.c [new file with mode: 0644]
fs/xfs/xfs_vnode.h [new file with mode: 0644]
fs/xfs/xfs_xattr.c [new file with mode: 0644]
include/linux/connector.h
include/linux/fs.h
include/linux/fuse.h
include/linux/personality.h
include/linux/pwm_backlight.h
include/linux/rio_regs.h
include/linux/writeback.h
include/target/target_core_fabric_ops.h
kernel/irq/manage.c
kernel/printk.c
kernel/sys.c
kernel/sysctl_binary.c
kernel/sysctl_check.c
mm/memcontrol.c
mm/page-writeback.c
mm/vmscan.c
net/8021q/vlan_core.c
net/atm/br2684.c
net/bridge/br_if.c
net/ipv6/ipv6_sockglue.c
net/ipv6/sit.c
net/sched/act_mirred.c
scripts/checkpatch.pl
scripts/get_maintainer.pl
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/soc/blackfin/bf5xx-ad193x.c
sound/soc/codecs/ad193x.c
sound/soc/codecs/ad193x.h
sound/soc/codecs/sta32x.c
sound/soc/codecs/wm8962.c
sound/soc/codecs/wm8996.c
sound/soc/ep93xx/ep93xx-i2s.c
sound/soc/fsl/fsl_dma.c
sound/soc/fsl/mpc8610_hpcd.c
sound/soc/fsl/p1022_ds.c
sound/soc/kirkwood/kirkwood-i2s.c
sound/soc/omap/ams-delta.c
sound/soc/samsung/Kconfig
sound/soc/samsung/h1940_uda1380.c
sound/soc/samsung/rx1950_uda1380.c
sound/soc/samsung/speyside_wm8962.c
sound/soc/soc-core.c
sound/soc/soc-io.c
sound/soc/soc-jack.c
sound/soc/soc-pcm.c
sound/soc/tegra/tegra_wm8903.c
tools/power/cpupower/Makefile
tools/power/cpupower/debug/x86_64/Makefile
tools/power/cpupower/debug/x86_64/centrino-decode.c [deleted symlink]
tools/power/cpupower/debug/x86_64/powernow-k8-decode.c [deleted symlink]
tools/power/cpupower/man/cpupower-frequency-info.1
tools/power/cpupower/man/cpupower-frequency-set.1
tools/power/cpupower/man/cpupower.1
tools/power/cpupower/utils/builtin.h
tools/power/cpupower/utils/cpufreq-info.c
tools/power/cpupower/utils/cpufreq-set.c
tools/power/cpupower/utils/cpuidle-info.c
tools/power/cpupower/utils/cpupower-info.c
tools/power/cpupower/utils/cpupower-set.c
tools/power/cpupower/utils/cpupower.c
tools/power/cpupower/utils/helpers/helpers.h
tools/power/cpupower/utils/helpers/sysfs.c
tools/power/cpupower/utils/helpers/sysfs.h
tools/power/cpupower/utils/helpers/topology.c
tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
tools/power/cpupower/utils/idle_monitor/mperf_monitor.c

index 4edd78d..bbce121 100644 (file)
@@ -1,13 +1,21 @@
 00-INDEX
        - this file
+3c359.txt
+       - information on the 3Com TokenLink Velocity XL (3c5359) driver.
 3c505.txt
        - information on the 3Com EtherLink Plus (3c505) driver.
+3c509.txt
+       - information on the 3Com Etherlink III Series Ethernet cards.
 6pack.txt
        - info on the 6pack protocol, an alternative to KISS for AX.25
 DLINK.txt
        - info on the D-Link DE-600/DE-620 parallel port pocket adapters
 PLIP.txt
        - PLIP: The Parallel Line Internet Protocol device driver
+README.ipw2100
+       - README for the Intel PRO/Wireless 2100 driver.
+README.ipw2200
+       - README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
 README.sb1000
        - info on General Instrument/NextLevel SURFboard1000 cable modem.
 alias.txt
@@ -20,8 +28,12 @@ atm.txt
        - info on where to get ATM programs and support for Linux.
 ax25.txt
        - info on using AX.25 and NET/ROM code for Linux
+batman-adv.txt
+       - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
 baycom.txt
        - info on the driver for Baycom style amateur radio modems
+bonding.txt
+       - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
 bridge.txt
        - where to get user space programs for ethernet bridging with Linux.
 can.txt
@@ -34,32 +46,60 @@ cxacru.txt
        - Conexant AccessRunner USB ADSL Modem
 cxacru-cf.py
        - Conexant AccessRunner USB ADSL Modem configuration file parser
+cxgb.txt
+       - Release Notes for the Chelsio N210 Linux device driver.
+dccp.txt
+       - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42).
 de4x5.txt
        - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver
 decnet.txt
        - info on using the DECnet networking layer in Linux.
 depca.txt
        - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
+dl2k.txt
+       - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko).
+dm9000.txt
+       - README for the Simtec DM9000 Network driver.
 dmfe.txt
        - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
+dns_resolver.txt
+       - The DNS resolver module allows kernel servies to make DNS queries.
+driver.txt
+       - Softnet driver issues.
 e100.txt
        - info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt
        - info on Intel's E1000 line of gigabit ethernet boards
+e1000e.txt
+       - README for the Intel Gigabit Ethernet Driver (e1000e).
 eql.txt
        - serial IP load balancing
 ewrk3.txt
        - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
+fib_trie.txt
+       - Level Compressed Trie (LC-trie) notes: a structure for routing.
 filter.txt
        - Linux Socket Filtering
 fore200e.txt
        - FORE Systems PCA-200E/SBA-200E ATM NIC driver info.
 framerelay.txt
        - info on using Frame Relay/Data Link Connection Identifier (DLCI).
+gen_stats.txt
+       - Generic networking statistics for netlink users.
+generic_hdlc.txt
+       - The generic High Level Data Link Control (HDLC) layer.
 generic_netlink.txt
        - info on Generic Netlink
+gianfar.txt
+       - Gianfar Ethernet Driver.
 ieee802154.txt
        - Linux IEEE 802.15.4 implementation, API and drivers
+ifenslave.c
+       - Configure network interfaces for parallel routing (bonding).
+igb.txt
+       - README for the Intel Gigabit Ethernet Driver (igb).
+igbvf.txt
+       - README for the Intel Gigabit Ethernet Driver (igbvf).
 ip-sysctl.txt
        - /proc/sys/net/ipv4/* variables
 ip_dynaddr.txt
@@ -68,41 +108,117 @@ ipddp.txt
        - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
 iphase.txt
        - Interphase PCI ATM (i)Chip IA Linux driver info.
+ipv6.txt
+       - Options to the ipv6 kernel module.
+ipvs-sysctl.txt
+       - Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
 irda.txt
        - where to get IrDA (infrared) utilities and info for Linux.
+ixgb.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgb).
+ixgbe.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
+ixgbevf.txt
+       - README for the Intel Virtual Function (VF) Driver (ixgbevf).
+l2tp.txt
+       - User guide to the L2TP tunnel protocol.
 lapb-module.txt
        - programming information of the LAPB module.
 ltpc.txt
        - the Apple or Farallon LocalTalk PC card driver
+mac80211-injection.txt
+       - HOWTO use packet injection with mac80211
 multicast.txt
        - Behaviour of cards under Multicast
+multiqueue.txt
+       - HOWTO for multiqueue network device support.
+netconsole.txt
+       - The network console module netconsole.ko: configuration and notes.
+netdev-features.txt
+       - Network interface features API description.
 netdevices.txt
        - info on network device driver functions exported to the kernel.
+netif-msg.txt
+       - Design of the network interface message level setting (NETIF_MSG_*).
+nfc.txt
+       - The Linux Near Field Communication (NFS) subsystem.
 olympic.txt
        - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+operstates.txt
+       - Overview of network interface operational states.
+packet_mmap.txt
+       - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING).
+phonet.txt
+       - The Phonet packet protocol used in Nokia cellular modems.
+phy.txt
+       - The PHY abstraction layer.
+pktgen.txt
+       - User guide to the kernel packet generator (pktgen.ko).
 policy-routing.txt
        - IP policy-based routing
+ppp_generic.txt
+       - Information about the generic PPP driver.
+proc_net_tcp.txt
+       - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces.
+radiotap-headers.txt
+       - Background on radiotap headers.
 ray_cs.txt
        - Raylink Wireless LAN card driver info.
+rds.txt
+       - Background on the reliable, ordered datagram delivery method RDS.
+regulatory.txt
+       - Overview of the Linux wireless regulatory infrastructure.
+rxrpc.txt
+       - Guide to the RxRPC protocol.
+s2io.txt
+       - Release notes for Neterion Xframe I/II 10GbE driver.
+scaling.txt
+       - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS.
+sctp.txt
+       - Notes on the Linux kernel implementation of the SCTP protocol.
+secid.txt
+       - Explanation of the secid member in flow structures.
 skfp.txt
        - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
 smc9.txt
        - the driver for SMC's 9000 series of Ethernet cards
 smctr.txt
        - SMC TokenCard TokenRing Linux driver info.
+spider-net.txt
+       - README for the Spidernet Driver (as found in PS3 / Cell BE).
+stmmac.txt
+       - README for the STMicro Synopsys Ethernet driver.
+tc-actions-env-rules.txt
+       - rules for traffic control (tc) actions.
+timestamping.txt
+       - overview of network packet timestamping variants.
 tcp.txt
        - short blurb on how TCP output takes place.
+tcp-thin.txt
+       - kernel tuning options for low rate 'thin' TCP streams.
 tlan.txt
        - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info.
 tms380tr.txt
        - SysKonnect Token Ring ISA/PCI adapter driver info.
+tproxy.txt
+       - Transparent proxy support user guide.
 tuntap.txt
        - TUN/TAP device driver, allowing user space Rx/Tx of packets.
+udplite.txt
+       - UDP-Lite protocol (RFC 3828) introduction.
 vortex.txt
        - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
+vxge.txt
+       - README for the Neterion X3100 PCIe Server Adapter.
 x25.txt
        - general info on X.25 development.
 x25-iface.txt
        - description of the X.25 Packet Layer to LAPB device interface.
+xfrm_proc.txt
+       - description of the statistics package for XFRM.
+xfrm_sync.txt
+       - sync patches for XFRM enable migration of an SA between hosts.
+xfrm_sysctl.txt
+       - description of the XFRM configuration options.
 z8530drv.txt
        - info about Linux driver for Z8530 based HDLC cards for AX.25
index db2a406..8154699 100644 (file)
@@ -992,7 +992,7 @@ bindv6only - BOOLEAN
                TRUE: disable IPv4-mapped address feature
                FALSE: enable IPv4-mapped address feature
 
-       Default: FALSE (as specified in RFC2553bis)
+       Default: FALSE (as specified in RFC3493)
 
 IPv6 Fragmentation:
 
index 7254b4b..58fd741 100644 (file)
@@ -52,7 +52,8 @@ module parameter for specifying the number of hardware queues to
 configure. In the bnx2x driver, for instance, this parameter is called
 num_queues. A typical RSS configuration would be to have one receive queue
 for each CPU if the device supports enough queues, or otherwise at least
-one for each cache domain at a particular cache level (L1, L2, etc.).
+one for each memory domain, where a memory domain is a set of CPUs that
+share a particular memory level (L1, L2, NUMA node, etc.).
 
 The indirection table of an RSS device, which resolves a queue by masked
 hash, is usually programmed by the driver at initialization. The
@@ -82,11 +83,17 @@ RSS should be enabled when latency is a concern or whenever receive
 interrupt processing forms a bottleneck. Spreading load between CPUs
 decreases queue length. For low latency networking, the optimal setting
 is to allocate as many queues as there are CPUs in the system (or the
-NIC maximum, if lower). Because the aggregate number of interrupts grows
-with each additional queue, the most efficient high-rate configuration
+NIC maximum, if lower). The most efficient high-rate configuration
 is likely the one with the smallest number of receive queues where no
-CPU that processes receive interrupts reaches 100% utilization. Per-cpu
-load can be observed using the mpstat utility.
+receive queue overflows due to a saturated CPU, because in default
+mode with interrupt coalescing enabled, the aggregate number of
+interrupts (and thus work) grows with each additional queue.
+
+Per-cpu load can be observed using the mpstat utility, but note that on
+processors with hyperthreading (HT), each hyperthread is represented as
+a separate CPU. For interrupt handling, HT has shown no benefit in
+initial tests, so limit the number of queues to the number of CPU cores
+in the system.
 
 
 RPS: Receive Packet Steering
@@ -145,7 +152,7 @@ the bitmap.
 == Suggested Configuration
 
 For a single queue device, a typical RPS configuration would be to set
-the rps_cpus to the CPUs in the same cache domain of the interrupting
+the rps_cpus to the CPUs in the same memory domain of the interrupting
 CPU. If NUMA locality is not an issue, this could also be all CPUs in
 the system. At high interrupt rate, it might be wise to exclude the
 interrupting CPU from the map since that already performs much work.
@@ -154,7 +161,7 @@ For a multi-queue system, if RSS is configured so that a hardware
 receive queue is mapped to each CPU, then RPS is probably redundant
 and unnecessary. If there are fewer hardware queues than CPUs, then
 RPS might be beneficial if the rps_cpus for each queue are the ones that
-share the same cache domain as the interrupting CPU for that queue.
+share the same memory domain as the interrupting CPU for that queue.
 
 
 RFS: Receive Flow Steering
@@ -326,7 +333,7 @@ The queue chosen for transmitting a particular flow is saved in the
 corresponding socket structure for the flow (e.g. a TCP connection).
 This transmit queue is used for subsequent packets sent on the flow to
 prevent out of order (ooo) packets. The choice also amortizes the cost
-of calling get_xps_queues() over all packets in the connection. To avoid
+of calling get_xps_queues() over all packets in the flow. To avoid
 ooo packets, the queue for a flow can subsequently only be changed if
 skb->ooo_okay is set for a packet in the flow. This flag indicates that
 there are no outstanding packets in the flow, so the transmit queue can
index 069ee3b..1a8cc60 100644 (file)
@@ -1883,7 +1883,7 @@ S:        Maintained
 F:     drivers/connector/
 
 CONTROL GROUPS (CGROUPS)
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
 M:     Li Zefan <lizf@cn.fujitsu.com>
 L:     containers@lists.linux-foundation.org
 S:     Maintained
@@ -1932,7 +1932,7 @@ S:        Maintained
 F:     tools/power/cpupower
 
 CPUSETS
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
 W:     http://www.bullopensource.org/cpuset/
 W:     http://oss.sgi.com/projects/cpusets/
 S:     Supported
@@ -5532,6 +5532,7 @@ F:        include/media/*7146*
 
 SAMSUNG AUDIO (ASoC) DRIVERS
 M:     Jassi Brar <jassisinghbrar@gmail.com>
+M:     Sangbeom Kim <sbkim73@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/samsung
@@ -7087,7 +7088,7 @@ S:        Supported
 F:     drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
-M:     Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+M:     Evgeniy Polyakov <zbr@ioremap.net>
 S:     Maintained
 F:     Documentation/w1/
 F:     drivers/w1/
index 086aba2..e77d77c 100644 (file)
 #define UAC_NOFIX                      2
 #define UAC_SIGBUS                     4
 
-
-#ifdef __KERNEL__
-
-/* This is the shift that is applied to the UAC bits as stored in the
-   per-thread flags.  See thread_info.h.  */
-#define UAC_SHIFT                      6
-
-#endif
-
 #endif /* __ASM_ALPHA_SYSINFO_H */
index 6f32f9c..ff73db0 100644 (file)
@@ -74,9 +74,9 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_NEED_RESCHED       3       /* rescheduling necessary */
 #define TIF_POLLING_NRFLAG     8       /* poll_idle is polling NEED_RESCHED */
 #define TIF_DIE_IF_KERNEL      9       /* dik recursion lock */
-#define TIF_UAC_NOPRINT                10      /* see sysinfo.h */
-#define TIF_UAC_NOFIX          11
-#define TIF_UAC_SIGBUS         12
+#define TIF_UAC_NOPRINT                10      /* ! Preserve sequence of following */
+#define TIF_UAC_NOFIX          11      /* ! flags as they match            */
+#define TIF_UAC_SIGBUS         12      /* ! userspace part of 'osf_sysinfo' */
 #define TIF_MEMDIE             13      /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK    14      /* restore signal mask in do_signal */
 #define TIF_FREEZE             16      /* is freezing for suspend */
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK         \
                                 | _TIF_SYSCALL_TRACE)
 
-#define ALPHA_UAC_SHIFT                10
+#define ALPHA_UAC_SHIFT                TIF_UAC_NOPRINT
 #define ALPHA_UAC_MASK         (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
                                 1 << TIF_UAC_SIGBUS)
 
index 326f0a2..01e8715 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/sysinfo.h>
+#include <asm/thread_info.h>
 #include <asm/hwrpb.h>
 #include <asm/processor.h>
 
@@ -633,9 +634,10 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
        case GSI_UACPROC:
                if (nbytes < sizeof(unsigned int))
                        return -EINVAL;
-               w = (current_thread_info()->flags >> UAC_SHIFT) & UAC_BITMASK;
-               if (put_user(w, (unsigned int __user *)buffer))
-                       return -EFAULT;
+               w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) &
+                       UAC_BITMASK;
+               if (put_user(w, (unsigned int __user *)buffer))
+                       return -EFAULT;
                return 1;
 
        case GSI_PROC_TYPE:
@@ -756,8 +758,8 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                        case SSIN_UACPROC:
                        again:
                                old = current_thread_info()->flags;
-                               new = old & ~(UAC_BITMASK << UAC_SHIFT);
-                               new = new | (w & UAC_BITMASK) << UAC_SHIFT;
+                               new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT);
+                               new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT;
                                if (cmpxchg(&current_thread_info()->flags,
                                            old, new) != old)
                                        goto again;
diff --git a/arch/cris/include/asm/serial.h b/arch/cris/include/asm/serial.h
new file mode 100644 (file)
index 0000000..af7535a
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _ASM_SERIAL_H
+#define _ASM_SERIAL_H
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ */
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* _ASM_SERIAL_H */
index 31d5570..89f2014 100644 (file)
@@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
        pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);          \
 })
 #define page_to_pfn(_page) ({                                          \
-       struct page *__p = (_page);                                     \
+       const struct page *__p = (_page);                               \
        struct pglist_data *pgdat;                                      \
        pgdat = &pg_data_map[page_to_nid(__p)];                         \
        ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;          \
index 2de8551..c65f75a 100644 (file)
@@ -54,6 +54,7 @@
 #define ODSR_CLEAR             0x1c00
 #define LTLEECSR_ENABLE_ALL    0xFFC000FC
 #define ESCSR_CLEAR            0x07120204
+#define IECSR_CLEAR            0x80000000
 
 #define RIO_PORT1_EDCSR                0x0640
 #define RIO_PORT2_EDCSR                0x0680
@@ -1089,11 +1090,11 @@ static void port_error_handler(struct rio_mport *port, int offset)
 
        if (offset == 0) {
                out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
                out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
        } else {
                out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
                out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
        }
 }
index 068f846..f297456 100644 (file)
@@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void)
 static __init void rescue_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
+       unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
        /*
-        * Move the initrd right behind the bss section in case it starts
-        * within the bss section. So we don't overwrite it when the bss
-        * section gets cleared.
+        * Just like in case of IPL from VM reader we make sure there is a
+        * gap of 4MB between end of kernel and start of initrd.
+        * That way we can also be sure that saving an NSS will succeed,
+        * which however only requires different segments.
         */
        if (!INITRD_START || !INITRD_SIZE)
                return;
-       if (INITRD_START >= (unsigned long) __bss_stop)
+       if (INITRD_START >= min_initrd_addr)
                return;
-       memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
-       INITRD_START = (unsigned long) __bss_stop;
+       memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+       INITRD_START = min_initrd_addr;
 #endif
 }
 
index 04361d5..48c7102 100644 (file)
@@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void)
        /* sysfs: create fcp kset for mixing attr group and bin attrs */
        reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
                                             &reipl_kset->kobj);
-       if (!reipl_kset) {
+       if (!reipl_fcp_kset) {
                free_page((unsigned long) reipl_block_fcp);
                return -ENOMEM;
        }
@@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
 
 static void stop_run(struct shutdown_trigger *trigger)
 {
-       if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+       if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+           strcmp(trigger->name, ON_RESTART_STR) == 0)
                disabled_wait((unsigned long) __builtin_return_address(0));
        while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
                cpu_relax();
@@ -1717,7 +1718,7 @@ static void do_panic(void)
 /* on restart */
 
 static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-       &reipl_action};
+       &stop_action};
 
 static ssize_t on_restart_show(struct kobject *kobj,
                               struct kobj_attribute *attr, char *page)
index a1607d1..69914d7 100644 (file)
@@ -45,6 +45,19 @@ typedef struct {
        int                     si_mask;
 } __siginfo32_t;
 
+#define __SIGC_MAXWIN  7
+
+typedef struct {
+       unsigned long locals[8];
+       unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+       int                     wsaved;
+       __siginfo_reg_window    reg_window[__SIGC_MAXWIN];
+       unsigned long           rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
 #ifdef CONFIG_SPARC64
 typedef struct {
        unsigned   int si_float_regs [64];
@@ -73,6 +86,7 @@ struct sigcontext {
                unsigned long   ss_size;
        }                       sigc_stack;
        unsigned long           sigc_mask;
+       __siginfo_rwin_t *      sigc_rwin_save;
 };
 
 #else
index b90b4a1..cb85458 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4c_irq.o sun4d_irq.o
 
 obj-y                   += process_$(BITS).o
 obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
 obj-$(CONFIG_SPARC32)   += ioport.o
 obj-y                   += setup_$(BITS).o
 obj-y                   += idprom.o
index 75fad42..1ba95af 100644 (file)
@@ -29,6 +29,8 @@
 #include <asm/visasm.h>
 #include <asm/compat_signal.h>
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 /* This magic should be in g_upper[0] for all upper parts
@@ -44,14 +46,14 @@ typedef struct {
 struct signal_frame32 {
        struct sparc_stackf32   ss;
        __siginfo32_t           info;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
        unsigned int            insns[2];
        unsigned int            extramask[_COMPAT_NSIG_WORDS - 1];
        unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
        /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
        siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 typedef struct compat_siginfo{
        int si_signo;
@@ -110,18 +112,14 @@ struct rt_signal_frame32 {
        compat_siginfo_t        info;
        struct pt_regs32        regs;
        compat_sigset_t         mask;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
        unsigned int            insns[2];
        stack_t32               stack;
        unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
        /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
        siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
-
-/* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
@@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
        return 0;
 }
 
-static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-       
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
 void do_sigreturn32(struct pt_regs *regs)
 {
        struct signal_frame32 __user *sf;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
        unsigned int psr;
-       unsigned pc, npc, fpu_save;
+       unsigned pc, npc;
        sigset_t set;
        unsigned seta[_COMPAT_NSIG_WORDS];
        int err, i;
@@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
        err |= __get_user(seta[0], &sf->info.si_mask);
        err |= copy_from_user(seta+1, &sf->extramask,
                              (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
@@ -300,7 +286,9 @@ segv:
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
        struct rt_signal_frame32 __user *sf;
-       unsigned int psr, pc, npc, fpu_save, u_ss_sp;
+       unsigned int psr, pc, npc, u_ss_sp;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
        mm_segment_t old_fs;
        sigset_t set;
        compat_sigset_t seta;
@@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
        err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t));
        err |= __get_user(u_ss_sp, &sf->stack.ss_sp);
        st.ss_sp = compat_ptr(u_ss_sp);
@@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
        do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf);
        set_fs(old_fs);
        
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
+
        switch (_NSIG_WORDS) {
                case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32);
                case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32);
@@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
        return (void __user *) sp;
 }
 
-static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
 /* The I-cache flush instruction only works in the primary ASI, which
  * right now is the nucleus, aka. kernel space.
  *
@@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                         int signo, sigset_t *oldset)
 {
        struct signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
        int sigframe_size;
        u32 psr;
-       int i, err;
        unsigned int seta[_COMPAT_NSIG_WORDS];
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct signal_frame32 __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
 
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
 
        /* 2. Save the current process state */
        if (test_thread_flag(TIF_32BIT)) {
@@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                          &sf->v8plus.asi);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        switch (_NSIG_WORDS) {
        case 4: seta[7] = (oldset->sig[3] >> 32);
@@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __copy_to_user(sf->extramask, seta + 1,
                              (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
-       
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }       
        if (err)
                goto sigsegv;
 
@@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
                if (err)
                        goto sigsegv;
-
                flush_signal_insns(address);
        }
        return 0;
@@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                            siginfo_t *info)
 {
        struct rt_signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
        int sigframe_size;
        u32 psr;
-       int i, err;
        compat_sigset_t seta;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct rt_signal_frame32 __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
 
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
 
        /* 2. Save the current process state */
        if (test_thread_flag(TIF_32BIT)) {
@@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                          &sf->v8plus.asi);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        /* Update the siginfo structure.  */
        err |= copy_siginfo_to_user32(&sf->info, info);
@@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        }
        err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t));
 
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }
        if (err)
                goto sigsegv;
        
index 5e5c5fd..04ede8f 100644 (file)
@@ -26,6 +26,8 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>    /* flush_sig_insns */
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
@@ -39,8 +41,8 @@ struct signal_frame {
        unsigned long           insns[2] __attribute__ ((aligned (8)));
        unsigned int            extramask[_NSIG_WORDS - 1];
        unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
 
 struct rt_signal_frame {
        struct sparc_stackf     ss;
@@ -51,8 +53,8 @@ struct rt_signal_frame {
        unsigned int            insns[2];
        stack_t                 stack;
        unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
 
 /* Align macros */
 #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
@@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set)
        return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU))
-               regs->psr &= ~PSR_EF;
-#else
-       if (current == last_task_used_math) {
-               last_task_used_math = NULL;
-               regs->psr &= ~PSR_EF;
-       }
-#endif
-       set_used_math();
-       clear_tsk_thread_flag(current, TIF_USEDFPU);
-
-       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
-               return -EFAULT;
-
-       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
-                              (sizeof(unsigned long) * 32));
-       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_from_user(&current->thread.fpqueue[0],
-                                       &fpu->si_fpqueue[0],
-                                       ((sizeof(unsigned long) +
-                                       (sizeof(unsigned long *)))*16));
-       return err;
-}
-
 asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
        struct signal_frame __user *sf;
        unsigned long up_psr, pc, npc;
        sigset_t set;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        int err;
 
        /* Always make any pending restarted system calls return -EINTR */
@@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-
        if (fpu_save)
                err |= restore_fpu_state(regs, fpu_save);
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (rwin_save)
+               err |= restore_rwin_state(rwin_save);
 
        /* This is pretty much atomic, no amount locking would prevent
         * the races which exist anyways.
@@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        struct rt_signal_frame __user *sf;
        unsigned int psr, pc, npc;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        mm_segment_t old_fs;
        sigset_t set;
        stack_t st;
@@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-
-       if (fpu_save)
+       if (!err && fpu_save)
                err |= restore_fpu_state(regs, fpu_save);
        err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
        
@@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf);
        set_fs(old_fs);
 
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
        sigdelsetmask(&set, ~_BLOCKABLE);
        spin_lock_irq(&current->sighand->siglock);
        current->blocked = set;
@@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
        return (void __user *) sp;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err = 0;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               regs->psr &= ~(PSR_EF);
-               clear_tsk_thread_flag(current, TIF_USEDFPU);
-       }
-#else
-       if (current == last_task_used_math) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               last_task_used_math = NULL;
-               regs->psr &= ~(PSR_EF);
-       }
-#endif
-       err |= __copy_to_user(&fpu->si_float_regs[0],
-                             &current->thread.float_regs[0],
-                             (sizeof(unsigned long) * 32));
-       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_to_user(&fpu->si_fpqueue[0],
-                                     &current->thread.fpqueue[0],
-                                     ((sizeof(unsigned long) +
-                                     (sizeof(unsigned long *)))*16));
-       clear_used_math();
-       return err;
-}
-
 static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
                       int signo, sigset_t *oldset)
 {
        struct signal_frame __user *sf;
-       int sigframe_size, err;
+       int sigframe_size, err, wsaved;
+       void __user *tail;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
 
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct signal_frame __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill_and_return;
 
-       if (current_thread_info()->w_saved != 0)
-               goto sigill_and_return;
+       tail = sf + 1;
 
        /* 2. Save the current process state */
        err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
@@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(0, &sf->extra_size);
 
        if (used_math()) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        err |= __put_user(oldset->sig[0], &sf->info.si_mask);
        err |= __copy_to_user(sf->extramask, &oldset->sig[1],
                              (_NSIG_WORDS - 1) * sizeof(unsigned int));
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
        if (err)
                goto sigsegv;
        
@@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
                          int signo, sigset_t *oldset, siginfo_t *info)
 {
        struct rt_signal_frame __user *sf;
-       int sigframe_size;
+       int sigframe_size, wsaved;
+       void __user *tail;
        unsigned int psr;
        int err;
 
        synchronize_user_stack();
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
        sf = (struct rt_signal_frame __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
-       if (current_thread_info()->w_saved != 0)
-               goto sigill;
 
+       tail = sf + 1;
        err  = __put_user(regs->pc, &sf->regs.pc);
        err |= __put_user(regs->npc, &sf->regs.npc);
        err |= __put_user(regs->y, &sf->regs.y);
@@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(0, &sf->extra_size);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
        err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
        
        /* Setup sigaltstack */
@@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
        err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
        
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
 
        err |= copy_siginfo_to_user(&sf->info, info);
 
index 006fe45..47509df 100644 (file)
@@ -34,6 +34,7 @@
 
 #include "entry.h"
 #include "systbls.h"
+#include "sigutil.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -236,7 +237,7 @@ struct rt_signal_frame {
        __siginfo_fpu_t __user  *fpu_save;
        stack_t                 stack;
        sigset_t                mask;
-       __siginfo_fpu_t         fpu_state;
+       __siginfo_rwin_t        *rwin_save;
 };
 
 static long _sigpause_common(old_sigset_t set)
@@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set)
        return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
-                              (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
-                              (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
 void do_rt_sigreturn(struct pt_regs *regs)
 {
        struct rt_signal_frame __user *sf;
        unsigned long tpc, tnpc, tstate;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        sigset_t set;
        int err;
 
@@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs)
        regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, fpu_save);
 
        err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
        err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
@@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs)
        if (err)
                goto segv;
 
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
        regs->tpc = tpc;
        regs->tnpc = tnpc;
 
@@ -351,34 +337,13 @@ segv:
 }
 
 /* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
+static int invalid_frame_pointer(void __user *fp)
 {
        if (((unsigned long) fp) & 15)
                return 1;
        return 0;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
 static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
 {
        unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
@@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
               int signo, sigset_t *oldset, siginfo_t *info)
 {
        struct rt_signal_frame __user *sf;
-       int sigframe_size, err;
+       int wsaved, err, sf_size;
+       void __user *tail;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = sizeof(struct rt_signal_frame);
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
 
+       sf_size = sizeof(struct rt_signal_frame);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sf_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sf_size += sizeof(__siginfo_rwin_t);
        sf = (struct rt_signal_frame __user *)
-               get_sigframe(ka, regs, sigframe_size);
-       
-       if (invalid_frame_pointer (sf, sigframe_size))
-               goto sigill;
+               get_sigframe(ka, regs, sf_size);
 
-       if (get_thread_wsaved() != 0)
+       if (invalid_frame_pointer (sf))
                goto sigill;
 
+       tail = (sf + 1);
+
        /* 2. Save the current process state */
        err = copy_to_user(&sf->regs, regs, sizeof (*regs));
 
        if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fpu_save = tail;
+               tail += sizeof(__siginfo_fpu_t);
+               err |= save_fpu_state(regs, fpu_save);
+               err |= __put_user((u64)fpu_save, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwin_save = tail;
+               tail += sizeof(__siginfo_rwin_t);
+               err |= save_rwin_state(wsaved, rwin_save);
+               err |= __put_user((u64)rwin_save, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
        
        /* Setup sigaltstack */
        err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
@@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 
        err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
 
-       err |= copy_in_user((u64 __user *)sf,
-                           (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
-                           sizeof(struct reg_window));
+       if (!wsaved) {
+               err |= copy_in_user((u64 __user *)sf,
+                                   (u64 __user *)(regs->u_regs[UREG_FP] +
+                                                  STACK_BIAS),
+                                   sizeof(struct reg_window));
+       } else {
+               struct reg_window *rp;
 
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+       }
        if (info)
                err |= copy_siginfo_to_user(&sf->info, info);
        else {
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h
new file mode 100644 (file)
index 0000000..d223aa4
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
new file mode 100644 (file)
index 0000000..35c7897
--- /dev/null
@@ -0,0 +1,120 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err = 0;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               regs->psr &= ~(PSR_EF);
+               clear_tsk_thread_flag(current, TIF_USEDFPU);
+       }
+#else
+       if (current == last_task_used_math) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               last_task_used_math = NULL;
+               regs->psr &= ~(PSR_EF);
+       }
+#endif
+       err |= __copy_to_user(&fpu->si_float_regs[0],
+                             &current->thread.float_regs[0],
+                             (sizeof(unsigned long) * 32));
+       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_to_user(&fpu->si_fpqueue[0],
+                                     &current->thread.fpqueue[0],
+                                     ((sizeof(unsigned long) +
+                                     (sizeof(unsigned long *)))*16));
+       clear_used_math();
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU))
+               regs->psr &= ~PSR_EF;
+#else
+       if (current == last_task_used_math) {
+               last_task_used_math = NULL;
+               regs->psr &= ~PSR_EF;
+       }
+#endif
+       set_used_math();
+       clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+               return -EFAULT;
+
+       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+                              (sizeof(unsigned long) * 32));
+       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_from_user(&current->thread.fpqueue[0],
+                                       &fpu->si_fpqueue[0],
+                                       ((sizeof(unsigned long) +
+                                       (sizeof(unsigned long *)))*16));
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window32 *rp;
+               unsigned long fp;
+
+               rp = &current_thread_info()->reg_window[i];
+               fp = current_thread_info()->rwbuf_stkptrs[i];
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window32));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window32));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       t->w_saved = wsaved;
+       synchronize_user_stack();
+       if (t->w_saved)
+               return -EFAULT;
+       return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
new file mode 100644 (file)
index 0000000..e7dc508
--- /dev/null
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err = 0;
+       
+       fprs = current_thread_info()->fpsaved[0];
+       if (fprs & FPRS_DL)
+               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+                                   (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+                                   (sizeof(unsigned int) * 32));
+       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       err |= __put_user(fprs, &fpu->si_fprs);
+
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err;
+
+       err = __get_user(fprs, &fpu->si_fprs);
+       fprs_write(0);
+       regs->tstate &= ~TSTATE_PEF;
+       if (fprs & FPRS_DL)
+               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+                              (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+                              (sizeof(unsigned int) * 32));
+       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       current_thread_info()->fpsaved[0] |= fprs;
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window *rp = &current_thread_info()->reg_window[i];
+               unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       set_thread_wsaved(wsaved);
+       synchronize_user_stack();
+       if (get_thread_wsaved())
+               return -EFAULT;
+       return 0;
+}
index adc66c3..34b1859 100644 (file)
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
            ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
            APIC_DM_INIT;
        uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       mdelay(10);
 
        val = (1UL << UVH_IPI_INT_SEND_SHFT) |
            (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
index 08119a3..6b96110 100644 (file)
@@ -149,7 +149,6 @@ struct set_mtrr_data {
  */
 static int mtrr_rendezvous_handler(void *info)
 {
-#ifdef CONFIG_SMP
        struct set_mtrr_data *data = info;
 
        /*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
        } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
                mtrr_if->set_all();
        }
-#endif
        return 0;
 }
 
index 5c1a919..f3f6f53 100644 (file)
@@ -54,6 +54,7 @@
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
 661:   pushl_cfi $do_general_protection
 662:
 .section .altinstructions,"a"
-       .balign 4
-       .long 661b
-       .long 663f
-       .word X86_FEATURE_XMM
-       .byte 662b-661b
-       .byte 664f-663f
+       altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 .previous
 .section .altinstr_replacement,"ax"
 663:   pushl $do_simd_coprocessor_error
index 7000e74..58425ad 100644 (file)
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
                        irq_attr.trigger = 1;
                        irq_attr.polarity = 1;
                        io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
-               }
+               } else
+                       pentry->irq = 0; /* No irq */
+
                switch (pentry->type) {
                case SFI_DEV_TYPE_IPC:
                        /* ID as IRQ is a hack that will go away */
index 8b9940e..7cce722 100644 (file)
@@ -161,13 +161,13 @@ restart:
        if (inbuf && inlen) {
                /* write data to EC */
                for (i = 0; i < inlen; i++) {
+                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+                       outb(inbuf[i], 0x68);
                        if (wait_on_ibf(0x6c, 0)) {
                                printk(KERN_ERR "olpc-ec:  timeout waiting for"
                                                " EC accept data!\n");
                                goto err;
                        }
-                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-                       outb(inbuf[i], 0x68);
                }
        }
        if (outbuf && outlen) {
index e2800af..e354bce 100644 (file)
@@ -43,7 +43,7 @@ __kernel_vsyscall:
        .space 7,0x90
 
        /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-       jmp .Lenter_kernel
+       int $0x80
        /* 16: System call normal return point is here! */
 VDSO32_SYSENTER_RETURN:        /* Symbol used by sysenter.c via vdso32-syms.h */
        pop %ebp
index bbb03e6..06ed6b4 100644 (file)
@@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p,
        if (!firmware_p)
                return -EINVAL;
 
-       if (WARN_ON(usermodehelper_is_disabled())) {
-               dev_err(device, "firmware: %s will not be loaded\n", name);
-               return -EBUSY;
-       }
-
        *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
        if (!firmware) {
                dev_err(device, "%s: kmalloc(struct firmware) failed\n",
@@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p,
                return 0;
        }
 
+       if (WARN_ON(usermodehelper_is_disabled())) {
+               dev_err(device, "firmware: %s will not be loaded\n", name);
+               retval = -EBUSY;
+               goto out;
+       }
+
        if (uevent)
                dev_dbg(device, "firmware: requesting %s\n", name);
 
index b6f8a65..8eca55d 100644 (file)
@@ -379,9 +379,8 @@ static int __init smd_pkt_init(void)
        for (i = 0; i < NUM_SMD_PKT_PORTS; ++i) {
                smd_pkt_devp[i] = kzalloc(sizeof(struct smd_pkt_dev),
                                          GFP_KERNEL);
-               if (IS_ERR(smd_pkt_devp[i])) {
-                       r = PTR_ERR(smd_pkt_devp[i]);
-                       pr_err("kmalloc() failed %d\n", r);
+               if (!smd_pkt_devp[i]) {
+                       pr_err("kmalloc() failed\n");
                        goto clean_cdevs;
                }
 
index 7f65940..4f0c1ec 100644 (file)
@@ -466,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev,
                    (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
                        return true;
        }
+       /* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100
+        * (RS690M) sends data to i2c bus for a HDMI connector that
+        * is not implemented */
+       if ((dev->pdev->device == 0x791f) &&
+           (dev->pdev->subsystem_vendor == 0x1179) &&
+           (dev->pdev->subsystem_device == 0xff68)) {
+               if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+                   (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
+                       return true;
+       }
 
        /* Default: no EDID header probe required for DDC probing */
        return false;
index a3b011b..b51e157 100644 (file)
@@ -301,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64
                mc->mc_vram_size = mc->aper_size;
        }
        mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+       if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size)
+               mc->real_vram_size = radeon_vram_limit;
        dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
                        mc->mc_vram_size >> 20, mc->vram_start,
                        mc->vram_end, mc->real_vram_size >> 20);
index dee4a0c..602fa35 100644 (file)
@@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev)
        size = 1024 * 1024;
 
        /* Number of tests =
-        * (Total GTT - IB pool - writeback page - ring buffer) / test size
+        * (Total GTT - IB pool - writeback page - ring buffers) / test size
         */
-       n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE -
-            rdev->cp.ring_size)) / size;
+       n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size;
+       if (rdev->wb.wb_obj)
+               n -= RADEON_GPU_PAGE_SIZE;
+       if (rdev->ih.ring_obj)
+               n -= rdev->ih.ring_size;
+       n /= size;
 
        gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
        if (!gtt_obj) {
@@ -132,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                     gtt_start++, vram_start++) {
                        if (*vram_start != gtt_start) {
                                DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
-                                         "expected 0x%p (GTT map 0x%p-0x%p)\n",
-                                         i, *vram_start, gtt_start, gtt_map,
-                                         gtt_end);
+                                         "expected 0x%p (GTT/VRAM offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *vram_start, gtt_start,
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)gtt_start - gtt_map),
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)gtt_start - gtt_map));
                                radeon_bo_kunmap(vram_obj);
                                goto out_cleanup;
                        }
@@ -175,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                     gtt_start++, vram_start++) {
                        if (*gtt_start != vram_start) {
                                DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
-                                         "expected 0x%p (VRAM map 0x%p-0x%p)\n",
-                                         i, *gtt_start, vram_start, vram_map,
-                                         vram_end);
+                                         "expected 0x%p (VRAM/GTT offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *gtt_start, vram_start,
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)vram_start - vram_map),
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)vram_start - vram_map));
                                radeon_bo_kunmap(gtt_obj[i]);
                                goto out_cleanup;
                        }
index 60125dd..9b86fb0 100644 (file)
@@ -450,6 +450,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
                        return -EINVAL;
                mem->bus.base = rdev->mc.aper_base;
                mem->bus.is_iomem = true;
+#ifdef __alpha__
+               /*
+                * Alpha: use bus.addr to hold the ioremap() return,
+                * so we can modify bus.base below.
+                */
+               if (mem->placement & TTM_PL_FLAG_WC)
+                       mem->bus.addr =
+                               ioremap_wc(mem->bus.base + mem->bus.offset,
+                                          mem->bus.size);
+               else
+                       mem->bus.addr =
+                               ioremap_nocache(mem->bus.base + mem->bus.offset,
+                                               mem->bus.size);
+
+               /*
+                * Alpha: Use just the bus offset plus
+                * the hose/domain memory base for bus.base.
+                * It then can be used to build PTEs for VRAM
+                * access, as done in ttm_bo_vm_fault().
+                */
+               mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
+                       rdev->ddev->hose->dense_mem_base;
+#endif
                break;
        default:
                return -EINVAL;
index 56619f6..a4d38d8 100644 (file)
@@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 
                ret = ttm_tt_set_user(bo->ttm, current,
                                      bo->buffer_start, bo->num_pages);
-               if (unlikely(ret != 0))
+               if (unlikely(ret != 0)) {
                        ttm_tt_destroy(bo->ttm);
+                       bo->ttm = NULL;
+               }
                break;
        default:
                printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
@@ -390,10 +392,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
         * Create and bind a ttm if required.
         */
 
-       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
-               ret = ttm_bo_add_ttm(bo, false);
-               if (ret)
-                       goto out_err;
+       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+               if (bo->ttm == NULL) {
+                       ret = ttm_bo_add_ttm(bo, false);
+                       if (ret)
+                               goto out_err;
+               }
 
                ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
                if (ret)
index 77dbf40..ae3c6f5 100644 (file)
@@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
                if (ret)
                        return ret;
 
-               ttm_bo_free_old_node(bo);
                if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
                    (bo->ttm != NULL)) {
                        ttm_tt_unbind(bo->ttm);
                        ttm_tt_destroy(bo->ttm);
                        bo->ttm = NULL;
                }
+               ttm_bo_free_old_node(bo);
        } else {
                /**
                 * This should help pipeline ordinary buffer moves.
index 306b15f..1130a89 100644 (file)
@@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY
 config HID_WIIMOTE
        tristate "Nintendo Wii Remote support"
        depends on BT_HIDP
+       depends on LEDS_CLASS
        ---help---
        Support for the Nintendo Wii Remote bluetooth device.
 
index b85744f..18b3bc6 100644 (file)
@@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
+               .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
index 1a5cf0c..242353d 100644 (file)
@@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
index db63ccf..7d27d2b 100644 (file)
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI      0x024f
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO       0x0250
+#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS       0x0251
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE        0x0001
 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE       0x0600
 
+#define USB_VENDOR_ID_SIGMA_MICRO      0x1c4f
+#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD     0x0002
+
 #define USB_VENDOR_ID_SKYCABLE                 0x1223
 #define        USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER       0x3F07
 
index a594383..85a02e5 100644 (file)
  * any later version.
  */
 
-#include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/input.h>
+#include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include "hid-ids.h"
@@ -33,9 +33,9 @@ struct wiimote_state {
 };
 
 struct wiimote_data {
-       atomic_t ready;
        struct hid_device *hdev;
        struct input_dev *input;
+       struct led_classdev *leds[4];
 
        spinlock_t qlock;
        __u8 head;
@@ -53,8 +53,15 @@ struct wiimote_data {
 #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \
                                        WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4)
 
+/* return flag for led \num */
+#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1))
+
 enum wiiproto_reqs {
+       WIIPROTO_REQ_NULL = 0x0,
        WIIPROTO_REQ_LED = 0x11,
+       WIIPROTO_REQ_DRM = 0x12,
+       WIIPROTO_REQ_STATUS = 0x20,
+       WIIPROTO_REQ_RETURN = 0x22,
        WIIPROTO_REQ_DRM_K = 0x30,
 };
 
@@ -87,9 +94,6 @@ static __u16 wiiproto_keymap[] = {
        BTN_MODE,       /* WIIPROTO_KEY_HOME */
 };
 
-#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \
-                                                                       dev))
-
 static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer,
                                                                size_t count)
 {
@@ -192,66 +196,96 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds)
        wiimote_queue(wdata, cmd, sizeof(cmd));
 }
 
-#define wiifs_led_show_set(num)                                                \
-static ssize_t wiifs_led_show_##num(struct device *dev,                        \
-                       struct device_attribute *attr, char *buf)       \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       unsigned long flags;                                            \
-       int state;                                                      \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-       state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num);        \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return sprintf(buf, "%d\n", state);                             \
-}                                                                      \
-static ssize_t wiifs_led_set_##num(struct device *dev,                 \
-       struct device_attribute *attr, const char *buf, size_t count)   \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       int tmp = simple_strtoul(buf, NULL, 10);                        \
-       unsigned long flags;                                            \
-       __u8 state;                                                     \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-                                                                       \
-       state = wdata->state.flags;                                     \
-                                                                       \
-       if (tmp)                                                        \
-               wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\
-       else                                                            \
-               wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\
-                                                                       \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return count;                                                   \
-}                                                                      \
-static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num,  \
-                                               wiifs_led_set_##num)
-
-wiifs_led_show_set(1);
-wiifs_led_show_set(2);
-wiifs_led_show_set(3);
-wiifs_led_show_set(4);
+/*
+ * Check what peripherals of the wiimote are currently
+ * active and select a proper DRM that supports all of
+ * the requested data inputs.
+ */
+static __u8 select_drm(struct wiimote_data *wdata)
+{
+       return WIIPROTO_REQ_DRM_K;
+}
+
+static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm)
+{
+       __u8 cmd[3];
+
+       if (drm == WIIPROTO_REQ_NULL)
+               drm = select_drm(wdata);
+
+       cmd[0] = WIIPROTO_REQ_DRM;
+       cmd[1] = 0;
+       cmd[2] = drm;
+
+       wiimote_queue(wdata, cmd, sizeof(cmd));
+}
+
+static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       bool value = false;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+
+       return value ? LED_FULL : LED_OFF;
+}
+
+static void wiimote_leds_set(struct led_classdev *led_dev,
+                                               enum led_brightness value)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       __u8 state, flag;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       flag = WIIPROTO_FLAG_LED(i + 1);
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       state = wdata->state.flags;
+                       if (value == LED_OFF)
+                               wiiproto_req_leds(wdata, state & ~flag);
+                       else
+                               wiiproto_req_leds(wdata, state | flag);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+}
 
 static int wiimote_input_event(struct input_dev *dev, unsigned int type,
                                                unsigned int code, int value)
+{
+       return 0;
+}
+
+static int wiimote_input_open(struct input_dev *dev)
 {
        struct wiimote_data *wdata = input_get_drvdata(dev);
 
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
+       return hid_hw_open(wdata->hdev);
+}
 
-       return 0;
+static void wiimote_input_close(struct input_dev *dev)
+{
+       struct wiimote_data *wdata = input_get_drvdata(dev);
+
+       hid_hw_close(wdata->hdev);
 }
 
 static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
@@ -281,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
        input_sync(wdata->input);
 }
 
+static void handler_status(struct wiimote_data *wdata, const __u8 *payload)
+{
+       handler_keys(wdata, payload);
+
+       /* on status reports the drm is reset so we need to resend the drm */
+       wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL);
+}
+
+static void handler_return(struct wiimote_data *wdata, const __u8 *payload)
+{
+       __u8 err = payload[3];
+       __u8 cmd = payload[2];
+
+       handler_keys(wdata, payload);
+
+       if (err)
+               hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err,
+                                                                       cmd);
+}
+
 struct wiiproto_handler {
        __u8 id;
        size_t size;
@@ -288,6 +342,8 @@ struct wiiproto_handler {
 };
 
 static struct wiiproto_handler handlers[] = {
+       { .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status },
+       { .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return },
        { .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys },
        { .id = 0 }
 };
@@ -300,11 +356,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
        int i;
        unsigned long flags;
 
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
-
        if (size < 1)
                return -EINVAL;
 
@@ -321,6 +372,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
        return 0;
 }
 
+static void wiimote_leds_destroy(struct wiimote_data *wdata)
+{
+       int i;
+       struct led_classdev *led;
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i]) {
+                       led = wdata->leds[i];
+                       wdata->leds[i] = NULL;
+                       led_classdev_unregister(led);
+                       kfree(led);
+               }
+       }
+}
+
+static int wiimote_leds_create(struct wiimote_data *wdata)
+{
+       int i, ret;
+       struct device *dev = &wdata->hdev->dev;
+       size_t namesz = strlen(dev_name(dev)) + 9;
+       struct led_classdev *led;
+       char *name;
+
+       for (i = 0; i < 4; ++i) {
+               led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL);
+               if (!led) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               name = (void*)&led[1];
+               snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i);
+               led->name = name;
+               led->brightness = 0;
+               led->max_brightness = 1;
+               led->brightness_get = wiimote_leds_get;
+               led->brightness_set = wiimote_leds_set;
+
+               ret = led_classdev_register(dev, led);
+               if (ret) {
+                       kfree(led);
+                       goto err;
+               }
+               wdata->leds[i] = led;
+       }
+
+       return 0;
+
+err:
+       wiimote_leds_destroy(wdata);
+       return ret;
+}
+
 static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 {
        struct wiimote_data *wdata;
@@ -341,6 +444,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
        input_set_drvdata(wdata->input, wdata);
        wdata->input->event = wiimote_input_event;
+       wdata->input->open = wiimote_input_open;
+       wdata->input->close = wiimote_input_close;
        wdata->input->dev.parent = &wdata->hdev->dev;
        wdata->input->id.bustype = wdata->hdev->bus;
        wdata->input->id.vendor = wdata->hdev->vendor;
@@ -362,6 +467,12 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
 static void wiimote_destroy(struct wiimote_data *wdata)
 {
+       wiimote_leds_destroy(wdata);
+
+       input_unregister_device(wdata->input);
+       cancel_work_sync(&wdata->worker);
+       hid_hw_stop(wdata->hdev);
+
        kfree(wdata);
 }
 
@@ -377,19 +488,6 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                return -ENOMEM;
        }
 
-       ret = device_create_file(&hdev->dev, &dev_attr_led1);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led2);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led3);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led4);
-       if (ret)
-               goto err;
-
        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "HID parse failed\n");
@@ -408,9 +506,10 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                goto err_stop;
        }
 
-       /* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */
-       smp_wmb();
-       atomic_set(&wdata->ready, 1);
+       ret = wiimote_leds_create(wdata);
+       if (ret)
+               goto err_free;
+
        hid_info(hdev, "New device registered\n");
 
        /* by default set led1 after device initialization */
@@ -420,15 +519,15 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 
        return 0;
 
+err_free:
+       wiimote_destroy(wdata);
+       return ret;
+
 err_stop:
        hid_hw_stop(hdev);
 err:
        input_free_device(wdata->input);
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-       wiimote_destroy(wdata);
+       kfree(wdata);
        return ret;
 }
 
@@ -437,16 +536,6 @@ static void wiimote_hid_remove(struct hid_device *hdev)
        struct wiimote_data *wdata = hid_get_drvdata(hdev);
 
        hid_info(hdev, "Device removed\n");
-
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-
-       hid_hw_stop(hdev);
-       input_unregister_device(wdata->input);
-
-       cancel_work_sync(&wdata->worker);
        wiimote_destroy(wdata);
 }
 
index 621959d..4bdb5d4 100644 (file)
@@ -89,6 +89,7 @@ static const struct hid_blacklist {
 
        { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT },
        { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT },
+       { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS },
        { 0, 0 }
 };
 
index c4c40be..d22f241 100644 (file)
@@ -114,7 +114,6 @@ struct i5k_amb_data {
        void __iomem *amb_mmio;
        struct i5k_device_attribute *attrs;
        unsigned int num_attrs;
-       unsigned long chipset_id;
 };
 
 static ssize_t show_name(struct device *dev, struct device_attribute *devattr,
@@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data,
                goto out;
        }
 
-       data->chipset_id = devid;
-
        res = 0;
 out:
        pci_dev_put(pcidev);
@@ -478,23 +475,13 @@ out:
        return res;
 }
 
-static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data,
-                                       unsigned long channel)
-{
-       switch (data->chipset_id) {
-       case PCI_DEVICE_ID_INTEL_5000_ERR:
-               return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel;
-       case PCI_DEVICE_ID_INTEL_5400_ERR:
-               return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
-       default:
-               BUG();
-       }
-}
-
-static unsigned long chipset_ids[] = {
-       PCI_DEVICE_ID_INTEL_5000_ERR,
-       PCI_DEVICE_ID_INTEL_5400_ERR,
-       0
+static struct {
+       unsigned long err;
+       unsigned long fbd0;
+} chipset_ids[] __devinitdata  = {
+       { PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 },
+       { PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 },
+       { 0, 0 }
 };
 
 #ifdef MODULE
@@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
        struct i5k_amb_data *data;
        struct resource *reso;
-       int i;
-       int res = -ENODEV;
+       int i, res;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
        /* Figure out where the AMB registers live */
        i = 0;
        do {
-               res = i5k_find_amb_registers(data, chipset_ids[i]);
+               res = i5k_find_amb_registers(data, chipset_ids[i].err);
+               if (res == 0)
+                       break;
                i++;
-       } while (res && chipset_ids[i]);
+       } while (chipset_ids[i].err);
 
        if (res)
                goto err;
 
        /* Copy the DIMM presence map for the first two channels */
-       res = i5k_channel_probe(&data->amb_present[0],
-                               i5k_channel_pci_id(data, 0));
+       res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0);
        if (res)
                goto err;
 
        /* Copy the DIMM presence map for the optional second two channels */
-       i5k_channel_probe(&data->amb_present[2],
-                         i5k_channel_pci_id(data, 1));
+       i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1);
 
        /* Set up resource regions */
        reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME);
index d7926f4..eab1161 100644 (file)
@@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data,
        if (data->comp[mid].ohm <= ohm) {
                *i_low = mid;
                *i_high = mid - 1;
-       }
-       if (data->comp[mid].ohm > ohm) {
+       } else {
                *i_low = mid + 1;
                *i_high = mid;
        }
index 0c731ca..b228e09 100644 (file)
@@ -146,6 +146,7 @@ struct i2c_nmk_client {
  * @stop: stop condition
  * @xfer_complete: acknowledge completion for a I2C message
  * @result: controller propogated result
+ * @regulator: pointer to i2c regulator
  * @busy: Busy doing transfer
  */
 struct nmk_i2c_dev {
@@ -417,12 +418,12 @@ static int read_i2c(struct nmk_i2c_dev *dev)
        writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                        dev->virtbase + I2C_IMSCR);
 
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                &dev->xfer_complete, dev->adap.timeout);
 
        if (timeout < 0) {
                dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout"
                        "returned %d waiting for event\n", timeout);
                status = timeout;
        }
@@ -504,12 +505,12 @@ static int write_i2c(struct nmk_i2c_dev *dev)
        writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                        dev->virtbase + I2C_IMSCR);
 
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                &dev->xfer_complete, dev->adap.timeout);
 
        if (timeout < 0) {
                dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout "
                        "returned %d waiting for event\n", timeout);
                status = timeout;
        }
index 1a766cf..2dfb631 100644 (file)
@@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_SUSPEND
-static int omap_i2c_suspend(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
-                       dev->bus->pm->runtime_suspend(dev);
-
-       return 0;
-}
-
-static int omap_i2c_resume(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
-                       dev->bus->pm->runtime_resume(dev);
-
-       return 0;
-}
-
-static struct dev_pm_ops omap_i2c_pm_ops = {
-       .suspend = omap_i2c_suspend,
-       .resume = omap_i2c_resume,
-};
-#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
-#else
-#define OMAP_I2C_PM_OPS NULL
-#endif
-
 static struct platform_driver omap_i2c_driver = {
        .probe          = omap_i2c_probe,
        .remove         = omap_i2c_remove,
        .driver         = {
                .name   = "omap_i2c",
                .owner  = THIS_MODULE,
-               .pm     = OMAP_I2C_PM_OPS,
        },
 };
 
index 9882971..358cd7e 100644 (file)
@@ -139,7 +139,7 @@ struct analog_port {
 #include <linux/i8253.h>
 
 #define GET_TIME(x)    do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0)))
+#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
 #define TIME_NAME      (cpu_has_tsc?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
index c8242dd..aa17e02 100644 (file)
@@ -20,6 +20,7 @@
  * flag.
  */
 
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
index f270447..a5a7791 100644 (file)
@@ -702,7 +702,7 @@ err_iounmap:
 err_free_mem_region:
        release_mem_region(res->start, resource_size(res));
 err_free_mem:
-       input_free_device(kbc->idev);
+       input_free_device(input_dev);
        kfree(kbc);
 
        return err;
index e21deb1..025417d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (I2C bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -27,54 +27,49 @@ static int ad714x_i2c_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
 
-static int ad714x_i2c_write(struct device *dev, unsigned short reg,
-                               unsigned short data)
+static int ad714x_i2c_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)&data;
-
-       u8 tx[4] = {
-               _reg[1],
-               _reg[0],
-               _data[1],
-               _data[0]
-       };
-
-       ret = i2c_master_send(client, tx, 4);
-       if (ret < 0)
-               dev_err(&client->dev, "I2C write error\n");
-
-       return ret;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C write error: %d\n", error);
+               return error;
+       }
+
+       return 0;
 }
 
-static int ad714x_i2c_read(struct device *dev, unsigned short reg,
-                               unsigned short *data)
+static int ad714x_i2c_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)data;
-
-       u8 tx[2] = {
-               _reg[1],
-               _reg[0]
-       };
-       u8 rx[2];
-
-       ret = i2c_master_send(client, tx, 2);
-       if (ret >= 0)
-               ret = i2c_master_recv(client, rx, 2);
-
-       if (unlikely(ret < 0)) {
-               dev_err(&client->dev, "I2C read error\n");
-       } else {
-               _data[0] = rx[1];
-               _data[1] = rx[0];
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int i;
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               sizeof(*chip->xfer_buf));
+       if (error >= 0)
+               error = i2c_master_recv(client, (u8 *)chip->xfer_buf,
+                                       len * sizeof(*chip->xfer_buf));
+
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C read error: %d\n", error);
+               return error;
        }
 
-       return ret;
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i]);
+
+       return 0;
 }
 
 static int __devinit ad714x_i2c_probe(struct i2c_client *client,
index 4120dd5..875b508 100644 (file)
@@ -1,12 +1,12 @@
 /*
  * AD714X CapTouch Programmable Controller driver (SPI bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
 
-#include <linux/input.h>       /* BUS_I2C */
+#include <linux/input.h>       /* BUS_SPI */
 #include <linux/module.h>
 #include <linux/spi/spi.h>
 #include <linux/pm.h>
@@ -30,30 +30,68 @@ static int ad714x_spi_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
-static int ad714x_spi_read(struct device *dev, unsigned short reg,
-               unsigned short *data)
+static int ad714x_spi_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
 {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg;
+       struct spi_device *spi = to_spi_device(chip->dev);
+       struct spi_message message;
+       struct spi_transfer xfer[2];
+       int i;
+       int error;
+
+       spi_message_init(&message);
+       memset(xfer, 0, sizeof(xfer));
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
+                                       AD714x_SPI_READ | reg);
+       xfer[0].tx_buf = &chip->xfer_buf[0];
+       xfer[0].len = sizeof(chip->xfer_buf[0]);
+       spi_message_add_tail(&xfer[0], &message);
+
+       xfer[1].rx_buf = &chip->xfer_buf[1];
+       xfer[1].len = sizeof(chip->xfer_buf[1]) * len;
+       spi_message_add_tail(&xfer[1], &message);
+
+       error = spi_sync(spi, &message);
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI read error: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i + 1]);
 
-       return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2);
+       return 0;
 }
 
-static int ad714x_spi_write(struct device *dev, unsigned short reg,
-               unsigned short data)
+static int ad714x_spi_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
 {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx[2] = {
-               AD714x_SPI_CMD_PREFIX | reg,
-               data
-       };
+       struct spi_device *spi = to_spi_device(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = spi_write(spi, (u8 *)chip->xfer_buf,
+                         2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI write error: %d\n", error);
+               return error;
+       }
 
-       return spi_write(spi, (u8 *)tx, 4);
+       return 0;
 }
 
 static int __devinit ad714x_spi_probe(struct spi_device *spi)
 {
        struct ad714x_chip *chip;
+       int err;
+
+       spi->bits_per_word = 8;
+       err = spi_setup(spi);
+       if (err < 0)
+               return err;
 
        chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq,
                            ad714x_spi_read, ad714x_spi_write);
index c3a62c4..ca42c7d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -59,7 +59,6 @@
 #define STAGE11_AMBIENT                0x27D
 
 #define PER_STAGE_REG_NUM      36
-#define STAGE_NUM              12
 #define STAGE_CFGREG_NUM       8
 #define SYS_CFGREG_NUM         8
 
@@ -124,27 +123,6 @@ struct ad714x_driver_data {
  * information to integrate all things which will be private data
  * of spi/i2c device
  */
-struct ad714x_chip {
-       unsigned short h_state;
-       unsigned short l_state;
-       unsigned short c_state;
-       unsigned short adc_reg[STAGE_NUM];
-       unsigned short amb_reg[STAGE_NUM];
-       unsigned short sensor_val[STAGE_NUM];
-
-       struct ad714x_platform_data *hw;
-       struct ad714x_driver_data *sw;
-
-       int irq;
-       struct device *dev;
-       ad714x_read_t read;
-       ad714x_write_t write;
-
-       struct mutex mutex;
-
-       unsigned product;
-       unsigned version;
-};
 
 static void ad714x_use_com_int(struct ad714x_chip *ad714x,
                                int start_stage, int end_stage)
@@ -154,13 +132,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 
        mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
        data |= 1 << end_stage;
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
        data &= ~mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
@@ -171,13 +149,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
 
        mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
        data &= ~(1 << end_stage);
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
        data |= mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x,
@@ -273,15 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
        for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
-
-               ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
+
+               ad714x->sensor_val[i] =
+                       abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]);
        }
 }
 
@@ -444,15 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
        for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                else
                        ad714x->sensor_val[i] = 0;
        }
@@ -597,15 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage,
+                       &ad714x->adc_reg[hw->x_start_stage],
+                       hw->x_end_stage - hw->x_start_stage + 1);
+
        for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                               &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                else
                        ad714x->sensor_val[i] = 0;
        }
@@ -891,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x)
 {
        unsigned short data;
 
-       ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data);
+       ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1);
        switch (data & 0xFFF0) {
        case AD7142_PARTID:
                ad714x->product = 0x7142;
@@ -940,23 +921,20 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x)
        for (i = 0; i < STAGE_NUM; i++) {
                reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM;
                for (j = 0; j < STAGE_CFGREG_NUM; j++)
-                       ad714x->write(ad714x->dev, reg_base + j,
+                       ad714x->write(ad714x, reg_base + j,
                                        ad714x->hw->stage_cfg_reg[i][j]);
        }
 
        for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i,
+               ad714x->write(ad714x, AD714X_SYSCFG_REG + i,
                        ad714x->hw->sys_cfg_reg[i]);
        for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i,
-                       &data);
+               ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1);
 
-       ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF);
+       ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF);
 
        /* clear all interrupts */
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 }
 
 static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
@@ -966,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
 
        mutex_lock(&ad714x->mutex);
 
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
        for (i = 0; i < ad714x->hw->button_num; i++)
                ad714x_button_state_machine(ad714x, i);
@@ -1245,7 +1221,7 @@ int ad714x_disable(struct ad714x_chip *ad714x)
        mutex_lock(&ad714x->mutex);
 
        data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3;
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data);
+       ad714x->write(ad714x, AD714X_PWR_CTRL, data);
 
        mutex_unlock(&ad714x->mutex);
 
@@ -1255,24 +1231,20 @@ EXPORT_SYMBOL(ad714x_disable);
 
 int ad714x_enable(struct ad714x_chip *ad714x)
 {
-       unsigned short data;
-
        dev_dbg(ad714x->dev, "%s enter\n", __func__);
 
        mutex_lock(&ad714x->mutex);
 
        /* resume to non-shutdown mode */
 
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL,
+       ad714x->write(ad714x, AD714X_PWR_CTRL,
                        ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]);
 
        /* make sure the interrupt output line is not low level after resume,
         * otherwise we will get no chance to enter falling-edge irq again
         */
 
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
        mutex_unlock(&ad714x->mutex);
 
index 45c54fb..3c85455 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (bus interfaces)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
 
 #include <linux/types.h>
 
+#define STAGE_NUM              12
+
 struct device;
+struct ad714x_platform_data;
+struct ad714x_driver_data;
 struct ad714x_chip;
 
-typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *);
-typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short);
+typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t);
+typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short);
+
+struct ad714x_chip {
+       unsigned short l_state;
+       unsigned short h_state;
+       unsigned short c_state;
+       unsigned short adc_reg[STAGE_NUM];
+       unsigned short amb_reg[STAGE_NUM];
+       unsigned short sensor_val[STAGE_NUM];
+
+       struct ad714x_platform_data *hw;
+       struct ad714x_driver_data *sw;
+
+       int irq;
+       struct device *dev;
+       ad714x_read_t read;
+       ad714x_write_t write;
+
+       struct mutex mutex;
+
+       unsigned product;
+       unsigned version;
+
+       __be16 xfer_buf[16] ____cacheline_aligned;
+
+};
 
 int ad714x_disable(struct ad714x_chip *ad714x);
 int ad714x_enable(struct ad714x_chip *ad714x);
index 6c76cf7..0794778 100644 (file)
@@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = {
        { .compatible = "fsl,mma8450", },
        { /* sentinel */ }
 };
-MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
+MODULE_DEVICE_TABLE(of, mma8450_dt_ids);
 
 static struct i2c_driver mma8450_driver = {
        .driver = {
index b95fac1..f71dc72 100644 (file)
@@ -282,7 +282,7 @@ err_free_irq:
 err_pm_set_suspended:
        pm_runtime_set_suspended(&client->dev);
 err_free_mem:
-       input_unregister_device(idev);
+       input_free_device(idev);
        kfree(sensor);
        return error;
 }
index 3126983..da28018 100644 (file)
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+/* MacbookAir4,2 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI   0x024c
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO    0x024d
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS    0x024e
+/* Macbook8,2 (unibody) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI  0x0252
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO   0x0253
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS   0x0254
 
 #define BCM5974_DEVICE(prod) {                                 \
        .match_flags = (USB_DEVICE_ID_MATCH_DEVICE |            \
@@ -104,6 +112,14 @@ static const struct usb_device_id bcm5974_table[] = {
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
+       /* MacbookAir4,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
+       /* MacbookPro8,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
        /* Terminating entry */
        {}
 };
@@ -294,6 +310,30 @@ static const struct bcm5974_config bcm5974_config_table[] = {
                { DIM_X, DIM_X / SN_COORD, -4415, 5050 },
                { DIM_Y, DIM_Y / SN_COORD, -55, 6680 }
        },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+       },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4750, 5280 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
+       },
        {}
 };
 
index 449c0a4..d27c9d9 100644 (file)
@@ -49,6 +49,7 @@ struct hid_descriptor {
 #define USB_REQ_GET_REPORT     0x01
 #define USB_REQ_SET_REPORT     0x09
 #define WAC_HID_FEATURE_REPORT 0x03
+#define WAC_MSG_RETRIES                5
 
 static int usb_get_report(struct usb_interface *intf, unsigned char type,
                                unsigned char id, void *buf, int size)
@@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
                        report,
                        hid_desc->wDescriptorLength,
                        5000); /* 5 secs */
-       } while (result < 0 && limit++ < 5);
+       } while (result < 0 && limit++ < WAC_MSG_RETRIES);
 
        /* No need to parse the Descriptor. It isn't an error though */
        if (result < 0)
@@ -319,24 +320,26 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
        int limit = 0, report_id = 2;
        int error = -ENOMEM;
 
-       rep_data = kmalloc(2, GFP_KERNEL);
+       rep_data = kmalloc(4, GFP_KERNEL);
        if (!rep_data)
                return error;
 
-       /* ask to report tablet data if it is 2FGT Tablet PC or
+       /* ask to report tablet data if it is MT Tablet PC or
         * not a Tablet PC */
        if (features->type == TABLETPC2FG) {
                do {
                        rep_data[0] = 3;
                        rep_data[1] = 4;
+                       rep_data[2] = 0;
+                       rep_data[3] = 0;
                        report_id = 3;
                        error = usb_set_report(intf, WAC_HID_FEATURE_REPORT,
-                               report_id, rep_data, 2);
+                               report_id, rep_data, 4);
                        if (error >= 0)
                                error = usb_get_report(intf,
                                        WAC_HID_FEATURE_REPORT, report_id,
-                                       rep_data, 3);
-               } while ((error < 0 || rep_data[1] != 4) && limit++ < 5);
+                                       rep_data, 4);
+               } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES);
        } else if (features->type != TABLETPC) {
                do {
                        rep_data[0] = 2;
@@ -347,7 +350,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
                                error = usb_get_report(intf,
                                        WAC_HID_FEATURE_REPORT, report_id,
                                        rep_data, 2);
-               } while ((error < 0 || rep_data[1] != 2) && limit++ < 5);
+               } while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES);
        }
 
        kfree(rep_data);
index 03ebcc8..c1c2f7b 100644 (file)
@@ -1460,6 +1460,9 @@ static const struct wacom_features wacom_features_0xD3 =
 static const struct wacom_features wacom_features_0xD4 =
        { "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0xD5 =
+       { "Wacom Bamboo Pen 6x8",     WACOM_PKGLEN_BBFUN, 21648, 13530, 1023,
+         63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD6 =
        { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1564,6 +1567,7 @@ const struct usb_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0xD2) },
        { USB_DEVICE_WACOM(0xD3) },
        { USB_DEVICE_WACOM(0xD4) },
+       { USB_DEVICE_WACOM(0xD5) },
        { USB_DEVICE_WACOM(0xD6) },
        { USB_DEVICE_WACOM(0xD7) },
        { USB_DEVICE_WACOM(0xD8) },
index ae00604..f5d6685 100644 (file)
@@ -244,6 +244,7 @@ struct mxt_finger {
        int x;
        int y;
        int area;
+       int pressure;
 };
 
 /* Each client has this additional data */
@@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
                                        finger[id].x);
                        input_report_abs(input_dev, ABS_MT_POSITION_Y,
                                        finger[id].y);
+                       input_report_abs(input_dev, ABS_MT_PRESSURE,
+                                       finger[id].pressure);
                } else {
                        finger[id].status = 0;
                }
@@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
        if (status != MXT_RELEASE) {
                input_report_abs(input_dev, ABS_X, finger[single_id].x);
                input_report_abs(input_dev, ABS_Y, finger[single_id].y);
+               input_report_abs(input_dev,
+                                ABS_PRESSURE, finger[single_id].pressure);
        }
 
        input_sync(input_dev);
@@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
        int x;
        int y;
        int area;
+       int pressure;
 
        /* Check the touch is present on the screen */
        if (!(status & MXT_DETECT)) {
@@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
                y = y >> 2;
 
        area = message->message[4];
+       pressure = message->message[5];
 
        dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id,
                status & MXT_MOVE ? "moved" : "pressed",
@@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
        finger[id].x = x;
        finger[id].y = y;
        finger[id].area = area;
+       finger[id].pressure = pressure;
 
        mxt_input_report(data, id);
 }
@@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                             0, data->max_x, 0, 0);
        input_set_abs_params(input_dev, ABS_Y,
                             0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_PRESSURE,
+                            0, 255, 0, 0);
 
        /* For multi touch */
        input_mt_init_slots(input_dev, MXT_MAX_FINGER);
@@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                             0, data->max_x, 0, 0);
        input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
                             0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_MT_PRESSURE,
+                            0, 255, 0, 0);
 
        input_set_drvdata(input_dev, data);
        i2c_set_clientdata(client, data);
index 4f2713d..4627fe5 100644 (file)
@@ -9,7 +9,8 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  */
 
 /*
index 089b0a0..0e8f63e 100644 (file)
@@ -13,6 +13,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/errno.h>
index b982603..8c00937 100644 (file)
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
index 3ebe382..ea21855 100644 (file)
@@ -662,6 +662,11 @@ failed_unregister_led1_R:
 static void bd2802_unregister_led_classdev(struct bd2802_led *led)
 {
        cancel_work_sync(&led->work);
+       led_classdev_unregister(&led->cdev_led2b);
+       led_classdev_unregister(&led->cdev_led2g);
+       led_classdev_unregister(&led->cdev_led2r);
+       led_classdev_unregister(&led->cdev_led1b);
+       led_classdev_unregister(&led->cdev_led1g);
        led_classdev_unregister(&led->cdev_led1r);
 }
 
index e4ce1fd..bcfbd3a 100644 (file)
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
index 54e3d05..3590315 100644 (file)
@@ -164,5 +164,5 @@ subsys_initcall(ab8500_pwm_init);
 module_exit(ab8500_pwm_exit);
 MODULE_AUTHOR("Arun MURTHY <arun.murthy@stericsson.com>");
 MODULE_DESCRIPTION("AB8500 Pulse Width Modulation Driver");
-MODULE_ALIAS("AB8500 PWM driver");
+MODULE_ALIAS("platform:ab8500-pwm");
 MODULE_LICENSE("GPL v2");
index 5325a7e..27dc0d2 100644 (file)
@@ -455,7 +455,7 @@ static int __devinit fsa9480_probe(struct i2c_client *client,
 
 fail2:
        if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
 fail1:
        i2c_set_clientdata(client, NULL);
        kfree(usbsw);
@@ -466,7 +466,7 @@ static int __devexit fsa9480_remove(struct i2c_client *client)
 {
        struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
        if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
        i2c_set_clientdata(client, NULL);
 
        sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
index 8653bd0..06df187 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/mutex.h>
 #include <linux/miscdevice.h>
 #include <linux/pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #define DRIVERNAME             "pti"
 #define PCINAME                        "pciPTI"
index 38a83ac..43f2ea5 100644 (file)
@@ -3419,9 +3419,27 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
 static int bond_open(struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+       int i;
 
        bond->kill_timers = 0;
 
+       /* reset slave->backup and slave->inactive */
+       read_lock(&bond->lock);
+       if (bond->slave_cnt > 0) {
+               read_lock(&bond->curr_slave_lock);
+               bond_for_each_slave(bond, slave, i) {
+                       if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+                               && (slave != bond->curr_active_slave)) {
+                               bond_set_slave_inactive_flags(slave);
+                       } else {
+                               bond_set_slave_active_flags(slave);
+                       }
+               }
+               read_unlock(&bond->curr_slave_lock);
+       }
+       read_unlock(&bond->lock);
+
        INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
 
        if (bond_is_lb(bond)) {
index 231385b..c7f3d4e 100644 (file)
@@ -408,7 +408,7 @@ static void plx_pci_del_card(struct pci_dev *pdev)
        struct sja1000_priv *priv;
        int i = 0;
 
-       for (i = 0; i < card->channels; i++) {
+       for (i = 0; i < PLX_PCI_MAX_CHAN; i++) {
                dev = card->net_dev[i];
                if (!dev)
                        continue;
@@ -536,7 +536,6 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                        if (err) {
                                dev_err(&pdev->dev, "Registering device failed "
                                        "(err=%d)\n", err);
-                               free_sja1000dev(dev);
                                goto failure_cleanup;
                        }
 
@@ -549,6 +548,7 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                        dev_err(&pdev->dev, "Channel #%d not detected\n",
                                i + 1);
                        free_sja1000dev(dev);
+                       card->net_dev[i] = NULL;
                }
        }
 
index 35916f4..8533ad7 100644 (file)
@@ -155,6 +155,9 @@ struct e1000_info;
 #define HV_M_STATUS_SPEED_1000            0x0200
 #define HV_M_STATUS_LINK_UP               0x0040
 
+#define E1000_ICH_FWSM_PCIM2PCI                0x01000000 /* ME PCIm-to-PCI active */
+#define E1000_ICH_FWSM_PCIM2PCI_COUNT  2000
+
 /* Time to wait before putting the device into D3 if there's no link (in ms). */
 #define LINK_TIMEOUT           100
 
@@ -454,6 +457,7 @@ struct e1000_info {
 #define FLAG2_DISABLE_AIM                 (1 << 8)
 #define FLAG2_CHECK_PHY_HANG              (1 << 9)
 #define FLAG2_NO_DISABLE_RX               (1 << 10)
+#define FLAG2_PCIM2PCI_ARBITER_WA         (1 << 11)
 
 #define E1000_RX_DESC_PS(R, i)     \
        (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
index 4e36978..54add27 100644 (file)
 #define HV_PM_CTRL             PHY_REG(770, 17)
 
 /* PHY Low Power Idle Control */
-#define I82579_LPI_CTRL                        PHY_REG(772, 20)
-#define I82579_LPI_CTRL_ENABLE_MASK    0x6000
+#define I82579_LPI_CTRL                                PHY_REG(772, 20)
+#define I82579_LPI_CTRL_ENABLE_MASK            0x6000
+#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT   0x80
 
 /* EMI Registers */
 #define I82579_EMI_ADDR         0x10
 #define HV_KMRN_MODE_CTRL      PHY_REG(769, 16)
 #define HV_KMRN_MDIO_SLOW      0x0400
 
+/* KMRN FIFO Control and Status */
+#define HV_KMRN_FIFO_CTRLSTA                  PHY_REG(770, 16)
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK    0x7000
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT   12
+
 /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
 /* Offset 04h HSFSTS */
 union ich8_hws_flash_status {
@@ -657,6 +663,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
        struct e1000_mac_info *mac = &hw->mac;
        s32 ret_val;
        bool link;
+       u16 phy_reg;
 
        /*
         * We only want to go out to the PHY registers to see if Auto-Neg
@@ -689,16 +696,35 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
        mac->get_link_status = false;
 
-       if (hw->phy.type == e1000_phy_82578) {
-               ret_val = e1000_link_stall_workaround_hv(hw);
-               if (ret_val)
-                       goto out;
-       }
-
-       if (hw->mac.type == e1000_pch2lan) {
+       switch (hw->mac.type) {
+       case e1000_pch2lan:
                ret_val = e1000_k1_workaround_lv(hw);
                if (ret_val)
                        goto out;
+               /* fall-thru */
+       case e1000_pchlan:
+               if (hw->phy.type == e1000_phy_82578) {
+                       ret_val = e1000_link_stall_workaround_hv(hw);
+                       if (ret_val)
+                               goto out;
+               }
+
+               /*
+                * Workaround for PCHx parts in half-duplex:
+                * Set the number of preambles removed from the packet
+                * when it is passed from the PHY to the MAC to prevent
+                * the MAC from misinterpreting the packet type.
+                */
+               e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
+               phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
+
+               if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
+                       phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
+
+               e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
+               break;
+       default:
+               break;
        }
 
        /*
@@ -788,6 +814,11 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
            (adapter->hw.phy.type == e1000_phy_igp_3))
                adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
 
+       /* Enable workaround for 82579 w/ ME enabled */
+       if ((adapter->hw.mac.type == e1000_pch2lan) &&
+           (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+               adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
+
        /* Disable EEE by default until IEEE802.3az spec is finalized */
        if (adapter->flags2 & FLAG2_HAS_EEE)
                adapter->hw.dev_spec.ich8lan.eee_disable = true;
@@ -1355,7 +1386,7 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
                        return ret_val;
 
                /* Preamble tuning for SSC */
-               ret_val = e1e_wphy(hw, PHY_REG(770, 16), 0xA204);
+               ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
                if (ret_val)
                        return ret_val;
        }
@@ -1645,6 +1676,7 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
        s32 ret_val = 0;
        u16 status_reg = 0;
        u32 mac_reg;
+       u16 phy_reg;
 
        if (hw->mac.type != e1000_pch2lan)
                goto out;
@@ -1659,12 +1691,19 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
                mac_reg = er32(FEXTNVM4);
                mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
 
-               if (status_reg & HV_M_STATUS_SPEED_1000)
+               ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
+               if (ret_val)
+                       goto out;
+
+               if (status_reg & HV_M_STATUS_SPEED_1000) {
                        mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
-               else
+                       phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               } else {
                        mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
-
+                       phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               }
                ew32(FEXTNVM4, mac_reg);
+               ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
        }
 
 out:
index 362f703..2198e61 100644 (file)
@@ -518,6 +518,63 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
        adapter->hw_csum_good++;
 }
 
+/**
+ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
+ * @hw: pointer to the HW structure
+ * @tail: address of tail descriptor register
+ * @i: value to write to tail descriptor register
+ *
+ * When updating the tail register, the ME could be accessing Host CSR
+ * registers at the same time.  Normally, this is handled in h/w by an
+ * arbiter but on some parts there is a bug that acknowledges Host accesses
+ * later than it should which could result in the descriptor register to
+ * have an incorrect value.  Workaround this by checking the FWSM register
+ * which has bit 24 set while ME is accessing Host CSR registers, wait
+ * if it is set and try again a number of times.
+ **/
+static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail,
+                                       unsigned int i)
+{
+       unsigned int j = 0;
+
+       while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
+              (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
+               udelay(50);
+
+       writel(i, tail);
+
+       if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
+               return E1000_ERR_SWFW_SYNC;
+
+       return 0;
+}
+
+static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 rctl = er32(RCTL);
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
+               e_err("ME firmware caused invalid RDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
+static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 tctl = er32(TCTL);
+               ew32(TCTL, tctl & ~E1000_TCTL_EN);
+               e_err("ME firmware caused invalid TDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
 /**
  * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
  * @adapter: address of board private structure
@@ -573,7 +630,10 @@ map_skb:
                         * such as IA-64).
                         */
                        wmb();
-                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i);
+                       else
+                               writel(i, adapter->hw.hw_addr + rx_ring->tail);
                }
                i++;
                if (i == rx_ring->count)
@@ -673,7 +733,11 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
                         * such as IA-64).
                         */
                        wmb();
-                       writel(i << 1, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i << 1);
+                       else
+                               writel(i << 1,
+                                      adapter->hw.hw_addr + rx_ring->tail);
                }
 
                i++;
@@ -756,7 +820,10 @@ check_page:
                 * applicable for weak-ordered memory model archs,
                 * such as IA-64). */
                wmb();
-               writel(i, adapter->hw.hw_addr + rx_ring->tail);
+               if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                       e1000e_update_rdt_wa(adapter, i);
+               else
+                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
        }
 }
 
@@ -4689,7 +4756,12 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
        wmb();
 
        tx_ring->next_to_use = i;
-       writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
+       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+               e1000e_update_tdt_wa(adapter, i);
+       else
+               writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
        /*
         * we need this if more than one processor can write to our tail
         * at a time, it synchronizes IO on IA64/Altix systems
index e55df30..6d5fbd4 100644 (file)
@@ -5615,7 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                goto out_error;
        }
 
-       nv_vlan_mode(dev, dev->features);
+       if (id->driver_data & DEV_HAS_VLAN)
+               nv_vlan_mode(dev, dev->features);
 
        netif_carrier_off(dev);
 
index 2659daa..31d5c57 100644 (file)
@@ -2710,8 +2710,13 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
        /* Tell the skb what kind of packet this is */
        skb->protocol = eth_type_trans(skb, dev);
 
-       /* Set vlan tag */
-       if (fcb->flags & RXFCB_VLN)
+       /*
+        * There's need to check for NETIF_F_HW_VLAN_RX here.
+        * Even if vlan rx accel is disabled, on some chips
+        * RXFCB_VLN is pseudo randomly set.
+        */
+       if (dev->features & NETIF_F_HW_VLAN_RX &&
+           fcb->flags & RXFCB_VLN)
                __vlan_hwaccel_put_tag(skb, fcb->vlctl);
 
        /* Send the packet up the stack */
index 6e35069..25a8c2a 100644 (file)
@@ -686,10 +686,21 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 {
        unsigned int last_rule_idx = priv->cur_filer_idx;
        unsigned int cmp_rqfpr;
-       unsigned int local_rqfpr[MAX_FILER_IDX + 1];
-       unsigned int local_rqfcr[MAX_FILER_IDX + 1];
+       unsigned int *local_rqfpr;
+       unsigned int *local_rqfcr;
        int i = 0x0, k = 0x0;
        int j = MAX_FILER_IDX, l = 0x0;
+       int ret = 1;
+
+       local_rqfpr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       local_rqfcr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       if (!local_rqfpr || !local_rqfcr) {
+               pr_err("Out of memory\n");
+               ret = 0;
+               goto err;
+       }
 
        switch (class) {
        case TCP_V4_FLOW:
@@ -706,7 +717,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                break;
        default:
                pr_err("Right now this class is not supported\n");
-               return 0;
+               ret = 0;
+               goto err;
        }
 
        for (i = 0; i < MAX_FILER_IDX + 1; i++) {
@@ -721,7 +733,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 
        if (i == MAX_FILER_IDX + 1) {
                pr_err("No parse rule found, can't create hash rules\n");
-               return 0;
+               ret = 0;
+               goto err;
        }
 
        /* If a match was found, then it begins the starting of a cluster rule
@@ -765,7 +778,10 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                priv->cur_filer_idx = priv->cur_filer_idx - 1;
        }
 
-       return 1;
+err:
+       kfree(local_rqfcr);
+       kfree(local_rqfpr);
+       return ret;
 }
 
 static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd)
index e86297b..2279039 100644 (file)
@@ -1459,8 +1459,10 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                if (ixgbe_rx_is_fcoe(adapter, rx_desc)) {
                        ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb,
                                                   staterr);
-                       if (!ddp_bytes)
+                       if (!ddp_bytes) {
+                               dev_kfree_skb_any(skb);
                                goto next_desc;
+                       }
                }
 #endif /* IXGBE_FCOE */
                ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc);
index 86ac38c..3bb1311 100644 (file)
@@ -80,13 +80,13 @@ static int rionet_capable = 1;
  */
 static struct rio_dev **rionet_active;
 
-#define is_rionet_capable(pef, src_ops, dst_ops)               \
-                       ((pef & RIO_PEF_INB_MBOX) &&            \
-                        (pef & RIO_PEF_INB_DOORBELL) &&        \
+#define is_rionet_capable(src_ops, dst_ops)                    \
+                       ((src_ops & RIO_SRC_OPS_DATA_MSG) &&    \
+                        (dst_ops & RIO_DST_OPS_DATA_MSG) &&    \
                         (src_ops & RIO_SRC_OPS_DOORBELL) &&    \
                         (dst_ops & RIO_DST_OPS_DOORBELL))
 #define dev_rionet_capable(dev) \
-       is_rionet_capable(dev->pef, dev->src_ops, dev->dst_ops)
+       is_rionet_capable(dev->src_ops, dev->dst_ops)
 
 #define RIONET_MAC_MATCH(x)    (*(u32 *)x == 0x00010001)
 #define RIONET_GET_DESTID(x)   (*(u16 *)(x + 4))
@@ -282,7 +282,6 @@ static int rionet_open(struct net_device *ndev)
 {
        int i, rc = 0;
        struct rionet_peer *peer, *tmp;
-       u32 pwdcsr;
        struct rionet_private *rnet = netdev_priv(ndev);
 
        if (netif_msg_ifup(rnet))
@@ -332,13 +331,8 @@ static int rionet_open(struct net_device *ndev)
                        continue;
                }
 
-               /*
-                * If device has initialized inbound doorbells,
-                * send a join message
-                */
-               rio_read_config_32(peer->rdev, RIO_WRITE_PORT_CSR, &pwdcsr);
-               if (pwdcsr & RIO_DOORBELL_AVAIL)
-                       rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
+               /* Send a join message */
+               rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
        }
 
       out:
@@ -492,7 +486,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
 {
        int rc = -ENODEV;
-       u32 lpef, lsrc_ops, ldst_ops;
+       u32 lsrc_ops, ldst_ops;
        struct rionet_peer *peer;
        struct net_device *ndev = NULL;
 
@@ -515,12 +509,11 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
         * on later probes
         */
        if (!rionet_check) {
-               rio_local_read_config_32(rdev->net->hport, RIO_PEF_CAR, &lpef);
                rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
                                         &lsrc_ops);
                rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
                                         &ldst_ops);
-               if (!is_rionet_capable(lpef, lsrc_ops, ldst_ops)) {
+               if (!is_rionet_capable(lsrc_ops, ldst_ops)) {
                        printk(KERN_ERR
                               "%s: local device is not network capable\n",
                               DRV_NAME);
index ad35c21..190f619 100644 (file)
@@ -21,6 +21,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
index a03336e..f06fb78 100644 (file)
@@ -228,23 +228,40 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
        if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
 
                if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) {
-                       struct usb_cdc_ncm_ndp_input_size ndp_in_sz;
+                       struct usb_cdc_ncm_ndp_input_size *ndp_in_sz;
+
+                       ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL);
+                       if (!ndp_in_sz) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+
                        err = usb_control_msg(ctx->udev,
                                        usb_sndctrlpipe(ctx->udev, 0),
                                        USB_CDC_SET_NTB_INPUT_SIZE,
                                        USB_TYPE_CLASS | USB_DIR_OUT
                                         | USB_RECIP_INTERFACE,
-                                       0, iface_no, &ndp_in_sz, 8, 1000);
+                                       0, iface_no, ndp_in_sz, 8, 1000);
+                       kfree(ndp_in_sz);
                } else {
-                       __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+                       __le32 *dwNtbInMaxSize;
+                       dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize),
+                                       GFP_KERNEL);
+                       if (!dwNtbInMaxSize) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+                       *dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+
                        err = usb_control_msg(ctx->udev,
                                        usb_sndctrlpipe(ctx->udev, 0),
                                        USB_CDC_SET_NTB_INPUT_SIZE,
                                        USB_TYPE_CLASS | USB_DIR_OUT
                                         | USB_RECIP_INTERFACE,
-                                       0, iface_no, &dwNtbInMaxSize, 4, 1000);
+                                       0, iface_no, dwNtbInMaxSize, 4, 1000);
+                       kfree(dwNtbInMaxSize);
                }
-
+size_err:
                if (err < 0)
                        pr_debug("Setting NTB Input Size failed\n");
        }
@@ -325,19 +342,29 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 
        /* set Max Datagram Size (MTU) */
        if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) {
-               __le16 max_datagram_size;
+               __le16 *max_datagram_size;
                u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+
+               max_datagram_size = kzalloc(sizeof(*max_datagram_size),
+                               GFP_KERNEL);
+               if (!max_datagram_size) {
+                       err = -ENOMEM;
+                       goto max_dgram_err;
+               }
+
                err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0),
                                USB_CDC_GET_MAX_DATAGRAM_SIZE,
                                USB_TYPE_CLASS | USB_DIR_IN
                                 | USB_RECIP_INTERFACE,
-                               0, iface_no, &max_datagram_size,
+                               0, iface_no, max_datagram_size,
                                2, 1000);
                if (err < 0) {
                        pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n",
                                                CDC_NCM_MIN_DATAGRAM_SIZE);
+                       kfree(max_datagram_size);
                } else {
-                       ctx->max_datagram_size = le16_to_cpu(max_datagram_size);
+                       ctx->max_datagram_size =
+                               le16_to_cpu(*max_datagram_size);
                        /* Check Eth descriptor value */
                        if (eth_max_sz < CDC_NCM_MAX_DATAGRAM_SIZE) {
                                if (ctx->max_datagram_size > eth_max_sz)
@@ -360,8 +387,10 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
                                                USB_TYPE_CLASS | USB_DIR_OUT
                                                 | USB_RECIP_INTERFACE,
                                                0,
-                                               iface_no, &max_datagram_size,
+                                               iface_no, max_datagram_size,
                                                2, 1000);
+                       kfree(max_datagram_size);
+max_dgram_err:
                        if (err < 0)
                                pr_debug("SET_MAX_DATAGRAM_SIZE failed\n");
                }
index deb1eca..7c5336c 100644 (file)
@@ -515,10 +515,6 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
        mac_set_cam_mask(regs, vptr->mCAMmask);
 
        /* Enable VCAMs */
-
-       if (test_bit(0, vptr->active_vlans))
-               WORD_REG_BITS_ON(MCFG_RTGOPT, &regs->MCFG);
-
        for_each_set_bit(vid, vptr->active_vlans, VLAN_N_VID) {
                mac_set_vlan_cam(regs, i, (u8 *) &vid);
                vptr->vCAMmask[i / 8] |= 0x1 << (i % 8);
index 1cbacb3..0959583 100644 (file)
@@ -1929,14 +1929,17 @@ static void
 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
 
-       VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
 
        set_bit(vid, adapter->active_vlans);
 }
@@ -1946,14 +1949,17 @@ static void
 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
 
-       VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
 
        clear_bit(vid, adapter->active_vlans);
 }
index 69d4ec4..2fdbffa 100644 (file)
@@ -478,27 +478,22 @@ out_no_pci:
        return err;
 }
 
-static void iwl_pci_down(struct iwl_bus *bus)
-{
-       struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific;
-
-       pci_disable_msi(pci_bus->pci_dev);
-       pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base);
-       pci_release_regions(pci_bus->pci_dev);
-       pci_disable_device(pci_bus->pci_dev);
-       pci_set_drvdata(pci_bus->pci_dev, NULL);
-
-       kfree(bus);
-}
-
 static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 {
        struct iwl_priv *priv = pci_get_drvdata(pdev);
-       void *bus_specific = priv->bus->bus_specific;
+       struct iwl_bus *bus = priv->bus;
+       struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus);
+       struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus);
 
        iwl_remove(priv);
 
-       iwl_pci_down(bus_specific);
+       pci_disable_msi(pci_dev);
+       pci_iounmap(pci_dev, pci_bus->hw_base);
+       pci_release_regions(pci_dev);
+       pci_disable_device(pci_dev);
+       pci_set_drvdata(pci_dev, NULL);
+
+       kfree(bus);
 }
 
 #ifdef CONFIG_PM
index 9395631..dbf501c 100644 (file)
@@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
        int wcid, ack, pid;
        int tx_wcid, tx_ack, tx_pid;
 
+       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+               WARNING(entry->queue->rt2x00dev,
+                       "Data pending for entry %u in queue %u\n",
+                       entry->entry_idx, entry->queue->qid);
+               cond_resched();
+               return false;
+       }
+
        wcid    = rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
        ack     = rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
        pid     = rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
@@ -529,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
                        if (rt2800usb_txdone_entry_check(entry, reg))
                                break;
+                       entry = NULL;
                }
 
-               if (!entry || rt2x00queue_empty(queue))
-                       break;
-
-               rt2800_txdone_entry(entry, reg);
+               if (entry)
+                       rt2800_txdone_entry(entry, reg);
        }
 }
 
@@ -558,8 +566,10 @@ static void rt2800usb_work_txdone(struct work_struct *work)
                while (!rt2x00queue_empty(queue)) {
                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
 
-                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags))
                                break;
+
                        if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags))
                                rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE);
                        else if (rt2x00queue_status_timeout(entry))
index b6b4542..7fbb55c 100644 (file)
@@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
        struct queue_entry *entry = (struct queue_entry *)urb->context;
        struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 
-       if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+       if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
                return;
-
-       if (rt2x00dev->ops->lib->tx_dma_done)
-               rt2x00dev->ops->lib->tx_dma_done(entry);
-
-       /*
-        * Report the frame as DMA done
-        */
-       rt2x00lib_dmadone(entry);
-
        /*
         * Check if the frame was correctly uploaded
         */
        if (urb->status)
                set_bit(ENTRY_DATA_IO_FAILED, &entry->flags);
+       /*
+        * Report the frame as DMA done
+        */
+       rt2x00lib_dmadone(entry);
 
+       if (rt2x00dev->ops->lib->tx_dma_done)
+               rt2x00dev->ops->lib->tx_dma_done(entry);
        /*
         * Schedule the delayed work for reading the TX status
         * from the device.
index 7e33f1f..34f6ab5 100644 (file)
@@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth)
        auth->sleep_auth = sleep_auth;
 
        ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth));
-       if (ret < 0)
-               return ret;
 
 out:
        kfree(auth);
@@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl)
 
        ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD,
                                   detection, sizeof(*detection));
-       if (ret < 0) {
+       if (ret < 0)
                wl1271_warning("failed to set cca threshold: %d", ret);
-               return ret;
-       }
 
 out:
        kfree(detection);
index 5d5e1ef..88add68 100644 (file)
@@ -139,12 +139,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
 
        if (ret < 0) {
                wl1271_warning("testmode cmd interrogate failed: %d", ret);
+               kfree(cmd);
                return ret;
        }
 
        skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd));
-       if (!skb)
+       if (!skb) {
+               kfree(cmd);
                return -ENOMEM;
+       }
 
        NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd);
 
index 7106b49..ffc5033 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
index cc21fa2..ef8efad 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
index a675e31..d32d0d7 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/s3c_adc_battery.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <plat/adc.h>
 
index ee89358..ebe77dd 100644 (file)
@@ -505,8 +505,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
        rdev->dev.dma_mask = &rdev->dma_mask;
        rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 
-       if ((rdev->pef & RIO_PEF_INB_DOORBELL) &&
-           (rdev->dst_ops & RIO_DST_OPS_DOORBELL))
+       if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
                rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
                                   0, 0xffff);
 
index 9329dbb..4e7c04e 100644 (file)
@@ -152,10 +152,6 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
                goto retry_get_time;
        }
 
-       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
-                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
-
        rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
        rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
        rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
@@ -164,6 +160,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
        rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        rtc_tm->tm_year += 100;
+
+       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
+                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+
        rtc_tm->tm_mon -= 1;
 
        clk_disable(rtc_clk);
@@ -269,10 +270,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
        clk_enable(rtc_clk);
        pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
                 alrm->enabled,
-                1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+                1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
                 tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-
        alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
        writeb(0x00, base + S3C2410_RTCALM);
 
@@ -319,49 +319,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
        return 0;
 }
 
-static int s3c_rtc_open(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-       int ret;
-
-       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
-                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
-               return ret;
-       }
-
-       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
-                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
-               goto tick_err;
-       }
-
-       return ret;
-
- tick_err:
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       return ret;
-}
-
-static void s3c_rtc_release(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-
-       /* do not clear AIE here, it may be needed for wake */
-
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       free_irq(s3c_rtc_tickno, rtc_dev);
-}
-
 static const struct rtc_class_ops s3c_rtcops = {
-       .open           = s3c_rtc_open,
-       .release        = s3c_rtc_release,
        .read_time      = s3c_rtc_gettime,
        .set_time       = s3c_rtc_settime,
        .read_alarm     = s3c_rtc_getalarm,
@@ -425,6 +383,9 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
 {
        struct rtc_device *rtc = platform_get_drvdata(dev);
 
+       free_irq(s3c_rtc_alarmno, rtc);
+       free_irq(s3c_rtc_tickno, rtc);
+
        platform_set_drvdata(dev, NULL);
        rtc_device_unregister(rtc);
 
@@ -548,10 +509,32 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev)
 
        s3c_rtc_setfreq(&pdev->dev, 1);
 
+       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
+                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
+               goto err_alarm_irq;
+       }
+
+       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
+                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
+               free_irq(s3c_rtc_alarmno, rtc);
+               goto err_tick_irq;
+       }
+
        clk_disable(rtc_clk);
 
        return 0;
 
+ err_tick_irq:
+       free_irq(s3c_rtc_alarmno, rtc);
+
+ err_alarm_irq:
+       platform_set_drvdata(pdev, NULL);
+       rtc_device_unregister(rtc);
+
  err_nortc:
        s3c_rtc_enable(pdev, 0);
        clk_disable(rtc_clk);
index eb4e034..f1a2016 100644 (file)
@@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block)
 static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
 {
        struct dasd_profile_info_t *data;
+       int rc = 0;
 
        data = kmalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
                spin_unlock_bh(&block->profile.lock);
        } else {
                spin_unlock_bh(&block->profile.lock);
-               return -EIO;
+               rc = -EIO;
+               goto out;
        }
        if (copy_to_user(argp, data, sizeof(*data)))
-               return -EFAULT;
-       return 0;
+               rc = -EFAULT;
+out:
+       kfree(data);
+       return rc;
 }
 #else
 static int dasd_ioctl_reset_profile(struct dasd_block *block)
index be55fb2..837e010 100644 (file)
@@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id)
        switch (sccb->header.response_code) {
        case 0x0020:
                set_bit(id, sclp_storage_ids);
-               for (i = 0; i < sccb->assigned; i++)
-                       sclp_unassign_storage(sccb->entries[i] >> 16);
+               for (i = 0; i < sccb->assigned; i++) {
+                       if (sccb->entries[i])
+                               sclp_unassign_storage(sccb->entries[i] >> 16);
+               }
                break;
        default:
                rc = -EIO;
index c24fb10..6a4ea29 100644 (file)
@@ -2243,7 +2243,6 @@ static int iscsit_handle_snack(
        case 0:
                return iscsit_handle_recovery_datain_or_r2t(conn, buf,
                        hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
-               return 0;
        case ISCSI_FLAG_SNACK_TYPE_STATUS:
                return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
                        hdr->begrun, hdr->runlength);
index f095e65..f1643db 100644 (file)
@@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
                                ISCSI_TCP);
        if (IS_ERR(tpg_np)) {
                iscsit_put_tpg(tpg);
-               return ERR_PTR(PTR_ERR(tpg_np));
+               return ERR_CAST(tpg_np);
        }
        pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
 
@@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn(
 
        tiqn = iscsit_add_tiqn((unsigned char *)name);
        if (IS_ERR(tiqn))
-               return ERR_PTR(PTR_ERR(tiqn));
+               return ERR_CAST(tiqn);
        /*
         * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
         */
index 9806507..c4c68da 100644 (file)
@@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn(
                         */
                        list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
                                                ooo_list) {
-                               while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+                               if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
                                        continue;
 
                                list_add(&ooo_cmdsn->ooo_list,
index bcaf82f..daad362 100644 (file)
@@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
                                        ISCSI_LOGIN_STATUS_TARGET_ERROR);
                        goto new_sess_out;
                }
-#if 0
-               if (!iscsi_ntop6((const unsigned char *)
-                               &sock_in6.sin6_addr.in6_u,
-                               (char *)&conn->ipv6_login_ip[0],
-                               IPV6_ADDRESS_SPACE)) {
-                       pr_err("iscsi_ntop6() failed\n");
-                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
-                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
-                       goto new_sess_out;
-               }
-#else
-               pr_debug("Skipping iscsi_ntop6()\n");
-#endif
+               snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
+                               &sock_in6.sin6_addr.in6_u);
+               conn->login_port = ntohs(sock_in6.sin6_port);
        } else {
                memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
index 252e246..497b2e7 100644 (file)
@@ -545,13 +545,13 @@ int iscsi_copy_param_list(
        struct iscsi_param_list *src_param_list,
        int leading)
 {
-       struct iscsi_param *new_param = NULL, *param = NULL;
+       struct iscsi_param *param = NULL;
+       struct iscsi_param *new_param = NULL;
        struct iscsi_param_list *param_list = NULL;
 
        param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
        if (!param_list) {
-               pr_err("Unable to allocate memory for"
-                               " struct iscsi_param_list.\n");
+               pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
                goto err_out;
        }
        INIT_LIST_HEAD(&param_list->param_list);
@@ -567,8 +567,17 @@ int iscsi_copy_param_list(
 
                new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
                if (!new_param) {
-                       pr_err("Unable to allocate memory for"
-                               " struct iscsi_param.\n");
+                       pr_err("Unable to allocate memory for struct iscsi_param.\n");
+                       goto err_out;
+               }
+
+               new_param->name = kstrdup(param->name, GFP_KERNEL);
+               new_param->value = kstrdup(param->value, GFP_KERNEL);
+               if (!new_param->value || !new_param->name) {
+                       kfree(new_param->value);
+                       kfree(new_param->name);
+                       kfree(new_param);
+                       pr_err("Unable to allocate memory for parameter name/value.\n");
                        goto err_out;
                }
 
@@ -580,32 +589,12 @@ int iscsi_copy_param_list(
                new_param->use = param->use;
                new_param->type_range = param->type_range;
 
-               new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
-               if (!new_param->name) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter name.\n");
-                       goto err_out;
-               }
-
-               new_param->value = kzalloc(strlen(param->value) + 1,
-                               GFP_KERNEL);
-               if (!new_param->value) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter value.\n");
-                       goto err_out;
-               }
-
-               memcpy(new_param->name, param->name, strlen(param->name));
-               new_param->name[strlen(param->name)] = '\0';
-               memcpy(new_param->value, param->value, strlen(param->value));
-               new_param->value[strlen(param->value)] = '\0';
-
                list_add_tail(&new_param->p_list, &param_list->param_list);
        }
 
-       if (!list_empty(&param_list->param_list))
+       if (!list_empty(&param_list->param_list)) {
                *dst_param_list = param_list;
-       else {
+       else {
                pr_err("No parameters allocated.\n");
                goto err_out;
        }
index a1acb01..a0d23bc 100644 (file)
@@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
        if (!cmd->tmr_req) {
                pr_err("Unable to allocate memory for"
                        " Task Management command!\n");
-               return NULL;
+               goto out;
        }
        /*
         * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
        return cmd;
 out:
        iscsit_release_cmd(cmd);
-       if (se_cmd)
-               transport_free_se_cmd(se_cmd);
        return NULL;
 }
 
index 8ae09a1..89ae923 100644 (file)
@@ -67,6 +67,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 {
        struct se_lun *lun = cmd->se_lun;
        struct se_device *dev = cmd->se_dev;
+       struct se_portal_group *tpg = lun->lun_sep->sep_tpg;
        unsigned char *buf;
 
        /*
@@ -81,9 +82,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 
        buf = transport_kmap_first_data_page(cmd);
 
-       buf[0] = dev->transport->get_device_type(dev);
-       if (buf[0] == TYPE_TAPE)
-               buf[1] = 0x80;
+       if (dev == tpg->tpg_virt_lun0.lun_se_dev) {
+               buf[0] = 0x3f; /* Not connected */
+       } else {
+               buf[0] = dev->transport->get_device_type(dev);
+               if (buf[0] == TYPE_TAPE)
+                       buf[1] = 0x80;
+       }
        buf[2] = dev->transport->get_device_rev(dev);
 
        /*
@@ -915,8 +920,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten)
                length += target_modesense_control(dev, &buf[offset+length]);
                break;
        default:
-               pr_err("Got Unknown Mode Page: 0x%02x\n",
-                               cdb[2] & 0x3f);
+               pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n",
+                      cdb[2] & 0x3f, cdb[3]);
                return PYX_TRANSPORT_UNKNOWN_MODE_PAGE;
        }
        offset += length;
@@ -1072,8 +1077,6 @@ target_emulate_unmap(struct se_task *task)
                size -= 16;
        }
 
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
 err:
        transport_kunmap_first_data_page(cmd);
 
@@ -1085,24 +1088,17 @@ err:
  * Note this is not used for TCM/pSCSI passthrough
  */
 static int
-target_emulate_write_same(struct se_task *task, int write_same32)
+target_emulate_write_same(struct se_task *task, u32 num_blocks)
 {
        struct se_cmd *cmd = task->task_se_cmd;
        struct se_device *dev = cmd->se_dev;
        sector_t range;
        sector_t lba = cmd->t_task_lba;
-       unsigned int num_blocks;
        int ret;
        /*
-        * Extract num_blocks from the WRITE_SAME_* CDB.  Then use the explict
-        * range when non zero is supplied, otherwise calculate the remaining
-        * range based on ->get_blocks() - starting LBA.
+        * Use the explicit range when non zero is supplied, otherwise calculate
+        * the remaining range based on ->get_blocks() - starting LBA.
         */
-       if (write_same32)
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]);
-       else
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]);
-
        if (num_blocks != 0)
                range = num_blocks;
        else
@@ -1117,8 +1113,6 @@ target_emulate_write_same(struct se_task *task, int write_same32)
                return ret;
        }
 
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
        return 0;
 }
 
@@ -1165,13 +1159,23 @@ transport_emulate_control_cdb(struct se_task *task)
                }
                ret = target_emulate_unmap(task);
                break;
+       case WRITE_SAME:
+               if (!dev->transport->do_discard) {
+                       pr_err("WRITE_SAME emulation not supported"
+                                       " for: %s\n", dev->transport->name);
+                       return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
+               }
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be16(&cmd->t_task_cdb[7]));
+               break;
        case WRITE_SAME_16:
                if (!dev->transport->do_discard) {
                        pr_err("WRITE_SAME_16 emulation not supported"
                                        " for: %s\n", dev->transport->name);
                        return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                }
-               ret = target_emulate_write_same(task, 0);
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[10]));
                break;
        case VARIABLE_LENGTH_CMD:
                service_action =
@@ -1184,7 +1188,8 @@ transport_emulate_control_cdb(struct se_task *task)
                                        dev->transport->name);
                                return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                        }
-                       ret = target_emulate_write_same(task, 1);
+                       ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[28]));
                        break;
                default:
                        pr_err("Unsupported VARIABLE_LENGTH_CMD SA:"
@@ -1219,8 +1224,14 @@ transport_emulate_control_cdb(struct se_task *task)
 
        if (ret < 0)
                return ret;
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
+       /*
+        * Handle the successful completion here unless a caller
+        * has explictly requested an asychronous completion.
+        */
+       if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) {
+               task->task_scsi_status = GOOD;
+               transport_complete_task(task, 1);
+       }
 
        return PYX_TRANSPORT_SENT_TO_TRANSPORT;
 }
index b38b6c9..ca6e4a4 100644 (file)
@@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
        struct se_dev_entry *deve;
        u32 i;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
 
                spin_lock_irq(&nacl->device_list_lock);
                for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
@@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
                }
                spin_unlock_irq(&nacl->device_list_lock);
 
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 }
 
 static struct se_port *core_alloc_port(struct se_device *dev)
@@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev)
        return ret;
 }
 
+u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
+{
+       u32 tmp, aligned_max_sectors;
+       /*
+        * Limit max_sectors to a PAGE_SIZE aligned value for modern
+        * transport_allocate_data_tasks() operation.
+        */
+       tmp = rounddown((max_sectors * block_size), PAGE_SIZE);
+       aligned_max_sectors = (tmp / block_size);
+       if (max_sectors != aligned_max_sectors) {
+               printk(KERN_INFO "Rounding down aligned max_sectors from %u"
+                               " to %u\n", max_sectors, aligned_max_sectors);
+               return aligned_max_sectors;
+       }
+
+       return max_sectors;
+}
+
 void se_dev_set_default_attribs(
        struct se_device *dev,
        struct se_dev_limits *dev_limits)
@@ -878,6 +896,11 @@ void se_dev_set_default_attribs(
         * max_sectors is based on subsystem plugin dependent requirements.
         */
        dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors;
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors,
+                                               limits->logical_block_size);
        dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors;
        /*
         * Set optimal_sectors from max_sectors, which can be lowered via
@@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors)
                        return -EINVAL;
                }
        }
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       max_sectors = se_dev_align_max_sectors(max_sectors,
+                               dev->se_sub_dev->se_dev_attrib.block_size);
 
        dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors;
        pr_debug("dev[%p]: SE Device max_sectors changed to %u\n",
@@ -1344,15 +1372,17 @@ struct se_lun *core_dev_add_lun(
         */
        if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) {
                struct se_node_acl *acl;
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
-                       if (acl->dynamic_node_acl) {
-                               spin_unlock_bh(&tpg->acl_node_lock);
+                       if (acl->dynamic_node_acl &&
+                           (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only ||
+                            !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) {
+                               spin_unlock_irq(&tpg->acl_node_lock);
                                core_tpg_add_node_to_devs(acl, tpg);
-                               spin_lock_bh(&tpg->acl_node_lock);
+                               spin_lock_irq(&tpg->acl_node_lock);
                        }
                }
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
        }
 
        return lun_p;
index f165469..55bbe08 100644 (file)
@@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl(
 
        se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name);
        if (IS_ERR(se_nacl))
-               return ERR_PTR(PTR_ERR(se_nacl));
+               return ERR_CAST(se_nacl);
 
        nacl_cg = &se_nacl->acl_group;
        nacl_cg->default_groups = se_nacl->acl_default_groups;
index 1c1b849..7fd3a16 100644 (file)
@@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port(
                         * from the decoded fabric module specific TransportID
                         * at *i_str.
                         */
-                       spin_lock_bh(&tmp_tpg->acl_node_lock);
+                       spin_lock_irq(&tmp_tpg->acl_node_lock);
                        dest_node_acl = __core_tpg_get_initiator_node_acl(
                                                tmp_tpg, i_str);
                        if (dest_node_acl) {
                                atomic_inc(&dest_node_acl->acl_pr_ref_count);
                                smp_mb__after_atomic_inc();
                        }
-                       spin_unlock_bh(&tmp_tpg->acl_node_lock);
+                       spin_unlock_irq(&tmp_tpg->acl_node_lock);
 
                        if (!dest_node_acl) {
                                core_scsi3_tpg_undepend_item(tmp_tpg);
@@ -3496,14 +3496,14 @@ after_iport_check:
        /*
         * Locate the destination struct se_node_acl from the received Transport ID
         */
-       spin_lock_bh(&dest_se_tpg->acl_node_lock);
+       spin_lock_irq(&dest_se_tpg->acl_node_lock);
        dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
                                initiator_str);
        if (dest_node_acl) {
                atomic_inc(&dest_node_acl->acl_pr_ref_count);
                smp_mb__after_atomic_inc();
        }
-       spin_unlock_bh(&dest_se_tpg->acl_node_lock);
+       spin_unlock_irq(&dest_se_tpg->acl_node_lock);
 
        if (!dest_node_acl) {
                pr_err("Unable to locate %s dest_node_acl for"
index 3dd81d2..e567e12 100644 (file)
@@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                length = req->rd_size;
 
                        dst = sg_virt(&sg_d[i++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        src = sg_virt(&sg_s[j]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        dst_offset = 0;
                        src_offset = length;
@@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                length = req->rd_size;
 
                        dst = sg_virt(&sg_d[i]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        if (sg_d[i].length == length) {
                                i++;
@@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                dst_offset = length;
 
                        src = sg_virt(&sg_s[j++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        src_offset = 0;
                        page_end = 1;
@@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                length = req->rd_size;
 
                        src = sg_virt(&sg_s[i++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        dst = sg_virt(&sg_d[j]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        src_offset = 0;
                        dst_offset = length;
@@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                length = req->rd_size;
 
                        src = sg_virt(&sg_s[i]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        if (sg_s[i].length == length) {
                                i++;
@@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                src_offset = length;
 
                        dst = sg_virt(&sg_d[j++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        dst_offset = 0;
                        page_end = 1;
index 4f1ba4c..162b736 100644 (file)
@@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(
 {
        struct se_node_acl *acl;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
                if (!strcmp(acl->initiatorname, initiatorname) &&
                    !acl->dynamic_node_acl) {
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        return acl;
                }
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        return NULL;
 }
@@ -298,13 +298,21 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
                tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl);
                return NULL;
        }
+       /*
+        * Here we only create demo-mode MappedLUNs from the active
+        * TPG LUNs if the fabric is not explictly asking for
+        * tpg_check_demo_mode_login_only() == 1.
+        */
+       if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) &&
+           (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1))
+               do { ; } while (0);
+       else
+               core_tpg_add_node_to_devs(acl, tpg);
 
-       core_tpg_add_node_to_devs(acl, tpg);
-
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_add_tail(&acl->acl_list, &tpg->acl_node_list);
        tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s"
                " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
@@ -354,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 {
        struct se_node_acl *acl = NULL;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
        if (acl) {
                if (acl->dynamic_node_acl) {
@@ -362,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                        pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
                                " for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
                                tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        /*
                         * Release the locally allocated struct se_node_acl
                         * because * core_tpg_add_initiator_node_acl() returned
@@ -378,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                        " Node %s already exists for TPG %u, ignoring"
                        " request.\n",  tpg->se_tpg_tfo->get_fabric_name(),
                        initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return ERR_PTR(-EEXIST);
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        if (!se_nacl) {
                pr_err("struct se_node_acl pointer is NULL\n");
@@ -418,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                return ERR_PTR(-EINVAL);
        }
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_add_tail(&acl->acl_list, &tpg->acl_node_list);
        tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
 done:
        pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s"
@@ -445,14 +453,14 @@ int core_tpg_del_initiator_node_acl(
        struct se_session *sess, *sess_tmp;
        int dynamic_acl = 0;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        if (acl->dynamic_node_acl) {
                acl->dynamic_node_acl = 0;
                dynamic_acl = 1;
        }
        list_del(&acl->acl_list);
        tpg->num_node_acls--;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        spin_lock_bh(&tpg->session_lock);
        list_for_each_entry_safe(sess, sess_tmp,
@@ -503,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth(
        struct se_node_acl *acl;
        int dynamic_acl = 0;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
        if (!acl) {
                pr_err("Access Control List entry for %s Initiator"
                        " Node %s does not exists for TPG %hu, ignoring"
                        " request.\n", tpg->se_tpg_tfo->get_fabric_name(),
                        initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return -ENODEV;
        }
        if (acl->dynamic_node_acl) {
                acl->dynamic_node_acl = 0;
                dynamic_acl = 1;
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        spin_lock_bh(&tpg->session_lock);
        list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) {
@@ -533,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth(
                                tpg->se_tpg_tfo->get_fabric_name(), initiatorname);
                        spin_unlock_bh(&tpg->session_lock);
 
-                       spin_lock_bh(&tpg->acl_node_lock);
+                       spin_lock_irq(&tpg->acl_node_lock);
                        if (dynamic_acl)
                                acl->dynamic_node_acl = 1;
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        return -EEXIST;
                }
                /*
@@ -571,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth(
                if (init_sess)
                        tpg->se_tpg_tfo->close_session(init_sess);
 
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                if (dynamic_acl)
                        acl->dynamic_node_acl = 1;
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return -EINVAL;
        }
        spin_unlock_bh(&tpg->session_lock);
@@ -590,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth(
                initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
                tpg->se_tpg_tfo->tpg_get_tag(tpg));
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        if (dynamic_acl)
                acl->dynamic_node_acl = 1;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        return 0;
 }
@@ -717,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
         * not been released because of TFO->tpg_check_demo_mode_cache() == 1
         * in transport_deregister_session().
         */
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
        list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
                        acl_list) {
                list_del(&nacl->acl_list);
                se_tpg->num_node_acls--;
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irq(&se_tpg->acl_node_lock);
 
                core_tpg_wait_for_nacl_pr_ref(nacl);
                core_free_device_list_for_node(nacl, se_tpg);
                se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl);
 
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irq(&se_tpg->acl_node_lock);
        }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
 
        if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
                core_tpg_release_virtual_lun0(se_tpg);
index 8976032..8d0c58e 100644 (file)
@@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess)
 {
        struct se_portal_group *se_tpg = se_sess->se_tpg;
        struct se_node_acl *se_nacl;
+       unsigned long flags;
 
        if (!se_tpg) {
                transport_free_session(se_sess);
                return;
        }
 
-       spin_lock_bh(&se_tpg->session_lock);
+       spin_lock_irqsave(&se_tpg->session_lock, flags);
        list_del(&se_sess->sess_list);
        se_sess->se_tpg = NULL;
        se_sess->fabric_sess_ptr = NULL;
-       spin_unlock_bh(&se_tpg->session_lock);
+       spin_unlock_irqrestore(&se_tpg->session_lock, flags);
 
        /*
         * Determine if we need to do extra work for this initiator node's
@@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess)
         */
        se_nacl = se_sess->se_node_acl;
        if (se_nacl) {
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                if (se_nacl->dynamic_node_acl) {
                        if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache(
                                        se_tpg)) {
                                list_del(&se_nacl->acl_list);
                                se_tpg->num_node_acls--;
-                               spin_unlock_bh(&se_tpg->acl_node_lock);
+                               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 
                                core_tpg_wait_for_nacl_pr_ref(se_nacl);
                                core_free_device_list_for_node(se_nacl, se_tpg);
                                se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg,
                                                se_nacl);
-                               spin_lock_bh(&se_tpg->acl_node_lock);
+                               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                        }
                }
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
        }
 
        transport_free_session(se_sess);
@@ -2053,8 +2054,14 @@ static void transport_generic_request_failure(
                cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE;
                break;
        }
-
-       if (!sc)
+       /*
+        * If a fabric does not define a cmd->se_tfo->new_cmd_map caller,
+        * make the call to transport_send_check_condition_and_sense()
+        * directly.  Otherwise expect the fabric to make the call to
+        * transport_send_check_condition_and_sense() after handling
+        * possible unsoliticied write data payloads.
+        */
+       if (!sc && !cmd->se_tfo->new_cmd_map)
                transport_new_cmd_failure(cmd);
        else {
                ret = transport_send_check_condition_and_sense(cmd,
@@ -2847,12 +2854,42 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd)
                        " transport_dev_end_lba(): %llu\n",
                        cmd->t_task_lba, sectors,
                        transport_dev_end_lba(dev));
-               pr_err("  We should return CHECK_CONDITION"
-                      " but we don't yet\n");
-               return 0;
+               return -EINVAL;
        }
 
-       return sectors;
+       return 0;
+}
+
+static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev)
+{
+       /*
+        * Determine if the received WRITE_SAME is used to for direct
+        * passthrough into Linux/SCSI with struct request via TCM/pSCSI
+        * or we are signaling the use of internal WRITE_SAME + UNMAP=1
+        * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code.
+        */
+       int passthrough = (dev->transport->transport_type ==
+                               TRANSPORT_PLUGIN_PHBA_PDEV);
+
+       if (!passthrough) {
+               if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+                       pr_err("WRITE_SAME PBDATA and LBDATA"
+                               " bits not supported for Block Discard"
+                               " Emulation\n");
+                       return -ENOSYS;
+               }
+               /*
+                * Currently for the emulated case we only accept
+                * tpws with the UNMAP=1 bit set.
+                */
+               if (!(flags[0] & 0x08)) {
+                       pr_err("WRITE_SAME w/o UNMAP bit not"
+                               " supported for Block Discard Emulation\n");
+                       return -ENOSYS;
+               }
+       }
+
+       return 0;
 }
 
 /*     transport_generic_cmd_sequencer():
@@ -3065,7 +3102,7 @@ static int transport_generic_cmd_sequencer(
                                goto out_unsupported_cdb;
 
                        if (sectors)
-                               size = transport_get_size(sectors, cdb, cmd);
+                               size = transport_get_size(1, cdb, cmd);
                        else {
                                pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not"
                                       " supported\n");
@@ -3075,27 +3112,9 @@ static int transport_generic_cmd_sequencer(
                        cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
                        cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
 
-                       /*
-                        * Skip the remaining assignments for TCM/PSCSI passthrough
-                        */
-                       if (passthrough)
-                               break;
-
-                       if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
+                       if (target_check_write_same_discard(&cdb[10], dev) < 0)
                                goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[10] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not"
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+
                        break;
                default:
                        pr_err("VARIABLE_LENGTH_CMD service action"
@@ -3330,10 +3349,12 @@ static int transport_generic_cmd_sequencer(
                cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC;
                /*
                 * Check to ensure that LBA + Range does not exceed past end of
-                * device.
+                * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
                 */
-               if (!transport_cmd_get_valid_sectors(cmd))
-                       goto out_invalid_cdb_field;
+               if ((cmd->t_task_lba != 0) || (sectors != 0)) {
+                       if (transport_cmd_get_valid_sectors(cmd) < 0)
+                               goto out_invalid_cdb_field;
+               }
                break;
        case UNMAP:
                size = get_unaligned_be16(&cdb[7]);
@@ -3345,40 +3366,38 @@ static int transport_generic_cmd_sequencer(
                        goto out_unsupported_cdb;
 
                if (sectors)
-                       size = transport_get_size(sectors, cdb, cmd);
+                       size = transport_get_size(1, cdb, cmd);
                else {
                        pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
                        goto out_invalid_cdb_field;
                }
 
                cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
-               passthrough = (dev->transport->transport_type ==
-                               TRANSPORT_PLUGIN_PHBA_PDEV);
-               /*
-                * Determine if the received WRITE_SAME_16 is used to for direct
-                * passthrough into Linux/SCSI with struct request via TCM/pSCSI
-                * or we are signaling the use of internal WRITE_SAME + UNMAP=1
-                * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and
-                * TCM/FILEIO subsystem plugin backstores.
-                */
-               if (!passthrough) {
-                       if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[1] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not "
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+               cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
+               break;
+       case WRITE_SAME:
+               sectors = transport_get_sectors_10(cdb, cmd, &sector_ret);
+               if (sector_ret)
+                       goto out_unsupported_cdb;
+
+               if (sectors)
+                       size = transport_get_size(1, cdb, cmd);
+               else {
+                       pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
+                       goto out_invalid_cdb_field;
                }
+
+               cmd->t_task_lba = get_unaligned_be32(&cdb[2]);
                cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+               /*
+                * Follow sbcr26 with WRITE_SAME (10) and check for the existence
+                * of byte 1 bit 3 UNMAP instead of original reserved field
+                */
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
                break;
        case ALLOW_MEDIUM_REMOVAL:
        case GPCMD_CLOSE_TRACK:
@@ -3873,9 +3892,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd);
 static int transport_new_cmd_obj(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
-       u32 task_cdbs;
-       u32 rc;
-       int set_counts = 1;
+       int set_counts = 1, rc, task_cdbs;
 
        /*
         * Setup any BIDI READ tasks and memory from
@@ -3893,7 +3910,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                        cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                        cmd->scsi_sense_reason =
                                TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-                       return PYX_TRANSPORT_LU_COMM_FAILURE;
+                       return -EINVAL;
                }
                atomic_inc(&cmd->t_fe_count);
                atomic_inc(&cmd->t_se_count);
@@ -3912,7 +3929,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                cmd->scsi_sense_reason =
                        TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-               return PYX_TRANSPORT_LU_COMM_FAILURE;
+               return -EINVAL;
        }
 
        if (set_counts) {
@@ -4028,8 +4045,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                if (!task->task_sg)
                        continue;
 
-               BUG_ON(!task->task_padded_sg);
-
                if (!sg_first) {
                        sg_first = task->task_sg;
                        chained_nents = task->task_sg_nents;
@@ -4037,9 +4052,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                        sg_chain(sg_prev, sg_prev_nents, task->task_sg);
                        chained_nents += task->task_sg_nents;
                }
+               /*
+                * For the padded tasks, use the extra SGL vector allocated
+                * in transport_allocate_data_tasks() for the sg_prev_nents
+                * offset into sg_chain() above..  The last task of a
+                * multi-task list, or a single task will not have
+                * task->task_sg_padded set..
+                */
+               if (task->task_padded_sg)
+                       sg_prev_nents = (task->task_sg_nents + 1);
+               else
+                       sg_prev_nents = task->task_sg_nents;
 
                sg_prev = task->task_sg;
-               sg_prev_nents = task->task_sg_nents;
        }
        /*
         * Setup the starting pointer and total t_tasks_sg_linked_no including
@@ -4091,7 +4116,7 @@ static int transport_allocate_data_tasks(
        
        cmd_sg = sgl;
        for (i = 0; i < task_count; i++) {
-               unsigned int task_size;
+               unsigned int task_size, task_sg_nents_padded;
                int count;
 
                task = transport_generic_get_task(cmd, data_direction);
@@ -4110,30 +4135,33 @@ static int transport_allocate_data_tasks(
 
                /* Update new cdb with updated lba/sectors */
                cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb);
-
+               /*
+                * This now assumes that passed sg_ents are in PAGE_SIZE chunks
+                * in order to calculate the number per task SGL entries
+                */
+               task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE);
                /*
                 * Check if the fabric module driver is requesting that all
                 * struct se_task->task_sg[] be chained together..  If so,
                 * then allocate an extra padding SG entry for linking and
-                * marking the end of the chained SGL.
-                * Possibly over-allocate task sgl size by using cmd sgl size.
-                * It's so much easier and only a waste when task_count > 1.
-                * That is extremely rare.
+                * marking the end of the chained SGL for every task except
+                * the last one for (task_count > 1) operation, or skipping
+                * the extra padding for the (task_count == 1) case.
                 */
-               task->task_sg_nents = sgl_nents;
-               if (cmd->se_tfo->task_sg_chaining) {
-                       task->task_sg_nents++;
+               if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) {
+                       task_sg_nents_padded = (task->task_sg_nents + 1);
                        task->task_padded_sg = 1;
-               }
+               } else
+                       task_sg_nents_padded = task->task_sg_nents;
 
                task->task_sg = kmalloc(sizeof(struct scatterlist) *
-                                       task->task_sg_nents, GFP_KERNEL);
+                                       task_sg_nents_padded, GFP_KERNEL);
                if (!task->task_sg) {
                        cmd->se_dev->transport->free_task(task);
                        return -ENOMEM;
                }
 
-               sg_init_table(task->task_sg, task->task_sg_nents);
+               sg_init_table(task->task_sg, task_sg_nents_padded);
 
                task_size = task->task_size;
 
@@ -4230,10 +4258,13 @@ static u32 transport_allocate_tasks(
        struct scatterlist *sgl,
        unsigned int sgl_nents)
 {
-       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)
+       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) {
+               if (transport_cmd_get_valid_sectors(cmd) < 0)
+                       return -EINVAL;
+
                return transport_allocate_data_tasks(cmd, lba, data_direction,
                                                     sgl, sgl_nents);
-       else
+       else
                return transport_allocate_control_task(cmd);
 
 }
@@ -4726,6 +4757,13 @@ int transport_send_check_condition_and_sense(
         */
        switch (reason) {
        case TCM_NON_EXISTENT_LUN:
+               /* CURRENT ERROR */
+               buffer[offset] = 0x70;
+               /* ILLEGAL REQUEST */
+               buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
+               /* LOGICAL UNIT NOT SUPPORTED */
+               buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25;
+               break;
        case TCM_UNSUPPORTED_SCSI_OPCODE:
        case TCM_SECTOR_COUNT_TOO_MANY:
                /* CURRENT ERROR */
index 8781d1e..b15879d 100644 (file)
@@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
        struct se_portal_group *se_tpg = &tpg->se_tpg;
        struct se_node_acl *se_acl;
 
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
        list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) {
                acl = container_of(se_acl, struct ft_node_acl, se_node_acl);
                pr_debug("acl %p port_name %llx\n",
@@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
                        break;
                }
        }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
        return found;
 }
 
@@ -655,9 +655,7 @@ static void __exit ft_exit(void)
        synchronize_rcu();
 }
 
-#ifdef MODULE
 MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION);
 MODULE_LICENSE("GPL");
 module_init(ft_init);
 module_exit(ft_exit);
-#endif /* MODULE */
index 05a8832..d06886a 100644 (file)
@@ -1009,4 +1009,4 @@ module_exit(adp8870_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("ADP8870 Backlight driver");
-MODULE_ALIAS("platform:adp8870-backlight");
+MODULE_ALIAS("i2c:adp8870-backlight");
index 9f1e389..b058291 100644 (file)
@@ -11,7 +11,7 @@
  * BRIGHT, on the Cirrus EP9307, EP9312, and EP9315 processors.
  */
 
-
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/fb.h>
index b8f38ec..8b5b2a4 100644 (file)
@@ -28,6 +28,8 @@ struct pwm_bl_data {
        unsigned int            lth_brightness;
        int                     (*notify)(struct device *,
                                          int brightness);
+       void                    (*notify_after)(struct device *,
+                                       int brightness);
        int                     (*check_fb)(struct device *, struct fb_info *);
 };
 
@@ -55,6 +57,10 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
                pwm_config(pb->pwm, brightness, pb->period);
                pwm_enable(pb->pwm);
        }
+
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, brightness);
+
        return 0;
 }
 
@@ -105,6 +111,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->period = data->pwm_period_ns;
        pb->notify = data->notify;
+       pb->notify_after = data->notify_after;
        pb->check_fb = data->check_fb;
        pb->lth_brightness = data->lth_brightness *
                (data->pwm_period_ns / data->max_brightness);
@@ -172,6 +179,8 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
                pb->notify(pb->dev, 0);
        pwm_config(pb->pwm, 0, pb->period);
        pwm_disable(pb->pwm);
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, 0);
        return 0;
 }
 
index 02bf7bf..b5abaae 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     dscore.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -1024,5 +1024,5 @@ module_init(ds_init);
 module_exit(ds_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("DS2490 USB <-> W1 bus master driver (DS9490*)");
index 334d1cc..f667c26 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     matrox_w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@
 #include "../w1_log.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for transport(Dallas 1-wire prtocol) over VGA DDC(matrox gpio).");
 
 static struct pci_device_id matrox_w1_tbl[] = {
index c377818..7c8cdb8 100644 (file)
@@ -373,7 +373,7 @@ static int w1_f29_add_slave(struct w1_slave *sl)
 static void w1_f29_remove_slave(struct w1_slave *sl)
 {
        int i;
-       for (i = NB_SYSFS_BIN_FILES; i <= 0; --i)
+       for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i)
                sysfs_remove_bin_file(&sl->dev.kobj,
                        &(w1_f29_sysfs_bin_files[i]));
 }
index cc8c02e..8465562 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_smem.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, 64bit memory family.");
 
 static struct w1_family w1_smem_family_01 = {
index 402928b..a1ef9b5 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_therm.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, temperature family.");
 
 /* Allow the strong pullup to be disabled, but default to enabled.
index 6c136c1..c374978 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@
 #include "w1_netlink.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol.");
 
 static int w1_timeout = 10;
index 1ce23fc..4d012ca 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 4a09904..6335979 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_family.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 98a1ac0..490cda2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_family.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index b50be3f..d220bce 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_int.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 4274082..2ad7d44 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_int.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 8e8b64c..765b37b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_io.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index e6ab7cf..9c7bd62 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_log.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 55aabd9..40788c9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.c
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 27e950f..b0922dc 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.h
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 640fc22..168a80f 100644 (file)
@@ -1358,6 +1358,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
        if (outarg.namelen > FUSE_NAME_MAX)
                goto err;
 
+       err = -EINVAL;
+       if (size != sizeof(outarg) + outarg.namelen + 1)
+               goto err;
+
        name.name = buf;
        name.len = outarg.namelen;
        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
index d480d9a..594f07a 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/compat.h>
+#include <linux/swap.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -245,6 +246,12 @@ void fuse_release_common(struct file *file, int opcode)
        req = ff->reserved_req;
        fuse_prepare_release(ff, file->f_flags, opcode);
 
+       if (ff->flock) {
+               struct fuse_release_in *inarg = &req->misc.release.in;
+               inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+               inarg->lock_owner = fuse_lock_owner_id(ff->fc,
+                                                      (fl_owner_t) file);
+       }
        /* Hold vfsmount and dentry until release is finished */
        path_get(&file->f_path);
        req->misc.release.path = file->f_path;
@@ -755,18 +762,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
        return req->misc.write.out.size;
 }
 
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned flags,
-                       struct page **pagep, void **fsdata)
-{
-       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-
-       *pagep = grab_cache_page_write_begin(mapping, index, flags);
-       if (!*pagep)
-               return -ENOMEM;
-       return 0;
-}
-
 void fuse_write_update_size(struct inode *inode, loff_t pos)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -779,62 +774,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos)
        spin_unlock(&fc->lock);
 }
 
-static int fuse_buffered_write(struct file *file, struct inode *inode,
-                              loff_t pos, unsigned count, struct page *page)
-{
-       int err;
-       size_t nres;
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-       struct fuse_req *req;
-
-       if (is_bad_inode(inode))
-               return -EIO;
-
-       /*
-        * Make sure writepages on the same page are not mixed up with
-        * plain writes.
-        */
-       fuse_wait_on_page_writeback(inode, page->index);
-
-       req = fuse_get_req(fc);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->in.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_offset = offset;
-       nres = fuse_send_write(req, file, pos, count, NULL);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (!err && !nres)
-               err = -EIO;
-       if (!err) {
-               pos += nres;
-               fuse_write_update_size(inode, pos);
-               if (count == PAGE_CACHE_SIZE)
-                       SetPageUptodate(page);
-       }
-       fuse_invalidate_attr(inode);
-       return err ? err : nres;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned copied,
-                       struct page *page, void *fsdata)
-{
-       struct inode *inode = mapping->host;
-       int res = 0;
-
-       if (copied)
-               res = fuse_buffered_write(file, inode, pos, copied, page);
-
-       unlock_page(page);
-       page_cache_release(page);
-       return res;
-}
-
 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
                                    struct inode *inode, loff_t pos,
                                    size_t count)
@@ -908,6 +847,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                pagefault_enable();
                flush_dcache_page(page);
 
+               mark_page_accessed(page);
+
                if (!tmp) {
                        unlock_page(page);
                        page_cache_release(page);
@@ -1559,11 +1500,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
        struct fuse_conn *fc = get_fuse_conn(inode);
        int err;
 
-       if (fc->no_lock) {
+       if (fc->no_flock) {
                err = flock_lock_file_wait(file, fl);
        } else {
+               struct fuse_file *ff = file->private_data;
+
                /* emulate flock with POSIX locks */
                fl->fl_owner = (fl_owner_t) file;
+               ff->flock = true;
                err = fuse_setlk(file, fl, 1);
        }
 
@@ -2201,8 +2145,6 @@ static const struct address_space_operations fuse_file_aops  = {
        .readpage       = fuse_readpage,
        .writepage      = fuse_writepage,
        .launder_page   = fuse_launder_page,
-       .write_begin    = fuse_write_begin,
-       .write_end      = fuse_write_end,
        .readpages      = fuse_readpages,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .bmap           = fuse_bmap,
index c6aa2d4..cf6db0a 100644 (file)
@@ -135,6 +135,9 @@ struct fuse_file {
 
        /** Wait queue head for poll */
        wait_queue_head_t poll_wait;
+
+       /** Has flock been performed on this file? */
+       bool flock:1;
 };
 
 /** One input argument of a request */
@@ -448,7 +451,7 @@ struct fuse_conn {
        /** Is removexattr not implemented by fs? */
        unsigned no_removexattr:1;
 
-       /** Are file locking primitives not implemented by fs? */
+       /** Are posix file locking primitives not implemented by fs? */
        unsigned no_lock:1;
 
        /** Is access not implemented by fs? */
@@ -472,6 +475,9 @@ struct fuse_conn {
        /** Don't apply umask to creation modes */
        unsigned dont_mask:1;
 
+       /** Are BSD file locking primitives not implemented by fs? */
+       unsigned no_flock:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
index 38f84cd..12b5029 100644 (file)
@@ -71,7 +71,7 @@ struct fuse_mount_data {
        unsigned blksize;
 };
 
-struct fuse_forget_link *fuse_alloc_forget()
+struct fuse_forget_link *fuse_alloc_forget(void)
 {
        return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
 }
@@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->async_read = 1;
                        if (!(arg->flags & FUSE_POSIX_LOCKS))
                                fc->no_lock = 1;
+                       if (arg->minor >= 17) {
+                               if (!(arg->flags & FUSE_FLOCK_LOCKS))
+                                       fc->no_flock = 1;
+                       }
                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                fc->atomic_o_trunc = 1;
                        if (arg->minor >= 9) {
@@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
+                       fc->no_flock = 1;
                }
 
                fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
@@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->minor = FUSE_KERNEL_MINOR_VERSION;
        arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+               FUSE_FLOCK_LOCKS;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
index 87b6e04..ec88953 100644 (file)
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
                        inode->i_op = &page_symlink_inode_operations;
                        break;
                }
+               lockdep_annotate_inode_mutex_key(inode);
        }
        return inode;
 }
index 73920d5..ec79246 100644 (file)
@@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb)
 }
 EXPORT_SYMBOL(new_inode);
 
-/**
- * unlock_new_inode - clear the I_NEW state and wake up any waiters
- * @inode:     new inode to unlock
- *
- * Called when the inode is fully initialised to clear the new state of the
- * inode and wake up anyone waiting for the inode to finish initialisation.
- */
-void unlock_new_inode(struct inode *inode)
-{
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
        if (S_ISDIR(inode->i_mode)) {
                struct file_system_type *type = inode->i_sb->s_type;
 
@@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode)
                                          &type->i_mutex_dir_key);
                }
        }
+}
+EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
 #endif
+
+/**
+ * unlock_new_inode - clear the I_NEW state and wake up any waiters
+ * @inode:     new inode to unlock
+ *
+ * Called when the inode is fully initialised to clear the new state of the
+ * inode and wake up anyone waiting for the inode to finish initialisation.
+ */
+void unlock_new_inode(struct inode *inode)
+{
+       lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW;
index 75bb316..427a4e8 100644 (file)
 # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
-ccflags-y := -I$(src) -I$(src)/linux-2.6
-ccflags-$(CONFIG_XFS_DEBUG) += -g
+ccflags-y += -I$(src)                  # needed for trace events
 
-XFS_LINUX := linux-2.6
+ccflags-$(CONFIG_XFS_DEBUG) += -g
 
 obj-$(CONFIG_XFS_FS)           += xfs.o
 
-xfs-y                          += linux-2.6/xfs_trace.o
-
-xfs-$(CONFIG_XFS_QUOTA)                += $(addprefix quota/, \
-                                  xfs_dquot.o \
-                                  xfs_dquot_item.o \
-                                  xfs_trans_dquot.o \
-                                  xfs_qm_syscalls.o \
-                                  xfs_qm_bhv.o \
-                                  xfs_qm.o)
-xfs-$(CONFIG_XFS_QUOTA)                += linux-2.6/xfs_quotaops.o
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)          += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)    += $(XFS_LINUX)/xfs_acl.o
-xfs-$(CONFIG_PROC_FS)          += $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)           += $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)           += $(XFS_LINUX)/xfs_ioctl32.o
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y                          += xfs_trace.o
 
+# highlevel code
+xfs-y                          += xfs_aops.o \
+                                  xfs_bit.o \
+                                  xfs_buf.o \
+                                  xfs_dfrag.o \
+                                  xfs_discard.o \
+                                  xfs_error.o \
+                                  xfs_export.o \
+                                  xfs_file.o \
+                                  xfs_filestream.o \
+                                  xfs_fsops.o \
+                                  xfs_fs_subr.o \
+                                  xfs_globals.o \
+                                  xfs_iget.o \
+                                  xfs_ioctl.o \
+                                  xfs_iomap.o \
+                                  xfs_iops.o \
+                                  xfs_itable.o \
+                                  xfs_message.o \
+                                  xfs_mru_cache.o \
+                                  xfs_super.o \
+                                  xfs_sync.o \
+                                  xfs_xattr.o \
+                                  xfs_rename.o \
+                                  xfs_rw.o \
+                                  xfs_utils.o \
+                                  xfs_vnodeops.o \
+                                  kmem.o \
+                                  uuid.o
 
+# code shared with libxfs
 xfs-y                          += xfs_alloc.o \
                                   xfs_alloc_btree.o \
                                   xfs_attr.o \
                                   xfs_attr_leaf.o \
-                                  xfs_bit.o \
                                   xfs_bmap.o \
                                   xfs_bmap_btree.o \
                                   xfs_btree.o \
-                                  xfs_buf_item.o \
                                   xfs_da_btree.o \
                                   xfs_dir2.o \
                                   xfs_dir2_block.o \
@@ -61,49 +70,37 @@ xfs-y                               += xfs_alloc.o \
                                   xfs_dir2_leaf.o \
                                   xfs_dir2_node.o \
                                   xfs_dir2_sf.o \
-                                  xfs_error.o \
-                                  xfs_extfree_item.o \
-                                  xfs_filestream.o \
-                                  xfs_fsops.o \
                                   xfs_ialloc.o \
                                   xfs_ialloc_btree.o \
-                                  xfs_iget.o \
                                   xfs_inode.o \
-                                  xfs_inode_item.o \
-                                  xfs_iomap.o \
-                                  xfs_itable.o \
-                                  xfs_dfrag.o \
-                                  xfs_log.o \
-                                  xfs_log_cil.o \
                                   xfs_log_recover.o \
                                   xfs_mount.o \
-                                  xfs_mru_cache.o \
-                                  xfs_rename.o \
-                                  xfs_trans.o \
+                                  xfs_trans.o
+
+# low-level transaction/log code
+xfs-y                          += xfs_log.o \
+                                  xfs_log_cil.o \
+                                  xfs_buf_item.o \
+                                  xfs_extfree_item.o \
+                                  xfs_inode_item.o \
                                   xfs_trans_ail.o \
                                   xfs_trans_buf.o \
                                   xfs_trans_extfree.o \
                                   xfs_trans_inode.o \
-                                  xfs_utils.o \
-                                  xfs_vnodeops.o \
-                                  xfs_rw.o
-
-# Objects in linux/
-xfs-y                          += $(addprefix $(XFS_LINUX)/, \
-                                  kmem.o \
-                                  xfs_aops.o \
-                                  xfs_buf.o \
-                                  xfs_discard.o \
-                                  xfs_export.o \
-                                  xfs_file.o \
-                                  xfs_fs_subr.o \
-                                  xfs_globals.o \
-                                  xfs_ioctl.o \
-                                  xfs_iops.o \
-                                  xfs_message.o \
-                                  xfs_super.o \
-                                  xfs_sync.o \
-                                  xfs_xattr.o)
 
-# Objects in support/
-xfs-y                          += support/uuid.o
+# optional features
+xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
+                                  xfs_dquot_item.o \
+                                  xfs_trans_dquot.o \
+                                  xfs_qm_syscalls.o \
+                                  xfs_qm_bhv.o \
+                                  xfs_qm.o \
+                                  xfs_quotaops.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)          += xfs_qm_stats.o
+endif
+xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
+xfs-$(CONFIG_PROC_FS)          += xfs_stats.o
+xfs-$(CONFIG_SYSCTL)           += xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)           += xfs_ioctl32.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
new file mode 100644 (file)
index 0000000..a907de5
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include "time.h"
+#include "kmem.h"
+#include "xfs_message.h"
+
+/*
+ * Greedy allocation.  May fail and may return vmalloced memory.
+ *
+ * Must be freed using kmem_free_large.
+ */
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
+{
+       void            *ptr;
+       size_t          kmsize = maxsize;
+
+       while (!(ptr = kmem_zalloc_large(kmsize))) {
+               if ((kmsize >>= 1) <= minsize)
+                       kmsize = minsize;
+       }
+       if (ptr)
+               *size = kmsize;
+       return ptr;
+}
+
+void *
+kmem_alloc(size_t size, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmalloc(size, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zalloc(size_t size, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_alloc(size, flags);
+       if (ptr)
+               memset((char *)ptr, 0, (int)size);
+       return ptr;
+}
+
+void
+kmem_free(const void *ptr)
+{
+       if (!is_vmalloc_addr(ptr)) {
+               kfree(ptr);
+       } else {
+               vfree(ptr);
+       }
+}
+
+void *
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
+            unsigned int __nocast flags)
+{
+       void    *new;
+
+       new = kmem_alloc(newsize, flags);
+       if (ptr) {
+               if (new)
+                       memcpy(new, ptr,
+                               ((oldsize < newsize) ? oldsize : newsize));
+               kmem_free(ptr);
+       }
+       return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmem_cache_alloc(zone, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_zone_alloc(zone, flags);
+       if (ptr)
+               memset((char *)ptr, 0, kmem_cache_size(zone));
+       return ptr;
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
new file mode 100644 (file)
index 0000000..f7c8f7a
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP       0x0001u
+#define KM_NOSLEEP     0x0002u
+#define KM_NOFS                0x0004u
+#define KM_MAYFAIL     0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
+{
+       gfp_t   lflags;
+
+       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
+
+       if (flags & KM_NOSLEEP) {
+               lflags = GFP_ATOMIC | __GFP_NOWARN;
+       } else {
+               lflags = GFP_KERNEL | __GFP_NOWARN;
+               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+                       lflags &= ~__GFP_FS;
+       }
+       return lflags;
+}
+
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void  kmem_free(const void *);
+
+static inline void *kmem_zalloc_large(size_t size)
+{
+       void *ptr;
+
+       ptr = vmalloc(size);
+       if (ptr)
+               memset(ptr, 0, size);
+       return ptr;
+}
+static inline void kmem_free_large(void *ptr)
+{
+       vfree(ptr);
+}
+
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+
+#define kmem_zone      kmem_cache
+#define kmem_zone_t    struct kmem_cache
+
+static inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+       return kmem_cache_create(zone_name, size, 0, 0, NULL);
+}
+
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+                    void (*construct)(void *))
+{
+       return kmem_cache_create(zone_name, size, 0, flags, construct);
+}
+
+static inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+       kmem_cache_free(zone, ptr);
+}
+
+static inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+       if (zone)
+               kmem_cache_destroy(zone);
+}
+
+extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+
+static inline int
+kmem_shake_allow(gfp_t gfp_mask)
+{
+       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
deleted file mode 100644 (file)
index a907de5..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include "time.h"
-#include "kmem.h"
-#include "xfs_message.h"
-
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-       void            *ptr;
-       size_t          kmsize = maxsize;
-
-       while (!(ptr = kmem_zalloc_large(kmsize))) {
-               if ((kmsize >>= 1) <= minsize)
-                       kmsize = minsize;
-       }
-       if (ptr)
-               *size = kmsize;
-       return ptr;
-}
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmalloc(size, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_alloc(size, flags);
-       if (ptr)
-               memset((char *)ptr, 0, (int)size);
-       return ptr;
-}
-
-void
-kmem_free(const void *ptr)
-{
-       if (!is_vmalloc_addr(ptr)) {
-               kfree(ptr);
-       } else {
-               vfree(ptr);
-       }
-}
-
-void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
-            unsigned int __nocast flags)
-{
-       void    *new;
-
-       new = kmem_alloc(newsize, flags);
-       if (ptr) {
-               if (new)
-                       memcpy(new, ptr,
-                               ((oldsize < newsize) ? oldsize : newsize));
-               kmem_free(ptr);
-       }
-       return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmem_cache_alloc(zone, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_zone_alloc(zone, flags);
-       if (ptr)
-               memset((char *)ptr, 0, kmem_cache_size(zone));
-       return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
deleted file mode 100644 (file)
index f7c8f7a..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-#define KM_SLEEP       0x0001u
-#define KM_NOSLEEP     0x0002u
-#define KM_NOFS                0x0004u
-#define KM_MAYFAIL     0x0008u
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions.  We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(unsigned int __nocast flags)
-{
-       gfp_t   lflags;
-
-       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
-
-       if (flags & KM_NOSLEEP) {
-               lflags = GFP_ATOMIC | __GFP_NOWARN;
-       } else {
-               lflags = GFP_KERNEL | __GFP_NOWARN;
-               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
-                       lflags &= ~__GFP_FS;
-       }
-       return lflags;
-}
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(const void *);
-
-static inline void *kmem_zalloc_large(size_t size)
-{
-       void *ptr;
-
-       ptr = vmalloc(size);
-       if (ptr)
-               memset(ptr, 0, size);
-       return ptr;
-}
-static inline void kmem_free_large(void *ptr)
-{
-       vfree(ptr);
-}
-
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
-/*
- * Zone interfaces
- */
-
-#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
-
-#define kmem_zone      kmem_cache
-#define kmem_zone_t    struct kmem_cache
-
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
-       return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-                    void (*construct)(void *))
-{
-       return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
-       kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
-       if (zone)
-               kmem_cache_destroy(zone);
-}
-
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
deleted file mode 100644 (file)
index ff6a198..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/rwsem.h>
-
-typedef struct {
-       struct rw_semaphore     mr_lock;
-#ifdef DEBUG
-       int                     mr_writer;
-#endif
-} mrlock_t;
-
-#ifdef DEBUG
-#define mrinit(mrp, name)      \
-       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name)      \
-       do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
-#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
-#define mrfree(mrp)            do { } while (0)
-
-static inline void mraccess_nested(mrlock_t *mrp, int subclass)
-{
-       down_read_nested(&mrp->mr_lock, subclass);
-}
-
-static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
-{
-       down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-}
-
-static inline int mrtryaccess(mrlock_t *mrp)
-{
-       return down_read_trylock(&mrp->mr_lock);
-}
-
-static inline int mrtryupdate(mrlock_t *mrp)
-{
-       if (!down_write_trylock(&mrp->mr_lock))
-               return 0;
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-       return 1;
-}
-
-static inline void mrunlock_excl(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
-       up_read(&mrp->mr_lock);
-}
-
-static inline void mrdemote(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       downgrade_write(&mrp->mr_lock);
-}
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
deleted file mode 100644 (file)
index 387e695..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-       schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-       *tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
deleted file mode 100644 (file)
index b6c4b37..0000000
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-/*
- * Locking scheme:
- *  - all ACL updates are protected by inode->i_mutex, which is taken before
- *    calling into this file.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
-       struct posix_acl_entry *acl_e;
-       struct posix_acl *acl;
-       struct xfs_acl_entry *ace;
-       int count, i;
-
-       count = be32_to_cpu(aclp->acl_cnt);
-
-       acl = posix_acl_alloc(count, GFP_KERNEL);
-       if (!acl)
-               return ERR_PTR(-ENOMEM);
-
-       for (i = 0; i < count; i++) {
-               acl_e = &acl->a_entries[i];
-               ace = &aclp->acl_entry[i];
-
-               /*
-                * The tag is 32 bits on disk and 16 bits in core.
-                *
-                * Because every access to it goes through the core
-                * format first this is not a problem.
-                */
-               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
-               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
-               switch (acl_e->e_tag) {
-               case ACL_USER:
-               case ACL_GROUP:
-                       acl_e->e_id = be32_to_cpu(ace->ae_id);
-                       break;
-               case ACL_USER_OBJ:
-               case ACL_GROUP_OBJ:
-               case ACL_MASK:
-               case ACL_OTHER:
-                       acl_e->e_id = ACL_UNDEFINED_ID;
-                       break;
-               default:
-                       goto fail;
-               }
-       }
-       return acl;
-
-fail:
-       posix_acl_release(acl);
-       return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
-       const struct posix_acl_entry *acl_e;
-       struct xfs_acl_entry *ace;
-       int i;
-
-       aclp->acl_cnt = cpu_to_be32(acl->a_count);
-       for (i = 0; i < acl->a_count; i++) {
-               ace = &aclp->acl_entry[i];
-               acl_e = &acl->a_entries[i];
-
-               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-               ace->ae_id = cpu_to_be32(acl_e->e_id);
-               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
-       }
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       struct posix_acl *acl;
-       struct xfs_acl *xfs_acl;
-       int len = sizeof(struct xfs_acl);
-       unsigned char *ea_name;
-       int error;
-
-       acl = get_cached_acl(inode, type);
-       if (acl != ACL_NOT_CACHED)
-               return acl;
-
-       trace_xfs_get_acl(ip);
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               BUG();
-       }
-
-       /*
-        * If we have a cached ACLs value just return it, not need to
-        * go out to the disk.
-        */
-
-       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-       if (!xfs_acl)
-               return ERR_PTR(-ENOMEM);
-
-       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
-                                                       &len, ATTR_ROOT);
-       if (error) {
-               /*
-                * If the attribute doesn't exist make sure we have a negative
-                * cache entry, for any other error assume it is transient and
-                * leave the cache entry as ACL_NOT_CACHED.
-                */
-               if (error == -ENOATTR) {
-                       acl = NULL;
-                       goto out_update_cache;
-               }
-               goto out;
-       }
-
-       acl = xfs_acl_from_disk(xfs_acl);
-       if (IS_ERR(acl))
-               goto out;
-
- out_update_cache:
-       set_cached_acl(inode, type, acl);
- out:
-       kfree(xfs_acl);
-       return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       unsigned char *ea_name;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               if (!S_ISDIR(inode->i_mode))
-                       return acl ? -EACCES : 0;
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (acl) {
-               struct xfs_acl *xfs_acl;
-               int len;
-
-               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-               if (!xfs_acl)
-                       return -ENOMEM;
-
-               xfs_acl_to_disk(xfs_acl, acl);
-               len = sizeof(struct xfs_acl) -
-                       (sizeof(struct xfs_acl_entry) *
-                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
-               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
-                               len, ATTR_ROOT);
-
-               kfree(xfs_acl);
-       } else {
-               /*
-                * A NULL ACL argument means we want to remove the ACL.
-                */
-               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
-               /*
-                * If the attribute didn't exist to start with that's fine.
-                */
-               if (error == -ENOATTR)
-                       error = 0;
-       }
-
-       if (!error)
-               set_cached_acl(inode, type, acl);
-       return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, umode_t mode)
-{
-       int error = 0;
-
-       if (mode != inode->i_mode) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-               iattr.ia_mode = mode;
-               iattr.ia_ctime = current_fs_time(inode->i_sb);
-
-               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-       }
-
-       return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
-       int len = sizeof(struct xfs_acl);
-
-       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
-                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
-       return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
-       if (!S_ISDIR(inode->i_mode))
-               return 0;
-       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
-{
-       umode_t mode = inode->i_mode;
-       int error = 0, inherit = 0;
-
-       if (S_ISDIR(inode->i_mode)) {
-               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
-               if (error)
-                       goto out;
-       }
-
-       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
-       if (error < 0)
-               return error;
-
-       /*
-        * If posix_acl_create returns a positive value we need to
-        * inherit a permission that can't be represented using the Unix
-        * mode bits and we actually need to set an ACL.
-        */
-       if (error > 0)
-               inherit = 1;
-
-       error = xfs_set_mode(inode, mode);
-       if (error)
-               goto out;
-
-       if (inherit)
-               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-
-out:
-       posix_acl_release(acl);
-       return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
-       struct posix_acl *acl;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-       if (IS_ERR(acl) || !acl)
-               return PTR_ERR(acl);
-
-       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
-       if (error)
-               return error;
-
-       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-       posix_acl_release(acl);
-       return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int type)
-{
-       struct posix_acl *acl;
-       int error;
-
-       acl = xfs_get_acl(dentry->d_inode, type);
-       if (IS_ERR(acl))
-               return PTR_ERR(acl);
-       if (acl == NULL)
-               return -ENODATA;
-
-       error = posix_acl_to_xattr(acl, value, size);
-       posix_acl_release(acl);
-
-       return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags, int type)
-{
-       struct inode *inode = dentry->d_inode;
-       struct posix_acl *acl = NULL;
-       int error = 0;
-
-       if (flags & XATTR_CREATE)
-               return -EINVAL;
-       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-               return value ? -EACCES : 0;
-       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
-               return -EPERM;
-
-       if (!value)
-               goto set_acl;
-
-       acl = posix_acl_from_xattr(value, size);
-       if (!acl) {
-               /*
-                * acl_set_file(3) may request that we set default ACLs with
-                * zero length -- defend (gracefully) against that here.
-                */
-               goto out;
-       }
-       if (IS_ERR(acl)) {
-               error = PTR_ERR(acl);
-               goto out;
-       }
-
-       error = posix_acl_valid(acl);
-       if (error)
-               goto out_release;
-
-       error = -EINVAL;
-       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
-               goto out_release;
-
-       if (type == ACL_TYPE_ACCESS) {
-               umode_t mode = inode->i_mode;
-               error = posix_acl_equiv_mode(acl, &mode);
-
-               if (error <= 0) {
-                       posix_acl_release(acl);
-                       acl = NULL;
-
-                       if (error < 0)
-                               return error;
-               }
-
-               error = xfs_set_mode(inode, mode);
-               if (error)
-                       goto out_release;
-       }
-
- set_acl:
-       error = xfs_set_acl(inode, type, acl);
- out_release:
-       posix_acl_release(acl);
- out:
-       return error;
-}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
-       .prefix = POSIX_ACL_XATTR_ACCESS,
-       .flags  = ACL_TYPE_ACCESS,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
-       .prefix = POSIX_ACL_XATTR_DEFAULT,
-       .flags  = ACL_TYPE_DEFAULT,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
deleted file mode 100644 (file)
index 63e971e..0000000
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_bmap.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-
-/*
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC         37
-#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t xfs_ioend_wq[NVSYNC];
-
-void __init
-xfs_ioend_init(void)
-{
-       int i;
-
-       for (i = 0; i < NVSYNC; i++)
-               init_waitqueue_head(&xfs_ioend_wq[i]);
-}
-
-void
-xfs_ioend_wait(
-       xfs_inode_t     *ip)
-{
-       wait_queue_head_t *wq = to_ioend_wq(ip);
-
-       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-STATIC void
-xfs_ioend_wake(
-       xfs_inode_t     *ip)
-{
-       if (atomic_dec_and_test(&ip->i_iocount))
-               wake_up(to_ioend_wq(ip));
-}
-
-void
-xfs_count_page_state(
-       struct page             *page,
-       int                     *delalloc,
-       int                     *unwritten)
-{
-       struct buffer_head      *bh, *head;
-
-       *delalloc = *unwritten = 0;
-
-       bh = head = page_buffers(page);
-       do {
-               if (buffer_unwritten(bh))
-                       (*unwritten) = 1;
-               else if (buffer_delay(bh))
-                       (*delalloc) = 1;
-       } while ((bh = bh->b_this_page) != head);
-}
-
-STATIC struct block_device *
-xfs_find_bdev_for_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (XFS_IS_REALTIME_INODE(ip))
-               return mp->m_rtdev_targp->bt_bdev;
-       else
-               return mp->m_ddev_targp->bt_bdev;
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory.  Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
-       xfs_ioend_t             *ioend)
-{
-       struct buffer_head      *bh, *next;
-       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
-
-       for (bh = ioend->io_buffer_head; bh; bh = next) {
-               next = bh->b_private;
-               bh->b_end_io(bh, !ioend->io_error);
-       }
-
-       /*
-        * Volume managers supporting multiple paths can send back ENODEV
-        * when the final path disappears.  In this case continuing to fill
-        * the page cache with dirty data which cannot be written out is
-        * evil, so prevent that.
-        */
-       if (unlikely(ioend->io_error == -ENODEV)) {
-               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
-                                     __FILE__, __LINE__);
-       }
-
-       xfs_ioend_wake(ip);
-       mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-       xfs_fsize_t             bsize;
-
-       bsize = ioend->io_offset + ioend->io_size;
-       isize = MAX(ip->i_size, ip->i_new_size);
-       isize = MIN(isize, bsize);
-       return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
- * Update on-disk file size now that data has been written to disk.  The
- * current in-memory file size is i_size.  If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated.  If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
- */
-STATIC int
-xfs_setfilesize(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-
-       if (unlikely(ioend->io_error))
-               return 0;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-               return EAGAIN;
-
-       isize = xfs_ioend_new_eof(ioend);
-       if (isize) {
-               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
-               ip->i_d.di_size = isize;
-               xfs_mark_inode_dirty(ip);
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-/*
- * Schedule IO completion handling on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               if (ioend->io_type == IO_UNWRITTEN)
-                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
-               else
-                       queue_work(xfsdatad_workqueue, &ioend->io_work);
-       }
-}
-
-/*
- * IO write completion.
- */
-STATIC void
-xfs_end_io(
-       struct work_struct *work)
-{
-       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
-       struct xfs_inode *ip = XFS_I(ioend->io_inode);
-       int             error = 0;
-
-       /*
-        * For unwritten extents we need to issue transactions to convert a
-        * range to normal written extens after the data I/O has finished.
-        */
-       if (ioend->io_type == IO_UNWRITTEN &&
-           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
-
-               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-                                                ioend->io_size);
-               if (error)
-                       ioend->io_error = error;
-       }
-
-       /*
-        * We might have to update the on-disk file size after extending
-        * writes.
-        */
-       error = xfs_setfilesize(ioend);
-       ASSERT(!error || error == EAGAIN);
-
-       /*
-        * If we didn't complete processing of the ioend, requeue it to the
-        * tail of the workqueue for another attempt later. Otherwise destroy
-        * it.
-        */
-       if (error == EAGAIN) {
-               atomic_inc(&ioend->io_remaining);
-               xfs_finish_ioend(ioend);
-               /* ensure we don't spin on blocked ioends */
-               delay(1);
-       } else {
-               if (ioend->io_iocb)
-                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
-               xfs_destroy_ioend(ioend);
-       }
-}
-
-/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining))
-               xfs_end_io(&ioend->io_work);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
-       struct inode            *inode,
-       unsigned int            type)
-{
-       xfs_ioend_t             *ioend;
-
-       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
-       /*
-        * Set the count to 1 initially, which will prevent an I/O
-        * completion callback from happening before we have started
-        * all the I/O from calling the completion routine too early.
-        */
-       atomic_set(&ioend->io_remaining, 1);
-       ioend->io_error = 0;
-       ioend->io_list = NULL;
-       ioend->io_type = type;
-       ioend->io_inode = inode;
-       ioend->io_buffer_head = NULL;
-       ioend->io_buffer_tail = NULL;
-       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
-       ioend->io_offset = 0;
-       ioend->io_size = 0;
-       ioend->io_iocb = NULL;
-       ioend->io_result = 0;
-
-       INIT_WORK(&ioend->io_work, xfs_end_io);
-       return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
-       struct inode            *inode,
-       loff_t                  offset,
-       struct xfs_bmbt_irec    *imap,
-       int                     type,
-       int                     nonblocking)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 count = 1 << inode->i_blkbits;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
-       int                     nimaps = 1;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (type == IO_UNWRITTEN)
-               bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-               if (nonblocking)
-                       return -XFS_ERROR(EAGAIN);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-       }
-
-       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-              (ip->i_df.if_flags & XFS_IFEXTENTS));
-       ASSERT(offset <= mp->m_maxioffset);
-
-       if (offset + count > mp->m_maxioffset)
-               count = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (error)
-               return -XFS_ERROR(error);
-
-       if (type == IO_DELALLOC &&
-           (!nimaps || isnullstartblock(imap->br_startblock))) {
-               error = xfs_iomap_write_allocate(ip, offset, count, imap);
-               if (!error)
-                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-               return -XFS_ERROR(error);
-       }
-
-#ifdef DEBUG
-       if (type == IO_UNWRITTEN) {
-               ASSERT(nimaps);
-               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-       }
-#endif
-       if (nimaps)
-               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
-       return 0;
-}
-
-STATIC int
-xfs_imap_valid(
-       struct inode            *inode,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       offset >>= inode->i_blkbits;
-
-       return offset >= imap->br_startoff &&
-               offset < imap->br_startoff + imap->br_blockcount;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_ioend_t             *ioend = bio->bi_private;
-
-       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
-       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
-
-       /* Toss bio and pass work off to an xfsdatad thread */
-       bio->bi_private = NULL;
-       bio->bi_end_io = NULL;
-       bio_put(bio);
-
-       xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
-       struct bio              *bio)
-{
-       atomic_inc(&ioend->io_remaining);
-       bio->bi_private = ioend;
-       bio->bi_end_io = xfs_end_bio;
-
-       /*
-        * If the I/O is beyond EOF we mark the inode dirty immediately
-        * but don't update the inode size until I/O completion.
-        */
-       if (xfs_ioend_new_eof(ioend))
-               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
-       struct buffer_head      *bh)
-{
-       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
-       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
-
-       ASSERT(bio->bi_private == NULL);
-       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-       bio->bi_bdev = bh->b_bdev;
-       return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
-       struct buffer_head      *bh)
-{
-       ASSERT(buffer_mapped(bh));
-       ASSERT(buffer_locked(bh));
-       ASSERT(!buffer_delay(bh));
-       ASSERT(!buffer_unwritten(bh));
-
-       mark_buffer_async_write(bh);
-       set_buffer_uptodate(bh);
-       clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
-       struct page             *page,
-       int                     clear_dirty,
-       int                     buffers)
-{
-       ASSERT(PageLocked(page));
-       ASSERT(!PageWriteback(page));
-       if (clear_dirty)
-               clear_page_dirty_for_io(page);
-       set_page_writeback(page);
-       unlock_page(page);
-       /* If no buffers on the page are to be written, finish it here */
-       if (!buffers)
-               end_page_writeback(page);
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
-       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
- */
-STATIC void
-xfs_submit_ioend(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *head = ioend;
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh;
-       struct bio              *bio;
-       sector_t                lastblock = 0;
-
-       /* Pass 1 - start writeback */
-       do {
-               next = ioend->io_list;
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-                       xfs_start_buffer_writeback(bh);
-       } while ((ioend = next) != NULL);
-
-       /* Pass 2 - submit I/O */
-       ioend = head;
-       do {
-               next = ioend->io_list;
-               bio = NULL;
-
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-                       if (!bio) {
- retry:
-                               bio = xfs_alloc_ioend_bio(bh);
-                       } else if (bh->b_blocknr != lastblock + 1) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       if (bio_add_buffer(bio, bh) != bh->b_size) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       lastblock = bh->b_blocknr;
-               }
-               if (bio)
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-               xfs_finish_ioend(ioend);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh, *next_bh;
-
-       do {
-               next = ioend->io_list;
-               bh = ioend->io_buffer_head;
-               do {
-                       next_bh = bh->b_private;
-                       clear_buffer_async_write(bh);
-                       unlock_buffer(bh);
-               } while ((bh = next_bh) != NULL);
-
-               xfs_ioend_wake(XFS_I(ioend->io_inode));
-               mempool_free(ioend, xfs_ioend_pool);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       xfs_off_t               offset,
-       unsigned int            type,
-       xfs_ioend_t             **result,
-       int                     need_ioend)
-{
-       xfs_ioend_t             *ioend = *result;
-
-       if (!ioend || need_ioend || type != ioend->io_type) {
-               xfs_ioend_t     *previous = *result;
-
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_buffer_head = bh;
-               ioend->io_buffer_tail = bh;
-               if (previous)
-                       previous->io_list = ioend;
-               *result = ioend;
-       } else {
-               ioend->io_buffer_tail->b_private = bh;
-               ioend->io_buffer_tail = bh;
-       }
-
-       bh->b_private = NULL;
-       ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_buffer(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       sector_t                bn;
-       struct xfs_mount        *m = XFS_I(inode)->i_mount;
-       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
-       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-             ((offset - iomap_offset) >> inode->i_blkbits);
-
-       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
-       bh->b_blocknr = bn;
-       set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       xfs_map_buffer(inode, bh, imap, offset);
-       set_buffer_mapped(bh);
-       clear_buffer_delay(bh);
-       clear_buffer_unwritten(bh);
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
-       struct page             *page,
-       unsigned int            type)
-{
-       if (PageWriteback(page))
-               return 0;
-
-       if (page->mapping && page_has_buffers(page)) {
-               struct buffer_head      *bh, *head;
-               int                     acceptable = 0;
-
-               bh = head = page_buffers(page);
-               do {
-                       if (buffer_unwritten(bh))
-                               acceptable = (type == IO_UNWRITTEN);
-                       else if (buffer_delay(bh))
-                               acceptable = (type == IO_DELALLOC);
-                       else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable = (type == IO_OVERWRITE);
-                       else
-                               break;
-               } while ((bh = bh->b_this_page) != head);
-
-               if (acceptable)
-                       return 1;
-       }
-
-       return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-       struct inode            *inode,
-       struct page             *page,
-       loff_t                  tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc)
-{
-       struct buffer_head      *bh, *head;
-       xfs_off_t               end_offset;
-       unsigned long           p_offset;
-       unsigned int            type;
-       int                     len, page_dirty;
-       int                     count = 0, done = 0, uptodate = 1;
-       xfs_off_t               offset = page_offset(page);
-
-       if (page->index != tindex)
-               goto fail;
-       if (!trylock_page(page))
-               goto fail;
-       if (PageWriteback(page))
-               goto fail_unlock_page;
-       if (page->mapping != inode->i_mapping)
-               goto fail_unlock_page;
-       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
-               goto fail_unlock_page;
-
-       /*
-        * page_dirty is initially a count of buffers on the page before
-        * EOF and is decremented as we move each into a cleanable state.
-        *
-        * Derivation:
-        *
-        * End offset is the highest offset that this page should represent.
-        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-        * hence give us the correct page_dirty count. On any other page,
-        * it will be zero and in that case we need page_dirty to be the
-        * count of buffers on the page.
-        */
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       i_size_read(inode));
-
-       len = 1 << inode->i_blkbits;
-       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                       PAGE_CACHE_SIZE);
-       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-       page_dirty = p_offset / len;
-
-       bh = head = page_buffers(page);
-       do {
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-                       done = 1;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                   buffer_mapped(bh)) {
-                       if (buffer_unwritten(bh))
-                               type = IO_UNWRITTEN;
-                       else if (buffer_delay(bh))
-                               type = IO_DELALLOC;
-                       else
-                               type = IO_OVERWRITE;
-
-                       if (!xfs_imap_valid(inode, imap, offset)) {
-                               done = 1;
-                               continue;
-                       }
-
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type,
-                                        ioendp, done);
-
-                       page_dirty--;
-                       count++;
-               } else {
-                       done = 1;
-               }
-       } while (offset += len, (bh = bh->b_this_page) != head);
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       if (count) {
-               if (--wbc->nr_to_write <= 0 &&
-                   wbc->sync_mode == WB_SYNC_NONE)
-                       done = 1;
-       }
-       xfs_start_page_writeback(page, !page_dirty, count);
-
-       return done;
- fail_unlock_page:
-       unlock_page(page);
- fail:
-       return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-       struct inode            *inode,
-       pgoff_t                 tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc,
-       pgoff_t                 tlast)
-{
-       struct pagevec          pvec;
-       int                     done = 0, i;
-
-       pagevec_init(&pvec, 0);
-       while (!done && tindex <= tlast) {
-               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                       break;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       imap, ioendp, wbc);
-                       if (done)
-                               break;
-               }
-
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-}
-
-STATIC void
-xfs_vm_invalidatepage(
-       struct page             *page,
-       unsigned long           offset)
-{
-       trace_xfs_invalidatepage(page->mapping->host, page, offset);
-       block_invalidatepage(page, offset);
-}
-
-/*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
- *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
- */
-STATIC void
-xfs_aops_discard_page(
-       struct page             *page)
-{
-       struct inode            *inode = page->mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct buffer_head      *bh, *head;
-       loff_t                  offset = page_offset(page);
-
-       if (!xfs_is_delayed_page(page, IO_DELALLOC))
-               goto out_invalidate;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               goto out_invalidate;
-
-       xfs_alert(ip->i_mount,
-               "page discard on page %p, inode 0x%llx, offset %llu.",
-                       page, ip->i_ino, offset);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       bh = head = page_buffers(page);
-       do {
-               int             error;
-               xfs_fileoff_t   start_fsb;
-
-               if (!buffer_delay(bh))
-                       goto next_buffer;
-
-               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "page discard unable to remove delalloc mapping.");
-                       }
-                       break;
-               }
-next_buffer:
-               offset += 1 << inode->i_blkbits;
-
-       } while ((bh = bh->b_this_page) != head);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_invalidate:
-       xfs_vm_invalidatepage(page, 0);
-       return;
-}
-
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
- */
-STATIC int
-xfs_vm_writepage(
-       struct page             *page,
-       struct writeback_control *wbc)
-{
-       struct inode            *inode = page->mapping->host;
-       struct buffer_head      *bh, *head;
-       struct xfs_bmbt_irec    imap;
-       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
-       loff_t                  offset;
-       unsigned int            type;
-       __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index;
-       ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
-       int                     count = 0;
-       int                     nonblocking = 0;
-
-       trace_xfs_writepage(inode, page, 0);
-
-       ASSERT(page_has_buffers(page));
-
-       /*
-        * Refuse to write the page out if we are called from reclaim context.
-        *
-        * This avoids stack overflows when called from deeply used stacks in
-        * random callers for direct reclaim or memcg reclaim.  We explicitly
-        * allow reclaim from kswapd as the stack usage there is relatively low.
-        *
-        * This should really be done by the core VM, but until that happens
-        * filesystems like XFS, btrfs and ext4 have to take care of this
-        * by themselves.
-        */
-       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-               goto redirty;
-
-       /*
-        * Given that we do not allow direct reclaim to call us, we should
-        * never be called while in a filesystem transaction.
-        */
-       if (WARN_ON(current->flags & PF_FSTRANS))
-               goto redirty;
-
-       /* Is this page beyond the end of the file? */
-       offset = i_size_read(inode);
-       end_index = offset >> PAGE_CACHE_SHIFT;
-       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index >= end_index) {
-               if ((page->index >= end_index + 1) ||
-                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-                       unlock_page(page);
-                       return 0;
-               }
-       }
-
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       offset);
-       len = 1 << inode->i_blkbits;
-
-       bh = head = page_buffers(page);
-       offset = page_offset(page);
-       type = IO_OVERWRITE;
-
-       if (wbc->sync_mode == WB_SYNC_NONE)
-               nonblocking = 1;
-
-       do {
-               int new_ioend = 0;
-
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-
-               /*
-                * set_page_dirty dirties all buffers in a page, independent
-                * of their state.  The dirty state however is entirely
-                * meaningless for holes (!mapped && uptodate), so skip
-                * buffers covering holes here.
-                */
-               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh)) {
-                       if (type != IO_UNWRITTEN) {
-                               type = IO_UNWRITTEN;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (type != IO_DELALLOC) {
-                               type = IO_DELALLOC;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (type != IO_OVERWRITE) {
-                               type = IO_OVERWRITE;
-                               imap_valid = 0;
-                       }
-               } else {
-                       if (PageUptodate(page)) {
-                               ASSERT(buffer_mapped(bh));
-                               imap_valid = 0;
-                       }
-                       continue;
-               }
-
-               if (imap_valid)
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               if (!imap_valid) {
-                       /*
-                        * If we didn't have a valid mapping then we need to
-                        * put the new mapping into a separate ioend structure.
-                        * This ensures non-contiguous extents always have
-                        * separate ioends, which is particularly important
-                        * for unwritten extent conversion at I/O completion
-                        * time.
-                        */
-                       new_ioend = 1;
-                       err = xfs_map_blocks(inode, offset, &imap, type,
-                                            nonblocking);
-                       if (err)
-                               goto error;
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               }
-               if (imap_valid) {
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, &imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-                                        new_ioend);
-                       count++;
-               }
-
-               if (!iohead)
-                       iohead = ioend;
-
-       } while (offset += len, ((bh = bh->b_this_page) != head));
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       xfs_start_page_writeback(page, 1, count);
-
-       if (ioend && imap_valid) {
-               xfs_off_t               end_index;
-
-               end_index = imap.br_startoff + imap.br_blockcount;
-
-               /* to bytes */
-               end_index <<= inode->i_blkbits;
-
-               /* to pages */
-               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-               /* check against file size */
-               if (end_index > last_index)
-                       end_index = last_index;
-
-               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                 wbc, end_index);
-       }
-
-       if (iohead)
-               xfs_submit_ioend(wbc, iohead);
-
-       return 0;
-
-error:
-       if (iohead)
-               xfs_cancel_ioend(iohead);
-
-       if (err == -EAGAIN)
-               goto redirty;
-
-       xfs_aops_discard_page(page);
-       ClearPageUptodate(page);
-       unlock_page(page);
-       return err;
-
-redirty:
-       redirty_page_for_writepage(wbc, page);
-       unlock_page(page);
-       return 0;
-}
-
-STATIC int
-xfs_vm_writepages(
-       struct address_space    *mapping,
-       struct writeback_control *wbc)
-{
-       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-       return generic_writepages(mapping, wbc);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
-STATIC int
-xfs_vm_releasepage(
-       struct page             *page,
-       gfp_t                   gfp_mask)
-{
-       int                     delalloc, unwritten;
-
-       trace_xfs_releasepage(page->mapping->host, page, 0);
-
-       xfs_count_page_state(page, &delalloc, &unwritten);
-
-       if (WARN_ON(delalloc))
-               return 0;
-       if (WARN_ON(unwritten))
-               return 0;
-
-       return try_to_free_buffers(page);
-}
-
-STATIC int
-__xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create,
-       int                     direct)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     lockmode = 0;
-       struct xfs_bmbt_irec    imap;
-       int                     nimaps = 1;
-       xfs_off_t               offset;
-       ssize_t                 size;
-       int                     new = 0;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       offset = (xfs_off_t)iblock << inode->i_blkbits;
-       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
-       size = bh_result->b_size;
-
-       if (!create && direct && offset >= i_size_read(inode))
-               return 0;
-
-       if (create) {
-               lockmode = XFS_ILOCK_EXCL;
-               xfs_ilock(ip, lockmode);
-       } else {
-               lockmode = xfs_ilock_map_shared(ip);
-       }
-
-       ASSERT(offset <= mp->m_maxioffset);
-       if (offset + size > mp->m_maxioffset)
-               size = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
-       if (error)
-               goto out_unlock;
-
-       if (create &&
-           (!nimaps ||
-            (imap.br_startblock == HOLESTARTBLOCK ||
-             imap.br_startblock == DELAYSTARTBLOCK))) {
-               if (direct) {
-                       error = xfs_iomap_write_direct(ip, offset, size,
-                                                      &imap, nimaps);
-               } else {
-                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
-               }
-               if (error)
-                       goto out_unlock;
-
-               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
-       } else if (nimaps) {
-               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
-       } else {
-               trace_xfs_get_blocks_notfound(ip, offset, size);
-               goto out_unlock;
-       }
-       xfs_iunlock(ip, lockmode);
-
-       if (imap.br_startblock != HOLESTARTBLOCK &&
-           imap.br_startblock != DELAYSTARTBLOCK) {
-               /*
-                * For unwritten extents do not report a disk address on
-                * the read case (treat as if we're reading into a hole).
-                */
-               if (create || !ISUNWRITTEN(&imap))
-                       xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (create && ISUNWRITTEN(&imap)) {
-                       if (direct)
-                               bh_result->b_private = inode;
-                       set_buffer_unwritten(bh_result);
-               }
-       }
-
-       /*
-        * If this is a realtime file, data may be on a different device.
-        * to that pointed to from the buffer_head b_bdev currently.
-        */
-       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
-       /*
-        * If we previously allocated a block out beyond eof and we are now
-        * coming back to use it then we will need to flag it as new even if it
-        * has a disk address.
-        *
-        * With sub-block writes into unwritten extents we also need to mark
-        * the buffer as new so that the unwritten parts of the buffer gets
-        * correctly zeroed.
-        */
-       if (create &&
-           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-            (offset >= i_size_read(inode)) ||
-            (new || ISUNWRITTEN(&imap))))
-               set_buffer_new(bh_result);
-
-       if (imap.br_startblock == DELAYSTARTBLOCK) {
-               BUG_ON(direct);
-               if (create) {
-                       set_buffer_uptodate(bh_result);
-                       set_buffer_mapped(bh_result);
-                       set_buffer_delay(bh_result);
-               }
-       }
-
-       /*
-        * If this is O_DIRECT or the mpage code calling tell them how large
-        * the mapping is, so that we can avoid repeated get_blocks calls.
-        */
-       if (direct || size > (1 << inode->i_blkbits)) {
-               xfs_off_t               mapping_size;
-
-               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-               mapping_size <<= inode->i_blkbits;
-
-               ASSERT(mapping_size > 0);
-               if (mapping_size > size)
-                       mapping_size = size;
-               if (mapping_size > LONG_MAX)
-                       mapping_size = LONG_MAX;
-
-               bh_result->b_size = mapping_size;
-       }
-
-       return 0;
-
-out_unlock:
-       xfs_iunlock(ip, lockmode);
-       return -error;
-}
-
-int
-xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-}
-
-STATIC int
-xfs_get_blocks_direct(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.  In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done.  But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions.  In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private,
-       int                     ret,
-       bool                    is_async)
-{
-       struct xfs_ioend        *ioend = iocb->private;
-
-       /*
-        * blockdev_direct_IO can return an error even after the I/O
-        * completion handler was called.  Thus we need to protect
-        * against double-freeing.
-        */
-       iocb->private = NULL;
-
-       ioend->io_offset = offset;
-       ioend->io_size = size;
-       if (private && size > 0)
-               ioend->io_type = IO_UNWRITTEN;
-
-       if (is_async) {
-               /*
-                * If we are converting an unwritten extent we need to delay
-                * the AIO completion until after the unwrittent extent
-                * conversion has completed, otherwise do it ASAP.
-                */
-               if (ioend->io_type == IO_UNWRITTEN) {
-                       ioend->io_iocb = iocb;
-                       ioend->io_result = ret;
-               } else {
-                       aio_complete(iocb, ret, 0);
-               }
-               xfs_finish_ioend(ioend);
-       } else {
-               xfs_finish_ioend_sync(ioend);
-       }
-
-       /* XXX: probably should move into the real I/O completion handler */
-       inode_dio_done(ioend->io_inode);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-       int                     rw,
-       struct kiocb            *iocb,
-       const struct iovec      *iov,
-       loff_t                  offset,
-       unsigned long           nr_segs)
-{
-       struct inode            *inode = iocb->ki_filp->f_mapping->host;
-       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
-       ssize_t                 ret;
-
-       if (rw & WRITE) {
-               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
-
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL, 0);
-               if (ret != -EIOCBQUEUED && iocb->private)
-                       xfs_destroy_ioend(iocb->private);
-       } else {
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           NULL, NULL, 0);
-       }
-
-       return ret;
-}
-
-STATIC void
-xfs_vm_write_failed(
-       struct address_space    *mapping,
-       loff_t                  to)
-{
-       struct inode            *inode = mapping->host;
-
-       if (to > inode->i_size) {
-               /*
-                * punch out the delalloc blocks we have already allocated. We
-                * don't call xfs_setattr() to do this as we may be in the
-                * middle of a multi-iovec write and so the vfs inode->i_size
-                * will not match the xfs ip->i_size and so it will zero too
-                * much. Hence we jus truncate the page cache to zero what is
-                * necessary and punch the delalloc blocks directly.
-                */
-               struct xfs_inode        *ip = XFS_I(inode);
-               xfs_fileoff_t           start_fsb;
-               xfs_fileoff_t           end_fsb;
-               int                     error;
-
-               truncate_pagecache(inode, to, inode->i_size);
-
-               /*
-                * Check if there are any blocks that are outside of i_size
-                * that need to be trimmed back.
-                */
-               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
-               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
-               if (end_fsb <= start_fsb)
-                       return;
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-                                                       end_fsb - start_fsb);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "xfs_vm_write_failed: unable to clean up ino %lld",
-                                               ip->i_ino);
-                       }
-               }
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-STATIC int
-xfs_vm_write_begin(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                flags,
-       struct page             **pagep,
-       void                    **fsdata)
-{
-       int                     ret;
-
-       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
-                               pagep, xfs_get_blocks);
-       if (unlikely(ret))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC int
-xfs_vm_write_end(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                copied,
-       struct page             *page,
-       void                    *fsdata)
-{
-       int                     ret;
-
-       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-       if (unlikely(ret < len))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC sector_t
-xfs_vm_bmap(
-       struct address_space    *mapping,
-       sector_t                block)
-{
-       struct inode            *inode = (struct inode *)mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_vm_bmap(XFS_I(inode));
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return generic_block_bmap(mapping, block, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpage(
-       struct file             *unused,
-       struct page             *page)
-{
-       return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
-       struct file             *unused,
-       struct address_space    *mapping,
-       struct list_head        *pages,
-       unsigned                nr_pages)
-{
-       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-const struct address_space_operations xfs_address_space_operations = {
-       .readpage               = xfs_vm_readpage,
-       .readpages              = xfs_vm_readpages,
-       .writepage              = xfs_vm_writepage,
-       .writepages             = xfs_vm_writepages,
-       .releasepage            = xfs_vm_releasepage,
-       .invalidatepage         = xfs_vm_invalidatepage,
-       .write_begin            = xfs_vm_write_begin,
-       .write_end              = xfs_vm_write_end,
-       .bmap                   = xfs_vm_bmap,
-       .direct_IO              = xfs_vm_direct_IO,
-       .migratepage            = buffer_migrate_page,
-       .is_partially_uptodate  = block_is_partially_uptodate,
-       .error_remove_page      = generic_error_remove_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
deleted file mode 100644 (file)
index 71f721e..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_AOPS_H__
-#define __XFS_AOPS_H__
-
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
-extern mempool_t *xfs_ioend_pool;
-
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
-       IO_DIRECT = 0,  /* special case for direct I/O ioends */
-       IO_DELALLOC,    /* mapping covers delalloc region */
-       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
-       IO_OVERWRITE,   /* mapping covers already allocated extent */
-};
-
-#define XFS_IO_TYPES \
-       { 0,                    "" }, \
-       { IO_DELALLOC,          "delalloc" }, \
-       { IO_UNWRITTEN,         "unwritten" }, \
-       { IO_OVERWRITE,         "overwrite" }
-
-/*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
- */
-typedef struct xfs_ioend {
-       struct xfs_ioend        *io_list;       /* next ioend in chain */
-       unsigned int            io_type;        /* delalloc / unwritten */
-       int                     io_error;       /* I/O error code */
-       atomic_t                io_remaining;   /* hold count */
-       struct inode            *io_inode;      /* file being written to */
-       struct buffer_head      *io_buffer_head;/* buffer linked list head */
-       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
-       size_t                  io_size;        /* size of the extent */
-       xfs_off_t               io_offset;      /* offset in the file */
-       struct work_struct      io_work;        /* xfsdatad work queue */
-       struct kiocb            *io_iocb;
-       int                     io_result;
-} xfs_ioend_t;
-
-extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
-
-extern void xfs_ioend_init(void);
-extern void xfs_ioend_wait(struct xfs_inode *);
-
-extern void xfs_count_page_state(struct page *, int *, int *);
-
-#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
deleted file mode 100644 (file)
index c57836d..0000000
+++ /dev/null
@@ -1,1876 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
-#include <linux/backing-dev.h>
-#include <linux/freezer.h>
-
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_trace.h"
-
-static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp)      do { } while (0)
-# define XB_CLEAR_OWNER(bp)    do { } while (0)
-# define XB_GET_OWNER(bp)      do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
-       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
-        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
-       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
-       kmem_zone_free(xfs_buf_zone, (bp));
-
-static inline int
-xfs_buf_is_vmapped(
-       struct xfs_buf  *bp)
-{
-       /*
-        * Return true if the buffer is vmapped.
-        *
-        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
-        * code is clever enough to know it doesn't have to map a single page,
-        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
-        */
-       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
-}
-
-static inline int
-xfs_buf_vmap_len(
-       struct xfs_buf  *bp)
-{
-       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
-}
-
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (list_empty(&bp->b_lru)) {
-               atomic_inc(&bp->b_hold);
-               list_add_tail(&bp->b_lru, &btp->bt_lru);
-               btp->bt_lru_nr++;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       if (list_empty(&bp->b_lru))
-               return;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (!list_empty(&bp->b_lru)) {
-               list_del_init(&bp->b_lru);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * When we mark a buffer stale, we remove the buffer from the LRU and clear the
- * b_lru_ref count so that the buffer is freed immediately when the buffer
- * reference count falls to zero. If the buffer is already on the LRU, we need
- * to remove the reference that LRU holds on the buffer.
- *
- * This prevents build-up of stale buffers on the LRU.
- */
-void
-xfs_buf_stale(
-       struct xfs_buf  *bp)
-{
-       bp->b_flags |= XBF_STALE;
-       atomic_set(&(bp)->b_lru_ref, 0);
-       if (!list_empty(&bp->b_lru)) {
-               struct xfs_buftarg *btp = bp->b_target;
-
-               spin_lock(&btp->bt_lru_lock);
-               if (!list_empty(&bp->b_lru)) {
-                       list_del_init(&bp->b_lru);
-                       btp->bt_lru_nr--;
-                       atomic_dec(&bp->b_hold);
-               }
-               spin_unlock(&btp->bt_lru_lock);
-       }
-       ASSERT(atomic_read(&bp->b_hold) >= 1);
-}
-
-STATIC void
-_xfs_buf_initialize(
-       xfs_buf_t               *bp,
-       xfs_buftarg_t           *target,
-       xfs_off_t               range_base,
-       size_t                  range_length,
-       xfs_buf_flags_t         flags)
-{
-       /*
-        * We don't want certain flags to appear in b_flags.
-        */
-       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
-       memset(bp, 0, sizeof(xfs_buf_t));
-       atomic_set(&bp->b_hold, 1);
-       atomic_set(&bp->b_lru_ref, 1);
-       init_completion(&bp->b_iowait);
-       INIT_LIST_HEAD(&bp->b_lru);
-       INIT_LIST_HEAD(&bp->b_list);
-       RB_CLEAR_NODE(&bp->b_rbnode);
-       sema_init(&bp->b_sema, 0); /* held, no waiters */
-       XB_SET_OWNER(bp);
-       bp->b_target = target;
-       bp->b_file_offset = range_base;
-       /*
-        * Set buffer_length and count_desired to the same value initially.
-        * I/O routines should use count_desired, which will be the same in
-        * most cases but may be reset (e.g. XFS recovery).
-        */
-       bp->b_buffer_length = bp->b_count_desired = range_length;
-       bp->b_flags = flags;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       atomic_set(&bp->b_pin_count, 0);
-       init_waitqueue_head(&bp->b_waiters);
-
-       XFS_STATS_INC(xb_create);
-
-       trace_xfs_buf_init(bp, _RET_IP_);
-}
-
-/*
- *     Allocate a page array capable of holding a specified number
- *     of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
-       xfs_buf_t               *bp,
-       int                     page_count,
-       xfs_buf_flags_t         flags)
-{
-       /* Make sure that we have a page list */
-       if (bp->b_pages == NULL) {
-               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-               bp->b_page_count = page_count;
-               if (page_count <= XB_PAGES) {
-                       bp->b_pages = bp->b_page_array;
-               } else {
-                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
-                                       page_count, xb_to_km(flags));
-                       if (bp->b_pages == NULL)
-                               return -ENOMEM;
-               }
-               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
-       }
-       return 0;
-}
-
-/*
- *     Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
-       xfs_buf_t       *bp)
-{
-       if (bp->b_pages != bp->b_page_array) {
-               kmem_free(bp->b_pages);
-               bp->b_pages = NULL;
-       }
-}
-
-/*
- *     Releases the specified buffer.
- *
- *     The modification state of any associated pages is left unchanged.
- *     The buffer most not be on any hash - use xfs_buf_rele instead for
- *     hashed and refcounted buffers
- */
-void
-xfs_buf_free(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_free(bp, _RET_IP_);
-
-       ASSERT(list_empty(&bp->b_lru));
-
-       if (bp->b_flags & _XBF_PAGES) {
-               uint            i;
-
-               if (xfs_buf_is_vmapped(bp))
-                       vm_unmap_ram(bp->b_addr - bp->b_offset,
-                                       bp->b_page_count);
-
-               for (i = 0; i < bp->b_page_count; i++) {
-                       struct page     *page = bp->b_pages[i];
-
-                       __free_page(page);
-               }
-       } else if (bp->b_flags & _XBF_KMEM)
-               kmem_free(bp->b_addr);
-       _xfs_buf_free_pages(bp);
-       xfs_buf_deallocate(bp);
-}
-
-/*
- * Allocates all the pages for buffer in question and builds it's page list.
- */
-STATIC int
-xfs_buf_allocate_memory(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       size_t                  size = bp->b_count_desired;
-       size_t                  nbytes, offset;
-       gfp_t                   gfp_mask = xb_to_gfp(flags);
-       unsigned short          page_count, i;
-       xfs_off_t               end;
-       int                     error;
-
-       /*
-        * for buffers that are contained within a single page, just allocate
-        * the memory from the heap - there's no need for the complexity of
-        * page arrays to keep allocation down to order 0.
-        */
-       if (bp->b_buffer_length < PAGE_SIZE) {
-               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
-               if (!bp->b_addr) {
-                       /* low memory - use alloc_page loop instead */
-                       goto use_alloc_page;
-               }
-
-               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
-                                                               PAGE_MASK) !=
-                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
-                       /* b_addr spans two pages - use alloc_page instead */
-                       kmem_free(bp->b_addr);
-                       bp->b_addr = NULL;
-                       goto use_alloc_page;
-               }
-               bp->b_offset = offset_in_page(bp->b_addr);
-               bp->b_pages = bp->b_page_array;
-               bp->b_pages[0] = virt_to_page(bp->b_addr);
-               bp->b_page_count = 1;
-               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
-               return 0;
-       }
-
-use_alloc_page:
-       end = bp->b_file_offset + bp->b_buffer_length;
-       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-       error = _xfs_buf_get_pages(bp, page_count, flags);
-       if (unlikely(error))
-               return error;
-
-       offset = bp->b_offset;
-       bp->b_flags |= _XBF_PAGES;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               struct page     *page;
-               uint            retries = 0;
-retry:
-               page = alloc_page(gfp_mask);
-               if (unlikely(page == NULL)) {
-                       if (flags & XBF_READ_AHEAD) {
-                               bp->b_page_count = i;
-                               error = ENOMEM;
-                               goto out_free_pages;
-                       }
-
-                       /*
-                        * This could deadlock.
-                        *
-                        * But until all the XFS lowlevel code is revamped to
-                        * handle buffer allocation failures we can't do much.
-                        */
-                       if (!(++retries % 100))
-                               xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, gfp_mask);
-
-                       XFS_STATS_INC(xb_page_retries);
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
-                       goto retry;
-               }
-
-               XFS_STATS_INC(xb_page_found);
-
-               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
-               size -= nbytes;
-               bp->b_pages[i] = page;
-               offset = 0;
-       }
-       return 0;
-
-out_free_pages:
-       for (i = 0; i < bp->b_page_count; i++)
-               __free_page(bp->b_pages[i]);
-       return error;
-}
-
-/*
- *     Map buffer into kernel address-space if necessary.
- */
-STATIC int
-_xfs_buf_map_pages(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       ASSERT(bp->b_flags & _XBF_PAGES);
-       if (bp->b_page_count == 1) {
-               /* A single page buffer is always mappable */
-               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       } else if (flags & XBF_MAPPED) {
-               int retried = 0;
-
-               do {
-                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-                                               -1, PAGE_KERNEL);
-                       if (bp->b_addr)
-                               break;
-                       vm_unmap_aliases();
-               } while (retried++ <= 1);
-
-               if (!bp->b_addr)
-                       return -ENOMEM;
-               bp->b_addr += bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       }
-
-       return 0;
-}
-
-/*
- *     Finding and Reading Buffers
- */
-
-/*
- *     Look up, and creates if absent, a lockable buffer for
- *     a given range of an inode.  The buffer is returned
- *     locked.  If other overlapping buffers exist, they are
- *     released before the new buffer is created and locked,
- *     which may imply that this call will block until those buffers
- *     are unlocked.  No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
-       xfs_buftarg_t           *btp,   /* block device target          */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags,
-       xfs_buf_t               *new_bp)
-{
-       xfs_off_t               range_base;
-       size_t                  range_length;
-       struct xfs_perag        *pag;
-       struct rb_node          **rbp;
-       struct rb_node          *parent;
-       xfs_buf_t               *bp;
-
-       range_base = (ioff << BBSHIFT);
-       range_length = (isize << BBSHIFT);
-
-       /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(range_length < (1 << btp->bt_sshift)));
-       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
-       /* get tree root */
-       pag = xfs_perag_get(btp->bt_mount,
-                               xfs_daddr_to_agno(btp->bt_mount, ioff));
-
-       /* walk tree */
-       spin_lock(&pag->pag_buf_lock);
-       rbp = &pag->pag_buf_tree.rb_node;
-       parent = NULL;
-       bp = NULL;
-       while (*rbp) {
-               parent = *rbp;
-               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
-               if (range_base < bp->b_file_offset)
-                       rbp = &(*rbp)->rb_left;
-               else if (range_base > bp->b_file_offset)
-                       rbp = &(*rbp)->rb_right;
-               else {
-                       /*
-                        * found a block offset match. If the range doesn't
-                        * match, the only way this is allowed is if the buffer
-                        * in the cache is stale and the transaction that made
-                        * it stale has not yet committed. i.e. we are
-                        * reallocating a busy extent. Skip this buffer and
-                        * continue searching to the right for an exact match.
-                        */
-                       if (bp->b_buffer_length != range_length) {
-                               ASSERT(bp->b_flags & XBF_STALE);
-                               rbp = &(*rbp)->rb_right;
-                               continue;
-                       }
-                       atomic_inc(&bp->b_hold);
-                       goto found;
-               }
-       }
-
-       /* No match found */
-       if (new_bp) {
-               _xfs_buf_initialize(new_bp, btp, range_base,
-                               range_length, flags);
-               rb_link_node(&new_bp->b_rbnode, parent, rbp);
-               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
-               /* the buffer keeps the perag reference until it is freed */
-               new_bp->b_pag = pag;
-               spin_unlock(&pag->pag_buf_lock);
-       } else {
-               XFS_STATS_INC(xb_miss_locked);
-               spin_unlock(&pag->pag_buf_lock);
-               xfs_perag_put(pag);
-       }
-       return new_bp;
-
-found:
-       spin_unlock(&pag->pag_buf_lock);
-       xfs_perag_put(pag);
-
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK) {
-                       xfs_buf_rele(bp);
-                       XFS_STATS_INC(xb_busy_locked);
-                       return NULL;
-               }
-               xfs_buf_lock(bp);
-               XFS_STATS_INC(xb_get_locked_waited);
-       }
-
-       /*
-        * if the buffer is stale, clear all the external state associated with
-        * it. We need to keep flags such as how we allocated the buffer memory
-        * intact here.
-        */
-       if (bp->b_flags & XBF_STALE) {
-               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
-       }
-
-       trace_xfs_buf_find(bp, flags, _RET_IP_);
-       XFS_STATS_INC(xb_get_locked);
-       return bp;
-}
-
-/*
- *     Assembles a buffer covering the specified range.
- *     Storage in memory for all portions of the buffer will be allocated,
- *     although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get(
-       xfs_buftarg_t           *target,/* target for buffer            */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp, *new_bp;
-       int                     error = 0;
-
-       new_bp = xfs_buf_allocate(flags);
-       if (unlikely(!new_bp))
-               return NULL;
-
-       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-       if (bp == new_bp) {
-               error = xfs_buf_allocate_memory(bp, flags);
-               if (error)
-                       goto no_buffer;
-       } else {
-               xfs_buf_deallocate(new_bp);
-               if (unlikely(bp == NULL))
-                       return NULL;
-       }
-
-       if (!(bp->b_flags & XBF_MAPPED)) {
-               error = _xfs_buf_map_pages(bp, flags);
-               if (unlikely(error)) {
-                       xfs_warn(target->bt_mount,
-                               "%s: failed to map pages\n", __func__);
-                       goto no_buffer;
-               }
-       }
-
-       XFS_STATS_INC(xb_get);
-
-       /*
-        * Always fill in the block number now, the mapped cases can do
-        * their own overlay of this later.
-        */
-       bp->b_bn = ioff;
-       bp->b_count_desired = bp->b_buffer_length;
-
-       trace_xfs_buf_get(bp, flags, _RET_IP_);
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-STATIC int
-_xfs_buf_read(
-       xfs_buf_t               *bp,
-       xfs_buf_flags_t         flags)
-{
-       int                     status;
-
-       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
-       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-
-       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-
-       status = xfs_buf_iorequest(bp);
-       if (status || bp->b_error || (flags & XBF_ASYNC))
-               return status;
-       return xfs_buf_iowait(bp);
-}
-
-xfs_buf_t *
-xfs_buf_read(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp;
-
-       flags |= XBF_READ;
-
-       bp = xfs_buf_get(target, ioff, isize, flags);
-       if (bp) {
-               trace_xfs_buf_read(bp, flags, _RET_IP_);
-
-               if (!XFS_BUF_ISDONE(bp)) {
-                       XFS_STATS_INC(xb_get_read);
-                       _xfs_buf_read(bp, flags);
-               } else if (flags & XBF_ASYNC) {
-                       /*
-                        * Read ahead call which is already satisfied,
-                        * drop the buffer
-                        */
-                       goto no_buffer;
-               } else {
-                       /* We do not want read in the flags */
-                       bp->b_flags &= ~XBF_READ;
-               }
-       }
-
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-/*
- *     If we are not low on memory then do the readahead in a deadlock
- *     safe manner.
- */
-void
-xfs_buf_readahead(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize)
-{
-       if (bdi_read_congested(target->bt_bdi))
-               return;
-
-       xfs_buf_read(target, ioff, isize,
-                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
-}
-
-/*
- * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
- */
-struct xfs_buf *
-xfs_buf_read_uncached(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             daddr,
-       size_t                  length,
-       int                     flags)
-{
-       xfs_buf_t               *bp;
-       int                     error;
-
-       bp = xfs_buf_get_uncached(target, length, flags);
-       if (!bp)
-               return NULL;
-
-       /* set up the buffer for a read IO */
-       XFS_BUF_SET_ADDR(bp, daddr);
-       XFS_BUF_READ(bp);
-
-       xfsbdstrat(mp, bp);
-       error = xfs_buf_iowait(bp);
-       if (error || bp->b_error) {
-               xfs_buf_relse(bp);
-               return NULL;
-       }
-       return bp;
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
-       size_t                  len,
-       xfs_buftarg_t           *target)
-{
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (bp)
-               _xfs_buf_initialize(bp, target, 0, len, 0);
-       return bp;
-}
-
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-       struct xfs_buf          *bp,
-       size_t                  len)
-{
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_page_count = 0;
-       bp->b_addr = NULL;
-       bp->b_file_offset = 0;
-       bp->b_buffer_length = bp->b_count_desired = len;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       bp->b_flags &= ~XBF_MAPPED;
-}
-
-static inline struct page *
-mem_to_page(
-       void                    *addr)
-{
-       if ((!is_vmalloc_addr(addr))) {
-               return virt_to_page(addr);
-       } else {
-               return vmalloc_to_page(addr);
-       }
-}
-
-int
-xfs_buf_associate_memory(
-       xfs_buf_t               *bp,
-       void                    *mem,
-       size_t                  len)
-{
-       int                     rval;
-       int                     i = 0;
-       unsigned long           pageaddr;
-       unsigned long           offset;
-       size_t                  buflen;
-       int                     page_count;
-
-       pageaddr = (unsigned long)mem & PAGE_MASK;
-       offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_SHIFT;
-
-       /* Free any previous set of page pointers */
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_addr = mem;
-
-       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
-       if (rval)
-               return rval;
-
-       bp->b_offset = offset;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_SIZE;
-       }
-
-       bp->b_count_desired = len;
-       bp->b_buffer_length = buflen;
-       bp->b_flags |= XBF_MAPPED;
-
-       return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_uncached(
-       struct xfs_buftarg      *target,
-       size_t                  len,
-       int                     flags)
-{
-       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
-       int                     error, i;
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (unlikely(bp == NULL))
-               goto fail;
-       _xfs_buf_initialize(bp, target, 0, len, 0);
-
-       error = _xfs_buf_get_pages(bp, page_count, 0);
-       if (error)
-               goto fail_free_buf;
-
-       for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-               if (!bp->b_pages[i])
-                       goto fail_free_mem;
-       }
-       bp->b_flags |= _XBF_PAGES;
-
-       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
-       if (unlikely(error)) {
-               xfs_warn(target->bt_mount,
-                       "%s: failed to map pages\n", __func__);
-               goto fail_free_mem;
-       }
-
-       trace_xfs_buf_get_uncached(bp, _RET_IP_);
-       return bp;
-
- fail_free_mem:
-       while (--i >= 0)
-               __free_page(bp->b_pages[i]);
-       _xfs_buf_free_pages(bp);
- fail_free_buf:
-       xfs_buf_deallocate(bp);
- fail:
-       return NULL;
-}
-
-/*
- *     Increment reference count on buffer, to hold the buffer concurrently
- *     with another thread which may release (free) the buffer asynchronously.
- *     Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_hold(bp, _RET_IP_);
-       atomic_inc(&bp->b_hold);
-}
-
-/*
- *     Releases a hold on the specified buffer.  If the
- *     the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
-       xfs_buf_t               *bp)
-{
-       struct xfs_perag        *pag = bp->b_pag;
-
-       trace_xfs_buf_rele(bp, _RET_IP_);
-
-       if (!pag) {
-               ASSERT(list_empty(&bp->b_lru));
-               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
-               if (atomic_dec_and_test(&bp->b_hold))
-                       xfs_buf_free(bp);
-               return;
-       }
-
-       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
-       ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-               if (!(bp->b_flags & XBF_STALE) &&
-                          atomic_read(&bp->b_lru_ref)) {
-                       xfs_buf_lru_add(bp);
-                       spin_unlock(&pag->pag_buf_lock);
-               } else {
-                       xfs_buf_lru_del(bp);
-                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-                       spin_unlock(&pag->pag_buf_lock);
-                       xfs_perag_put(pag);
-                       xfs_buf_free(bp);
-               }
-       }
-}
-
-
-/*
- *     Lock a buffer object, if it is not already locked.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we are
- *     being asked to lock a buffer that has been reallocated. Because it is
- *     pinned, we know that the log has not been pushed to disk and hence it
- *     will still be locked.  Rather than continuing to have trylock attempts
- *     fail until someone else pushes the log, push it ourselves before
- *     returning.  This means that the xfsaild will not get stuck trying
- *     to push on stale inode buffers.
- */
-int
-xfs_buf_trylock(
-       struct xfs_buf          *bp)
-{
-       int                     locked;
-
-       locked = down_trylock(&bp->b_sema) == 0;
-       if (locked)
-               XB_SET_OWNER(bp);
-       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-
-       trace_xfs_buf_trylock(bp, _RET_IP_);
-       return locked;
-}
-
-/*
- *     Lock a buffer object.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we
- *     are being asked to lock a buffer that has been reallocated. Because
- *     it is pinned, we know that the log has not been pushed to disk and
- *     hence it will still be locked. Rather than sleeping until someone
- *     else pushes the log, push it ourselves before trying to get the lock.
- */
-void
-xfs_buf_lock(
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_lock(bp, _RET_IP_);
-
-       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-       down(&bp->b_sema);
-       XB_SET_OWNER(bp);
-
-       trace_xfs_buf_lock_done(bp, _RET_IP_);
-}
-
-/*
- *     Releases the lock on the buffer object.
- *     If the buffer is marked delwri but is not queued, do so before we
- *     unlock the buffer as we need to set flags correctly.  We also need to
- *     take a reference for the delwri queue because the unlocker is going to
- *     drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
-       struct xfs_buf          *bp)
-{
-       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-               atomic_inc(&bp->b_hold);
-               bp->b_flags |= XBF_ASYNC;
-               xfs_buf_delwri_queue(bp, 0);
-       }
-
-       XB_CLEAR_OWNER(bp);
-       up(&bp->b_sema);
-
-       trace_xfs_buf_unlock(bp, _RET_IP_);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
-       xfs_buf_t               *bp)
-{
-       DECLARE_WAITQUEUE       (wait, current);
-
-       if (atomic_read(&bp->b_pin_count) == 0)
-               return;
-
-       add_wait_queue(&bp->b_waiters, &wait);
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (atomic_read(&bp->b_pin_count) == 0)
-                       break;
-               io_schedule();
-       }
-       remove_wait_queue(&bp->b_waiters, &wait);
-       set_current_state(TASK_RUNNING);
-}
-
-/*
- *     Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
-       struct work_struct      *work)
-{
-       xfs_buf_t               *bp =
-               container_of(work, xfs_buf_t, b_iodone_work);
-
-       if (bp->b_iodone)
-               (*(bp->b_iodone))(bp);
-       else if (bp->b_flags & XBF_ASYNC)
-               xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       trace_xfs_buf_iodone(bp, _RET_IP_);
-
-       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-       if (bp->b_error == 0)
-               bp->b_flags |= XBF_DONE;
-
-       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
-               if (schedule) {
-                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
-                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
-               } else {
-                       xfs_buf_iodone_work(&bp->b_iodone_work);
-               }
-       } else {
-               complete(&bp->b_iowait);
-       }
-}
-
-void
-xfs_buf_ioerror(
-       xfs_buf_t               *bp,
-       int                     error)
-{
-       ASSERT(error >= 0 && error <= 0xffff);
-       bp->b_error = (unsigned short)error;
-       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
-}
-
-int
-xfs_bwrite(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       int                     error;
-
-       bp->b_flags |= XBF_WRITE;
-       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
-
-       xfs_buf_delwri_dequeue(bp);
-       xfs_bdstrat_cb(bp);
-
-       error = xfs_buf_iowait(bp);
-       if (error)
-               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-       xfs_buf_relse(bp);
-       return error;
-}
-
-void
-xfs_bdwrite(
-       void                    *mp,
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_bdwrite(bp, _RET_IP_);
-
-       bp->b_flags &= ~XBF_READ;
-       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-
-       xfs_buf_delwri_queue(bp, 1);
-}
-
-/*
- * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
- * so that the proper iodone callbacks get called.
- */
-STATIC int
-xfs_bioerror(
-       xfs_buf_t *bp)
-{
-#ifdef XFSERRORDEBUG
-       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
-#endif
-
-       /*
-        * No need to wait until the buffer is unpinned, we aren't flushing it.
-        */
-       xfs_buf_ioerror(bp, EIO);
-
-       /*
-        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_UNDONE(bp);
-       XFS_BUF_STALE(bp);
-
-       xfs_buf_ioend(bp, 0);
-
-       return EIO;
-}
-
-/*
- * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the xfs_buf_ioend call.
- * This is meant for userdata errors; metadata bufs come with
- * iodone functions attached, so that we can track down errors.
- */
-STATIC int
-xfs_bioerror_relse(
-       struct xfs_buf  *bp)
-{
-       int64_t         fl = bp->b_flags;
-       /*
-        * No need to wait until the buffer is unpinned.
-        * We aren't flushing it.
-        *
-        * chunkhold expects B_DONE to be set, whether
-        * we actually finish the I/O or not. We don't want to
-        * change that interface.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_DONE(bp);
-       XFS_BUF_STALE(bp);
-       bp->b_iodone = NULL;
-       if (!(fl & XBF_ASYNC)) {
-               /*
-                * Mark b_error and B_ERROR _both_.
-                * Lot's of chunkcache code assumes that.
-                * There's no reason to mark error for
-                * ASYNC buffers.
-                */
-               xfs_buf_ioerror(bp, EIO);
-               XFS_BUF_FINISH_IOWAIT(bp);
-       } else {
-               xfs_buf_relse(bp);
-       }
-
-       return EIO;
-}
-
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(
-       struct xfs_buf  *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               /*
-                * Metadata write that didn't get logged but
-                * written delayed anyway. These aren't associated
-                * with a transaction, and can be ignored.
-                */
-               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
-                       return xfs_bioerror_relse(bp);
-               else
-                       return xfs_bioerror(bp);
-       }
-
-       xfs_buf_iorequest(bp);
-       return 0;
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               xfs_bioerror_relse(bp);
-               return;
-       }
-
-       xfs_buf_iorequest(bp);
-}
-
-STATIC void
-_xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
-               xfs_buf_ioend(bp, schedule);
-}
-
-STATIC void
-xfs_buf_bio_end_io(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-
-       xfs_buf_ioerror(bp, -error);
-
-       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
-               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
-
-       _xfs_buf_ioend(bp, 1);
-       bio_put(bio);
-}
-
-STATIC void
-_xfs_buf_ioapply(
-       xfs_buf_t               *bp)
-{
-       int                     rw, map_i, total_nr_pages, nr_pages;
-       struct bio              *bio;
-       int                     offset = bp->b_offset;
-       int                     size = bp->b_count_desired;
-       sector_t                sector = bp->b_bn;
-
-       total_nr_pages = bp->b_page_count;
-       map_i = 0;
-
-       if (bp->b_flags & XBF_WRITE) {
-               if (bp->b_flags & XBF_SYNCIO)
-                       rw = WRITE_SYNC;
-               else
-                       rw = WRITE;
-               if (bp->b_flags & XBF_FUA)
-                       rw |= REQ_FUA;
-               if (bp->b_flags & XBF_FLUSH)
-                       rw |= REQ_FLUSH;
-       } else if (bp->b_flags & XBF_READ_AHEAD) {
-               rw = READA;
-       } else {
-               rw = READ;
-       }
-
-       /* we only use the buffer cache for meta-data */
-       rw |= REQ_META;
-
-next_chunk:
-       atomic_inc(&bp->b_io_remaining);
-       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-       if (nr_pages > total_nr_pages)
-               nr_pages = total_nr_pages;
-
-       bio = bio_alloc(GFP_NOIO, nr_pages);
-       bio->bi_bdev = bp->b_target->bt_bdev;
-       bio->bi_sector = sector;
-       bio->bi_end_io = xfs_buf_bio_end_io;
-       bio->bi_private = bp;
-
-
-       for (; size && nr_pages; nr_pages--, map_i++) {
-               int     rbytes, nbytes = PAGE_SIZE - offset;
-
-               if (nbytes > size)
-                       nbytes = size;
-
-               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-               if (rbytes < nbytes)
-                       break;
-
-               offset = 0;
-               sector += nbytes >> BBSHIFT;
-               size -= nbytes;
-               total_nr_pages--;
-       }
-
-       if (likely(bio->bi_size)) {
-               if (xfs_buf_is_vmapped(bp)) {
-                       flush_kernel_vmap_range(bp->b_addr,
-                                               xfs_buf_vmap_len(bp));
-               }
-               submit_bio(rw, bio);
-               if (size)
-                       goto next_chunk;
-       } else {
-               xfs_buf_ioerror(bp, EIO);
-               bio_put(bio);
-       }
-}
-
-int
-xfs_buf_iorequest(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iorequest(bp, _RET_IP_);
-
-       if (bp->b_flags & XBF_DELWRI) {
-               xfs_buf_delwri_queue(bp, 1);
-               return 0;
-       }
-
-       if (bp->b_flags & XBF_WRITE) {
-               xfs_buf_wait_unpin(bp);
-       }
-
-       xfs_buf_hold(bp);
-
-       /* Set the count to 1 initially, this will stop an I/O
-        * completion callout which happens before we have started
-        * all the I/O from calling xfs_buf_ioend too early.
-        */
-       atomic_set(&bp->b_io_remaining, 1);
-       _xfs_buf_ioapply(bp);
-       _xfs_buf_ioend(bp, 0);
-
-       xfs_buf_rele(bp);
-       return 0;
-}
-
-/*
- *     Waits for I/O to complete on the buffer supplied.
- *     It returns immediately if no I/O is pending.
- *     It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iowait(bp, _RET_IP_);
-
-       wait_for_completion(&bp->b_iowait);
-
-       trace_xfs_buf_iowait_done(bp, _RET_IP_);
-       return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
-       xfs_buf_t               *bp,
-       size_t                  offset)
-{
-       struct page             *page;
-
-       if (bp->b_flags & XBF_MAPPED)
-               return bp->b_addr + offset;
-
-       offset += bp->b_offset;
-       page = bp->b_pages[offset >> PAGE_SHIFT];
-       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
-}
-
-/*
- *     Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
-       xfs_buf_t               *bp,    /* buffer to process            */
-       size_t                  boff,   /* starting buffer offset       */
-       size_t                  bsize,  /* length to copy               */
-       void                    *data,  /* data address                 */
-       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
-{
-       size_t                  bend, cpoff, csize;
-       struct page             *page;
-
-       bend = boff + bsize;
-       while (boff < bend) {
-               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-               cpoff = xfs_buf_poff(boff + bp->b_offset);
-               csize = min_t(size_t,
-                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-
-               ASSERT(((csize + cpoff) <= PAGE_SIZE));
-
-               switch (mode) {
-               case XBRW_ZERO:
-                       memset(page_address(page) + cpoff, 0, csize);
-                       break;
-               case XBRW_READ:
-                       memcpy(data, page_address(page) + cpoff, csize);
-                       break;
-               case XBRW_WRITE:
-                       memcpy(page_address(page) + cpoff, data, csize);
-               }
-
-               boff += csize;
-               data += csize;
-       }
-}
-
-/*
- *     Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but have not yet
- * returned. These buffers will have an elevated hold count, so wait on those
- * while freeing all the buffers only held by the LRU.
- */
-void
-xfs_wait_buftarg(
-       struct xfs_buftarg      *btp)
-{
-       struct xfs_buf          *bp;
-
-restart:
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-               if (atomic_read(&bp->b_hold) > 1) {
-                       spin_unlock(&btp->bt_lru_lock);
-                       delay(100);
-                       goto restart;
-               }
-               /*
-                * clear the LRU reference count so the bufer doesn't get
-                * ignored in xfs_buf_rele().
-                */
-               atomic_set(&bp->b_lru_ref, 0);
-               spin_unlock(&btp->bt_lru_lock);
-               xfs_buf_rele(bp);
-               spin_lock(&btp->bt_lru_lock);
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-int
-xfs_buftarg_shrink(
-       struct shrinker         *shrink,
-       struct shrink_control   *sc)
-{
-       struct xfs_buftarg      *btp = container_of(shrink,
-                                       struct xfs_buftarg, bt_shrinker);
-       struct xfs_buf          *bp;
-       int nr_to_scan = sc->nr_to_scan;
-       LIST_HEAD(dispose);
-
-       if (!nr_to_scan)
-               return btp->bt_lru_nr;
-
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               if (nr_to_scan-- <= 0)
-                       break;
-
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
-               /*
-                * Decrement the b_lru_ref count unless the value is already
-                * zero. If the value is already zero, we need to reclaim the
-                * buffer, otherwise it gets another trip through the LRU.
-                */
-               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-                       list_move_tail(&bp->b_lru, &btp->bt_lru);
-                       continue;
-               }
-
-               /*
-                * remove the buffer from the LRU now to avoid needing another
-                * lock round trip inside xfs_buf_rele().
-                */
-               list_move(&bp->b_lru, &dispose);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-
-       while (!list_empty(&dispose)) {
-               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-               list_del_init(&bp->b_lru);
-               xfs_buf_rele(bp);
-       }
-
-       return btp->bt_lru_nr;
-}
-
-void
-xfs_free_buftarg(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *btp)
-{
-       unregister_shrinker(&btp->bt_shrinker);
-
-       xfs_flush_buftarg(btp, 1);
-       if (mp->m_flags & XFS_MOUNT_BARRIER)
-               xfs_blkdev_issue_flush(btp);
-
-       kthread_stop(btp->bt_task);
-       kmem_free(btp);
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize,
-       int                     verbose)
-{
-       btp->bt_bsize = blocksize;
-       btp->bt_sshift = ffs(sectorsize) - 1;
-       btp->bt_smask = sectorsize - 1;
-
-       if (set_blocksize(btp->bt_bdev, sectorsize)) {
-               xfs_warn(btp->bt_mount,
-                       "Cannot set_blocksize to %u on device %s\n",
-                       sectorsize, xfs_buf_target_name(btp));
-               return EINVAL;
-       }
-
-       return 0;
-}
-
-/*
- *     When allocating the initial buffer target we have not yet
- *     read in the superblock, so don't know what sized sectors
- *     are being used is at this early stage.  Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
-       xfs_buftarg_t           *btp,
-       struct block_device     *bdev)
-{
-       return xfs_setsize_buftarg_flags(btp,
-                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize)
-{
-       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
-       xfs_buftarg_t           *btp,
-       const char              *fsname)
-{
-       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-       spin_lock_init(&btp->bt_delwrite_lock);
-       btp->bt_flags = 0;
-       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
-       if (IS_ERR(btp->bt_task))
-               return PTR_ERR(btp->bt_task);
-       return 0;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
-       struct xfs_mount        *mp,
-       struct block_device     *bdev,
-       int                     external,
-       const char              *fsname)
-{
-       xfs_buftarg_t           *btp;
-
-       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
-       btp->bt_mount = mp;
-       btp->bt_dev =  bdev->bd_dev;
-       btp->bt_bdev = bdev;
-       btp->bt_bdi = blk_get_backing_dev_info(bdev);
-       if (!btp->bt_bdi)
-               goto error;
-
-       INIT_LIST_HEAD(&btp->bt_lru);
-       spin_lock_init(&btp->bt_lru_lock);
-       if (xfs_setsize_buftarg_early(btp, bdev))
-               goto error;
-       if (xfs_alloc_delwrite_queue(btp, fsname))
-               goto error;
-       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
-       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
-       register_shrinker(&btp->bt_shrinker);
-       return btp;
-
-error:
-       kmem_free(btp);
-       return NULL;
-}
-
-
-/*
- *     Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
-       xfs_buf_t               *bp,
-       int                     unlock)
-{
-       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-
-       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
-
-       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
-       spin_lock(dwlk);
-       /* If already in the queue, dequeue and place at tail */
-       if (!list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               if (unlock)
-                       atomic_dec(&bp->b_hold);
-               list_del(&bp->b_list);
-       }
-
-       if (list_empty(dwq)) {
-               /* start xfsbufd as it is about to have something to do */
-               wake_up_process(bp->b_target->bt_task);
-       }
-
-       bp->b_flags |= _XBF_DELWRI_Q;
-       list_add_tail(&bp->b_list, dwq);
-       bp->b_queuetime = jiffies;
-       spin_unlock(dwlk);
-
-       if (unlock)
-               xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
-       xfs_buf_t               *bp)
-{
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-       int                     dequeued = 0;
-
-       spin_lock(dwlk);
-       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               list_del_init(&bp->b_list);
-               dequeued = 1;
-       }
-       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-       spin_unlock(dwlk);
-
-       if (dequeued)
-               xfs_buf_rele(bp);
-
-       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
-}
-
-/*
- * If a delwri buffer needs to be pushed before it has aged out, then promote
- * it to the head of the delwri queue so that it will be flushed on the next
- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
- * than the age currently needed to flush the buffer. Hence the next time the
- * xfsbufd sees it is guaranteed to be considered old enough to flush.
- */
-void
-xfs_buf_delwri_promote(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
-
-       ASSERT(bp->b_flags & XBF_DELWRI);
-       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
-       /*
-        * Check the buffer age before locking the delayed write queue as we
-        * don't need to promote buffers that are already past the flush age.
-        */
-       if (bp->b_queuetime < jiffies - age)
-               return;
-       bp->b_queuetime = jiffies - age;
-       spin_lock(&btp->bt_delwrite_lock);
-       list_move(&bp->b_list, &btp->bt_delwrite_queue);
-       spin_unlock(&btp->bt_delwrite_lock);
-}
-
-STATIC void
-xfs_buf_runall_queues(
-       struct workqueue_struct *queue)
-{
-       flush_workqueue(queue);
-}
-
-/*
- * Move as many buffers as specified to the supplied list
- * idicating if we skipped any buffers to prevent deadlocks.
- */
-STATIC int
-xfs_buf_delwri_split(
-       xfs_buftarg_t   *target,
-       struct list_head *list,
-       unsigned long   age)
-{
-       xfs_buf_t       *bp, *n;
-       struct list_head *dwq = &target->bt_delwrite_queue;
-       spinlock_t      *dwlk = &target->bt_delwrite_lock;
-       int             skipped = 0;
-       int             force;
-
-       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       INIT_LIST_HEAD(list);
-       spin_lock(dwlk);
-       list_for_each_entry_safe(bp, n, dwq, b_list) {
-               ASSERT(bp->b_flags & XBF_DELWRI);
-
-               if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
-                       if (!force &&
-                           time_before(jiffies, bp->b_queuetime + age)) {
-                               xfs_buf_unlock(bp);
-                               break;
-                       }
-
-                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
-                       bp->b_flags |= XBF_WRITE;
-                       list_move_tail(&bp->b_list, list);
-                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
-               } else
-                       skipped++;
-       }
-       spin_unlock(dwlk);
-
-       return skipped;
-
-}
-
-/*
- * Compare function is more complex than it needs to be because
- * the return value is only 32 bits and we are doing comparisons
- * on 64 bit values
- */
-static int
-xfs_buf_cmp(
-       void            *priv,
-       struct list_head *a,
-       struct list_head *b)
-{
-       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
-       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
-       xfs_daddr_t             diff;
-
-       diff = ap->b_bn - bp->b_bn;
-       if (diff < 0)
-               return -1;
-       if (diff > 0)
-               return 1;
-       return 0;
-}
-
-STATIC int
-xfsbufd(
-       void            *data)
-{
-       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
-
-       current->flags |= PF_MEMALLOC;
-
-       set_freezable();
-
-       do {
-               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-               struct list_head tmp;
-               struct blk_plug plug;
-
-               if (unlikely(freezing(current))) {
-                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-                       refrigerator();
-               } else {
-                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-               }
-
-               /* sleep for a long time if there is nothing to do. */
-               if (list_empty(&target->bt_delwrite_queue))
-                       tout = MAX_SCHEDULE_TIMEOUT;
-               schedule_timeout_interruptible(tout);
-
-               xfs_buf_delwri_split(target, &tmp, age);
-               list_sort(NULL, &tmp, xfs_buf_cmp);
-
-               blk_start_plug(&plug);
-               while (!list_empty(&tmp)) {
-                       struct xfs_buf *bp;
-                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
-                       list_del_init(&bp->b_list);
-                       xfs_bdstrat_cb(bp);
-               }
-               blk_finish_plug(&plug);
-       } while (!kthread_should_stop());
-
-       return 0;
-}
-
-/*
- *     Go through all incore buffers, and release buffers if they belong to
- *     the given device. This is used in filesystem error handling to
- *     preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
-       xfs_buftarg_t   *target,
-       int             wait)
-{
-       xfs_buf_t       *bp;
-       int             pincount = 0;
-       LIST_HEAD(tmp_list);
-       LIST_HEAD(wait_list);
-       struct blk_plug plug;
-
-       xfs_buf_runall_queues(xfsconvertd_workqueue);
-       xfs_buf_runall_queues(xfsdatad_workqueue);
-       xfs_buf_runall_queues(xfslogd_workqueue);
-
-       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
-
-       /*
-        * Dropped the delayed write list lock, now walk the temporary list.
-        * All I/O is issued async and then if we need to wait for completion
-        * we do that after issuing all the IO.
-        */
-       list_sort(NULL, &tmp_list, xfs_buf_cmp);
-
-       blk_start_plug(&plug);
-       while (!list_empty(&tmp_list)) {
-               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
-               ASSERT(target == bp->b_target);
-               list_del_init(&bp->b_list);
-               if (wait) {
-                       bp->b_flags &= ~XBF_ASYNC;
-                       list_add(&bp->b_list, &wait_list);
-               }
-               xfs_bdstrat_cb(bp);
-       }
-       blk_finish_plug(&plug);
-
-       if (wait) {
-               /* Wait for IO to complete. */
-               while (!list_empty(&wait_list)) {
-                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
-
-                       list_del_init(&bp->b_list);
-                       xfs_buf_iowait(bp);
-                       xfs_buf_relse(bp);
-               }
-       }
-
-       return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
-       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
-                                               KM_ZONE_HWALIGN, NULL);
-       if (!xfs_buf_zone)
-               goto out;
-
-       xfslogd_workqueue = alloc_workqueue("xfslogd",
-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
-       if (!xfslogd_workqueue)
-               goto out_free_buf_zone;
-
-       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-       if (!xfsdatad_workqueue)
-               goto out_destroy_xfslogd_workqueue;
-
-       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-                                               WQ_MEM_RECLAIM, 1);
-       if (!xfsconvertd_workqueue)
-               goto out_destroy_xfsdatad_workqueue;
-
-       return 0;
-
- out_destroy_xfsdatad_workqueue:
-       destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-       destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
-       kmem_zone_destroy(xfs_buf_zone);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
-       destroy_workqueue(xfsconvertd_workqueue);
-       destroy_workqueue(xfsdatad_workqueue);
-       destroy_workqueue(xfslogd_workqueue);
-       kmem_zone_destroy(xfs_buf_zone);
-}
-
-#ifdef CONFIG_KDB_MODULES
-struct list_head *
-xfs_get_buftarg_list(void)
-{
-       return &xfs_buftarg_list;
-}
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
deleted file mode 100644 (file)
index 620972b..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- *     Base types
- */
-
-#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
-       XBRW_READ = 1,                  /* transfer into target memory */
-       XBRW_WRITE = 2,                 /* transfer from target memory */
-       XBRW_ZERO = 3,                  /* Zero target memory */
-} xfs_buf_rw_t;
-
-#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
-#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
-#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
-#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA                (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
-
-/* flags used only as arguments to access routines */
-#define XBF_LOCK       (1 << 15)/* lock requested */
-#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
-
-/* flags used only internally */
-#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
-
-typedef unsigned int xfs_buf_flags_t;
-
-#define XFS_BUF_FLAGS \
-       { XBF_READ,             "READ" }, \
-       { XBF_WRITE,            "WRITE" }, \
-       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
-       { XBF_MAPPED,           "MAPPED" }, \
-       { XBF_ASYNC,            "ASYNC" }, \
-       { XBF_DONE,             "DONE" }, \
-       { XBF_DELWRI,           "DELWRI" }, \
-       { XBF_STALE,            "STALE" }, \
-       { XBF_SYNCIO,           "SYNCIO" }, \
-       { XBF_FUA,              "FUA" }, \
-       { XBF_FLUSH,            "FLUSH" }, \
-       { XBF_LOCK,             "LOCK" },       /* should never be set */\
-       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
-       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
-       { _XBF_PAGES,           "PAGES" }, \
-       { _XBF_KMEM,            "KMEM" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }
-
-typedef enum {
-       XBT_FORCE_SLEEP = 0,
-       XBT_FORCE_FLUSH = 1,
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_buftarg {
-       dev_t                   bt_dev;
-       struct block_device     *bt_bdev;
-       struct backing_dev_info *bt_bdi;
-       struct xfs_mount        *bt_mount;
-       unsigned int            bt_bsize;
-       unsigned int            bt_sshift;
-       size_t                  bt_smask;
-
-       /* per device delwri queue */
-       struct task_struct      *bt_task;
-       struct list_head        bt_delwrite_queue;
-       spinlock_t              bt_delwrite_lock;
-       unsigned long           bt_flags;
-
-       /* LRU control structures */
-       struct shrinker         bt_shrinker;
-       struct list_head        bt_lru;
-       spinlock_t              bt_lru_lock;
-       unsigned int            bt_lru_nr;
-} xfs_buftarg_t;
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-
-#define XB_PAGES       2
-
-typedef struct xfs_buf {
-       /*
-        * first cacheline holds all the fields needed for an uncontended cache
-        * hit to be fully processed. The semaphore straddles the cacheline
-        * boundary, but the counter and lock sits on the first cacheline,
-        * which is the only bit that is touched if we hit the semaphore
-        * fast-path on locking.
-        */
-       struct rb_node          b_rbnode;       /* rbtree node */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
-       atomic_t                b_hold;         /* reference count */
-       atomic_t                b_lru_ref;      /* lru reclaim ref count */
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct semaphore        b_sema;         /* semaphore for lockables */
-
-       struct list_head        b_lru;          /* lru list */
-       wait_queue_head_t       b_waiters;      /* unpin waiters */
-       struct list_head        b_list;
-       struct xfs_perag        *b_pag;         /* contains rbtree root */
-       xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       xfs_daddr_t             b_bn;           /* block number for I/O */
-       size_t                  b_count_desired;/* desired transfer size */
-       void                    *b_addr;        /* virtual address of buffer */
-       struct work_struct      b_iodone_work;
-       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
-       struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_fspriv;
-       struct xfs_trans        *b_transp;
-       struct page             **b_pages;      /* array of page pointers */
-       struct page             *b_page_array[XB_PAGES]; /* inline pages */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
-       unsigned short          b_error;        /* error code on I/O */
-#ifdef XFS_BUF_LOCK_TRACKING
-       int                     b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
-       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
-                               struct xfs_buftarg *target,
-                               xfs_daddr_t daddr, size_t length, int flags);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-#define xfs_buf_islocked(bp) \
-       ((bp)->b_sema.count <= 0)
-
-/* Buffer Read and Write Routines */
-extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
-extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern void xfs_buf_ioend(xfs_buf_t *, int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-                               xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
-       return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void xfs_buf_delwri_promote(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-static inline const char *
-xfs_buf_target_name(struct xfs_buftarg *target)
-{
-       static char __b[BDEVNAME_SIZE];
-
-       return bdevname(target->bt_bdev, __b);
-}
-
-
-#define XFS_BUF_ZEROFLAGS(bp) \
-       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
-                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
-#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
-#define XFS_BUF_SUPER_STALE(bp)        do {                            \
-                                       XFS_BUF_STALE(bp);      \
-                                       xfs_buf_delwri_dequeue(bp);     \
-                                       XFS_BUF_DONE(bp);       \
-                               } while (0)
-
-#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
-
-static inline void
-xfs_buf_set_ref(
-       struct xfs_buf  *bp,
-       int             lru_ref)
-{
-       atomic_set(&bp->b_lru_ref, lru_ref);
-}
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
-
-static inline int xfs_buf_ispinned(struct xfs_buf *bp)
-{
-       return atomic_read(&bp->b_pin_count);
-}
-
-#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
-       xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-}
-
-/*
- *     Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-                       struct block_device *, int, const char *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#ifdef CONFIG_KDB_MODULES
-extern struct list_head *xfs_get_buftarg_list(void);
-#endif
-
-#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
-
-#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
-
-#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
deleted file mode 100644 (file)
index 244e797..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_discard.h"
-#include "xfs_trace.h"
-
-STATIC int
-xfs_trim_extents(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          agno,
-       xfs_fsblock_t           start,
-       xfs_fsblock_t           len,
-       xfs_fsblock_t           minlen,
-       __uint64_t              *blocks_trimmed)
-{
-       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
-       struct xfs_btree_cur    *cur;
-       struct xfs_buf          *agbp;
-       struct xfs_perag        *pag;
-       int                     error;
-       int                     i;
-
-       pag = xfs_perag_get(mp, agno);
-
-       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-       if (error || !agbp)
-               goto out_put_perag;
-
-       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
-       /*
-        * Force out the log.  This means any transactions that might have freed
-        * space before we took the AGF buffer lock are now on disk, and the
-        * volatile disk cache is flushed.
-        */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /*
-        * Look up the longest btree in the AGF and start with it.
-        */
-       error = xfs_alloc_lookup_le(cur, 0,
-                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
-       if (error)
-               goto out_del_cursor;
-
-       /*
-        * Loop until we are done with all extents that are large
-        * enough to be worth discarding.
-        */
-       while (i) {
-               xfs_agblock_t fbno;
-               xfs_extlen_t flen;
-
-               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
-               if (error)
-                       goto out_del_cursor;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
-               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
-
-               /*
-                * Too small?  Give up.
-                */
-               if (flen < minlen) {
-                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-                       goto out_del_cursor;
-               }
-
-               /*
-                * If the extent is entirely outside of the range we are
-                * supposed to discard skip it.  Do not bother to trim
-                * down partially overlapping ranges for now.
-                */
-               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
-                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
-                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               /*
-                * If any blocks in the range are still busy, skip the
-                * discard and try again the next time.
-                */
-               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
-                       trace_xfs_discard_busy(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               trace_xfs_discard_extent(mp, agno, fbno, flen);
-               error = -blkdev_issue_discard(bdev,
-                               XFS_AGB_TO_DADDR(mp, agno, fbno),
-                               XFS_FSB_TO_BB(mp, flen),
-                               GFP_NOFS, 0);
-               if (error)
-                       goto out_del_cursor;
-               *blocks_trimmed += flen;
-
-next_extent:
-               error = xfs_btree_decrement(cur, 0, &i);
-               if (error)
-                       goto out_del_cursor;
-       }
-
-out_del_cursor:
-       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       xfs_buf_relse(agbp);
-out_put_perag:
-       xfs_perag_put(pag);
-       return error;
-}
-
-int
-xfs_ioc_trim(
-       struct xfs_mount                *mp,
-       struct fstrim_range __user      *urange)
-{
-       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
-       unsigned int            granularity = q->limits.discard_granularity;
-       struct fstrim_range     range;
-       xfs_fsblock_t           start, len, minlen;
-       xfs_agnumber_t          start_agno, end_agno, agno;
-       __uint64_t              blocks_trimmed = 0;
-       int                     error, last_error = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (!blk_queue_discard(q))
-               return -XFS_ERROR(EOPNOTSUPP);
-       if (copy_from_user(&range, urange, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-
-       /*
-        * Truncating down the len isn't actually quite correct, but using
-        * XFS_B_TO_FSB would mean we trivially get overflows for values
-        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
-        * used by the fstrim application.  In the end it really doesn't
-        * matter as trimming blocks is an advisory interface.
-        */
-       start = XFS_B_TO_FSBT(mp, range.start);
-       len = XFS_B_TO_FSBT(mp, range.len);
-       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
-
-       start_agno = XFS_FSB_TO_AGNO(mp, start);
-       if (start_agno >= mp->m_sb.sb_agcount)
-               return -XFS_ERROR(EINVAL);
-
-       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
-       if (end_agno >= mp->m_sb.sb_agcount)
-               end_agno = mp->m_sb.sb_agcount - 1;
-
-       for (agno = start_agno; agno <= end_agno; agno++) {
-               error = -xfs_trim_extents(mp, agno, start, len, minlen,
-                                         &blocks_trimmed);
-               if (error)
-                       last_error = error;
-       }
-
-       if (last_error)
-               return last_error;
-
-       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
-       if (copy_to_user(urange, &range, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-int
-xfs_discard_extents(
-       struct xfs_mount        *mp,
-       struct list_head        *list)
-{
-       struct xfs_busy_extent  *busyp;
-       int                     error = 0;
-
-       list_for_each_entry(busyp, list, list) {
-               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
-                                        busyp->length);
-
-               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
-                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
-                               XFS_FSB_TO_BB(mp, busyp->length),
-                               GFP_NOFS, 0);
-               if (error && error != EOPNOTSUPP) {
-                       xfs_info(mp,
-        "discard failed for extent [0x%llu,%u], error %d",
-                                (unsigned long long)busyp->bno,
-                                busyp->length,
-                                error);
-                       return error;
-               }
-       }
-
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
deleted file mode 100644 (file)
index 344879a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef XFS_DISCARD_H
-#define XFS_DISCARD_H 1
-
-struct fstrim_range;
-struct list_head;
-
-extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
-
-#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
deleted file mode 100644 (file)
index 75e5d32..0000000
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-/*
- * Note that we only accept fileids which are long enough rather than allow
- * the parent generation number to default to zero.  XFS considers zero a
- * valid generation number not an invalid/wildcard value.
- */
-static int xfs_fileid_length(int fileid_type)
-{
-       switch (fileid_type) {
-       case FILEID_INO32_GEN:
-               return 2;
-       case FILEID_INO32_GEN_PARENT:
-               return 4;
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               return 3;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               return 6;
-       }
-       return 255; /* invalid */
-}
-
-STATIC int
-xfs_fs_encode_fh(
-       struct dentry           *dentry,
-       __u32                   *fh,
-       int                     *max_len,
-       int                     connectable)
-{
-       struct fid              *fid = (struct fid *)fh;
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
-       struct inode            *inode = dentry->d_inode;
-       int                     fileid_type;
-       int                     len;
-
-       /* Directories don't need their parent encoded, they have ".." */
-       if (S_ISDIR(inode->i_mode) || !connectable)
-               fileid_type = FILEID_INO32_GEN;
-       else
-               fileid_type = FILEID_INO32_GEN_PARENT;
-
-       /*
-        * If the the filesystem may contain 64bit inode numbers, we need
-        * to use larger file handles that can represent them.
-        *
-        * While we only allocate inodes that do not fit into 32 bits any
-        * large enough filesystem may contain them, thus the slightly
-        * confusing looking conditional below.
-        */
-       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
-           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
-               fileid_type |= XFS_FILEID_TYPE_64FLAG;
-
-       /*
-        * Only encode if there is enough space given.  In practice
-        * this means we can't export a filesystem with 64bit inodes
-        * over NFSv2 with the subtree_check export option; the other
-        * seven combinations work.  The real answer is "don't use v2".
-        */
-       len = xfs_fileid_length(fileid_type);
-       if (*max_len < len) {
-               *max_len = len;
-               return 255;
-       }
-       *max_len = len;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               spin_lock(&dentry->d_lock);
-               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN:
-               fid->i32.ino = inode->i_ino;
-               fid->i32.gen = inode->i_generation;
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               spin_lock(&dentry->d_lock);
-               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               fid64->ino = inode->i_ino;
-               fid64->gen = inode->i_generation;
-               break;
-       }
-
-       return fileid_type;
-}
-
-STATIC struct inode *
-xfs_nfs_get_inode(
-       struct super_block      *sb,
-       u64                     ino,
-       u32                     generation)
- {
-       xfs_mount_t             *mp = XFS_M(sb);
-       xfs_inode_t             *ip;
-       int                     error;
-
-       /*
-        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
-        */
-       if (ino == 0)
-               return ERR_PTR(-ESTALE);
-
-       /*
-        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
-        * fine and not an indication of a corrupted filesystem as clients can
-        * send invalid file handles and we have to handle it gracefully..
-        */
-       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
-       if (error) {
-               /*
-                * EINVAL means the inode cluster doesn't exist anymore.
-                * This implies the filehandle is stale, so we should
-                * translate it here.
-                * We don't use ESTALE directly down the chain to not
-                * confuse applications using bulkstat that expect EINVAL.
-                */
-               if (error == EINVAL || error == ENOENT)
-                       error = ESTALE;
-               return ERR_PTR(-error);
-       }
-
-       if (ip->i_d.di_gen != generation) {
-               IRELE(ip);
-               return ERR_PTR(-ESTALE);
-       }
-
-       return VFS_I(ip);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       if (fh_len < xfs_fileid_length(fileid_type))
-               return NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-       case FILEID_INO32_GEN:
-               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
-                                             fid->i32.parent_gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
-                                             fid64->parent_gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_get_parent(
-       struct dentry           *child)
-{
-       int                     error;
-       struct xfs_inode        *cip;
-
-       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
-       if (unlikely(error))
-               return ERR_PTR(-error);
-
-       return d_obtain_alias(VFS_I(cip));
-}
-
-STATIC int
-xfs_fs_nfs_commit_metadata(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_ipincount(ip)) {
-               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
-                               XFS_LOG_SYNC, NULL);
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       return error;
-}
-
-const struct export_operations xfs_export_operations = {
-       .encode_fh              = xfs_fs_encode_fh,
-       .fh_to_dentry           = xfs_fs_fh_to_dentry,
-       .fh_to_parent           = xfs_fs_fh_to_parent,
-       .get_parent             = xfs_fs_get_parent,
-       .commit_metadata        = xfs_fs_nfs_commit_metadata,
-};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
deleted file mode 100644 (file)
index 3272b6a..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_EXPORT_H__
-#define __XFS_EXPORT_H__
-
-/*
- * Common defines for code related to exporting XFS filesystems over NFS.
- *
- * The NFS fileid goes out on the wire as an array of
- * 32bit unsigned ints in host order.  There are 5 possible
- * formats.
- *
- * (1) fileid_type=0x00
- *     (no fileid data; handled by the generic code)
- *
- * (2) fileid_type=0x01
- *     inode-num
- *     generation
- *
- * (3) fileid_type=0x02
- *     inode-num
- *     generation
- *     parent-inode-num
- *     parent-generation
- *
- * (4) fileid_type=0x81
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *
- * (5) fileid_type=0x82
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *     parent-inode-num-lo32
- *     parent-inode-num-hi32
- *     parent-generation
- *
- * Note, the NFS filehandle also includes an fsid portion which
- * may have an inode number in it.  That number is hardcoded to
- * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesystem with 64bit
- * inodes you should either export the mountpoint (rather than
- * a subdirectory) or use the "fsid" export option.
- */
-
-struct xfs_fid64 {
-       u64 ino;
-       u32 gen;
-       u64 parent_ino;
-       u32 parent_gen;
-} __attribute__((packed));
-
-/* This flag goes on the wire.  Don't play with it. */
-#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
-
-#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
deleted file mode 100644 (file)
index 7f7b424..0000000
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_da_btree.h"
-#include "xfs_ioctl.h"
-#include "xfs_trace.h"
-
-#include <linux/dcache.h>
-#include <linux/falloc.h>
-
-static const struct vm_operations_struct xfs_file_vm_ops;
-
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
-       xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_iunlock(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-static inline void
-xfs_rw_ilock_demote(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_ilock_demote(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-/*
- *     xfs_iozero
- *
- *     xfs_iozero clears the specified range of buffer supplied,
- *     and marks all the affected blocks as valid and modified.  If
- *     an affected block is not allocated, it will be allocated.  If
- *     an affected block is not completely overwritten, and is not
- *     valid before the operation, it will be read from disk before
- *     being partially zeroed.
- */
-STATIC int
-xfs_iozero(
-       struct xfs_inode        *ip,    /* inode                        */
-       loff_t                  pos,    /* offset in file               */
-       size_t                  count)  /* size of data to zero         */
-{
-       struct page             *page;
-       struct address_space    *mapping;
-       int                     status;
-
-       mapping = VFS_I(ip)->i_mapping;
-       do {
-               unsigned offset, bytes;
-               void *fsdata;
-
-               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-               bytes = PAGE_CACHE_SIZE - offset;
-               if (bytes > count)
-                       bytes = count;
-
-               status = pagecache_write_begin(NULL, mapping, pos, bytes,
-                                       AOP_FLAG_UNINTERRUPTIBLE,
-                                       &page, &fsdata);
-               if (status)
-                       break;
-
-               zero_user(page, offset, bytes);
-
-               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
-                                       page, fsdata);
-               WARN_ON(status <= 0); /* can't return less than zero! */
-               pos += bytes;
-               count -= bytes;
-               status = 0;
-       } while (count);
-
-       return (-status);
-}
-
-STATIC int
-xfs_file_fsync(
-       struct file             *file,
-       loff_t                  start,
-       loff_t                  end,
-       int                     datasync)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error = 0;
-       int                     log_flushed = 0;
-
-       trace_xfs_file_fsync(ip);
-
-       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       if (error)
-               return error;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_ioend_wait(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-       if (mp->m_flags & XFS_MOUNT_BARRIER) {
-               /*
-                * If we have an RT and/or log subvolume we need to make sure
-                * to flush the write cache the device used for file data
-                * first.  This is to ensure newly written file data make
-                * it to disk before logging the new inode size in case of
-                * an extending write.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-               else if (mp->m_logdev_targp != mp->m_ddev_targp)
-                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
-       }
-
-       /*
-        * We always need to make sure that the required inode state is safe on
-        * disk.  The inode might be clean but we still might need to force the
-        * log because of committed transactions that haven't hit the disk yet.
-        * Likewise, there could be unflushed non-transactional changes to the
-        * inode core that have to go to disk and this requires us to issue
-        * a synchronous transaction to capture these changes correctly.
-        *
-        * This code relies on the assumption that if the i_update_core field
-        * of the inode is clear and the inode is unpinned then it is clean
-        * and no action is required.
-        */
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-       /*
-        * First check if the VFS inode is marked dirty.  All the dirtying
-        * of non-transactional updates no goes through mark_inode_dirty*,
-        * which allows us to distinguish beteeen pure timestamp updates
-        * and i_size updates which need to be caught for fdatasync.
-        * After that also theck for the dirty state in the XFS inode, which
-        * might gets cleared when the inode gets written out via the AIL
-        * or xfs_iflush_cluster.
-        */
-       if (((inode->i_state & I_DIRTY_DATASYNC) ||
-           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-           ip->i_update_core) {
-               /*
-                * Kick off a transaction to log the inode core to get the
-                * updates.  The sync transaction will also force the log.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, 0,
-                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return -error;
-               }
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * Note - it's possible that we might have pushed ourselves out
-                * of the way during trans_reserve which would flush the inode.
-                * But there's no guarantee that the inode buffer has actually
-                * gone out yet (it's delwri).  Plus the buffer could be pinned
-                * anyway if it's part of an inode in another recent
-                * transaction.  So we play it safe and fire off the
-                * transaction anyway.
-                */
-               xfs_trans_ijoin(tp, ip);
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               xfs_trans_set_sync(tp);
-               error = _xfs_trans_commit(tp, 0, &log_flushed);
-
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       } else {
-               /*
-                * Timestamps/size haven't changed since last inode flush or
-                * inode transaction commit.  That means either nothing got
-                * written or a transaction committed which caught the updates.
-                * If the latter happened and the transaction hasn't hit the
-                * disk yet, the inode will be still be pinned.  If it is,
-                * force the log.
-                */
-               if (xfs_ipincount(ip)) {
-                       error = _xfs_log_force_lsn(mp,
-                                       ip->i_itemp->ili_last_lsn,
-                                       XFS_LOG_SYNC, &log_flushed);
-               }
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       }
-
-       /*
-        * If we only have a single device, and the log force about was
-        * a no-op we might have to flush the data device cache here.
-        * This can only happen for fdatasync/O_DSYNC if we were overwriting
-        * an already allocated file and thus do not have any metadata to
-        * commit.
-        */
-       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-           mp->m_logdev_targp == mp->m_ddev_targp &&
-           !XFS_IS_REALTIME_INODE(ip) &&
-           !log_flushed)
-               xfs_blkdev_issue_flush(mp->m_ddev_targp);
-
-       return -error;
-}
-
-STATIC ssize_t
-xfs_file_aio_read(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       size_t                  size = 0;
-       ssize_t                 ret = 0;
-       int                     ioflags = 0;
-       xfs_fsize_t             n;
-       unsigned long           seg;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ioflags |= IO_ISDIRECT;
-       if (file->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       /* START copy & waste from filemap.c */
-       for (seg = 0; seg < nr_segs; seg++) {
-               const struct iovec *iv = &iovp[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               size += iv->iov_len;
-               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-                       return XFS_ERROR(-EINVAL);
-       }
-       /* END copy & waste from filemap.c */
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-               if ((iocb->ki_pos & target->bt_smask) ||
-                   (size & target->bt_smask)) {
-                       if (iocb->ki_pos == ip->i_size)
-                               return 0;
-                       return -XFS_ERROR(EINVAL);
-               }
-       }
-
-       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
-       if (n <= 0 || size == 0)
-               return 0;
-
-       if (n < size)
-               size = n;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
-               if (inode->i_mapping->nrpages) {
-                       ret = -xfs_flushinval_pages(ip,
-                                       (iocb->ki_pos & PAGE_CACHE_MASK),
-                                       -1, FI_REMAPF_LOCKED);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-       } else
-               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
-       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_splice_read(
-       struct file             *infilp,
-       loff_t                  *ppos,
-       struct pipe_inode_info  *pipe,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       if (infilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC void
-xfs_aio_write_isize_update(
-       struct inode    *inode,
-       loff_t          *ppos,
-       ssize_t         bytes_written)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             isize = i_size_read(inode);
-
-       if (bytes_written > 0)
-               XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
-       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
-                                       *ppos > isize))
-               *ppos = isize;
-
-       if (*ppos > ip->i_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               if (*ppos > ip->i_size)
-                       ip->i_size = *ppos;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred.  In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
-       struct xfs_inode        *ip)
-{
-       if (ip->i_new_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               ip->i_new_size = 0;
-               if (ip->i_d.di_size > ip->i_size)
-                       ip->i_d.di_size = ip->i_size;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-       struct pipe_inode_info  *pipe,
-       struct file             *outfilp,
-       loff_t                  *ppos,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct inode            *inode = outfilp->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       if (outfilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       new_size = *ppos + count;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-
-       xfs_aio_write_isize_update(inode, ppos, ret);
-       xfs_aio_write_newsize_update(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF.  We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int                             /* error (positive) */
-xfs_zero_last_block(
-       xfs_inode_t     *ip,
-       xfs_fsize_t     offset,
-       xfs_fsize_t     isize)
-{
-       xfs_fileoff_t   last_fsb;
-       xfs_mount_t     *mp = ip->i_mount;
-       int             nimaps;
-       int             zero_offset;
-       int             zero_len;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-       if (zero_offset == 0) {
-               /*
-                * There are no extra bytes in the last block on disk to
-                * zero, so return.
-                */
-               return 0;
-       }
-
-       last_fsb = XFS_B_TO_FSBT(mp, isize);
-       nimaps = 1;
-       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
-                         &nimaps, NULL);
-       if (error) {
-               return error;
-       }
-       ASSERT(nimaps > 0);
-       /*
-        * If the block underlying isize is just a hole, then there
-        * is nothing to zero.
-        */
-       if (imap.br_startblock == HOLESTARTBLOCK) {
-               return 0;
-       }
-       /*
-        * Zero the part of the last block beyond the EOF, and write it
-        * out sync.  We need to drop the ilock while we do this so we
-        * don't deadlock when the buffer cache calls back to us.
-        */
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       zero_len = mp->m_sb.sb_blocksize - zero_offset;
-       if (isize + zero_len > offset)
-               zero_len = offset - isize;
-       error = xfs_iozero(ip, isize, zero_len);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF.  This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file.  This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated.  If fill is set,
- * then any holes in the range are filled and zeroed.  If not, the holes
- * are left alone as holes.
- */
-
-int                                    /* error (positive) */
-xfs_zero_eof(
-       xfs_inode_t     *ip,
-       xfs_off_t       offset,         /* starting I/O offset */
-       xfs_fsize_t     isize)          /* current inode size */
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       xfs_fileoff_t   start_zero_fsb;
-       xfs_fileoff_t   end_zero_fsb;
-       xfs_fileoff_t   zero_count_fsb;
-       xfs_fileoff_t   last_fsb;
-       xfs_fileoff_t   zero_off;
-       xfs_fsize_t     zero_len;
-       int             nimaps;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-       ASSERT(offset > isize);
-
-       /*
-        * First handle zeroing the block on which isize resides.
-        * We only zero a part of that block so it is handled specially.
-        */
-       error = xfs_zero_last_block(ip, offset, isize);
-       if (error) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-               return error;
-       }
-
-       /*
-        * Calculate the range between the new size and the old
-        * where blocks needing to be zeroed may exist.  To get the
-        * block where the last byte in the file currently resides,
-        * we need to subtract one from the size and truncate back
-        * to a block boundary.  We subtract 1 in case the size is
-        * exactly on a block boundary.
-        */
-       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-       if (last_fsb == end_zero_fsb) {
-               /*
-                * The size was only incremented on its last block.
-                * We took care of that above, so just return.
-                */
-               return 0;
-       }
-
-       ASSERT(start_zero_fsb <= end_zero_fsb);
-       while (start_zero_fsb <= end_zero_fsb) {
-               nimaps = 1;
-               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
-                                 0, NULL, 0, &imap, &nimaps, NULL);
-               if (error) {
-                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-                       return error;
-               }
-               ASSERT(nimaps > 0);
-
-               if (imap.br_state == XFS_EXT_UNWRITTEN ||
-                   imap.br_startblock == HOLESTARTBLOCK) {
-                       /*
-                        * This loop handles initializing pages that were
-                        * partially initialized by the code below this
-                        * loop. It basically zeroes the part of the page
-                        * that sits on a hole and sets the page as P_HOLE
-                        * and calls remapf if it is a mapped file.
-                        */
-                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                       continue;
-               }
-
-               /*
-                * There are blocks we need to zero.
-                * Drop the inode lock while we're doing the I/O.
-                * We'll still have the iolock to protect us.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-               if ((zero_off + zero_len) > offset)
-                       zero_len = offset - zero_off;
-
-               error = xfs_iozero(ip, zero_off, zero_len);
-               if (error) {
-                       goto out_lock;
-               }
-
-               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-       }
-
-       return 0;
-
-out_lock:
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Common pre-write limit and setup checks.
- *
- * Returns with iolock held according to @iolock.
- */
-STATIC ssize_t
-xfs_file_aio_write_checks(
-       struct file             *file,
-       loff_t                  *pos,
-       size_t                  *count,
-       int                     *iolock)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     error = 0;
-
-       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
-       if (error) {
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
-               *iolock = 0;
-               return error;
-       }
-
-       new_size = *pos + *count;
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-
-       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-               file_update_time(file);
-
-       /*
-        * If the offset is beyond the size of the file, we need to zero any
-        * blocks that fall between the existing EOF and the start of this
-        * write.
-        */
-       if (*pos > ip->i_size)
-               error = -xfs_zero_eof(ip, *pos, ip->i_size);
-
-       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       if (error)
-               return error;
-
-       /*
-        * If we're writing the file then make sure to clear the setuid and
-        * setgid bits if the process is not being run by root.  This keeps
-        * people from modifying setuid and setgid binaries.
-        */
-       return file_remove_suid(file);
-
-}
-
-/*
- * xfs_file_dio_aio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. xfs_ioend_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
- */
-STATIC ssize_t
-xfs_file_dio_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 ret = 0;
-       size_t                  count = ocount;
-       int                     unaligned_io = 0;
-       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
-                                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-       *iolock = 0;
-       if ((pos & target->bt_smask) || (count & target->bt_smask))
-               return -XFS_ERROR(EINVAL);
-
-       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
-       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
-               *iolock = XFS_IOLOCK_EXCL;
-       else
-               *iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       if (mapping->nrpages) {
-               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
-               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
-                                                       FI_REMAPF_LOCKED);
-               if (ret)
-                       return ret;
-       }
-
-       /*
-        * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
-        */
-       if (unaligned_io)
-               xfs_ioend_wait(ip);
-       else if (*iolock == XFS_IOLOCK_EXCL) {
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-               *iolock = XFS_IOLOCK_SHARED;
-       }
-
-       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-       /* No fallback to buffered IO on errors for XFS. */
-       ASSERT(ret < 0 || ret == count);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_buffered_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     enospc = 0;
-       size_t                  count = ocount;
-
-       *iolock = XFS_IOLOCK_EXCL;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       /* We can write back this queue in page reclaim */
-       current->backing_dev_info = mapping->backing_dev_info;
-
-write_retry:
-       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-                       pos, &iocb->ki_pos, count, ret);
-       /*
-        * if we just got an ENOSPC, flush the inode now we aren't holding any
-        * page locks and retry *once*
-        */
-       if (ret == -ENOSPC && !enospc) {
-               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
-               if (ret)
-                       return ret;
-               enospc = 1;
-               goto write_retry;
-       }
-       current->backing_dev_info = NULL;
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     iolock;
-       size_t                  ocount = 0;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-       if (ret)
-               return ret;
-
-       if (ocount == 0)
-               return 0;
-
-       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-       else
-               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-
-       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
-
-       if (ret <= 0)
-               goto out_unlock;
-
-       /* Handle various SYNC-type writes */
-       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-               loff_t end = pos + ret - 1;
-               int error;
-
-               xfs_rw_iunlock(ip, iolock);
-               error = xfs_file_fsync(file, pos, end,
-                                     (file->f_flags & __O_SYNC) ? 0 : 1);
-               xfs_rw_ilock(ip, iolock);
-               if (error)
-                       ret = error;
-       }
-
-out_unlock:
-       xfs_aio_write_newsize_update(ip);
-       xfs_rw_iunlock(ip, iolock);
-       return ret;
-}
-
-STATIC long
-xfs_file_fallocate(
-       struct file     *file,
-       int             mode,
-       loff_t          offset,
-       loff_t          len)
-{
-       struct inode    *inode = file->f_path.dentry->d_inode;
-       long            error;
-       loff_t          new_size = 0;
-       xfs_flock64_t   bf;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             cmd = XFS_IOC_RESVSP;
-       int             attr_flags = XFS_ATTR_NOLOCK;
-
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-               return -EOPNOTSUPP;
-
-       bf.l_whence = 0;
-       bf.l_start = offset;
-       bf.l_len = len;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       if (mode & FALLOC_FL_PUNCH_HOLE)
-               cmd = XFS_IOC_UNRESVSP;
-
-       /* check the new inode size is valid before allocating */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           offset + len > i_size_read(inode)) {
-               new_size = offset + len;
-               error = inode_newsize_ok(inode, new_size);
-               if (error)
-                       goto out_unlock;
-       }
-
-       if (file->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
-       if (error)
-               goto out_unlock;
-
-       /* Change file size if needed */
-       if (new_size) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = new_size;
-               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
-       }
-
-out_unlock:
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return error;
-}
-
-
-STATIC int
-xfs_file_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
-               return -EFBIG;
-       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
-               return -EIO;
-       return 0;
-}
-
-STATIC int
-xfs_dir_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       int             mode;
-       int             error;
-
-       error = xfs_file_open(inode, file);
-       if (error)
-               return error;
-
-       /*
-        * If there are any blocks, read-ahead block 0 as we're almost
-        * certain to have the next operation be a read there.
-        */
-       mode = xfs_ilock_map_shared(ip);
-       if (ip->i_d.di_nextents > 0)
-               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
-       xfs_iunlock(ip, mode);
-       return 0;
-}
-
-STATIC int
-xfs_file_release(
-       struct inode    *inode,
-       struct file     *filp)
-{
-       return -xfs_release(XFS_I(inode));
-}
-
-STATIC int
-xfs_file_readdir(
-       struct file     *filp,
-       void            *dirent,
-       filldir_t       filldir)
-{
-       struct inode    *inode = filp->f_path.dentry->d_inode;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             error;
-       size_t          bufsize;
-
-       /*
-        * The Linux API doesn't pass down the total size of the buffer
-        * we read into down to the filesystem.  With the filldir concept
-        * it's not needed for correct information, but the XFS dir2 leaf
-        * code wants an estimate of the buffer size to calculate it's
-        * readahead window and size the buffers used for mapping to
-        * physical blocks.
-        *
-        * Try to give it an estimate that's good enough, maybe at some
-        * point we can change the ->readdir prototype to include the
-        * buffer size.  For now we use the current glibc buffer size.
-        */
-       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
-
-       error = xfs_readdir(ip, dirent, bufsize,
-                               (xfs_off_t *)&filp->f_pos, filldir);
-       if (error)
-               return -error;
-       return 0;
-}
-
-STATIC int
-xfs_file_mmap(
-       struct file     *filp,
-       struct vm_area_struct *vma)
-{
-       vma->vm_ops = &xfs_file_vm_ops;
-       vma->vm_flags |= VM_CAN_NONLINEAR;
-
-       file_accessed(filp);
-       return 0;
-}
-
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-const struct file_operations xfs_file_operations = {
-       .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = xfs_file_aio_read,
-       .aio_write      = xfs_file_aio_write,
-       .splice_read    = xfs_file_splice_read,
-       .splice_write   = xfs_file_splice_write,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .mmap           = xfs_file_mmap,
-       .open           = xfs_file_open,
-       .release        = xfs_file_release,
-       .fsync          = xfs_file_fsync,
-       .fallocate      = xfs_file_fallocate,
-};
-
-const struct file_operations xfs_dir_file_operations = {
-       .open           = xfs_dir_open,
-       .read           = generic_read_dir,
-       .readdir        = xfs_file_readdir,
-       .llseek         = generic_file_llseek,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .fsync          = xfs_file_fsync,
-};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
-};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
deleted file mode 100644 (file)
index ed88ed1..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trace.h"
-
-/*
- * note: all filemap functions return negative error codes. These
- * need to be inverted before returning to the xfs core functions.
- */
-void
-xfs_tosspages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       /* can't toss partial tail pages, so mask them out */
-       last &= ~(PAGE_SIZE - 1);
-       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-}
-
-int
-xfs_flushinval_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-
-       trace_xfs_pagecache_inval(ip, first, last);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = filemap_write_and_wait_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (!ret)
-               truncate_inode_pages_range(mapping, first, last);
-       return -ret;
-}
-
-int
-xfs_flush_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       uint64_t        flags,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-       int             ret2;
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = -filemap_fdatawrite_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (flags & XBF_ASYNC)
-               return ret;
-       ret2 = xfs_wait_on_pages(ip, first, last);
-       if (!ret)
-               ret = ret2;
-       return ret;
-}
-
-int
-xfs_wait_on_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-
-       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-               return -filemap_fdatawait_range(mapping, first,
-                                       last == -1 ? ip->i_size - 1 : last);
-       }
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
deleted file mode 100644 (file)
index 76e81cf..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sysctl.h"
-
-/*
- * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
- * other XFS code uses these values.  Times are measured in centisecs (i.e.
- * 100ths of a second).
- */
-xfs_param_t xfs_params = {
-                         /*    MIN             DFLT            MAX     */
-       .sgid_inherit   = {     0,              0,              1       },
-       .symlink_mode   = {     0,              0,              1       },
-       .panic_mask     = {     0,              0,              255     },
-       .error_level    = {     0,              3,              11      },
-       .syncd_timer    = {     1*100,          30*100,         7200*100},
-       .stats_clear    = {     0,              0,              1       },
-       .inherit_sync   = {     0,              1,              1       },
-       .inherit_nodump = {     0,              1,              1       },
-       .inherit_noatim = {     0,              1,              1       },
-       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
-       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
-       .inherit_nosym  = {     0,              0,              1       },
-       .rotorstep      = {     1,              1,              255     },
-       .inherit_nodfrg = {     0,              1,              1       },
-       .fstrm_timer    = {     1,              30*100,         3600*100},
-};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
deleted file mode 100644 (file)
index f7ce7de..0000000
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
-#include "xfs_discard.h"
-#include "xfs_quota.h"
-#include "xfs_inode_item.h"
-#include "xfs_export.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- *    returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- *    returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- *    returns full handle for a path
- */
-int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       int                     hsize;
-       xfs_handle_t            handle;
-       struct inode            *inode;
-       struct file             *file = NULL;
-       struct path             path;
-       int                     error;
-       struct xfs_inode        *ip;
-
-       if (cmd == XFS_IOC_FD_TO_HANDLE) {
-               file = fget(hreq->fd);
-               if (!file)
-                       return -EBADF;
-               inode = file->f_path.dentry->d_inode;
-       } else {
-               error = user_lpath((const char __user *)hreq->path, &path);
-               if (error)
-                       return error;
-               inode = path.dentry->d_inode;
-       }
-       ip = XFS_I(inode);
-
-       /*
-        * We can only generate handles for inodes residing on a XFS filesystem,
-        * and only for regular files, directories or symbolic links.
-        */
-       error = -EINVAL;
-       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
-               goto out_put;
-
-       error = -EBADF;
-       if (!S_ISREG(inode->i_mode) &&
-           !S_ISDIR(inode->i_mode) &&
-           !S_ISLNK(inode->i_mode))
-               goto out_put;
-
-
-       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
-
-       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
-               /*
-                * This handle only contains an fsid, zero the rest.
-                */
-               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
-               hsize = sizeof(xfs_fsid_t);
-       } else {
-               int             lock_mode;
-
-               lock_mode = xfs_ilock_map_shared(ip);
-               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
-                                       sizeof(handle.ha_fid.fid_len);
-               handle.ha_fid.fid_pad = 0;
-               handle.ha_fid.fid_gen = ip->i_d.di_gen;
-               handle.ha_fid.fid_ino = ip->i_ino;
-               xfs_iunlock_map_shared(ip, lock_mode);
-
-               hsize = XFS_HSIZE(handle);
-       }
-
-       error = -EFAULT;
-       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
-               goto out_put;
-
-       error = 0;
-
- out_put:
-       if (cmd == XFS_IOC_FD_TO_HANDLE)
-               fput(file);
-       else
-               path_put(&path);
-       return error;
-}
-
-/*
- * No need to do permission checks on the various pathname components
- * as the handle operations are privileged.
- */
-STATIC int
-xfs_handle_acceptable(
-       void                    *context,
-       struct dentry           *dentry)
-{
-       return 1;
-}
-
-/*
- * Convert userspace handle data into a dentry.
- */
-struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen)
-{
-       xfs_handle_t            handle;
-       struct xfs_fid64        fid;
-
-       /*
-        * Only allow handle opens under a directory.
-        */
-       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
-               return ERR_PTR(-ENOTDIR);
-
-       if (hlen != sizeof(xfs_handle_t))
-               return ERR_PTR(-EINVAL);
-       if (copy_from_user(&handle, uhandle, hlen))
-               return ERR_PTR(-EFAULT);
-       if (handle.ha_fid.fid_len !=
-           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
-               return ERR_PTR(-EINVAL);
-
-       memset(&fid, 0, sizeof(struct fid));
-       fid.ino = handle.ha_fid.fid_ino;
-       fid.gen = handle.ha_fid.fid_gen;
-
-       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
-                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
-                       xfs_handle_acceptable, NULL);
-}
-
-STATIC struct dentry *
-xfs_handlereq_to_dentry(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
-}
-
-int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       const struct cred       *cred = current_cred();
-       int                     error;
-       int                     fd;
-       int                     permflag;
-       struct file             *filp;
-       struct inode            *inode;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       inode = dentry->d_inode;
-
-       /* Restrict xfs_open_by_handle to directories & regular files. */
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-#if BITS_PER_LONG != 32
-       hreq->oflags |= O_LARGEFILE;
-#endif
-
-       /* Put open permission in namei format. */
-       permflag = hreq->oflags;
-       if ((permflag+1) & O_ACCMODE)
-               permflag++;
-       if (permflag & O_TRUNC)
-               permflag |= 2;
-
-       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
-           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
-               error = -XFS_ERROR(EACCES);
-               goto out_dput;
-       }
-
-       /* Can't write directories. */
-       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
-               error = -XFS_ERROR(EISDIR);
-               goto out_dput;
-       }
-
-       fd = get_unused_fd();
-       if (fd < 0) {
-               error = fd;
-               goto out_dput;
-       }
-
-       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
-                          hreq->oflags, cred);
-       if (IS_ERR(filp)) {
-               put_unused_fd(fd);
-               return PTR_ERR(filp);
-       }
-
-       if (S_ISREG(inode->i_mode)) {
-               filp->f_flags |= O_NOATIME;
-               filp->f_mode |= FMODE_NOCMTIME;
-       }
-
-       fd_install(fd, filp);
-       return fd;
-
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-       char __user             *buffer,
-       int                     buflen,
-       const char              *link)
-{
-        int len;
-
-       len = PTR_ERR(link);
-       if (IS_ERR(link))
-               goto out;
-
-       len = strlen(link);
-       if (len > (unsigned) buflen)
-               len = buflen;
-       if (copy_to_user(buffer, link, len))
-               len = -EFAULT;
- out:
-       return len;
-}
-
-
-int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       struct dentry           *dentry;
-       __u32                   olen;
-       void                    *link;
-       int                     error;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       /* Restrict this handle operation to symlinks only. */
-       if (!S_ISLNK(dentry->d_inode->i_mode)) {
-               error = -XFS_ERROR(EINVAL);
-               goto out_dput;
-       }
-
-       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out_dput;
-       }
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link) {
-               error = -XFS_ERROR(ENOMEM);
-               goto out_dput;
-       }
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (error)
-               goto out_kfree;
-       error = do_readlink(hreq->ohandle, olen, link);
-       if (error)
-               goto out_kfree;
-
- out_kfree:
-       kfree(link);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
- out:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error = -ENOMEM;
-       attrlist_cursor_kern_t  *cursor;
-       xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (*len > XATTR_SIZE_MAX)
-               return EINVAL;
-       kbuf = kmalloc(*len, GFP_KERNEL);
-       if (!kbuf)
-               return ENOMEM;
-
-       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(ubuf, kbuf, *len))
-               error = EFAULT;
-
- out_kfree:
-       kfree(kbuf);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       if (len > XATTR_SIZE_MAX)
-               return EINVAL;
-
-       kbuf = memdup_user(ubuf, len);
-       if (IS_ERR(kbuf))
-               return PTR_ERR(kbuf);
-
-       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
-       return error;
-}
-
-int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags)
-{
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       return xfs_attr_remove(XFS_I(inode), name, flags);
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       xfs_attr_multiop_t      *ops;
-       xfs_fsop_attrmulti_handlereq_t am_hreq;
-       struct dentry           *dentry;
-       unsigned int            i, size;
-       unsigned char           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(am_hreq.ops, size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               ops[i].am_attrname, MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, &ops[i].am_length,
-                                       ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, ops[i].am_length,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(am_hreq.ops, ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf)
-{
-       int                     attr_flags = 0;
-       int                     error;
-
-       /*
-        * Only allow the sys admin to reserve space unless
-        * unwritten extents are enabled.
-        */
-       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
-           !capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
-               return -XFS_ERROR(EPERM);
-
-       if (!(filp->f_mode & FMODE_WRITE))
-               return -XFS_ERROR(EBADF);
-
-       if (!S_ISREG(inode->i_mode))
-               return -XFS_ERROR(EINVAL);
-
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               attr_flags |= XFS_ATTR_NONBLOCK;
-
-       if (filp->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       if (ioflags & IO_INVIS)
-               attr_flags |= XFS_ATTR_DMI;
-
-       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
-       return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
-       xfs_mount_t             *mp,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS)
-               error = xfs_inumbers(mp, &inlast, &count,
-                                       bulkreq.ubuffer, xfs_inumbers_fmt);
-       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-               error = xfs_bulkstat_single(mp, &inlast,
-                                               bulkreq.ubuffer, &done);
-       else    /* XFS_IOC_FSBULKSTAT */
-               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                                    &done);
-
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-
-       /*
-        * Caller should have passed an argument of type
-        * xfs_fsop_geom_v1_t.  This is a proper subset of the
-        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
-        */
-       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 4);
-       if (error)
-               return -error;
-
-       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
-       unsigned int    flags,
-       unsigned int    start)
-{
-       unsigned int    xflags = start;
-
-       if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
-       else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
-       if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
-       else
-               xflags &= ~XFS_XFLAG_APPEND;
-       if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
-       else
-               xflags &= ~XFS_XFLAG_SYNC;
-       if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
-       else
-               xflags &= ~XFS_XFLAG_NOATIME;
-       if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
-       else
-               xflags &= ~XFS_XFLAG_NODUMP;
-
-       return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
-       __uint16_t      di_flags)
-{
-       unsigned int    flags = 0;
-
-       if (di_flags & XFS_DIFLAG_IMMUTABLE)
-               flags |= FS_IMMUTABLE_FL;
-       if (di_flags & XFS_DIFLAG_APPEND)
-               flags |= FS_APPEND_FL;
-       if (di_flags & XFS_DIFLAG_SYNC)
-               flags |= FS_SYNC_FL;
-       if (di_flags & XFS_DIFLAG_NOATIME)
-               flags |= FS_NOATIME_FL;
-       if (di_flags & XFS_DIFLAG_NODUMP)
-               flags |= FS_NODUMP_FL;
-       return flags;
-}
-
-STATIC int
-xfs_ioc_fsgetxattr(
-       xfs_inode_t             *ip,
-       int                     attr,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-
-       memset(&fa, 0, sizeof(struct fsxattr));
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       fa.fsx_xflags = xfs_ip2xflags(ip);
-       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-       fa.fsx_projid = xfs_get_projid(ip);
-
-       if (attr) {
-               if (ip->i_afp) {
-                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-                               fa.fsx_nextents = ip->i_afp->if_bytes /
-                                                       sizeof(xfs_bmbt_rec_t);
-                       else
-                               fa.fsx_nextents = ip->i_d.di_anextents;
-               } else
-                       fa.fsx_nextents = 0;
-       } else {
-               if (ip->i_df.if_flags & XFS_IFEXTENTS)
-                       fa.fsx_nextents = ip->i_df.if_bytes /
-                                               sizeof(xfs_bmbt_rec_t);
-               else
-                       fa.fsx_nextents = ip->i_d.di_nextents;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (copy_to_user(arg, &fa, sizeof(fa)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC void
-xfs_set_diflags(
-       struct xfs_inode        *ip,
-       unsigned int            xflags)
-{
-       unsigned int            di_flags;
-
-       /* can't set PREALLOC this way, just preserve it */
-       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
-               di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_PROJINHERIT)
-               di_flags |= XFS_DIFLAG_PROJINHERIT;
-       if (xflags & XFS_XFLAG_NODEFRAG)
-               di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
-               di_flags |= XFS_DIFLAG_FILESTREAM;
-       if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
-                       di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
-                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
-                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-       } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
-                       di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
-                       di_flags |= XFS_DIFLAG_EXTSIZE;
-       }
-
-       ip->i_d.di_flags = di_flags;
-}
-
-STATIC void
-xfs_diflags_to_linux(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-       unsigned int            xflags = xfs_ip2xflags(ip);
-
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-#define FSX_PROJID     1
-#define FSX_EXTSIZE    2
-#define FSX_XFLAGS     4
-#define FSX_NONBLOCK   8
-
-STATIC int
-xfs_ioctl_setattr(
-       xfs_inode_t             *ip,
-       struct fsxattr          *fa,
-       int                     mask)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       unsigned int            lock_flags = 0;
-       struct xfs_dquot        *udqp = NULL;
-       struct xfs_dquot        *gdqp = NULL;
-       struct xfs_dquot        *olddquot = NULL;
-       int                     code;
-
-       trace_xfs_ioctl_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       /*
-        * Disallow 32bit project ids when projid32bit feature is not enabled.
-        */
-       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
-                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
-               return XFS_ERROR(EINVAL);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
-                                        ip->i_d.di_gid, fa->fsx_projid,
-                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
-               if (code)
-                       return code;
-       }
-
-       /*
-        * For the other attributes, we acquire the inode lock and
-        * first do an error checking pass.
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (code)
-               goto error_return;
-
-       lock_flags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * CAP_FOWNER overrides the following restrictions:
-        *
-        * The user ID of the calling process must be equal
-        * to the file owner ID, except in cases where the
-        * CAP_FSETID capability is applicable.
-        */
-       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
-               code = XFS_ERROR(EPERM);
-               goto error_return;
-       }
-
-       /*
-        * Do a quota reservation only if projid is actually going to change.
-        */
-       if (mask & FSX_PROJID) {
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   XFS_IS_PQUOTA_ON(mp) &&
-                   xfs_get_projid(ip) != fa->fsx_projid) {
-                       ASSERT(tp);
-                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (code)       /* out of quota */
-                               goto error_return;
-               }
-       }
-
-       if (mask & FSX_EXTSIZE) {
-               /*
-                * Can't change extent size if any extents are allocated.
-                */
-               if (ip->i_d.di_nextents &&
-                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                    fa->fsx_extsize)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * Extent size must be a multiple of the appropriate block
-                * size, if set at all. It must also be smaller than the
-                * maximum extent size supported by the filesystem.
-                *
-                * Also, for non-realtime files, limit the extent size hint to
-                * half the size of the AGs in the filesystem so alignment
-                * doesn't result in extents larger than an AG.
-                */
-               if (fa->fsx_extsize != 0) {
-                       xfs_extlen_t    size;
-                       xfs_fsblock_t   extsize_fsb;
-
-                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
-                       if (extsize_fsb > MAXEXTLEN) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-
-                       if (XFS_IS_REALTIME_INODE(ip) ||
-                           ((mask & FSX_XFLAGS) &&
-                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
-                               size = mp->m_sb.sb_rextsize <<
-                                      mp->m_sb.sb_blocklog;
-                       } else {
-                               size = mp->m_sb.sb_blocksize;
-                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
-                                       code = XFS_ERROR(EINVAL);
-                                       goto error_return;
-                               }
-                       }
-
-                       if (fa->fsx_extsize % size) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-       }
-
-
-       if (mask & FSX_XFLAGS) {
-               /*
-                * Can't change realtime flag if any extents are allocated.
-                */
-               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-                   (XFS_IS_REALTIME_INODE(ip)) !=
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * If realtime flag is set then must have realtime data.
-                */
-               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       if ((mp->m_sb.sb_rblocks == 0) ||
-                           (mp->m_sb.sb_rextsize == 0) ||
-                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-
-               /*
-                * Can't modify an immutable/append-only file unless
-                * we have appropriate permission.
-                */
-               if ((ip->i_d.di_flags &
-                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-                    (fa->fsx_xflags &
-                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-                   !capable(CAP_LINUX_IMMUTABLE)) {
-                       code = XFS_ERROR(EPERM);
-                       goto error_return;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & FSX_PROJID) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (xfs_get_projid(ip) != fa->fsx_projid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-                               olddquot = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       xfs_set_projid(ip, fa->fsx_projid);
-
-                       /*
-                        * We may have to rev the inode as well as
-                        * the superblock version number since projids didn't
-                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-                        */
-                       if (ip->i_d.di_version == 1)
-                               xfs_bump_ino_vers2(tp, ip);
-               }
-
-       }
-
-       if (mask & FSX_EXTSIZE)
-               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
-       if (mask & FSX_XFLAGS) {
-               xfs_set_diflags(ip, fa->fsx_xflags);
-               xfs_diflags_to_linux(ip);
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       /*
-        * If this is a synchronous mount, make sure that the
-        * transaction goes to disk before returning to the user.
-        * This is slightly sub-optimal in that truncates require
-        * two sync transactions instead of one for wsync filesystems.
-        * One for the truncate and one for the timestamps since we
-        * don't want to change the timestamps unless we're sure the
-        * truncate worked.  Truncates are less than 1% of the laddis
-        * mix so this probably isn't worth the trouble to optimize.
-        */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       code = xfs_trans_commit(tp, 0);
-       xfs_iunlock(ip, lock_flags);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       return code;
-
- error_return:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       xfs_trans_cancel(tp, 0);
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return code;
-}
-
-STATIC int
-xfs_ioc_fssetxattr(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            mask;
-
-       if (copy_from_user(&fa, arg, sizeof(fa)))
-               return -EFAULT;
-
-       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_ioc_getxflags(
-       xfs_inode_t             *ip,
-       void                    __user *arg)
-{
-       unsigned int            flags;
-
-       flags = xfs_di2lxflags(ip->i_d.di_flags);
-       if (copy_to_user(arg, &flags, sizeof(flags)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC int
-xfs_ioc_setxflags(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            flags;
-       unsigned int            mask;
-
-       if (copy_from_user(&flags, arg, sizeof(flags)))
-               return -EFAULT;
-
-       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                     FS_NOATIME_FL | FS_NODUMP_FL | \
-                     FS_SYNC_FL))
-               return -EOPNOTSUPP;
-
-       mask = FSX_XFLAGS;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmap __user   *base = *ap;
-
-       /* copy only getbmap portion (not getbmapx) */
-       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmap);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmap(
-       struct xfs_inode        *ip,
-       int                     ioflags,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-       if (ioflags & IO_INVIS)
-               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
-                           (struct getbmap *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header - only size of getbmap */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmapx __user  *base = *ap;
-
-       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmapx);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
-       struct xfs_inode        *ip,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(bmx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       if (bmx.bmv_iflags & (~BMV_IF_VALID))
-               return -XFS_ERROR(EINVAL);
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-                           (struct getbmapx *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       return 0;
-}
-
-/*
- * Note: some of the ioctl's return positive numbers as a
- * byte count indicating success, such as readlink_by_handle.
- * So we don't "sign flip" like most other routines.  This means
- * true errors need to be returned as a negative value.
- */
-long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_ioctl(ip);
-
-       switch (cmd) {
-       case FITRIM:
-               return xfs_ioc_trim(mp, arg);
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_ZERO_RANGE: {
-               xfs_flock64_t           bf;
-
-               if (copy_from_user(&bf, arg, sizeof(bf)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_DIOINFO: {
-               struct dioattr  da;
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-               if (copy_to_user(arg, &da, sizeof(da)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_FSBULKSTAT_SINGLE:
-       case XFS_IOC_FSBULKSTAT:
-       case XFS_IOC_FSINUMBERS:
-               return xfs_ioc_bulkstat(mp, cmd, arg);
-
-       case XFS_IOC_FSGEOMETRY_V1:
-               return xfs_ioc_fsgeometry_v1(mp, arg);
-
-       case XFS_IOC_FSGEOMETRY:
-               return xfs_ioc_fsgeometry(mp, arg);
-
-       case XFS_IOC_GETVERSION:
-               return put_user(inode->i_generation, (int __user *)arg);
-
-       case XFS_IOC_FSGETXATTR:
-               return xfs_ioc_fsgetxattr(ip, 0, arg);
-       case XFS_IOC_FSGETXATTRA:
-               return xfs_ioc_fsgetxattr(ip, 1, arg);
-       case XFS_IOC_FSSETXATTR:
-               return xfs_ioc_fssetxattr(ip, filp, arg);
-       case XFS_IOC_GETXFLAGS:
-               return xfs_ioc_getxflags(ip, arg);
-       case XFS_IOC_SETXFLAGS:
-               return xfs_ioc_setxflags(ip, filp, arg);
-
-       case XFS_IOC_FSSETDM: {
-               struct fsdmidata        dmi;
-
-               if (copy_from_user(&dmi, arg, sizeof(dmi)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-                               dmi.fsd_dmstate);
-               return -error;
-       }
-
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-       case XFS_IOC_GETBMAPX:
-               return xfs_ioc_getbmapx(ip, arg);
-
-       case XFS_IOC_FD_TO_HANDLE:
-       case XFS_IOC_PATH_TO_HANDLE:
-       case XFS_IOC_PATH_TO_FSHANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(hreq)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_FSSETDM_BY_HANDLE:
-               return xfs_fssetdm_by_handle(filp, arg);
-
-       case XFS_IOC_READLINK_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE:
-               return xfs_attrlist_by_handle(filp, arg);
-
-       case XFS_IOC_ATTRMULTI_BY_HANDLE:
-               return xfs_attrmulti_by_handle(filp, arg);
-
-       case XFS_IOC_SWAPEXT: {
-               struct xfs_swapext      sxp;
-
-               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-
-       case XFS_IOC_FSCOUNTS: {
-               xfs_fsop_counts_t out;
-
-               error = xfs_fs_counts(mp, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_SET_RESBLKS: {
-               xfs_fsop_resblks_t inout;
-               __uint64_t         in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (mp->m_flags & XFS_MOUNT_RDONLY)
-                       return -XFS_ERROR(EROFS);
-
-               if (copy_from_user(&inout, arg, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-
-               /* input parameter is passed in resblks field of structure */
-               in = inout.resblks;
-               error = xfs_reserve_blocks(mp, &in, &inout);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &inout, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_GET_RESBLKS: {
-               xfs_fsop_resblks_t out;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_reserve_blocks(mp, NULL, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-
-               return 0;
-       }
-
-       case XFS_IOC_FSGROWFSDATA: {
-               xfs_growfs_data_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSLOG: {
-               xfs_growfs_log_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_log(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSRT: {
-               xfs_growfs_rt_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_GOINGDOWN: {
-               __uint32_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (get_user(in, (__uint32_t __user *)arg))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_fs_goingdown(mp, in);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_INJECTION: {
-               xfs_error_injection_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_errortag_add(in.errtag, mp);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_CLEARALL:
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_errortag_clearall(mp, 1);
-               return -error;
-
-       default:
-               return -ENOTTY;
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
deleted file mode 100644 (file)
index d56173b..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL_H__
-#define __XFS_IOCTL_H__
-
-extern int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf);
-
-extern int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags);
-
-extern struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen);
-
-extern long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p);
-
-extern long
-xfs_file_compat_ioctl(
-       struct file             *file,
-       unsigned int            cmd,
-       unsigned long           arg);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
deleted file mode 100644 (file)
index 54e623b..0000000
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
-#include <linux/mount.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_vnode.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
-#include "xfs_fsops.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_attr.h"
-#include "xfs_ioctl.h"
-#include "xfs_ioctl32.h"
-#include "xfs_trace.h"
-
-#define  _NATIVE_IOC(cmd, type) \
-         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#ifdef BROKEN_X86_ALIGNMENT
-STATIC int
-xfs_compat_flock64_copyin(
-       xfs_flock64_t           *bf,
-       compat_xfs_flock64_t    __user *arg32)
-{
-       if (get_user(bf->l_type,        &arg32->l_type) ||
-           get_user(bf->l_whence,      &arg32->l_whence) ||
-           get_user(bf->l_start,       &arg32->l_start) ||
-           get_user(bf->l_len,         &arg32->l_len) ||
-           get_user(bf->l_sysid,       &arg32->l_sysid) ||
-           get_user(bf->l_pid,         &arg32->l_pid) ||
-           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_ioc_fsgeometry_v1(
-       struct xfs_mount          *mp,
-       compat_xfs_fsop_geom_v1_t __user *arg32)
-{
-       xfs_fsop_geom_t           fsgeo;
-       int                       error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-       /* The 32-bit variant simply has some padding at the end */
-       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_data_copyin(
-       struct xfs_growfs_data   *in,
-       compat_xfs_growfs_data_t __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->imaxpct,   &arg32->imaxpct))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_rt_copyin(
-       struct xfs_growfs_rt     *in,
-       compat_xfs_growfs_rt_t  __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->extsize,   &arg32->extsize))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_inumbers_fmt_compat(
-       void                    __user *ubuffer,
-       const xfs_inogrp_t      *buffer,
-       long                    count,
-       long                    *written)
-{
-       compat_xfs_inogrp_t     __user *p32 = ubuffer;
-       long                    i;
-
-       for (i = 0; i < count; i++) {
-               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                       return -XFS_ERROR(EFAULT);
-       }
-       *written = count * sizeof(*p32);
-       return 0;
-}
-
-#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#endif /* BROKEN_X86_ALIGNMENT */
-
-STATIC int
-xfs_ioctl32_bstime_copyin(
-       xfs_bstime_t            *bstime,
-       compat_xfs_bstime_t     __user *bstime32)
-{
-       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
-
-       if (get_user(sec32,             &bstime32->tv_sec)      ||
-           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       bstime->tv_sec = sec32;
-       return 0;
-}
-
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
-STATIC int
-xfs_ioctl32_bstat_copyin(
-       xfs_bstat_t             *bstat,
-       compat_xfs_bstat_t      __user *bstat32)
-{
-       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
-           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
-           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
-           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
-           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
-           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
-           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
-           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
-           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
-           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
-           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
-           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
-           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
-           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
-           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* XFS_IOC_FSBULKSTAT and friends */
-
-STATIC int
-xfs_bstime_store_compat(
-       compat_xfs_bstime_t     __user *p32,
-       const xfs_bstime_t      *p)
-{
-       __s32                   sec32;
-
-       sec32 = p->tv_sec;
-       if (put_user(sec32, &p32->tv_sec) ||
-           put_user(p->tv_nsec, &p32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* Return 0 on success or positive error (to xfs_bulkstat()) */
-STATIC int
-xfs_bulkstat_one_fmt_compat(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
-{
-       compat_xfs_bstat_t      __user *p32 = ubuffer;
-
-       if (ubsize < sizeof(*p32))
-               return XFS_ERROR(ENOMEM);
-
-       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
-           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
-           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
-           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
-           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
-           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
-           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
-           put_user(buffer->bs_size,     &p32->bs_size)        ||
-           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
-           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
-           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
-           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
-           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
-           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
-           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
-           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
-           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
-           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
-           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
-           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
-           put_user(buffer->bs_aextents, &p32->bs_aextents))
-               return XFS_ERROR(EFAULT);
-       if (ubused)
-               *ubused = sizeof(*p32);
-       return 0;
-}
-
-STATIC int
-xfs_bulkstat_one_compat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
-{
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt_compat,
-                                   ubused, stat);
-}
-
-/* copied from xfs_ioctl.c */
-STATIC int
-xfs_compat_ioc_bulkstat(
-       xfs_mount_t               *mp,
-       unsigned int              cmd,
-       compat_xfs_fsop_bulkreq_t __user *p32)
-{
-       u32                     addr;
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (get_user(addr, &p32->lastip))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.lastip = compat_ptr(addr);
-       if (get_user(bulkreq.icount, &p32->icount) ||
-           get_user(addr, &p32->ubuffer))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ubuffer = compat_ptr(addr);
-       if (get_user(addr, &p32->ocount))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ocount = compat_ptr(addr);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS_32) {
-               error = xfs_inumbers(mp, &inlast, &count,
-                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-               int res;
-
-               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-                               sizeof(compat_xfs_bstat_t), 0, &res);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-               error = xfs_bulkstat(mp, &inlast, &count,
-                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
-                       bulkreq.ubuffer, &done);
-       } else
-               error = XFS_ERROR(EINVAL);
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_compat_handlereq_copyin(
-       xfs_fsop_handlereq_t            *hreq,
-       compat_xfs_fsop_handlereq_t     __user *arg32)
-{
-       compat_xfs_fsop_handlereq_t     hreq32;
-
-       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       hreq->fd = hreq32.fd;
-       hreq->path = compat_ptr(hreq32.path);
-       hreq->oflags = hreq32.oflags;
-       hreq->ihandle = compat_ptr(hreq32.ihandle);
-       hreq->ihandlen = hreq32.ihandlen;
-       hreq->ohandle = compat_ptr(hreq32.ohandle);
-       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
-
-       return 0;
-}
-
-STATIC struct dentry *
-xfs_compat_handlereq_to_dentry(
-       struct file             *parfilp,
-       compat_xfs_fsop_handlereq_t *hreq)
-{
-       return xfs_handle_to_dentry(parfilp,
-                       compat_ptr(hreq->ihandle), hreq->ihandlen);
-}
-
-STATIC int
-xfs_compat_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       attrlist_cursor_kern_t  *cursor;
-       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = -ENOMEM;
-       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_compat_attrmulti_by_handle(
-       struct file                             *parfilp,
-       void                                    __user *arg)
-{
-       int                                     error;
-       compat_xfs_attr_multiop_t               *ops;
-       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
-       struct dentry                           *dentry;
-       unsigned int                            i, size;
-       unsigned char                           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(compat_ptr(am_hreq.ops), size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               compat_ptr(ops[i].am_attrname),
-                               MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       &ops[i].am_length, ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       ops[i].am_length, ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-STATIC int
-xfs_compat_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       compat_xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg,
-                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
-out:
-       dput(dentry);
-       return error;
-}
-
-long
-xfs_file_compat_ioctl(
-       struct file             *filp,
-       unsigned                cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_compat_ioctl(ip);
-
-       switch (cmd) {
-       /* No size or alignment issues on any arch */
-       case XFS_IOC_DIOINFO:
-       case XFS_IOC_FSGEOMETRY:
-       case XFS_IOC_FSGETXATTR:
-       case XFS_IOC_FSSETXATTR:
-       case XFS_IOC_FSGETXATTRA:
-       case XFS_IOC_FSSETDM:
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-       case XFS_IOC_GETBMAPX:
-       case XFS_IOC_FSCOUNTS:
-       case XFS_IOC_SET_RESBLKS:
-       case XFS_IOC_GET_RESBLKS:
-       case XFS_IOC_FSGROWFSLOG:
-       case XFS_IOC_GOINGDOWN:
-       case XFS_IOC_ERROR_INJECTION:
-       case XFS_IOC_ERROR_CLEARALL:
-               return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
-       /* These are handled fine if no alignment issues */
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_FSGEOMETRY_V1:
-       case XFS_IOC_FSGROWFSDATA:
-       case XFS_IOC_FSGROWFSRT:
-       case XFS_IOC_ZERO_RANGE:
-               return xfs_file_ioctl(filp, cmd, p);
-#else
-       case XFS_IOC_ALLOCSP_32:
-       case XFS_IOC_FREESP_32:
-       case XFS_IOC_ALLOCSP64_32:
-       case XFS_IOC_FREESP64_32:
-       case XFS_IOC_RESVSP_32:
-       case XFS_IOC_UNRESVSP_32:
-       case XFS_IOC_RESVSP64_32:
-       case XFS_IOC_UNRESVSP64_32:
-       case XFS_IOC_ZERO_RANGE_32: {
-               struct xfs_flock64      bf;
-
-               if (xfs_compat_flock64_copyin(&bf, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_FSGEOMETRY_V1_32:
-               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
-       case XFS_IOC_FSGROWFSDATA_32: {
-               struct xfs_growfs_data  in;
-
-               if (xfs_compat_growfs_data_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-       case XFS_IOC_FSGROWFSRT_32: {
-               struct xfs_growfs_rt    in;
-
-               if (xfs_compat_growfs_rt_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-#endif
-       /* long changes size, but xfs only copiese out 32 bits */
-       case XFS_IOC_GETXFLAGS_32:
-       case XFS_IOC_SETXFLAGS_32:
-       case XFS_IOC_GETVERSION_32:
-               cmd = _NATIVE_IOC(cmd, long);
-               return xfs_file_ioctl(filp, cmd, p);
-       case XFS_IOC_SWAPEXT_32: {
-               struct xfs_swapext        sxp;
-               struct compat_xfs_swapext __user *sxu = arg;
-
-               /* Bulk copy in up to the sx_stat field, then copy bstat */
-               if (copy_from_user(&sxp, sxu,
-                                  offsetof(struct xfs_swapext, sx_stat)) ||
-                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-       case XFS_IOC_FSBULKSTAT_32:
-       case XFS_IOC_FSBULKSTAT_SINGLE_32:
-       case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
-       case XFS_IOC_FD_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_FSHANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_READLINK_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
-               return xfs_compat_attrlist_by_handle(filp, arg);
-       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
-               return xfs_compat_attrmulti_by_handle(filp, arg);
-       case XFS_IOC_FSSETDM_BY_HANDLE_32:
-               return xfs_compat_fssetdm_by_handle(filp, arg);
-       default:
-               return -XFS_ERROR(ENOIOCTLCMD);
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
deleted file mode 100644 (file)
index 80f4060..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL32_H__
-#define __XFS_IOCTL32_H__
-
-#include <linux/compat.h>
-
-/*
- * on 32-bit arches, ioctl argument structures may have different sizes
- * and/or alignment.  We define compat structures which match the
- * 32-bit sizes/alignments here, and their associated ioctl numbers.
- *
- * xfs_ioctl32.c contains routines to copy these structures in and out.
- */
-
-/* stock kernel-level ioctls we support */
-#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
-#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
-#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
-
-/*
- * On intel, even if sizes match, alignment and/or padding may differ.
- */
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define __compat_packed __attribute__((packed))
-#else
-#define __compat_packed
-#endif
-
-typedef struct compat_xfs_bstime {
-       compat_time_t   tv_sec;         /* seconds              */
-       __s32           tv_nsec;        /* and nanoseconds      */
-} compat_xfs_bstime_t;
-
-typedef struct compat_xfs_bstat {
-       __u64           bs_ino;         /* inode number                 */
-       __u16           bs_mode;        /* type and mode                */
-       __u16           bs_nlink;       /* number of links              */
-       __u32           bs_uid;         /* user id                      */
-       __u32           bs_gid;         /* group id                     */
-       __u32           bs_rdev;        /* device value                 */
-       __s32           bs_blksize;     /* block size                   */
-       __s64           bs_size;        /* file size                    */
-       compat_xfs_bstime_t bs_atime;   /* access time                  */
-       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
-       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
-       int64_t         bs_blocks;      /* number of blocks             */
-       __u32           bs_xflags;      /* extended flags               */
-       __s32           bs_extsize;     /* extent size                  */
-       __s32           bs_extents;     /* number of extents            */
-       __u32           bs_gen;         /* generation count             */
-       __u16           bs_projid_lo;   /* lower part of project id     */
-#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
-       __u16           bs_projid_hi;   /* high part of project id      */
-       unsigned char   bs_pad[12];     /* pad space, unused            */
-       __u32           bs_dmevmask;    /* DMIG event mask              */
-       __u16           bs_dmstate;     /* DMIG state info              */
-       __u16           bs_aextents;    /* attribute number of extents  */
-} __compat_packed compat_xfs_bstat_t;
-
-typedef struct compat_xfs_fsop_bulkreq {
-       compat_uptr_t   lastip;         /* last inode # pointer         */
-       __s32           icount;         /* count of entries in buffer   */
-       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
-       compat_uptr_t   ocount;         /* output count pointer         */
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
-       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
-       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
-       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
-
-typedef struct compat_xfs_fsop_handlereq {
-       __u32           fd;             /* fd for FD_TO_HANDLE          */
-       compat_uptr_t   path;           /* user pathname                */
-       __u32           oflags;         /* open flags                   */
-       compat_uptr_t   ihandle;        /* user supplied handle         */
-       __u32           ihandlen;       /* user supplied length         */
-       compat_uptr_t   ohandle;        /* user buffer for handle       */
-       compat_uptr_t   ohandlen;       /* user buffer length           */
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
-       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
-       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
-       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
-       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
-       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
-       __int64_t               sx_version;     /* version */
-       __int64_t               sx_fdtarget;    /* fd of target file */
-       __int64_t               sx_fdtmp;       /* fd of tmp file */
-       xfs_off_t               sx_offset;      /* offset into file */
-       xfs_off_t               sx_length;      /* leng from offset */
-       char                    sx_pad[16];     /* pad space, unused */
-       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
-
-#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
-
-typedef struct compat_xfs_fsop_attrlist_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
-       __u32                           flags;  /* which namespace to use */
-       __u32                           buflen; /* length of buffer supplied */
-       compat_uptr_t                   buffer; /* returned names */
-} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
-
-/* Note: actually this is read/write */
-#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
-       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
-
-/* am_opcodes defined in xfs_fs.h */
-typedef struct compat_xfs_attr_multiop {
-       __u32           am_opcode;
-       __s32           am_error;
-       compat_uptr_t   am_attrname;
-       compat_uptr_t   am_attrvalue;
-       __u32           am_length;
-       __u32           am_flags;
-} compat_xfs_attr_multiop_t;
-
-typedef struct compat_xfs_fsop_attrmulti_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       __u32                           opcount;/* count of following multiop */
-       /* ptr to compat_xfs_attr_multiop */
-       compat_uptr_t                   ops; /* attr_multi data */
-} compat_xfs_fsop_attrmulti_handlereq_t;
-
-#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
-       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-
-typedef struct compat_xfs_fsop_setdm_handlereq {
-       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
-       /* ptr to struct fsdmidata */
-       compat_uptr_t                   data;   /* DMAPI data   */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
-       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
-#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
-       __s16           l_type;
-       __s16           l_whence;
-       __s64           l_start __attribute__((packed));
-                       /* len == 0 means until end of file */
-       __s64           l_len __attribute__((packed));
-       __s32           l_sysid;
-       __u32           l_pid;
-       __s32           l_pad[4];       /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
-
-typedef struct compat_xfs_fsop_geom_v1 {
-       __u32           blocksize;      /* filesystem (data) block size */
-       __u32           rtextsize;      /* realtime extent size         */
-       __u32           agblocks;       /* fsblocks in an AG            */
-       __u32           agcount;        /* number of allocation groups  */
-       __u32           logblocks;      /* fsblocks in the log          */
-       __u32           sectsize;       /* (data) sector size, bytes    */
-       __u32           inodesize;      /* inode size in bytes          */
-       __u32           imaxpct;        /* max allowed inode space(%)   */
-       __u64           datablocks;     /* fsblocks in data subvolume   */
-       __u64           rtblocks;       /* fsblocks in realtime subvol  */
-       __u64           rtextents;      /* rt extents in realtime subvol*/
-       __u64           logstart;       /* starting fsblock of the log  */
-       unsigned char   uuid[16];       /* unique id of the filesystem  */
-       __u32           sunit;          /* stripe unit, fsblocks        */
-       __u32           swidth;         /* stripe width, fsblocks       */
-       __s32           version;        /* structure version            */
-       __u32           flags;          /* superblock version flags     */
-       __u32           logsectsize;    /* log sector size, bytes       */
-       __u32           rtsectsize;     /* realtime sector size, bytes  */
-       __u32           dirblocksize;   /* directory block size, bytes  */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32  \
-       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-
-typedef struct compat_xfs_inogrp {
-       __u64           xi_startino;    /* starting inode number        */
-       __s32           xi_alloccount;  /* # bits set in allocmask      */
-       __u64           xi_allocmask;   /* mask of allocated inodes     */
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-/* These growfs input structures have padding on the end, so must translate */
-typedef struct compat_xfs_growfs_data {
-       __u64           newblocks;      /* new data subvol size, fsblocks */
-       __u32           imaxpct;        /* new inode space percentage limit */
-} __attribute__((packed)) compat_xfs_growfs_data_t;
-
-typedef struct compat_xfs_growfs_rt {
-       __u64           newblocks;      /* new realtime size, fsblocks */
-       __u32           extsize;        /* new realtime extent size, fsblocks */
-} __attribute__((packed)) compat_xfs_growfs_rt_t;
-
-#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
-#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
-
-#endif /* BROKEN_X86_ALIGNMENT */
-
-#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
deleted file mode 100644 (file)
index b9c172b..0000000
+++ /dev/null
@@ -1,1210 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_acl.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/posix_acl.h>
-#include <linux/security.h>
-#include <linux/fiemap.h>
-#include <linux/slab.h>
-
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty.
- * Used when committing a dirty inode into a transaction so that
- * the inode will get written back by the linux code
- */
-void
-xfs_mark_inode_dirty_sync(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty_sync(inode);
-}
-
-void
-xfs_mark_inode_dirty(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty(inode);
-}
-
-/*
- * Hook in SELinux.  This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-xfs_init_security(
-       struct inode    *inode,
-       struct inode    *dir,
-       const struct qstr *qstr)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       size_t          length;
-       void            *value;
-       unsigned char   *name;
-       int             error;
-
-       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
-                                            &value, &length);
-       if (error) {
-               if (error == -EOPNOTSUPP)
-                       return 0;
-               return -error;
-       }
-
-       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-
-       kfree(name);
-       kfree(value);
-       return error;
-}
-
-static void
-xfs_dentry_to_name(
-       struct xfs_name *namep,
-       struct dentry   *dentry)
-{
-       namep->name = dentry->d_name.name;
-       namep->len = dentry->d_name.len;
-}
-
-STATIC void
-xfs_cleanup_inode(
-       struct inode    *dir,
-       struct inode    *inode,
-       struct dentry   *dentry)
-{
-       struct xfs_name teardown;
-
-       /* Oh, the horror.
-        * If we can't add the ACL or we fail in
-        * xfs_init_security we must back out.
-        * ENOSPC can hit here, among other things.
-        */
-       xfs_dentry_to_name(&teardown, dentry);
-
-       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-       iput(inode);
-}
-
-STATIC int
-xfs_vn_mknod(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       dev_t           rdev)
-{
-       struct inode    *inode;
-       struct xfs_inode *ip = NULL;
-       struct posix_acl *default_acl = NULL;
-       struct xfs_name name;
-       int             error;
-
-       /*
-        * Irix uses Missed'em'V split, but doesn't want to see
-        * the upper 5 bits of (14bit) major.
-        */
-       if (S_ISCHR(mode) || S_ISBLK(mode)) {
-               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
-                       return -EINVAL;
-               rdev = sysv_encode_dev(rdev);
-       } else {
-               rdev = 0;
-       }
-
-       if (IS_POSIXACL(dir)) {
-               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-               if (IS_ERR(default_acl))
-                       return PTR_ERR(default_acl);
-
-               if (!default_acl)
-                       mode &= ~current_umask();
-       }
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
-       if (unlikely(error))
-               goto out_free_acl;
-
-       inode = VFS_I(ip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       if (default_acl) {
-               error = -xfs_inherit_acl(inode, default_acl);
-               default_acl = NULL;
-               if (unlikely(error))
-                       goto out_cleanup_inode;
-       }
-
-
-       d_instantiate(dentry, inode);
-       return -error;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
-       posix_acl_release(default_acl);
-       return -error;
-}
-
-STATIC int
-xfs_vn_create(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       struct nameidata *nd)
-{
-       return xfs_vn_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-xfs_vn_mkdir(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode)
-{
-       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-xfs_vn_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *cip;
-       struct xfs_name name;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               d_add(dentry, NULL);
-               return NULL;
-       }
-
-       return d_splice_alias(VFS_I(cip), dentry);
-}
-
-STATIC struct dentry *
-xfs_vn_ci_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *ip;
-       struct xfs_name xname;
-       struct xfs_name ci_name;
-       struct qstr     dname;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&xname, dentry);
-       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               /*
-                * call d_add(dentry, NULL) here when d_drop_negative_children
-                * is called in xfs_vn_mknod (ie. allow negative dentries
-                * with CI filesystems).
-                */
-               return NULL;
-       }
-
-       /* if exact match, just splice and exit */
-       if (!ci_name.name)
-               return d_splice_alias(VFS_I(ip), dentry);
-
-       /* else case-insensitive match... */
-       dname.name = ci_name.name;
-       dname.len = ci_name.len;
-       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
-       kmem_free(ci_name.name);
-       return dentry;
-}
-
-STATIC int
-xfs_vn_link(
-       struct dentry   *old_dentry,
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct inode    *inode = old_dentry->d_inode;
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
-       if (unlikely(error))
-               return -error;
-
-       ihold(inode);
-       d_instantiate(dentry, inode);
-       return 0;
-}
-
-STATIC int
-xfs_vn_unlink(
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
-       if (error)
-               return error;
-
-       /*
-        * With unlink, the VFS makes the dentry "negative": no inode,
-        * but still hashed. This is incompatible with case-insensitive
-        * mode, so invalidate (unhash) the dentry in CI-mode.
-        */
-       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-               d_invalidate(dentry);
-       return 0;
-}
-
-STATIC int
-xfs_vn_symlink(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       const char      *symname)
-{
-       struct inode    *inode;
-       struct xfs_inode *cip = NULL;
-       struct xfs_name name;
-       int             error;
-       mode_t          mode;
-
-       mode = S_IFLNK |
-               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
-       if (unlikely(error))
-               goto out;
-
-       inode = VFS_I(cip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       d_instantiate(dentry, inode);
-       return 0;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out:
-       return -error;
-}
-
-STATIC int
-xfs_vn_rename(
-       struct inode    *odir,
-       struct dentry   *odentry,
-       struct inode    *ndir,
-       struct dentry   *ndentry)
-{
-       struct inode    *new_inode = ndentry->d_inode;
-       struct xfs_name oname;
-       struct xfs_name nname;
-
-       xfs_dentry_to_name(&oname, odentry);
-       xfs_dentry_to_name(&nname, ndentry);
-
-       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-                          XFS_I(ndir), &nname, new_inode ?
-                                               XFS_I(new_inode) : NULL);
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-xfs_vn_follow_link(
-       struct dentry           *dentry,
-       struct nameidata        *nd)
-{
-       char                    *link;
-       int                     error = -ENOMEM;
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link)
-               goto out_err;
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (unlikely(error))
-               goto out_kfree;
-
-       nd_set_link(nd, link);
-       return NULL;
-
- out_kfree:
-       kfree(link);
- out_err:
-       nd_set_link(nd, ERR_PTR(error));
-       return NULL;
-}
-
-STATIC void
-xfs_vn_put_link(
-       struct dentry   *dentry,
-       struct nameidata *nd,
-       void            *p)
-{
-       char            *s = nd_get_link(nd);
-
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
-STATIC int
-xfs_vn_getattr(
-       struct vfsmount         *mnt,
-       struct dentry           *dentry,
-       struct kstat            *stat)
-{
-       struct inode            *inode = dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       trace_xfs_getattr(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       stat->size = XFS_ISIZE(ip);
-       stat->dev = inode->i_sb->s_dev;
-       stat->mode = ip->i_d.di_mode;
-       stat->nlink = ip->i_d.di_nlink;
-       stat->uid = ip->i_d.di_uid;
-       stat->gid = ip->i_d.di_gid;
-       stat->ino = ip->i_ino;
-       stat->atime = inode->i_atime;
-       stat->mtime = inode->i_mtime;
-       stat->ctime = inode->i_ctime;
-       stat->blocks =
-               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               stat->blksize = BLKDEV_IOSIZE;
-               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                                  sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               if (XFS_IS_REALTIME_INODE(ip)) {
-                       /*
-                        * If the file blocks are being allocated from a
-                        * realtime volume, then return the inode's realtime
-                        * extent size or the realtime volume's extent size.
-                        */
-                       stat->blksize =
-                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-               } else
-                       stat->blksize = xfs_preferred_iosize(mp);
-               stat->rdev = 0;
-               break;
-       }
-
-       return 0;
-}
-
-int
-xfs_setattr_nonsize(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       xfs_trans_t             *tp;
-       int                     error;
-       uid_t                   uid = 0, iuid = 0;
-       gid_t                   gid = 0, igid = 0;
-       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
-       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT((mask & ATTR_SIZE) == 0);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-               uint    qflags = 0;
-
-               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-                       uid = iattr->ia_uid;
-                       qflags |= XFS_QMOPT_UQUOTA;
-               } else {
-                       uid = ip->i_d.di_uid;
-               }
-               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-                       gid = iattr->ia_gid;
-                       qflags |= XFS_QMOPT_GQUOTA;
-               }  else {
-                       gid = ip->i_d.di_gid;
-               }
-
-               /*
-                * We take a reference when we initialize udqp and gdqp,
-                * so it is important that we never blindly double trip on
-                * the same variable. See xfs_create() for an example.
-                */
-               ASSERT(udqp == NULL);
-               ASSERT(gdqp == NULL);
-               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-                                        qflags, &udqp, &gdqp);
-               if (error)
-                       return error;
-       }
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (error)
-               goto out_dqrele;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * These IDs could have changed since we last looked at them.
-                * But, we're assured that if the ownership did change
-                * while we didn't have the inode locked, inode's dquot(s)
-                * would have changed also.
-                */
-               iuid = ip->i_d.di_uid;
-               igid = ip->i_d.di_gid;
-               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
-               /*
-                * Do a quota reservation only if uid/gid is actually
-                * going to change.
-                */
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-                       ASSERT(tp);
-                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (error)      /* out of quota */
-                               goto out_trans_cancel;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (iuid != uid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-                               ASSERT(mask & ATTR_UID);
-                               ASSERT(udqp);
-                               olddquot1 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_udquot, udqp);
-                       }
-                       ip->i_d.di_uid = uid;
-                       inode->i_uid = uid;
-               }
-               if (igid != gid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
-                               ASSERT(mask & ATTR_GID);
-                               ASSERT(gdqp);
-                               olddquot2 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       ip->i_d.di_gid = gid;
-                       inode->i_gid = gid;
-               }
-       }
-
-       /*
-        * Change file access modes.
-        */
-       if (mask & ATTR_MODE) {
-               umode_t mode = iattr->ia_mode;
-
-               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                       mode &= ~S_ISGID;
-
-               ip->i_d.di_mode &= S_IFMT;
-               ip->i_d.di_mode |= mode & ~S_IFMT;
-
-               inode->i_mode &= S_IFMT;
-               inode->i_mode |= mode & ~S_IFMT;
-       }
-
-       /*
-        * Change file access or modified times.
-        */
-       if (mask & ATTR_ATIME) {
-               inode->i_atime = iattr->ia_atime;
-               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot1);
-       xfs_qm_dqrele(olddquot2);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       if (error)
-               return XFS_ERROR(error);
-
-       /*
-        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-        *           update.  We could avoid this with linked transactions
-        *           and passing down the transaction pointer all the way
-        *           to attr_set.  No previous user of the generic
-        *           Posix ACL code seems to care about this issue either.
-        */
-       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-               error = -xfs_acl_chmod(inode);
-               if (error)
-                       return XFS_ERROR(error);
-       }
-
-       return 0;
-
-out_trans_cancel:
-       xfs_trans_cancel(tp, 0);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       return error;
-}
-
-/*
- * Truncate file.  Must have write permission and not be a directory.
- */
-int
-xfs_setattr_size(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       struct xfs_trans        *tp;
-       int                     error;
-       uint                    lock_flags;
-       uint                    commit_flags = 0;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT(S_ISREG(ip->i_d.di_mode));
-       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
-                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-
-       lock_flags = XFS_ILOCK_EXCL;
-       if (!(flags & XFS_ATTR_NOLOCK))
-               lock_flags |= XFS_IOLOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * Short circuit the truncate case for zero length files.
-        */
-       if (iattr->ia_size == 0 &&
-           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
-                       goto out_unlock;
-
-               /*
-                * Use the regular setattr path to update the timestamps.
-                */
-               xfs_iunlock(ip, lock_flags);
-               iattr->ia_valid &= ~ATTR_SIZE;
-               return xfs_setattr_nonsize(ip, iattr, 0);
-       }
-
-       /*
-        * Make sure that the dquots are attached to the inode.
-        */
-       error = xfs_qm_dqattach_locked(ip, 0);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Now we can make the changes.  Before we join the inode to the
-        * transaction, take care of the part of the truncation that must be
-        * done without the inode lock.  This needs to be done before joining
-        * the inode to the transaction, because the inode cannot be unlocked
-        * once it is a part of the transaction.
-        */
-       if (iattr->ia_size > ip->i_size) {
-               /*
-                * Do the first part of growing a file: zero any data in the
-                * last block that is beyond the old EOF.  We need to do this
-                * before the inode is joined to the transaction to modify
-                * i_size.
-                */
-               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-               if (error)
-                       goto out_unlock;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       lock_flags &= ~XFS_ILOCK_EXCL;
-
-       /*
-        * We are going to log the inode size change in this transaction so
-        * any previous writes that are beyond the on disk EOF and the new
-        * EOF that have not been written out need to be written here.  If we
-        * do not write the data out, we expose ourselves to the null files
-        * problem.
-        *
-        * Only flush from the on disk size to the smaller of the in memory
-        * file size or the new size as that's the range we really care about
-        * here and prevents waiting for other data not within the range we
-        * care about here.
-        */
-       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
-               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
-                                       XBF_ASYNC, FI_NONE);
-               if (error)
-                       goto out_unlock;
-       }
-
-       /*
-        * Wait for all I/O to complete.
-        */
-       xfs_ioend_wait(ip);
-
-       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-                                    xfs_get_blocks);
-       if (error)
-               goto out_unlock;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                XFS_TRANS_PERM_LOG_RES,
-                                XFS_ITRUNCATE_LOG_COUNT);
-       if (error)
-               goto out_trans_cancel;
-
-       truncate_setsize(inode, iattr->ia_size);
-
-       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-       lock_flags |= XFS_ILOCK_EXCL;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Only change the c/mtime if we are changing the size or we are
-        * explicitly asked to change it.  This handles the semantic difference
-        * between truncate() and ftruncate() as implemented in the VFS.
-        *
-        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
-        * special case where we need to update the times despite not having
-        * these flags set.  For all other operations the VFS set these flags
-        * explicitly if it wants a timestamp update.
-        */
-       if (iattr->ia_size != ip->i_size &&
-           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-               iattr->ia_ctime = iattr->ia_mtime =
-                       current_fs_time(inode->i_sb);
-               mask |= ATTR_CTIME | ATTR_MTIME;
-       }
-
-       if (iattr->ia_size > ip->i_size) {
-               ip->i_d.di_size = iattr->ia_size;
-               ip->i_size = iattr->ia_size;
-       } else if (iattr->ia_size <= ip->i_size ||
-                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
-               if (error)
-                       goto out_trans_abort;
-
-               /*
-                * Truncated "down", so we're removing references to old data
-                * here - if we delay flushing for a long time, we expose
-                * ourselves unduly to the notorious NULL files problem.  So,
-                * we mark this inode and flush it when the file is closed,
-                * and do not wait the usual (long) time for writeout.
-                */
-               xfs_iflags_set(ip, XFS_ITRUNCATED);
-       }
-
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-out_unlock:
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return error;
-
-out_trans_abort:
-       commit_flags |= XFS_TRANS_ABORT;
-out_trans_cancel:
-       xfs_trans_cancel(tp, commit_flags);
-       goto out_unlock;
-}
-
-STATIC int
-xfs_vn_setattr(
-       struct dentry   *dentry,
-       struct iattr    *iattr)
-{
-       if (iattr->ia_valid & ATTR_SIZE)
-               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
-       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
-}
-
-#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-       void                    **arg,
-       struct getbmapx         *bmv,
-       int                     *full)
-{
-       int                     error;
-       struct fiemap_extent_info *fieinfo = *arg;
-       u32                     fiemap_flags = 0;
-       u64                     logical, physical, length;
-
-       /* Do nothing for a hole */
-       if (bmv->bmv_block == -1LL)
-               return 0;
-
-       logical = BBTOB(bmv->bmv_offset);
-       physical = BBTOB(bmv->bmv_block);
-       length = BBTOB(bmv->bmv_length);
-
-       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
-               physical = 0;   /* no block yet */
-       }
-       if (bmv->bmv_oflags & BMV_OF_LAST)
-               fiemap_flags |= FIEMAP_EXTENT_LAST;
-
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
-                                       length, fiemap_flags);
-       if (error > 0) {
-               error = 0;
-               *full = 1;      /* user array now full */
-       }
-
-       return -error;
-}
-
-STATIC int
-xfs_vn_fiemap(
-       struct inode            *inode,
-       struct fiemap_extent_info *fieinfo,
-       u64                     start,
-       u64                     length)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-       struct getbmapx         bm;
-       int                     error;
-
-       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
-       if (error)
-               return error;
-
-       /* Set up bmap header for xfs internal routine */
-       bm.bmv_offset = BTOBB(start);
-       /* Special case for whole file */
-       if (length == FIEMAP_MAX_OFFSET)
-               bm.bmv_length = -1LL;
-       else
-               bm.bmv_length = BTOBB(length);
-
-       /* We add one because in getbmap world count includes the header */
-       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-                                       fieinfo->fi_extents_max + 1;
-       bm.bmv_count = min_t(__s32, bm.bmv_count,
-                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-               bm.bmv_iflags |= BMV_IF_ATTRFORK;
-       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-               bm.bmv_iflags |= BMV_IF_DELALLOC;
-
-       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-       if (error)
-               return -error;
-
-       return 0;
-}
-
-static const struct inode_operations xfs_inode_operations = {
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-       .fiemap                 = xfs_vn_fiemap,
-};
-
-static const struct inode_operations xfs_dir_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_dir_ci_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_ci_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_symlink_inode_operations = {
-       .readlink               = generic_readlink,
-       .follow_link            = xfs_vn_follow_link,
-       .put_link               = xfs_vn_put_link,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-STATIC void
-xfs_diflags_to_iflags(
-       struct inode            *inode,
-       struct xfs_inode        *ip)
-{
-       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
- *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
- */
-void
-xfs_setup_inode(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = &ip->i_vnode;
-
-       inode->i_ino = ip->i_ino;
-       inode->i_state = I_NEW;
-
-       inode_sb_list_add(inode);
-       /* make the inode look hashed for the writeback code */
-       hlist_add_fake(&inode->i_hash);
-
-       inode->i_mode   = ip->i_d.di_mode;
-       inode->i_nlink  = ip->i_d.di_nlink;
-       inode->i_uid    = ip->i_d.di_uid;
-       inode->i_gid    = ip->i_d.di_gid;
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               inode->i_rdev =
-                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                             sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               inode->i_rdev = 0;
-               break;
-       }
-
-       inode->i_generation = ip->i_d.di_gen;
-       i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
-       xfs_diflags_to_iflags(inode, ip);
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFREG:
-               inode->i_op = &xfs_inode_operations;
-               inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       case S_IFDIR:
-               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-                       inode->i_op = &xfs_dir_ci_inode_operations;
-               else
-                       inode->i_op = &xfs_dir_inode_operations;
-               inode->i_fop = &xfs_dir_file_operations;
-               break;
-       case S_IFLNK:
-               inode->i_op = &xfs_symlink_inode_operations;
-               if (!(ip->i_df.if_flags & XFS_IFINLINE))
-                       inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       default:
-               inode->i_op = &xfs_inode_operations;
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-               break;
-       }
-
-       /*
-        * If there is no attribute fork no ACL can exist on this inode,
-        * and it can't have any file capabilities attached to it either.
-        */
-       if (!XFS_IFORK_Q(ip)) {
-               inode_has_no_xattr(inode);
-               cache_no_acl(inode);
-       }
-
-       xfs_iflags_clear(ip, XFS_INEW);
-       barrier();
-
-       unlock_new_inode(inode);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
deleted file mode 100644 (file)
index ef41c92..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOPS_H__
-#define __XFS_IOPS_H__
-
-struct xfs_inode;
-
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
-extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-
-extern void xfs_setup_inode(struct xfs_inode *);
-
-#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
deleted file mode 100644 (file)
index d42f814..0000000
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
-
-#include <linux/types.h>
-
-/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS        1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS        0
-# define XFS_BIG_INUMS 0
-#endif
-
-#include <xfs_types.h>
-
-#include <kmem.h>
-#include <mrlock.h>
-#include <time.h>
-
-#include <support/uuid.h>
-
-#include <linux/semaphore.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/file.h>
-#include <linux/swap.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/major.h>
-#include <linux/pagemap.h>
-#include <linux/vfs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/proc_fs.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <linux/log2.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/ctype.h>
-#include <linux/writeback.h>
-#include <linux/capability.h>
-#include <linux/list_sort.h>
-
-#include <asm/page.h>
-#include <asm/div64.h>
-#include <asm/param.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <xfs_vnode.h>
-#include <xfs_stats.h>
-#include <xfs_sysctl.h>
-#include <xfs_iops.h>
-#include <xfs_aops.h>
-#include <xfs_super.h>
-#include <xfs_buf.h>
-#include <xfs_message.h>
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
-#define irix_sgid_inherit      xfs_params.sgid_inherit.val
-#define irix_symlink_mode      xfs_params.symlink_mode.val
-#define xfs_panic_mask         xfs_params.panic_mask.val
-#define xfs_error_level                xfs_params.error_level.val
-#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
-#define xfs_stats_clear                xfs_params.stats_clear.val
-#define xfs_inherit_sync       xfs_params.inherit_sync.val
-#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
-#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
-#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
-#define xfs_rotorstep          xfs_params.rotorstep.val
-#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
-#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
-
-#define current_cpu()          (raw_smp_processor_id())
-#define current_pid()          (current->pid)
-#define current_test_flags(f)  (current->flags & (f))
-#define current_set_flags_nested(sp, f)                \
-               (*(sp) = current->flags, current->flags |= (f))
-#define current_clear_flags_nested(sp, f)      \
-               (*(sp) = current->flags, current->flags &= ~(f))
-#define current_restore_flags_nested(sp, f)    \
-               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-
-#define spinlock_destroy(lock)
-
-#define NBBY           8               /* number of bits per byte */
-
-/*
- * Size of block device i/o is parameterized here.
- * Currently the system supports page-sized i/o.
- */
-#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
-#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
-/* number of BB's per block device block */
-#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
-
-#define ENOATTR                ENODATA         /* Attribute not found */
-#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
-#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
-
-#define SYNCHRONIZE()  barrier()
-#define __return_address __builtin_return_address(0)
-
-#define XFS_PROJID_DEFAULT     0
-#define MAXPATHLEN     1024
-
-#define MIN(a,b)       (min(a,b))
-#define MAX(a,b)       (max(a,b))
-#define howmany(x, y)  (((x)+((y)-1))/(y))
-
-/*
- * Various platform dependent calls that don't fit anywhere else
- */
-#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
-#define xfs_stack_trace()      dump_stack()
-
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       *(__u64 *)a = c;
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       mod = do_div(*(__u64 *)a, b);
-                       return mod;
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       __u64   c = *(__u64 *)a;
-                       return do_div(c, b);
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#endif
-
-#undef do_div
-#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
-#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
-
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return(x * y);
-}
-
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return x;
-}
-
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
-#define ASSERT_ALWAYS(expr)    \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-#define ASSERT(expr)   ((void)0)
-
-#ifndef STATIC
-# define STATIC static noinline
-#endif
-
-#else /* DEBUG */
-
-#define ASSERT(expr)   \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef STATIC
-# define STATIC noinline
-#endif
-
-#endif /* DEBUG */
-
-#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
deleted file mode 100644 (file)
index bd672de..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-/*
- * XFS logging functions
- */
-static void
-__xfs_printk(
-       const char              *level,
-       const struct xfs_mount  *mp,
-       struct va_format        *vaf)
-{
-       if (mp && mp->m_fsname) {
-               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
-               return;
-       }
-       printk("%sXFS: %pV\n", level, vaf);
-}
-
-#define define_xfs_printk_level(func, kern_level)              \
-void func(const struct xfs_mount *mp, const char *fmt, ...)    \
-{                                                              \
-       struct va_format        vaf;                            \
-       va_list                 args;                           \
-                                                               \
-       va_start(args, fmt);                                    \
-                                                               \
-       vaf.fmt = fmt;                                          \
-       vaf.va = &args;                                         \
-                                                               \
-       __xfs_printk(kern_level, mp, &vaf);                     \
-       va_end(args);                                           \
-}                                                              \
-
-define_xfs_printk_level(xfs_emerg, KERN_EMERG);
-define_xfs_printk_level(xfs_alert, KERN_ALERT);
-define_xfs_printk_level(xfs_crit, KERN_CRIT);
-define_xfs_printk_level(xfs_err, KERN_ERR);
-define_xfs_printk_level(xfs_warn, KERN_WARNING);
-define_xfs_printk_level(xfs_notice, KERN_NOTICE);
-define_xfs_printk_level(xfs_info, KERN_INFO);
-#ifdef DEBUG
-define_xfs_printk_level(xfs_debug, KERN_DEBUG);
-#endif
-
-void
-xfs_alert_tag(
-       const struct xfs_mount  *mp,
-       int                     panic_tag,
-       const char              *fmt, ...)
-{
-       struct va_format        vaf;
-       va_list                 args;
-       int                     do_panic = 0;
-
-       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
-               xfs_alert(mp, "Transforming an alert into a BUG.");
-               do_panic = 1;
-       }
-
-       va_start(args, fmt);
-
-       vaf.fmt = fmt;
-       vaf.va = &args;
-
-       __xfs_printk(KERN_ALERT, mp, &vaf);
-       va_end(args);
-
-       BUG_ON(do_panic);
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
-       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
-               expr, file, line);
-       BUG();
-}
-
-void
-xfs_hex_dump(void *p, int length)
-{
-       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
-}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
deleted file mode 100644 (file)
index 7fb7ea0..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __XFS_MESSAGE_H
-#define __XFS_MESSAGE_H 1
-
-struct xfs_mount;
-
-extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
-                        const char *fmt, ...)
-        __attribute__ ((format (printf, 3, 4)));
-extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-
-#ifdef DEBUG
-extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-#else
-static inline void
-__attribute__ ((format (printf, 2, 3)))
-xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-{
-}
-#endif
-
-extern void assfail(char *expr, char *f, int l);
-
-extern void xfs_hex_dump(void *p, int length);
-
-#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
deleted file mode 100644 (file)
index 29b9d64..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "quota/xfs_qm.h"
-#include <linux/quota.h>
-
-
-STATIC int
-xfs_quota_type(int type)
-{
-       switch (type) {
-       case USRQUOTA:
-               return XFS_DQ_USER;
-       case GRPQUOTA:
-               return XFS_DQ_GROUP;
-       default:
-               return XFS_DQ_PROJ;
-       }
-}
-
-STATIC int
-xfs_fs_get_xstate(
-       struct super_block      *sb,
-       struct fs_quota_stat    *fqs)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-STATIC int
-xfs_fs_set_xstate(
-       struct super_block      *sb,
-       unsigned int            uflags,
-       int                     op)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       unsigned int            flags = 0;
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-
-       if (uflags & FS_QUOTA_UDQ_ACCT)
-               flags |= XFS_UQUOTA_ACCT;
-       if (uflags & FS_QUOTA_PDQ_ACCT)
-               flags |= XFS_PQUOTA_ACCT;
-       if (uflags & FS_QUOTA_GDQ_ACCT)
-               flags |= XFS_GQUOTA_ACCT;
-       if (uflags & FS_QUOTA_UDQ_ENFD)
-               flags |= XFS_UQUOTA_ENFD;
-       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
-               flags |= XFS_OQUOTA_ENFD;
-
-       switch (op) {
-       case Q_XQUOTAON:
-               return -xfs_qm_scall_quotaon(mp, flags);
-       case Q_XQUOTAOFF:
-               if (!XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_quotaoff(mp, flags);
-       case Q_XQUOTARM:
-               if (XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_trunc_qfiles(mp, flags);
-       }
-
-       return -EINVAL;
-}
-
-STATIC int
-xfs_fs_get_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
-}
-
-STATIC int
-xfs_fs_set_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
-}
-
-const struct quotactl_ops xfs_quotactl_operations = {
-       .get_xstate             = xfs_fs_get_xstate,
-       .set_xstate             = xfs_fs_set_xstate,
-       .get_dqblk              = xfs_fs_get_dqblk,
-       .set_dqblk              = xfs_fs_set_dqblk,
-};
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
deleted file mode 100644 (file)
index 76fdc58..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
-{
-       int             c, i, j, val;
-       __uint64_t      xs_xstrat_bytes = 0;
-       __uint64_t      xs_write_bytes = 0;
-       __uint64_t      xs_read_bytes = 0;
-
-       static const struct xstats_entry {
-               char    *desc;
-               int     endpoint;
-       } xstats[] = {
-               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
-               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
-               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
-               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
-               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
-               { "trans",              XFSSTAT_END_TRANSACTIONS        },
-               { "ig",                 XFSSTAT_END_INODE_OPS           },
-               { "log",                XFSSTAT_END_LOG_OPS             },
-               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
-               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
-               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
-               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
-               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
-               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
-               { "buf",                XFSSTAT_END_BUF                 },
-               { "abtb2",              XFSSTAT_END_ABTB_V2             },
-               { "abtc2",              XFSSTAT_END_ABTC_V2             },
-               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
-               { "ibt2",               XFSSTAT_END_IBT_V2              },
-       };
-
-       /* Loop over all stats groups */
-       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
-               seq_printf(m, "%s", xstats[i].desc);
-               /* inner loop does each group */
-               while (j < xstats[i].endpoint) {
-                       val = 0;
-                       /* sum over all cpus */
-                       for_each_possible_cpu(c)
-                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                       seq_printf(m, " %u", val);
-                       j++;
-               }
-               seq_putc(m, '\n');
-       }
-       /* extra precision counters */
-       for_each_possible_cpu(i) {
-               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
-               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
-               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
-       }
-
-       seq_printf(m, "xpc %Lu %Lu %Lu\n",
-                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-       seq_printf(m, "debug %u\n",
-#if defined(DEBUG)
-               1);
-#else
-               0);
-#endif
-       return 0;
-}
-
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xfs_stat_proc_show, NULL);
-}
-
-static const struct file_operations xfs_stat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xfs_stat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-int
-xfs_init_procfs(void)
-{
-       if (!proc_mkdir("fs/xfs", NULL))
-               goto out;
-
-       if (!proc_create("fs/xfs/stat", 0, NULL,
-                        &xfs_stat_proc_fops))
-               goto out_remove_entry;
-       return 0;
-
- out_remove_entry:
-       remove_proc_entry("fs/xfs", NULL);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/stat", NULL);
-       remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
deleted file mode 100644 (file)
index 736854b..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_STATS_H__
-#define __XFS_STATS_H__
-
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-#include <linux/percpu.h>
-
-/*
- * XFS global statistics
- */
-struct xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC      4
-       __uint32_t              xs_allocx;
-       __uint32_t              xs_allocb;
-       __uint32_t              xs_freex;
-       __uint32_t              xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
-       __uint32_t              xs_abt_lookup;
-       __uint32_t              xs_abt_compare;
-       __uint32_t              xs_abt_insrec;
-       __uint32_t              xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
-       __uint32_t              xs_blk_mapr;
-       __uint32_t              xs_blk_mapw;
-       __uint32_t              xs_blk_unmap;
-       __uint32_t              xs_add_exlist;
-       __uint32_t              xs_del_exlist;
-       __uint32_t              xs_look_exlist;
-       __uint32_t              xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
-       __uint32_t              xs_bmbt_lookup;
-       __uint32_t              xs_bmbt_compare;
-       __uint32_t              xs_bmbt_insrec;
-       __uint32_t              xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
-       __uint32_t              xs_dir_lookup;
-       __uint32_t              xs_dir_create;
-       __uint32_t              xs_dir_remove;
-       __uint32_t              xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
-       __uint32_t              xs_trans_sync;
-       __uint32_t              xs_trans_async;
-       __uint32_t              xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
-       __uint32_t              xs_ig_attempts;
-       __uint32_t              xs_ig_found;
-       __uint32_t              xs_ig_frecycle;
-       __uint32_t              xs_ig_missed;
-       __uint32_t              xs_ig_dup;
-       __uint32_t              xs_ig_reclaims;
-       __uint32_t              xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
-       __uint32_t              xs_log_writes;
-       __uint32_t              xs_log_blocks;
-       __uint32_t              xs_log_noiclogs;
-       __uint32_t              xs_log_force;
-       __uint32_t              xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
-       __uint32_t              xs_try_logspace;
-       __uint32_t              xs_sleep_logspace;
-       __uint32_t              xs_push_ail;
-       __uint32_t              xs_push_ail_success;
-       __uint32_t              xs_push_ail_pushbuf;
-       __uint32_t              xs_push_ail_pinned;
-       __uint32_t              xs_push_ail_locked;
-       __uint32_t              xs_push_ail_flushing;
-       __uint32_t              xs_push_ail_restarts;
-       __uint32_t              xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
-       __uint32_t              xs_xstrat_quick;
-       __uint32_t              xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
-       __uint32_t              xs_write_calls;
-       __uint32_t              xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
-       __uint32_t              xs_attr_get;
-       __uint32_t              xs_attr_set;
-       __uint32_t              xs_attr_remove;
-       __uint32_t              xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
-       __uint32_t              xs_iflush_count;
-       __uint32_t              xs_icluster_flushcnt;
-       __uint32_t              xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
-       __uint32_t              vn_active;      /* # vnodes not on free lists */
-       __uint32_t              vn_alloc;       /* # times vn_alloc called */
-       __uint32_t              vn_get;         /* # times vn_get called */
-       __uint32_t              vn_hold;        /* # times vn_hold called */
-       __uint32_t              vn_rele;        /* # times vn_rele called */
-       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
-       __uint32_t              vn_remove;      /* # times vn_remove called */
-       __uint32_t              vn_free;        /* # times vn_free called */
-#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
-       __uint32_t              xb_get;
-       __uint32_t              xb_create;
-       __uint32_t              xb_get_locked;
-       __uint32_t              xb_get_locked_waited;
-       __uint32_t              xb_busy_locked;
-       __uint32_t              xb_miss_locked;
-       __uint32_t              xb_page_retries;
-       __uint32_t              xb_page_found;
-       __uint32_t              xb_get_read;
-/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
-       __uint32_t              xs_abtb_2_lookup;
-       __uint32_t              xs_abtb_2_compare;
-       __uint32_t              xs_abtb_2_insrec;
-       __uint32_t              xs_abtb_2_delrec;
-       __uint32_t              xs_abtb_2_newroot;
-       __uint32_t              xs_abtb_2_killroot;
-       __uint32_t              xs_abtb_2_increment;
-       __uint32_t              xs_abtb_2_decrement;
-       __uint32_t              xs_abtb_2_lshift;
-       __uint32_t              xs_abtb_2_rshift;
-       __uint32_t              xs_abtb_2_split;
-       __uint32_t              xs_abtb_2_join;
-       __uint32_t              xs_abtb_2_alloc;
-       __uint32_t              xs_abtb_2_free;
-       __uint32_t              xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
-       __uint32_t              xs_abtc_2_lookup;
-       __uint32_t              xs_abtc_2_compare;
-       __uint32_t              xs_abtc_2_insrec;
-       __uint32_t              xs_abtc_2_delrec;
-       __uint32_t              xs_abtc_2_newroot;
-       __uint32_t              xs_abtc_2_killroot;
-       __uint32_t              xs_abtc_2_increment;
-       __uint32_t              xs_abtc_2_decrement;
-       __uint32_t              xs_abtc_2_lshift;
-       __uint32_t              xs_abtc_2_rshift;
-       __uint32_t              xs_abtc_2_split;
-       __uint32_t              xs_abtc_2_join;
-       __uint32_t              xs_abtc_2_alloc;
-       __uint32_t              xs_abtc_2_free;
-       __uint32_t              xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
-       __uint32_t              xs_bmbt_2_lookup;
-       __uint32_t              xs_bmbt_2_compare;
-       __uint32_t              xs_bmbt_2_insrec;
-       __uint32_t              xs_bmbt_2_delrec;
-       __uint32_t              xs_bmbt_2_newroot;
-       __uint32_t              xs_bmbt_2_killroot;
-       __uint32_t              xs_bmbt_2_increment;
-       __uint32_t              xs_bmbt_2_decrement;
-       __uint32_t              xs_bmbt_2_lshift;
-       __uint32_t              xs_bmbt_2_rshift;
-       __uint32_t              xs_bmbt_2_split;
-       __uint32_t              xs_bmbt_2_join;
-       __uint32_t              xs_bmbt_2_alloc;
-       __uint32_t              xs_bmbt_2_free;
-       __uint32_t              xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
-       __uint32_t              xs_ibt_2_lookup;
-       __uint32_t              xs_ibt_2_compare;
-       __uint32_t              xs_ibt_2_insrec;
-       __uint32_t              xs_ibt_2_delrec;
-       __uint32_t              xs_ibt_2_newroot;
-       __uint32_t              xs_ibt_2_killroot;
-       __uint32_t              xs_ibt_2_increment;
-       __uint32_t              xs_ibt_2_decrement;
-       __uint32_t              xs_ibt_2_lshift;
-       __uint32_t              xs_ibt_2_rshift;
-       __uint32_t              xs_ibt_2_split;
-       __uint32_t              xs_ibt_2_join;
-       __uint32_t              xs_ibt_2_alloc;
-       __uint32_t              xs_ibt_2_free;
-       __uint32_t              xs_ibt_2_moves;
-/* Extra precision counters */
-       __uint64_t              xs_xstrat_bytes;
-       __uint64_t              xs_write_bytes;
-       __uint64_t              xs_read_bytes;
-};
-
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
-
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
-
-extern int xfs_init_procfs(void);
-extern void xfs_cleanup_procfs(void);
-
-
-#else  /* !CONFIG_PROC_FS */
-
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
-static inline int xfs_init_procfs(void)
-{
-       return 0;
-}
-
-static inline void xfs_cleanup_procfs(void)
-{
-}
-
-#endif /* !CONFIG_PROC_FS */
-
-#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
deleted file mode 100644 (file)
index 9a72dda..0000000
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_fsops.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_da_btree.h"
-#include "xfs_extfree_item.h"
-#include "xfs_mru_cache.h"
-#include "xfs_inode_item.h"
-#include "xfs_sync.h"
-#include "xfs_trace.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
-
-static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
-#define MNTOPT_LOGDEV  "logdev"        /* log device */
-#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
-#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
-#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
-#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
-#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
-#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
-                                        * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
-#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
-#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
-                                        * in stat(). */
-#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
-#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
-#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
-#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
-#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
-#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
-
-/*
- * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
- */
-enum {
-       Opt_barrier, Opt_nobarrier, Opt_err
-};
-
-static const match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_err, NULL}
-};
-
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
-       int     last, shift_left_factor = 0;
-       char    *value = s;
-
-       last = strlen(value) - 1;
-       if (value[last] == 'K' || value[last] == 'k') {
-               shift_left_factor = 10;
-               value[last] = '\0';
-       }
-       if (value[last] == 'M' || value[last] == 'm') {
-               shift_left_factor = 20;
-               value[last] = '\0';
-       }
-       if (value[last] == 'G' || value[last] == 'g') {
-               shift_left_factor = 30;
-               value[last] = '\0';
-       }
-
-       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure.  The caller takes care of them.
- */
-STATIC int
-xfs_parseargs(
-       struct xfs_mount        *mp,
-       char                    *options)
-{
-       struct super_block      *sb = mp->m_super;
-       char                    *this_char, *value, *eov;
-       int                     dsunit = 0;
-       int                     dswidth = 0;
-       int                     iosize = 0;
-       __uint8_t               iosizelog = 0;
-
-       /*
-        * set up the mount name first so all the errors will refer to the
-        * correct device.
-        */
-       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
-       if (!mp->m_fsname)
-               return ENOMEM;
-       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
-       /*
-        * Copy binary VFS mount flags we are interested in.
-        */
-       if (sb->s_flags & MS_RDONLY)
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       if (sb->s_flags & MS_DIRSYNC)
-               mp->m_flags |= XFS_MOUNT_DIRSYNC;
-       if (sb->s_flags & MS_SYNCHRONOUS)
-               mp->m_flags |= XFS_MOUNT_WSYNC;
-
-       /*
-        * Set some default flags that could be cleared by the mount option
-        * parsing.
-        */
-       mp->m_flags |= XFS_MOUNT_BARRIER;
-       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-
-       /*
-        * These can be overridden by the mount option parsing.
-        */
-       mp->m_logbufs = -1;
-       mp->m_logbsize = -1;
-
-       if (!options)
-               goto done;
-
-       while ((this_char = strsep(&options, ",")) != NULL) {
-               if (!*this_char)
-                       continue;
-               if ((value = strchr(this_char, '=')) != NULL)
-                       *value++ = 0;
-
-               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_logname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_rtname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = simple_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = suffix_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
-                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
-                       mp->m_flags |= XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
-                       mp->m_flags &= ~XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-                       mp->m_flags |= XFS_MOUNT_WSYNC;
-               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-                       mp->m_flags |= XFS_MOUNT_NOALIGN;
-               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-                       mp->m_flags |= XFS_MOUNT_SWALLOC;
-               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dsunit = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dswidth = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-#endif
-               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-                       mp->m_flags |= XFS_MOUNT_NOUUID;
-               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-                       mp->m_flags |= XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                       mp->m_flags |= XFS_MOUNT_ATTR2;
-               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
-                       mp->m_flags |= XFS_MOUNT_NOATTR2;
-               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-                          !strcmp(this_char, MNTOPT_UQUOTA) ||
-                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                        XFS_UQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
-                       mp->m_flags |= XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
-                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, "ihashsize")) {
-                       xfs_warn(mp,
-       "ihashsize no longer used, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisdsync")) {
-                       xfs_warn(mp,
-       "osyncisdsync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisosync")) {
-                       xfs_warn(mp,
-       "osyncisosync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "irixsgid")) {
-                       xfs_warn(mp,
-       "irixsgid is now a sysctl(2) variable, option is deprecated.");
-               } else {
-                       xfs_warn(mp, "unknown mount option [%s].", this_char);
-                       return EINVAL;
-               }
-       }
-
-       /*
-        * no recovery flag requires a read-only mount
-        */
-       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               xfs_warn(mp, "no-recovery mounts must be read-only.");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
-               xfs_warn(mp,
-       "sunit and swidth options incompatible with the noalign option");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
-           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
-               xfs_warn(mp,
-       "the discard option is incompatible with the nodelaylog option");
-               return EINVAL;
-       }
-
-#ifndef CONFIG_XFS_QUOTA
-       if (XFS_IS_QUOTA_RUNNING(mp)) {
-               xfs_warn(mp, "quota support not available in this kernel.");
-               return EINVAL;
-       }
-#endif
-
-       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
-               xfs_warn(mp, "cannot mount with both project and group quota");
-               return EINVAL;
-       }
-
-       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-               xfs_warn(mp, "sunit and swidth must be specified together");
-               return EINVAL;
-       }
-
-       if (dsunit && (dswidth % dsunit != 0)) {
-               xfs_warn(mp,
-       "stripe width (%d) must be a multiple of the stripe unit (%d)",
-                       dswidth, dsunit);
-               return EINVAL;
-       }
-
-done:
-       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
-               /*
-                * At this point the superblock has not been read
-                * in, therefore we do not know the block size.
-                * Before the mount call ends we will convert
-                * these to FSBs.
-                */
-               if (dsunit) {
-                       mp->m_dalign = dsunit;
-                       mp->m_flags |= XFS_MOUNT_RETERR;
-               }
-
-               if (dswidth)
-                       mp->m_swidth = dswidth;
-       }
-
-       if (mp->m_logbufs != -1 &&
-           mp->m_logbufs != 0 &&
-           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
-            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
-               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
-                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-               return XFS_ERROR(EINVAL);
-       }
-       if (mp->m_logbsize != -1 &&
-           mp->m_logbsize !=  0 &&
-           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
-            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
-            !is_power_of_2(mp->m_logbsize))) {
-               xfs_warn(mp,
-                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                       mp->m_logbsize);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (iosizelog) {
-               if (iosizelog > XFS_MAX_IO_LOG ||
-                   iosizelog < XFS_MIN_IO_LOG) {
-                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
-                               iosizelog, XFS_MIN_IO_LOG,
-                               XFS_MAX_IO_LOG);
-                       return XFS_ERROR(EINVAL);
-               }
-
-               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-               mp->m_readio_log = iosizelog;
-               mp->m_writeio_log = iosizelog;
-       }
-
-       return 0;
-}
-
-struct proc_xfs_info {
-       int     flag;
-       char    *str;
-};
-
-STATIC int
-xfs_showargs(
-       struct xfs_mount        *mp,
-       struct seq_file         *m)
-{
-       static struct proc_xfs_info xfs_info_set[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
-               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
-               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
-               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
-               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
-               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
-               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
-               { 0, NULL }
-       };
-       static struct proc_xfs_info xfs_info_unset[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
-               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
-               { 0, NULL }
-       };
-       struct proc_xfs_info    *xfs_infop;
-
-       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-               if (mp->m_flags & xfs_infop->flag)
-                       seq_puts(m, xfs_infop->str);
-       }
-       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
-               if (!(mp->m_flags & xfs_infop->flag))
-                       seq_puts(m, xfs_infop->str);
-       }
-
-       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
-                               (int)(1 << mp->m_writeio_log) >> 10);
-
-       if (mp->m_logbufs > 0)
-               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-       if (mp->m_logbsize > 0)
-               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
-       if (mp->m_logname)
-               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-       if (mp->m_rtname)
-               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
-       if (mp->m_dalign > 0)
-               seq_printf(m, "," MNTOPT_SUNIT "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-       if (mp->m_swidth > 0)
-               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_USRQUOTA);
-       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_UQUOTANOENF);
-
-       /* Either project or group quotas can be active, not both */
-
-       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_PRJQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
-       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_GRPQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
-       }
-
-       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-               seq_puts(m, "," MNTOPT_NOQUOTA);
-
-       return 0;
-}
-__uint64_t
-xfs_max_file_offset(
-       unsigned int            blockshift)
-{
-       unsigned int            pagefactor = 1;
-       unsigned int            bitshift = BITS_PER_LONG - 1;
-
-       /* Figure out maximum filesize, on Linux this can depend on
-        * the filesystem blocksize (on 32 bit platforms).
-        * __block_write_begin does this in an [unsigned] long...
-        *      page->index << (PAGE_CACHE_SHIFT - bbits)
-        * So, for page sized blocks (4K on 32 bit platforms),
-        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-        * but for smaller blocksizes it is less (bbits = log2 bsize).
-        * Note1: get_block_t takes a long (implicit cast from above)
-        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-        * can optionally convert the [unsigned] long from above into
-        * an [unsigned] long long.
-        */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
-       ASSERT(sizeof(sector_t) == 8);
-       pagefactor = PAGE_CACHE_SIZE;
-       bitshift = BITS_PER_LONG;
-# else
-       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
-       return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC int
-xfs_blkdev_get(
-       xfs_mount_t             *mp,
-       const char              *name,
-       struct block_device     **bdevp)
-{
-       int                     error = 0;
-
-       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-                                   mp);
-       if (IS_ERR(*bdevp)) {
-               error = PTR_ERR(*bdevp);
-               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
-       }
-
-       return -error;
-}
-
-STATIC void
-xfs_blkdev_put(
-       struct block_device     *bdev)
-{
-       if (bdev)
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
-       xfs_buftarg_t           *buftarg)
-{
-       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-}
-
-STATIC void
-xfs_close_devices(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_logdev_targp);
-               xfs_blkdev_put(logdev);
-       }
-       if (mp->m_rtdev_targp) {
-               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
-               xfs_blkdev_put(rtdev);
-       }
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
-}
-
-/*
- * The file system configurations are:
- *     (1) device (partition) with data and internal log
- *     (2) logical volume with data and log subvolumes.
- *     (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in sb->s_bdev.
- */
-STATIC int
-xfs_open_devices(
-       struct xfs_mount        *mp)
-{
-       struct block_device     *ddev = mp->m_super->s_bdev;
-       struct block_device     *logdev = NULL, *rtdev = NULL;
-       int                     error;
-
-       /*
-        * Open real time and log devices - order is important.
-        */
-       if (mp->m_logname) {
-               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
-               if (error)
-                       goto out;
-       }
-
-       if (mp->m_rtname) {
-               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
-               if (error)
-                       goto out_close_logdev;
-
-               if (rtdev == ddev || rtdev == logdev) {
-                       xfs_warn(mp,
-       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
-                       error = EINVAL;
-                       goto out_close_rtdev;
-               }
-       }
-
-       /*
-        * Setup xfs_mount buffer target pointers
-        */
-       error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
-       if (!mp->m_ddev_targp)
-               goto out_close_rtdev;
-
-       if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_rtdev_targp)
-                       goto out_free_ddev_targ;
-       }
-
-       if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_logdev_targp)
-                       goto out_free_rtdev_targ;
-       } else {
-               mp->m_logdev_targp = mp->m_ddev_targp;
-       }
-
-       return 0;
-
- out_free_rtdev_targ:
-       if (mp->m_rtdev_targp)
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
- out_free_ddev_targ:
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
- out_close_rtdev:
-       if (rtdev)
-               xfs_blkdev_put(rtdev);
- out_close_logdev:
-       if (logdev && logdev != ddev)
-               xfs_blkdev_put(logdev);
- out:
-       return error;
-}
-
-/*
- * Setup xfs_mount buffer target pointers based on superblock
- */
-STATIC int
-xfs_setup_devices(
-       struct xfs_mount        *mp)
-{
-       int                     error;
-
-       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-                                   mp->m_sb.sb_sectsize);
-       if (error)
-               return error;
-
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               unsigned int    log_sector_size = BBSIZE;
-
-               if (xfs_sb_version_hassector(&mp->m_sb))
-                       log_sector_size = mp->m_sb.sb_logsectsize;
-               error = xfs_setsize_buftarg(mp->m_logdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           log_sector_size);
-               if (error)
-                       return error;
-       }
-       if (mp->m_rtdev_targp) {
-               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           mp->m_sb.sb_sectsize);
-               if (error)
-                       return error;
-       }
-
-       return 0;
-}
-
-/* Catch misguided souls that try to use this interface on XFS */
-STATIC struct inode *
-xfs_fs_alloc_inode(
-       struct super_block      *sb)
-{
-       BUG();
-       return NULL;
-}
-
-/*
- * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
- */
-STATIC void
-xfs_fs_destroy_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_destroy_inode(ip);
-
-       XFS_STATS_INC(vn_reclaim);
-
-       /* bad inode, get out here ASAP */
-       if (is_bad_inode(inode))
-               goto out_reclaim;
-
-       xfs_ioend_wait(ip);
-
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-       /*
-        * We should never get here with one of the reclaim flags already set.
-        */
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
-       /*
-        * We always use background reclaim here because even if the
-        * inode is clean, it still may be under IO and hence we have
-        * to take the flush lock. The background reclaim path handles
-        * this more efficiently than we can here, so simply let background
-        * reclaim tear down all inodes.
-        */
-out_reclaim:
-       xfs_inode_set_reclaim_tag(ip);
-}
-
-/*
- * Slab object creation initialisation for the XFS inode.
- * This covers only the idempotent fields in the XFS inode;
- * all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly initialise
- * fields in the xfs inode that left in the initialise state
- * when freeing the inode.
- */
-STATIC void
-xfs_fs_inode_init_once(
-       void                    *inode)
-{
-       struct xfs_inode        *ip = inode;
-
-       memset(ip, 0, sizeof(struct xfs_inode));
-
-       /* vfs inode */
-       inode_init_once(VFS_I(ip));
-
-       /* xfs inode */
-       atomic_set(&ip->i_iocount, 0);
-       atomic_set(&ip->i_pincount, 0);
-       spin_lock_init(&ip->i_flags_lock);
-       init_waitqueue_head(&ip->i_ipin_wait);
-       /*
-        * Because we want to use a counting completion, complete
-        * the flush completion once to allow a single access to
-        * the flush completion without blocking.
-        */
-       init_completion(&ip->i_flush);
-       complete(&ip->i_flush);
-
-       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
-                    "xfsino", ip->i_ino);
-}
-
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
-       struct inode    *inode,
-       int             flags)
-{
-       barrier();
-       XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_log_inode(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /* we need to return with the lock hold shared */
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Note - it's possible that we might have pushed ourselves out of the
-        * way during trans_reserve which would flush the inode.  But there's
-        * no guarantee that the inode buffer has actually gone out yet (it's
-        * delwri).  Plus the buffer could be pinned anyway if it's part of
-        * an inode in another recent transaction.  So we play it safe and
-        * fire off the transaction anyway.
-        */
-       xfs_trans_ijoin(tp, ip);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       error = xfs_trans_commit(tp, 0);
-       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-STATIC int
-xfs_fs_write_inode(
-       struct inode            *inode,
-       struct writeback_control *wbc)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = EAGAIN;
-
-       trace_xfs_write_inode(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       if (wbc->sync_mode == WB_SYNC_ALL) {
-               /*
-                * Make sure the inode has made it it into the log.  Instead
-                * of forcing it all the way to stable storage using a
-                * synchronous transaction we let the log force inside the
-                * ->sync_fs call do that for thus, which reduces the number
-                * of synchronous log foces dramatically.
-                */
-               xfs_ioend_wait(ip);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               if (ip->i_update_core) {
-                       error = xfs_log_inode(ip);
-                       if (error)
-                               goto out_unlock;
-               }
-       } else {
-               /*
-                * We make this non-blocking if the inode is contended, return
-                * EAGAIN to indicate to the caller that they did not succeed.
-                * This prevents the flush path from blocking on inodes inside
-                * another operation right now, they get caught later by
-                * xfs_sync.
-                */
-               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
-                       goto out;
-
-               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-                       goto out_unlock;
-
-               /*
-                * Now we have the flush lock and the inode is not pinned, we
-                * can check if the inode is really clean as we know that
-                * there are no pending transaction completions, it is not
-                * waiting on the delayed write queue and there is no IO in
-                * progress.
-                */
-               if (xfs_inode_clean(ip)) {
-                       xfs_ifunlock(ip);
-                       error = 0;
-                       goto out_unlock;
-               }
-               error = xfs_iflush(ip, SYNC_TRYLOCK);
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
-       /*
-        * if we failed to write out the inode then mark
-        * it dirty again so we'll try again later.
-        */
-       if (error)
-               xfs_mark_inode_dirty_sync(ip);
-       return -error;
-}
-
-STATIC void
-xfs_fs_evict_inode(
-       struct inode            *inode)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-
-       trace_xfs_evict_inode(ip);
-
-       truncate_inode_pages(&inode->i_data, 0);
-       end_writeback(inode);
-       XFS_STATS_INC(vn_rele);
-       XFS_STATS_INC(vn_remove);
-       XFS_STATS_DEC(vn_active);
-
-       /*
-        * The iolock is used by the file system to coordinate reads,
-        * writes, and block truncates.  Up to this point the lock
-        * protected concurrent accesses by users of the inode.  But
-        * from here forward we're doing some final processing of the
-        * inode because we're done with it, and although we reuse the
-        * iolock for protection it is really a distinct lock class
-        * (in the lockdep sense) from before.  To keep lockdep happy
-        * (and basically indicate what we are doing), we explicitly
-        * re-init the iolock here.
-        */
-       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
-       xfs_inactive(ip);
-}
-
-STATIC void
-xfs_free_fsname(
-       struct xfs_mount        *mp)
-{
-       kfree(mp->m_fsname);
-       kfree(mp->m_rtname);
-       kfree(mp->m_logname);
-}
-
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_syncd_stop(mp);
-
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
-STATIC int
-xfs_fs_sync_fs(
-       struct super_block      *sb,
-       int                     wait)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       int                     error;
-
-       /*
-        * Not much we can do for the first async pass.  Writing out the
-        * superblock would be counter-productive as we are going to redirty
-        * when writing out other data and metadata (and writing out a single
-        * block is quite fast anyway).
-        *
-        * Try to asynchronously kick off quota syncing at least.
-        */
-       if (!wait) {
-               xfs_qm_sync(mp, SYNC_TRYLOCK);
-               return 0;
-       }
-
-       error = xfs_quiesce_data(mp);
-       if (error)
-               return -error;
-
-       if (laptop_mode) {
-               /*
-                * The disk must be active because we're syncing.
-                * We schedule xfssyncd now (now that the disk is
-                * active) instead of later (when it might not be).
-                */
-               flush_delayed_work_sync(&mp->m_sync_work);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
-       struct dentry           *dentry,
-       struct kstatfs          *statp)
-{
-       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
-       xfs_sb_t                *sbp = &mp->m_sb;
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
-       __uint64_t              fakeinos, id;
-       xfs_extlen_t            lsize;
-       __int64_t               ffree;
-
-       statp->f_type = XFS_SB_MAGIC;
-       statp->f_namelen = MAXNAMELEN - 1;
-
-       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
-       statp->f_fsid.val[0] = (u32)id;
-       statp->f_fsid.val[1] = (u32)(id >> 32);
-
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-
-       spin_lock(&mp->m_sb_lock);
-       statp->f_bsize = sbp->sb_blocksize;
-       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-       statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-       fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-       if (mp->m_maxicount)
-               statp->f_files = min_t(typeof(statp->f_files),
-                                       statp->f_files,
-                                       mp->m_maxicount);
-
-       /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
-       statp->f_ffree = max_t(__int64_t, ffree, 0);
-
-       spin_unlock(&mp->m_sb_lock);
-
-       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               xfs_qm_statvfs(ip, statp);
-       return 0;
-}
-
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks = 0;
-
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
-
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC int
-xfs_fs_remount(
-       struct super_block      *sb,
-       int                     *flags,
-       char                    *options)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       substring_t             args[MAX_OPT_ARGS];
-       char                    *p;
-       int                     error;
-
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token;
-
-               if (!*p)
-                       continue;
-
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case Opt_barrier:
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-                       break;
-               case Opt_nobarrier:
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                       break;
-               default:
-                       /*
-                        * Logically we would return an error here to prevent
-                        * users from believing they might have changed
-                        * mount options using remount which can't be changed.
-                        *
-                        * But unfortunately mount(8) adds all options from
-                        * mtab and fstab to the mount arguments in some cases
-                        * so we can't blindly reject options, but have to
-                        * check for each specified option if it actually
-                        * differs from the currently set option and only
-                        * reject it if that's the case.
-                        *
-                        * Until that is implemented we return success for
-                        * every remount request, and silently ignore all
-                        * options that we can't actually change.
-                        */
-#if 0
-                       xfs_info(mp,
-               "mount option \"%s\" not supported for remount\n", p);
-                       return -EINVAL;
-#else
-                       break;
-#endif
-               }
-       }
-
-       /* ro -> rw */
-       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
-               mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
-               /*
-                * If this is the first remount to writeable state we
-                * might have some superblock changes to update.
-                */
-               if (mp->m_update_flags) {
-                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
-                       if (error) {
-                               xfs_warn(mp, "failed to write sb changes");
-                               return error;
-                       }
-                       mp->m_update_flags = 0;
-               }
-
-               /*
-                * Fill out the reserve pool if it is empty. Use the stashed
-                * value if it is non-zero, otherwise go with the default.
-                */
-               xfs_restore_resvblks(mp);
-       }
-
-       /* rw -> ro */
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
-               /*
-                * After we have synced the data but before we sync the
-                * metadata, we need to free up the reserve block pool so that
-                * the used block count in the superblock on disk is correct at
-                * the end of the remount. Stash the current reserve pool size
-                * so that if we get remounted rw, we can return it to the same
-                * size.
-                */
-
-               xfs_quiesce_data(mp);
-               xfs_save_resvblks(mp);
-               xfs_quiesce_attr(mp);
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       }
-
-       return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of the metadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-STATIC int
-xfs_fs_freeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_save_resvblks(mp);
-       xfs_quiesce_attr(mp);
-       return -xfs_fs_log_dummy(mp);
-}
-
-STATIC int
-xfs_fs_unfreeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_restore_resvblks(mp);
-       return 0;
-}
-
-STATIC int
-xfs_fs_show_options(
-       struct seq_file         *m,
-       struct vfsmount         *mnt)
-{
-       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-       struct xfs_mount        *mp)
-{
-       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-       /* Fail a mount where the logbuf is smaller than the log stripe */
-       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-               if (mp->m_logbsize <= 0 &&
-                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
-                       mp->m_logbsize = mp->m_sb.sb_logsunit;
-               } else if (mp->m_logbsize > 0 &&
-                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
-                       xfs_warn(mp,
-               "logbuf size must be greater than or equal to log stripe size");
-                       return XFS_ERROR(EINVAL);
-               }
-       } else {
-               /* Fail a mount if the logbuf is larger than 32K */
-               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
-                       xfs_warn(mp,
-               "logbuf size for version 1 logs must be 16K or 32K");
-                       return XFS_ERROR(EINVAL);
-               }
-       }
-
-       /*
-        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-        * told by noattr2 to turn it off
-        */
-       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-           !(mp->m_flags & XFS_MOUNT_NOATTR2))
-               mp->m_flags |= XFS_MOUNT_ATTR2;
-
-       /*
-        * prohibit r/w mounts of read-only filesystems
-        */
-       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-               xfs_warn(mp,
-                       "cannot mount a read-only filesystem as read-write");
-               return XFS_ERROR(EROFS);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_fill_super(
-       struct super_block      *sb,
-       void                    *data,
-       int                     silent)
-{
-       struct inode            *root;
-       struct xfs_mount        *mp = NULL;
-       int                     flags = 0, error = ENOMEM;
-
-       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
-       if (!mp)
-               goto out;
-
-       spin_lock_init(&mp->m_sb_lock);
-       mutex_init(&mp->m_growlock);
-       atomic_set(&mp->m_active_trans, 0);
-
-       mp->m_super = sb;
-       sb->s_fs_info = mp;
-
-       error = xfs_parseargs(mp, (char *)data);
-       if (error)
-               goto out_free_fsname;
-
-       sb_min_blocksize(sb, BBSIZE);
-       sb->s_xattr = xfs_xattr_handlers;
-       sb->s_export_op = &xfs_export_operations;
-#ifdef CONFIG_XFS_QUOTA
-       sb->s_qcop = &xfs_quotactl_operations;
-#endif
-       sb->s_op = &xfs_super_operations;
-
-       if (silent)
-               flags |= XFS_MFSI_QUIET;
-
-       error = xfs_open_devices(mp);
-       if (error)
-               goto out_free_fsname;
-
-       error = xfs_icsb_init_counters(mp);
-       if (error)
-               goto out_close_devices;
-
-       error = xfs_readsb(mp, flags);
-       if (error)
-               goto out_destroy_counters;
-
-       error = xfs_finish_flags(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_setup_devices(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_filestream_mount(mp);
-       if (error)
-               goto out_free_sb;
-
-       /*
-        * we must configure the block size in the superblock before we run the
-        * full mount process as the mount process can lookup and cache inodes.
-        * For the same reason we must also initialise the syncd and register
-        * the inode cache shrinker so that inodes can be reclaimed during
-        * operations like a quotacheck that iterate all inodes in the
-        * filesystem.
-        */
-       sb->s_magic = XFS_SB_MAGIC;
-       sb->s_blocksize = mp->m_sb.sb_blocksize;
-       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
-       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
-       sb->s_time_gran = 1;
-       set_posix_acl_flag(sb);
-
-       error = xfs_mountfs(mp);
-       if (error)
-               goto out_filestream_unmount;
-
-       error = xfs_syncd_init(mp);
-       if (error)
-               goto out_unmount;
-
-       root = igrab(VFS_I(mp->m_rootip));
-       if (!root) {
-               error = ENOENT;
-               goto out_syncd_stop;
-       }
-       if (is_bad_inode(root)) {
-               error = EINVAL;
-               goto out_syncd_stop;
-       }
-       sb->s_root = d_alloc_root(root);
-       if (!sb->s_root) {
-               error = ENOMEM;
-               goto out_iput;
-       }
-
-       return 0;
-
- out_filestream_unmount:
-       xfs_filestream_unmount(mp);
- out_free_sb:
-       xfs_freesb(mp);
- out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
- out_close_devices:
-       xfs_close_devices(mp);
- out_free_fsname:
-       xfs_free_fsname(mp);
-       kfree(mp);
- out:
-       return -error;
-
- out_iput:
-       iput(root);
- out_syncd_stop:
-       xfs_syncd_stop(mp);
- out_unmount:
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       goto out_free_sb;
-}
-
-STATIC struct dentry *
-xfs_fs_mount(
-       struct file_system_type *fs_type,
-       int                     flags,
-       const char              *dev_name,
-       void                    *data)
-{
-       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-}
-
-static int
-xfs_fs_nr_cached_objects(
-       struct super_block      *sb)
-{
-       return xfs_reclaim_inodes_count(XFS_M(sb));
-}
-
-static void
-xfs_fs_free_cached_objects(
-       struct super_block      *sb,
-       int                     nr_to_scan)
-{
-       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
-}
-
-static const struct super_operations xfs_super_operations = {
-       .alloc_inode            = xfs_fs_alloc_inode,
-       .destroy_inode          = xfs_fs_destroy_inode,
-       .dirty_inode            = xfs_fs_dirty_inode,
-       .write_inode            = xfs_fs_write_inode,
-       .evict_inode            = xfs_fs_evict_inode,
-       .put_super              = xfs_fs_put_super,
-       .sync_fs                = xfs_fs_sync_fs,
-       .freeze_fs              = xfs_fs_freeze,
-       .unfreeze_fs            = xfs_fs_unfreeze,
-       .statfs                 = xfs_fs_statfs,
-       .remount_fs             = xfs_fs_remount,
-       .show_options           = xfs_fs_show_options,
-       .nr_cached_objects      = xfs_fs_nr_cached_objects,
-       .free_cached_objects    = xfs_fs_free_cached_objects,
-};
-
-static struct file_system_type xfs_fs_type = {
-       .owner                  = THIS_MODULE,
-       .name                   = "xfs",
-       .mount                  = xfs_fs_mount,
-       .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV,
-};
-
-STATIC int __init
-xfs_init_zones(void)
-{
-
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
-               goto out;
-
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_destroy_ioend_zone;
-
-       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-                                               "xfs_log_ticket");
-       if (!xfs_log_ticket_zone)
-               goto out_destroy_ioend_pool;
-
-       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-                                               "xfs_bmap_free_item");
-       if (!xfs_bmap_free_item_zone)
-               goto out_destroy_log_ticket_zone;
-
-       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-                                               "xfs_btree_cur");
-       if (!xfs_btree_cur_zone)
-               goto out_destroy_bmap_free_item_zone;
-
-       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-                                               "xfs_da_state");
-       if (!xfs_da_state_zone)
-               goto out_destroy_btree_cur_zone;
-
-       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-       if (!xfs_dabuf_zone)
-               goto out_destroy_da_state_zone;
-
-       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-       if (!xfs_ifork_zone)
-               goto out_destroy_dabuf_zone;
-
-       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-       if (!xfs_trans_zone)
-               goto out_destroy_ifork_zone;
-
-       xfs_log_item_desc_zone =
-               kmem_zone_init(sizeof(struct xfs_log_item_desc),
-                              "xfs_log_item_desc");
-       if (!xfs_log_item_desc_zone)
-               goto out_destroy_trans_zone;
-
-       /*
-        * The size of the zone allocated buf log item is the maximum
-        * size possible under XFS.  This wastes a little bit of memory,
-        * but it is much faster.
-        */
-       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
-                                 NBWORD) * sizeof(int))), "xfs_buf_item");
-       if (!xfs_buf_item_zone)
-               goto out_destroy_log_item_desc_zone;
-
-       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-                                sizeof(xfs_extent_t))), "xfs_efd_item");
-       if (!xfs_efd_zone)
-               goto out_destroy_buf_item_zone;
-
-       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-                               sizeof(xfs_extent_t))), "xfs_efi_item");
-       if (!xfs_efi_zone)
-               goto out_destroy_efd_zone;
-
-       xfs_inode_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
-                       xfs_fs_inode_init_once);
-       if (!xfs_inode_zone)
-               goto out_destroy_efi_zone;
-
-       xfs_ili_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-                                       KM_ZONE_SPREAD, NULL);
-       if (!xfs_ili_zone)
-               goto out_destroy_inode_zone;
-
-       return 0;
-
- out_destroy_inode_zone:
-       kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
-       kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
-       kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
-       kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
-       kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
-       kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
-       kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
-       kmem_zone_destroy(xfs_dabuf_zone);
- out_destroy_da_state_zone:
-       kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
-       kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
-       kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
-       mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
- out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
-       kmem_zone_destroy(xfs_ili_zone);
-       kmem_zone_destroy(xfs_inode_zone);
-       kmem_zone_destroy(xfs_efi_zone);
-       kmem_zone_destroy(xfs_efd_zone);
-       kmem_zone_destroy(xfs_buf_item_zone);
-       kmem_zone_destroy(xfs_log_item_desc_zone);
-       kmem_zone_destroy(xfs_trans_zone);
-       kmem_zone_destroy(xfs_ifork_zone);
-       kmem_zone_destroy(xfs_dabuf_zone);
-       kmem_zone_destroy(xfs_da_state_zone);
-       kmem_zone_destroy(xfs_btree_cur_zone);
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
-       kmem_zone_destroy(xfs_log_ticket_zone);
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_ioend_zone);
-
-}
-
-STATIC int __init
-xfs_init_workqueues(void)
-{
-       /*
-        * max_active is set to 8 to give enough concurency to allow
-        * multiple work operations on each CPU to run. This allows multiple
-        * filesystems to be running sync work concurrently, and scales with
-        * the number of CPUs in the system.
-        */
-       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_syncd_wq)
-               goto out;
-
-       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_ail_wq)
-               goto out_destroy_syncd;
-
-       return 0;
-
-out_destroy_syncd:
-       destroy_workqueue(xfs_syncd_wq);
-out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_workqueues(void)
-{
-       destroy_workqueue(xfs_ail_wq);
-       destroy_workqueue(xfs_syncd_wq);
-}
-
-STATIC int __init
-init_xfs_fs(void)
-{
-       int                     error;
-
-       printk(KERN_INFO XFS_VERSION_STRING " with "
-                        XFS_BUILD_OPTIONS " enabled\n");
-
-       xfs_ioend_init();
-       xfs_dir_startup();
-
-       error = xfs_init_zones();
-       if (error)
-               goto out;
-
-       error = xfs_init_workqueues();
-       if (error)
-               goto out_destroy_zones;
-
-       error = xfs_mru_cache_init();
-       if (error)
-               goto out_destroy_wq;
-
-       error = xfs_filestream_init();
-       if (error)
-               goto out_mru_cache_uninit;
-
-       error = xfs_buf_init();
-       if (error)
-               goto out_filestream_uninit;
-
-       error = xfs_init_procfs();
-       if (error)
-               goto out_buf_terminate;
-
-       error = xfs_sysctl_register();
-       if (error)
-               goto out_cleanup_procfs;
-
-       vfs_initquota();
-
-       error = register_filesystem(&xfs_fs_type);
-       if (error)
-               goto out_sysctl_unregister;
-       return 0;
-
- out_sysctl_unregister:
-       xfs_sysctl_unregister();
- out_cleanup_procfs:
-       xfs_cleanup_procfs();
- out_buf_terminate:
-       xfs_buf_terminate();
- out_filestream_uninit:
-       xfs_filestream_uninit();
- out_mru_cache_uninit:
-       xfs_mru_cache_uninit();
- out_destroy_wq:
-       xfs_destroy_workqueues();
- out_destroy_zones:
-       xfs_destroy_zones();
- out:
-       return error;
-}
-
-STATIC void __exit
-exit_xfs_fs(void)
-{
-       vfs_exitquota();
-       unregister_filesystem(&xfs_fs_type);
-       xfs_sysctl_unregister();
-       xfs_cleanup_procfs();
-       xfs_buf_terminate();
-       xfs_filestream_uninit();
-       xfs_mru_cache_uninit();
-       xfs_destroy_workqueues();
-       xfs_destroy_zones();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
deleted file mode 100644 (file)
index 50a3266..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPER_H__
-#define __XFS_SUPER_H__
-
-#include <linux/exportfs.h>
-
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
-extern void xfs_qm_exit(void);
-# define vfs_initquota()       xfs_qm_init()
-# define vfs_exitquota()       xfs_qm_exit()
-#else
-# define vfs_initquota()       do { } while (0)
-# define vfs_exitquota()       do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING                "ACLs, "
-# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
-#else
-# define XFS_ACL_STRING
-# define set_posix_acl_flag(sb)        do { } while (0)
-#endif
-
-#define XFS_SECURITY_STRING    "security attributes, "
-
-#ifdef CONFIG_XFS_RT
-# define XFS_REALTIME_STRING   "realtime, "
-#else
-# define XFS_REALTIME_STRING
-#endif
-
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-#  define XFS_BIGFS_STRING     "large block/inode numbers, "
-# else
-#  define XFS_BIGFS_STRING     "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
-#ifdef DEBUG
-# define XFS_DBG_STRING                "debug"
-#else
-# define XFS_DBG_STRING                "no debug"
-#endif
-
-#define XFS_VERSION_STRING     "SGI XFS"
-#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
-                               XFS_SECURITY_STRING \
-                               XFS_REALTIME_STRING \
-                               XFS_BIGFS_STRING \
-                               XFS_DBG_STRING /* DBG must be last */
-
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_buftarg;
-struct block_device;
-
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
-extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-
-extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
-extern const struct quotactl_ops xfs_quotactl_operations;
-
-#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
-
-#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
deleted file mode 100644 (file)
index 4604f90..0000000
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_fsops.h"
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
-
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- */
-#define XFS_LOOKUP_BATCH       32
-
-STATIC int
-xfs_inode_ag_walk_grab(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-
-       ASSERT(rcu_read_lock_held());
-
-       /*
-        * check for stale RCU freed inode
-        *
-        * If the inode has been reallocated, it doesn't matter if it's not in
-        * the AG we are walking - we are walking for writeback, so if it
-        * passes all the "valid inode" checks and is dirty, then we'll write
-        * it back anyway.  If it has been reallocated and still being
-        * initialised, the XFS_INEW check below will catch it.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!ip->i_ino)
-               goto out_unlock_noent;
-
-       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-               goto out_unlock_noent;
-       spin_unlock(&ip->i_flags_lock);
-
-       /* nothing to sync during shutdown */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return EFSCORRUPTED;
-
-       /* If we can't grab the inode, it must on it's way to reclaim. */
-       if (!igrab(inode))
-               return ENOENT;
-
-       if (is_bad_inode(inode)) {
-               IRELE(ip);
-               return ENOENT;
-       }
-
-       /* inode is valid */
-       return 0;
-
-out_unlock_noent:
-       spin_unlock(&ip->i_flags_lock);
-       return ENOENT;
-}
-
-STATIC int
-xfs_inode_ag_walk(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       uint32_t                first_index;
-       int                     last_error = 0;
-       int                     skipped;
-       int                     done;
-       int                     nr_found;
-
-restart:
-       done = 0;
-       skipped = 0;
-       first_index = 0;
-       nr_found = 0;
-       do {
-               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-               int             error = 0;
-               int             i;
-
-               rcu_read_lock();
-               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH);
-               if (!nr_found) {
-                       rcu_read_unlock();
-                       break;
-               }
-
-               /*
-                * Grab the inodes before we drop the lock. if we found
-                * nothing, nr == 0 and the loop will be skipped.
-                */
-               for (i = 0; i < nr_found; i++) {
-                       struct xfs_inode *ip = batch[i];
-
-                       if (done || xfs_inode_ag_walk_grab(ip))
-                               batch[i] = NULL;
-
-                       /*
-                        * Update the index for the next lookup. Catch
-                        * overflows into the next AG range which can occur if
-                        * we have inodes in the last block of the AG and we
-                        * are currently pointing to the last inode.
-                        *
-                        * Because we may see inodes that are from the wrong AG
-                        * due to RCU freeing and reallocation, only update the
-                        * index if it lies in this AG. It was a race that lead
-                        * us to see this inode, so another lookup from the
-                        * same index will not find it again.
-                        */
-                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
-                               continue;
-                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                               done = 1;
-               }
-
-               /* unlock now we've grabbed the inodes. */
-               rcu_read_unlock();
-
-               for (i = 0; i < nr_found; i++) {
-                       if (!batch[i])
-                               continue;
-                       error = execute(batch[i], pag, flags);
-                       IRELE(batch[i]);
-                       if (error == EAGAIN) {
-                               skipped++;
-                               continue;
-                       }
-                       if (error && last_error != EFSCORRUPTED)
-                               last_error = error;
-               }
-
-               /* bail out if the filesystem is corrupted.  */
-               if (error == EFSCORRUPTED)
-                       break;
-
-               cond_resched();
-
-       } while (nr_found && !done);
-
-       if (skipped) {
-               delay(1);
-               goto restart;
-       }
-       return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
-       struct xfs_mount        *mp,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-
-       ag = 0;
-       while ((pag = xfs_perag_get(mp, ag))) {
-               ag = pag->pag_agno + 1;
-               error = xfs_inode_ag_walk(mp, pag, execute, flags);
-               xfs_perag_put(pag);
-               if (error) {
-                       last_error = error;
-                       if (error == EFSCORRUPTED)
-                               break;
-               }
-       }
-       return XFS_ERROR(last_error);
-}
-
-STATIC int
-xfs_sync_inode_data(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       struct inode            *inode = VFS_I(ip);
-       struct address_space *mapping = inode->i_mapping;
-       int                     error = 0;
-
-       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-               goto out_wait;
-
-       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-               if (flags & SYNC_TRYLOCK)
-                       goto out_wait;
-               xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       }
-
-       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-                               0 : XBF_ASYNC, FI_NONE);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- out_wait:
-       if (flags & SYNC_WAIT)
-               xfs_ioend_wait(ip);
-       return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_inode_clean(ip))
-               goto out_unlock;
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(flags & SYNC_WAIT))
-                       goto out_unlock;
-               xfs_iflock(ip);
-       }
-
-       if (xfs_inode_clean(ip)) {
-               xfs_ifunlock(ip);
-               goto out_unlock;
-       }
-
-       error = xfs_iflush(ip, flags);
-
-       /*
-        * We don't want to try again on non-blocking flushes that can't run
-        * again immediately. If an inode really must be written, then that's
-        * what the SYNC_WAIT flag is for.
-        */
-       if (error == EAGAIN) {
-               ASSERT(!(flags & SYNC_WAIT));
-               error = 0;
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-STATIC int
-xfs_sync_data(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       int                     error;
-
-       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
-       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-       if (error)
-               return XFS_ERROR(error);
-
-       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-       return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       ASSERT((flags & ~SYNC_WAIT) == 0);
-
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
-STATIC int
-xfs_sync_fsdata(
-       struct xfs_mount        *mp)
-{
-       struct xfs_buf          *bp;
-
-       /*
-        * If the buffer is pinned then push on the log so we won't get stuck
-        * waiting in the write for someone, maybe ourselves, to flush the log.
-        *
-        * Even though we just pushed the log above, we did not have the
-        * superblock buffer locked at that point so it can become pinned in
-        * between there and here.
-        */
-       bp = xfs_getsb(mp, 0);
-       if (xfs_buf_ispinned(bp))
-               xfs_log_force(mp, 0);
-
-       return xfs_bwrite(mp, bp);
-}
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem, we have
- * two phases to execute. This first phase is syncing the data before we
- * quiesce the filesystem, and the second is flushing all the inodes out after
- * we've waited for all the transactions created by the first phase to
- * complete. The second phase ensures that the inodes are written to their
- * location on disk rather than just existing in transactions in the log. This
- * means after a quiesce there is no log replay required to write the inodes to
- * disk (this is the main difference between a sync and a quiesce).
- */
-/*
- * First stage of freeze - no writers will make progress now we are here,
- * so we flush delwri and delalloc buffers here, then wait for all I/O to
- * complete.  Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
- * because it'll just get dirty again.
- */
-int
-xfs_quiesce_data(
-       struct xfs_mount        *mp)
-{
-       int                     error, error2 = 0;
-
-       xfs_qm_sync(mp, SYNC_TRYLOCK);
-       xfs_qm_sync(mp, SYNC_WAIT);
-
-       /* force out the newly dirtied log buffers */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /* write superblock and hoover up shutdown errors */
-       error = xfs_sync_fsdata(mp);
-
-       /* make sure all delwri buffers are written out */
-       xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-       /* mark the log as covered if needed */
-       if (xfs_log_need_covered(mp))
-               error2 = xfs_fs_log_dummy(mp);
-
-       /* flush data-only devices */
-       if (mp->m_rtdev_targp)
-               XFS_bflush(mp->m_rtdev_targp);
-
-       return error ? error : error2;
-}
-
-STATIC void
-xfs_quiesce_fs(
-       struct xfs_mount        *mp)
-{
-       int     count = 0, pincount;
-
-       xfs_reclaim_inodes(mp, 0);
-       xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
-       /*
-        * This loop must run at least twice.  The first instance of the loop
-        * will flush most meta data but that will generate more meta data
-        * (typically directory updates).  Which then must be flushed and
-        * logged before we can write the unmount record. We also so sync
-        * reclaim of inodes to catch any that the above delwri flush skipped.
-        */
-       do {
-               xfs_reclaim_inodes(mp, SYNC_WAIT);
-               xfs_sync_attr(mp, SYNC_WAIT);
-               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-               if (!pincount) {
-                       delay(50);
-                       count++;
-               }
-       } while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceeding.
- */
-void
-xfs_quiesce_attr(
-       struct xfs_mount        *mp)
-{
-       int     error = 0;
-
-       /* wait for all modifications to complete */
-       while (atomic_read(&mp->m_active_trans) > 0)
-               delay(100);
-
-       /* flush inodes and push all remaining buffers out to disk */
-       xfs_quiesce_fs(mp);
-
-       /*
-        * Just warn here till VFS can correctly support
-        * read-only remount without racing.
-        */
-       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
-       /* Push the superblock and write an unmount record */
-       error = xfs_log_sbcount(mp);
-       if (error)
-               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
-                               "Frozen image may not be consistent.");
-       xfs_log_unmount_write(mp);
-       xfs_unmountfs_writesb(mp);
-}
-
-static void
-xfs_syncd_queue_sync(
-       struct xfs_mount        *mp)
-{
-       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
-                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
-}
-
-/*
- * Every sync period we need to unpin all items, reclaim inodes and sync
- * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle and not frozen.
- */
-STATIC void
-xfs_sync_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_sync_work);
-       int             error;
-
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               /* dgc: errors ignored here */
-               if (mp->m_super->s_frozen == SB_UNFROZEN &&
-                   xfs_log_need_covered(mp))
-                       error = xfs_fs_log_dummy(mp);
-               else
-                       xfs_log_force(mp, 0);
-               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-
-               /* start pushing all the metadata that is currently dirty */
-               xfs_ail_push_all(mp->m_ail);
-       }
-
-       /* queue us up again */
-       xfs_syncd_queue_sync(mp);
-}
-
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs syncd work default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_syncd_queue_reclaim(
-       struct xfs_mount        *mp)
-{
-
-       /*
-        * We can have inodes enter reclaim after we've shut down the syncd
-        * workqueue during unmount, so don't allow reclaim work to be queued
-        * during unmount.
-        */
-       if (!(mp->m_super->s_flags & MS_ACTIVE))
-               return;
-
-       rcu_read_lock();
-       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
-                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-       }
-       rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-STATIC void
-xfs_reclaim_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_reclaim_work);
-
-       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
-       xfs_syncd_queue_reclaim(mp);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room.
- *
- * Queue a new data flush if there isn't one already in progress and
- * wait for completion of the flush. This means that we only ever have one
- * inode flush in progress no matter how many ENOSPC events are occurring and
- * so will prevent the system from bogging down due to every concurrent
- * ENOSPC event scanning all the active inodes in the system for writeback.
- */
-void
-xfs_flush_inodes(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       queue_work(xfs_syncd_wq, &mp->m_flush_work);
-       flush_work_sync(&mp->m_flush_work);
-}
-
-STATIC void
-xfs_flush_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(work,
-                                       struct xfs_mount, m_flush_work);
-
-       xfs_sync_data(mp, SYNC_TRYLOCK);
-       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-}
-
-int
-xfs_syncd_init(
-       struct xfs_mount        *mp)
-{
-       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
-       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-
-       xfs_syncd_queue_sync(mp);
-       xfs_syncd_queue_reclaim(mp);
-
-       return 0;
-}
-
-void
-xfs_syncd_stop(
-       struct xfs_mount        *mp)
-{
-       cancel_delayed_work_sync(&mp->m_sync_work);
-       cancel_delayed_work_sync(&mp->m_reclaim_work);
-       cancel_work_sync(&mp->m_flush_work);
-}
-
-void
-__xfs_inode_set_reclaim_tag(
-       struct xfs_perag        *pag,
-       struct xfs_inode        *ip)
-{
-       radix_tree_tag_set(&pag->pag_ici_root,
-                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                          XFS_ICI_RECLAIM_TAG);
-
-       if (!pag->pag_ici_reclaimable) {
-               /* propagate the reclaim tag up into the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-
-               /* schedule periodic background inode reclaim */
-               xfs_syncd_queue_reclaim(ip->i_mount);
-
-               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-       pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
-       xfs_inode_t     *ip)
-{
-       struct xfs_mount *mp = ip->i_mount;
-       struct xfs_perag *pag;
-
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-       spin_lock(&pag->pag_ici_lock);
-       spin_lock(&ip->i_flags_lock);
-       __xfs_inode_set_reclaim_tag(pag, ip);
-       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-       spin_unlock(&ip->i_flags_lock);
-       spin_unlock(&pag->pag_ici_lock);
-       xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       pag->pag_ici_reclaimable--;
-       if (!pag->pag_ici_reclaimable) {
-               /* clear the reclaim tag from the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-}
-
-void
-__xfs_inode_clear_reclaim_tag(
-       xfs_mount_t     *mp,
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       radix_tree_tag_clear(&pag->pag_ici_root,
-                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
-       __xfs_inode_clear_reclaim(pag, ip);
-}
-
-/*
- * Grab the inode for reclaim exclusively.
- * Return 0 if we grabbed it, non-zero otherwise.
- */
-STATIC int
-xfs_reclaim_inode_grab(
-       struct xfs_inode        *ip,
-       int                     flags)
-{
-       ASSERT(rcu_read_lock_held());
-
-       /* quick check for stale RCU freed inode */
-       if (!ip->i_ino)
-               return 1;
-
-       /*
-        * do some unlocked checks first to avoid unnecessary lock traffic.
-        * The first is a flush lock check, the second is a already in reclaim
-        * check. Only do these checks if we are not going to block on locks.
-        */
-       if ((flags & SYNC_TRYLOCK) &&
-           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
-               return 1;
-       }
-
-       /*
-        * The radix tree lock here protects a thread in xfs_iget from racing
-        * with us starting reclaim on the inode.  Once we have the
-        * XFS_IRECLAIM flag set it will not touch us.
-        *
-        * Due to RCU lookup, we may find inodes that have been freed and only
-        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
-        * aren't candidates for reclaim at all, so we must check the
-        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
-           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
-               /* not a reclaim candidate. */
-               spin_unlock(&ip->i_flags_lock);
-               return 1;
-       }
-       __xfs_iflags_set(ip, XFS_IRECLAIM);
-       spin_unlock(&ip->i_flags_lock);
-       return 0;
-}
-
-/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
- *
- *     inode state          iflush ret         required action
- *      ---------------      ----------         ---------------
- *     bad                     -               reclaim
- *     shutdown                EIO             unpin and reclaim
- *     clean, unpinned         0               reclaim
- *     stale, unpinned         0               reclaim
- *     clean, pinned(*)        0               requeue
- *     stale, pinned           EAGAIN          requeue
- *     dirty, delwri ok        0               requeue
- *     dirty, delwri blocked   EAGAIN          requeue
- *     dirty, sync flush       0               reclaim
- *
- * (*) dgc: I don't think the clean, pinned state is possible but it gets
- * handled anyway given the order of checks implemented.
- *
- * As can be seen from the table, the return value of xfs_iflush() is not
- * sufficient to correctly decide the reclaim action here. The checks in
- * xfs_iflush() might look like duplicates, but they are not.
- *
- * Also, because we get the flush lock first, we know that any inode that has
- * been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
- *
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
- *
- * Hence the order of actions after gaining the locks should be:
- *     bad             => reclaim
- *     shutdown        => unpin and reclaim
- *     pinned, delwri  => requeue
- *     pinned, sync    => unpin
- *     stale           => reclaim
- *     clean           => reclaim
- *     dirty, delwri   => flush and requeue
- *     dirty, sync     => flush, wait and reclaim
- */
-STATIC int
-xfs_reclaim_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     sync_mode)
-{
-       int     error;
-
-restart:
-       error = 0;
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(sync_mode & SYNC_WAIT))
-                       goto out;
-               xfs_iflock(ip);
-       }
-
-       if (is_bad_inode(VFS_I(ip)))
-               goto reclaim;
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_iunpin_wait(ip);
-               goto reclaim;
-       }
-       if (xfs_ipincount(ip)) {
-               if (!(sync_mode & SYNC_WAIT)) {
-                       xfs_ifunlock(ip);
-                       goto out;
-               }
-               xfs_iunpin_wait(ip);
-       }
-       if (xfs_iflags_test(ip, XFS_ISTALE))
-               goto reclaim;
-       if (xfs_inode_clean(ip))
-               goto reclaim;
-
-       /*
-        * Now we have an inode that needs flushing.
-        *
-        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
-        * reclaim as we can deadlock with inode cluster removal.
-        * xfs_ifree_cluster() can lock the inode buffer before it locks the
-        * ip->i_lock, and we are doing the exact opposite here. As a result,
-        * doing a blocking xfs_itobp() to get the cluster buffer will result
-        * in an ABBA deadlock with xfs_ifree_cluster().
-        *
-        * As xfs_ifree_cluser() must gather all inodes that are active in the
-        * cache to mark them stale, if we hit this case we don't actually want
-        * to do IO here - we want the inode marked stale so we can simply
-        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
-        * just unlock the inode, back off and try again. Hopefully the next
-        * pass through will see the stale flag set on the inode.
-        */
-       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
-       if (sync_mode & SYNC_WAIT) {
-               if (error == EAGAIN) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       /* backoff longer than in xfs_ifree_cluster */
-                       delay(2);
-                       goto restart;
-               }
-               xfs_iflock(ip);
-               goto reclaim;
-       }
-
-       /*
-        * When we have to flush an inode but don't have SYNC_WAIT set, we
-        * flush the inode out using a delwri buffer and wait for the next
-        * call into reclaim to find it in a clean state instead of waiting for
-        * it now. We also don't return errors here - if the error is transient
-        * then the next reclaim pass will flush the inode, and if the error
-        * is permanent then the next sync reclaim will reclaim the inode and
-        * pass on the error.
-        */
-       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_warn(ip->i_mount,
-                       "inode 0x%llx background reclaim flush failed with %d",
-                       (long long)ip->i_ino, error);
-       }
-out:
-       xfs_iflags_clear(ip, XFS_IRECLAIM);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * We could return EAGAIN here to make reclaim rescan the inode tree in
-        * a short while. However, this just burns CPU time scanning the tree
-        * waiting for IO to complete and xfssyncd never goes back to the idle
-        * state. Instead, return 0 to let the next scheduled background reclaim
-        * attempt to reclaim the inode again.
-        */
-       return 0;
-
-reclaim:
-       xfs_ifunlock(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       XFS_STATS_INC(xs_ig_reclaims);
-       /*
-        * Remove the inode from the per-AG radix tree.
-        *
-        * Because radix_tree_delete won't complain even if the item was never
-        * added to the tree assert that it's been there before to catch
-        * problems with the inode life time early on.
-        */
-       spin_lock(&pag->pag_ici_lock);
-       if (!radix_tree_delete(&pag->pag_ici_root,
-                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
-               ASSERT(0);
-       __xfs_inode_clear_reclaim(pag, ip);
-       spin_unlock(&pag->pag_ici_lock);
-
-       /*
-        * Here we do an (almost) spurious inode lock in order to coordinate
-        * with inode cache radix tree lookups.  This is because the lookup
-        * can reference the inodes in the cache without taking references.
-        *
-        * We make that OK here by ensuring that we wait until the inode is
-        * unlocked after the lookup before we go ahead and free it.  We get
-        * both the ilock and the iolock because the code may need to drop the
-        * ilock one but will still hold the iolock.
-        */
-       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_qm_dqdetach(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-       xfs_inode_free(ip);
-       return error;
-
-}
-
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-int
-xfs_reclaim_inodes_ag(
-       struct xfs_mount        *mp,
-       int                     flags,
-       int                     *nr_to_scan)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-       int                     trylock = flags & SYNC_TRYLOCK;
-       int                     skipped;
-
-restart:
-       ag = 0;
-       skipped = 0;
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               unsigned long   first_index = 0;
-               int             done = 0;
-               int             nr_found = 0;
-
-               ag = pag->pag_agno + 1;
-
-               if (trylock) {
-                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
-                               skipped++;
-                               xfs_perag_put(pag);
-                               continue;
-                       }
-                       first_index = pag->pag_ici_reclaim_cursor;
-               } else
-                       mutex_lock(&pag->pag_ici_reclaim_lock);
-
-               do {
-                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-                       int     i;
-
-                       rcu_read_lock();
-                       nr_found = radix_tree_gang_lookup_tag(
-                                       &pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH,
-                                       XFS_ICI_RECLAIM_TAG);
-                       if (!nr_found) {
-                               done = 1;
-                               rcu_read_unlock();
-                               break;
-                       }
-
-                       /*
-                        * Grab the inodes before we drop the lock. if we found
-                        * nothing, nr == 0 and the loop will be skipped.
-                        */
-                       for (i = 0; i < nr_found; i++) {
-                               struct xfs_inode *ip = batch[i];
-
-                               if (done || xfs_reclaim_inode_grab(ip, flags))
-                                       batch[i] = NULL;
-
-                               /*
-                                * Update the index for the next lookup. Catch
-                                * overflows into the next AG range which can
-                                * occur if we have inodes in the last block of
-                                * the AG and we are currently pointing to the
-                                * last inode.
-                                *
-                                * Because we may see inodes that are from the
-                                * wrong AG due to RCU freeing and
-                                * reallocation, only update the index if it
-                                * lies in this AG. It was a race that lead us
-                                * to see this inode, so another lookup from
-                                * the same index will not find it again.
-                                */
-                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-                                                               pag->pag_agno)
-                                       continue;
-                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                                       done = 1;
-                       }
-
-                       /* unlock now we've grabbed the inodes. */
-                       rcu_read_unlock();
-
-                       for (i = 0; i < nr_found; i++) {
-                               if (!batch[i])
-                                       continue;
-                               error = xfs_reclaim_inode(batch[i], pag, flags);
-                               if (error && last_error != EFSCORRUPTED)
-                                       last_error = error;
-                       }
-
-                       *nr_to_scan -= XFS_LOOKUP_BATCH;
-
-                       cond_resched();
-
-               } while (nr_found && !done && *nr_to_scan > 0);
-
-               if (trylock && !done)
-                       pag->pag_ici_reclaim_cursor = first_index;
-               else
-                       pag->pag_ici_reclaim_cursor = 0;
-               mutex_unlock(&pag->pag_ici_reclaim_lock);
-               xfs_perag_put(pag);
-       }
-
-       /*
-        * if we skipped any AG, and we still have scan count remaining, do
-        * another pass this time using blocking reclaim semantics (i.e
-        * waiting on the reclaim locks and ignoring the reclaim cursors). This
-        * ensure that when we get more reclaimers than AGs we block rather
-        * than spin trying to execute reclaim.
-        */
-       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
-               trylock = 0;
-               goto restart;
-       }
-       return XFS_ERROR(last_error);
-}
-
-int
-xfs_reclaim_inodes(
-       xfs_mount_t     *mp,
-       int             mode)
-{
-       int             nr_to_scan = INT_MAX;
-
-       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
-}
-
-/*
- * Scan a certain number of inodes for reclaim.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-void
-xfs_reclaim_inodes_nr(
-       struct xfs_mount        *mp,
-       int                     nr_to_scan)
-{
-       /* kick background reclaimer and push the AIL */
-       xfs_syncd_queue_reclaim(mp);
-       xfs_ail_push_all(mp->m_ail);
-
-       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
-}
-
-/*
- * Return the number of reclaimable inodes in the filesystem for
- * the shrinker to determine how much to reclaim.
- */
-int
-xfs_reclaim_inodes_count(
-       struct xfs_mount        *mp)
-{
-       struct xfs_perag        *pag;
-       xfs_agnumber_t          ag = 0;
-       int                     reclaimable = 0;
-
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               ag = pag->pag_agno + 1;
-               reclaimable += pag->pag_ici_reclaimable;
-               xfs_perag_put(pag);
-       }
-       return reclaimable;
-}
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
deleted file mode 100644 (file)
index 941202e..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef XFS_SYNC_H
-#define XFS_SYNC_H 1
-
-struct xfs_mount;
-struct xfs_perag;
-
-#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
-#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
-
-extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
-
-int xfs_syncd_init(struct xfs_mount *mp);
-void xfs_syncd_stop(struct xfs_mount *mp);
-
-int xfs_quiesce_data(struct xfs_mount *mp);
-void xfs_quiesce_attr(struct xfs_mount *mp);
-
-void xfs_flush_inodes(struct xfs_inode *ip);
-
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
-int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
-
-void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
-                               struct xfs_inode *ip);
-
-int xfs_sync_inode_grab(struct xfs_inode *ip);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
deleted file mode 100644 (file)
index ee2d2ad..0000000
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include "xfs_error.h"
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             c, ret, *valp = ctl->data;
-       __uint32_t      vn_active;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-
-       if (!ret && write && *valp) {
-               xfs_notice(NULL, "Clearing xfsstats");
-               for_each_possible_cpu(c) {
-                       preempt_disable();
-                       /* save vn_active, it's a universal truth! */
-                       vn_active = per_cpu(xfsstats, c).vn_active;
-                       memset(&per_cpu(xfsstats, c), 0,
-                              sizeof(struct xfsstats));
-                       per_cpu(xfsstats, c).vn_active = vn_active;
-                       preempt_enable();
-               }
-               xfs_stats_clear = 0;
-       }
-
-       return ret;
-}
-
-STATIC int
-xfs_panic_mask_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             ret, *valp = ctl->data;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-       if (!ret && write) {
-               xfs_panic_mask = *valp;
-#ifdef DEBUG
-               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
-       }
-       return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-static ctl_table xfs_table[] = {
-       {
-               .procname       = "irix_sgid_inherit",
-               .data           = &xfs_params.sgid_inherit.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.sgid_inherit.min,
-               .extra2         = &xfs_params.sgid_inherit.max
-       },
-       {
-               .procname       = "irix_symlink_mode",
-               .data           = &xfs_params.symlink_mode.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.symlink_mode.min,
-               .extra2         = &xfs_params.symlink_mode.max
-       },
-       {
-               .procname       = "panic_mask",
-               .data           = &xfs_params.panic_mask.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_panic_mask_proc_handler,
-               .extra1         = &xfs_params.panic_mask.min,
-               .extra2         = &xfs_params.panic_mask.max
-       },
-
-       {
-               .procname       = "error_level",
-               .data           = &xfs_params.error_level.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.error_level.min,
-               .extra2         = &xfs_params.error_level.max
-       },
-       {
-               .procname       = "xfssyncd_centisecs",
-               .data           = &xfs_params.syncd_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.syncd_timer.min,
-               .extra2         = &xfs_params.syncd_timer.max
-       },
-       {
-               .procname       = "inherit_sync",
-               .data           = &xfs_params.inherit_sync.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_sync.min,
-               .extra2         = &xfs_params.inherit_sync.max
-       },
-       {
-               .procname       = "inherit_nodump",
-               .data           = &xfs_params.inherit_nodump.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodump.min,
-               .extra2         = &xfs_params.inherit_nodump.max
-       },
-       {
-               .procname       = "inherit_noatime",
-               .data           = &xfs_params.inherit_noatim.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_noatim.min,
-               .extra2         = &xfs_params.inherit_noatim.max
-       },
-       {
-               .procname       = "xfsbufd_centisecs",
-               .data           = &xfs_params.xfs_buf_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_timer.min,
-               .extra2         = &xfs_params.xfs_buf_timer.max
-       },
-       {
-               .procname       = "age_buffer_centisecs",
-               .data           = &xfs_params.xfs_buf_age.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_age.min,
-               .extra2         = &xfs_params.xfs_buf_age.max
-       },
-       {
-               .procname       = "inherit_nosymlinks",
-               .data           = &xfs_params.inherit_nosym.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nosym.min,
-               .extra2         = &xfs_params.inherit_nosym.max
-       },
-       {
-               .procname       = "rotorstep",
-               .data           = &xfs_params.rotorstep.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.rotorstep.min,
-               .extra2         = &xfs_params.rotorstep.max
-       },
-       {
-               .procname       = "inherit_nodefrag",
-               .data           = &xfs_params.inherit_nodfrg.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodfrg.min,
-               .extra2         = &xfs_params.inherit_nodfrg.max
-       },
-       {
-               .procname       = "filestream_centisecs",
-               .data           = &xfs_params.fstrm_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.fstrm_timer.min,
-               .extra2         = &xfs_params.fstrm_timer.max,
-       },
-       /* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
-       {
-               .procname       = "stats_clear",
-               .data           = &xfs_params.stats_clear.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_stats_clear_proc_handler,
-               .extra1         = &xfs_params.stats_clear.min,
-               .extra2         = &xfs_params.stats_clear.max
-       },
-#endif /* CONFIG_PROC_FS */
-
-       {}
-};
-
-static ctl_table xfs_dir_table[] = {
-       {
-               .procname       = "xfs",
-               .mode           = 0555,
-               .child          = xfs_table
-       },
-       {}
-};
-
-static ctl_table xfs_root_table[] = {
-       {
-               .procname       = "fs",
-               .mode           = 0555,
-               .child          = xfs_dir_table
-       },
-       {}
-};
-
-int
-xfs_sysctl_register(void)
-{
-       xfs_table_header = register_sysctl_table(xfs_root_table);
-       if (!xfs_table_header)
-               return -ENOMEM;
-       return 0;
-}
-
-void
-xfs_sysctl_unregister(void)
-{
-       unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
deleted file mode 100644 (file)
index b9937d4..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SYSCTL_H__
-#define __XFS_SYSCTL_H__
-
-#include <linux/sysctl.h>
-
-/*
- * Tunable xfs parameters
- */
-
-typedef struct xfs_sysctl_val {
-       int min;
-       int val;
-       int max;
-} xfs_sysctl_val_t;
-
-typedef struct xfs_param {
-       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
-                                        * not a member of parent dir GID. */
-       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
-       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
-       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
-       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
-       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
-       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
-       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
-       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
-       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
-       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
-       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
-       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
-       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
-       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
-} xfs_param_t;
-
-/*
- * xfs_error_level:
- *
- * How much error reporting will be done when internal problems are
- * encountered.  These problems normally return an EFSCORRUPTED to their
- * caller, with no other information reported.
- *
- * 0   No error reports
- * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
- * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
- *     additional errors that are known to not cause shutdowns)
- *
- * xfs_panic_mask bit 0x8 turns the error reports into panics
- */
-
-enum {
-       /* XFS_REFCACHE_SIZE = 1 */
-       /* XFS_REFCACHE_PURGE = 2 */
-       /* XFS_RESTRICT_CHOWN = 3 */
-       XFS_SGID_INHERIT = 4,
-       XFS_SYMLINK_MODE = 5,
-       XFS_PANIC_MASK = 6,
-       XFS_ERRLEVEL = 7,
-       XFS_SYNCD_TIMER = 8,
-       /* XFS_PROBE_DMAPI = 9 */
-       /* XFS_PROBE_IOOPS = 10 */
-       /* XFS_PROBE_QUOTA = 11 */
-       XFS_STATS_CLEAR = 12,
-       XFS_INHERIT_SYNC = 13,
-       XFS_INHERIT_NODUMP = 14,
-       XFS_INHERIT_NOATIME = 15,
-       XFS_BUF_TIMER = 16,
-       XFS_BUF_AGE = 17,
-       /* XFS_IO_BYPASS = 18 */
-       XFS_INHERIT_NOSYM = 19,
-       XFS_ROTORSTEP = 20,
-       XFS_INHERIT_NODFRG = 21,
-       XFS_FILESTREAM_TIMER = 22,
-};
-
-extern xfs_param_t     xfs_params;
-
-#ifdef CONFIG_SYSCTL
-extern int xfs_sysctl_register(void);
-extern void xfs_sysctl_unregister(void);
-#else
-# define xfs_sysctl_register()         (0)
-# define xfs_sysctl_unregister()       do { } while (0)
-#endif /* CONFIG_SYSCTL */
-
-#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
deleted file mode 100644 (file)
index 88d25d4..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_mount.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
-#include "quota/xfs_dquot_item.h"
-#include "quota/xfs_dquot.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-
-/*
- * We include this last to have the helpers above available for the trace
- * event implementations.
- */
-#define CREATE_TRACE_POINTS
-#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
deleted file mode 100644 (file)
index 690fc7a..0000000
+++ /dev/null
@@ -1,1746 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM xfs
-
-#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_XFS_H
-
-#include <linux/tracepoint.h>
-
-struct xfs_agf;
-struct xfs_alloc_arg;
-struct xfs_attr_list_context;
-struct xfs_buf_log_item;
-struct xfs_da_args;
-struct xfs_da_node_entry;
-struct xfs_dquot;
-struct xlog_ticket;
-struct log;
-struct xlog_recover;
-struct xlog_recover_item;
-struct xfs_buf_log_format;
-struct xfs_inode_log_format;
-
-DECLARE_EVENT_CLASS(xfs_attr_list_class,
-       TP_PROTO(struct xfs_attr_list_context *ctx),
-       TP_ARGS(ctx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
-       )
-)
-
-#define DEFINE_ATTR_LIST_EVENT(name) \
-DEFINE_EVENT(xfs_attr_list_class, name, \
-       TP_PROTO(struct xfs_attr_list_context *ctx), \
-       TP_ARGS(ctx))
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
-
-DECLARE_EVENT_CLASS(xfs_perag_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agno, refcount, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, refcount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->refcount = refcount;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->refcount,
-                 (char *)__entry->caller_ip)
-);
-
-#define DEFINE_PERAG_REF_EVENT(name)   \
-DEFINE_EVENT(xfs_perag_class, name,    \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
-                unsigned long caller_ip),                                      \
-       TP_ARGS(mp, agno, refcount, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
-DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
-
-TRACE_EVENT(xfs_attr_list_node_descend,
-       TP_PROTO(struct xfs_attr_list_context *ctx,
-                struct xfs_da_node_entry *btree),
-       TP_ARGS(ctx, btree),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-               __field(u32, bt_hashval)
-               __field(u32, bt_before)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-               __entry->bt_hashval = be32_to_cpu(btree->hashval);
-               __entry->bt_before = be32_to_cpu(btree->before);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s "
-                 "node hashval %u, node before %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
-                  __entry->bt_hashval,
-                  __entry->bt_before)
-);
-
-TRACE_EVENT(xfs_iext_insert,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
-                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
-       TP_ARGS(ip, idx, r, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r->br_startoff;
-               __entry->startblock = r->br_startblock;
-               __entry->blockcount = r->br_blockcount;
-               __entry->state = r->br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_bmap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
-                unsigned long caller_ip),
-       TP_ARGS(ip, idx, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
-                                               ip->i_afp : &ip->i_df;
-               struct xfs_bmbt_irec    r;
-
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r.br_startoff;
-               __entry->startblock = r.br_startblock;
-               __entry->blockcount = r.br_blockcount;
-               __entry->state = r.br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_BMAP_EVENT(name) \
-DEFINE_EVENT(xfs_bmap_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip, idx, state, caller_ip))
-DEFINE_BMAP_EVENT(xfs_iext_remove);
-DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
-DEFINE_BMAP_EVENT(xfs_bmap_post_update);
-DEFINE_BMAP_EVENT(xfs_extlist);
-
-DECLARE_EVENT_CLASS(xfs_buf_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
-       TP_ARGS(bp, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_EVENT(name) \
-DEFINE_EVENT(xfs_buf_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
-       TP_ARGS(bp, caller_ip))
-DEFINE_BUF_EVENT(xfs_buf_init);
-DEFINE_BUF_EVENT(xfs_buf_free);
-DEFINE_BUF_EVENT(xfs_buf_hold);
-DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_iodone);
-DEFINE_BUF_EVENT(xfs_buf_iorequest);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
-DEFINE_BUF_EVENT(xfs_buf_bdwrite);
-DEFINE_BUF_EVENT(xfs_buf_lock);
-DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_trylock);
-DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_iowait);
-DEFINE_BUF_EVENT(xfs_buf_iowait_done);
-DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
-DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-DEFINE_BUF_EVENT(xfs_buf_error_relse);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-
-/* not really buffer traces, but the buf provides useful information */
-DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
-
-/* pass flags explicitly */
-DECLARE_EVENT_CLASS(xfs_buf_flags_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
-       TP_ARGS(bp, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->flags = flags;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_FLAGS_EVENT(name) \
-DEFINE_EVENT(xfs_buf_flags_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
-       TP_ARGS(bp, flags, caller_ip))
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
-
-TRACE_EVENT(xfs_buf_ioerror,
-       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
-       TP_ARGS(bp, error, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(unsigned, flags)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(int, error)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->error = error;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __entry->error,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_buf_item_class,
-       TP_PROTO(struct xfs_buf_log_item *bip),
-       TP_ARGS(bip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, buf_bno)
-               __field(size_t, buf_len)
-               __field(int, buf_hold)
-               __field(int, buf_pincount)
-               __field(int, buf_lockval)
-               __field(unsigned, buf_flags)
-               __field(unsigned, bli_recur)
-               __field(int, bli_refcount)
-               __field(unsigned, bli_flags)
-               __field(void *, li_desc)
-               __field(unsigned, li_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = bip->bli_buf->b_target->bt_dev;
-               __entry->bli_flags = bip->bli_flags;
-               __entry->bli_recur = bip->bli_recur;
-               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
-               __entry->buf_bno = bip->bli_buf->b_bn;
-               __entry->buf_len = bip->bli_buf->b_buffer_length;
-               __entry->buf_flags = bip->bli_buf->b_flags;
-               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
-               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-               __entry->buf_lockval = bip->bli_buf->b_sema.count;
-               __entry->li_desc = bip->bli_item.li_desc;
-               __entry->li_flags = bip->bli_item.li_flags;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s recur %d refcount %d bliflags %s "
-                 "lidesc 0x%p liflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->buf_bno,
-                 __entry->buf_len,
-                 __entry->buf_hold,
-                 __entry->buf_pincount,
-                 __entry->buf_lockval,
-                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
-                 __entry->bli_recur,
-                 __entry->bli_refcount,
-                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
-                 __entry->li_desc,
-                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_BUF_ITEM_EVENT(name) \
-DEFINE_EVENT(xfs_buf_item_class, name, \
-       TP_PROTO(struct xfs_buf_log_item *bip), \
-       TP_ARGS(bip))
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-
-DECLARE_EVENT_CLASS(xfs_lock_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
-                unsigned long caller_ip),
-       TP_ARGS(ip,  lock_flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, lock_flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->lock_flags = lock_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_LOCK_EVENT(name) \
-DEFINE_EVENT(xfs_lock_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip,  lock_flags, caller_ip))
-DEFINE_LOCK_EVENT(xfs_ilock);
-DEFINE_LOCK_EVENT(xfs_ilock_nowait);
-DEFINE_LOCK_EVENT(xfs_ilock_demote);
-DEFINE_LOCK_EVENT(xfs_iunlock);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
-       TP_PROTO(struct xfs_inode *ip),
-       TP_ARGS(ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino)
-)
-
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
-       TP_PROTO(struct xfs_inode *ip), \
-       TP_ARGS(ip))
-DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_INODE_EVENT(xfs_iget_hit);
-DEFINE_INODE_EVENT(xfs_iget_miss);
-
-DEFINE_INODE_EVENT(xfs_getattr);
-DEFINE_INODE_EVENT(xfs_setattr);
-DEFINE_INODE_EVENT(xfs_readlink);
-DEFINE_INODE_EVENT(xfs_alloc_file_space);
-DEFINE_INODE_EVENT(xfs_free_file_space);
-DEFINE_INODE_EVENT(xfs_readdir);
-#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_get_acl);
-#endif
-DEFINE_INODE_EVENT(xfs_vm_bmap);
-DEFINE_INODE_EVENT(xfs_file_ioctl);
-DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
-DEFINE_INODE_EVENT(xfs_ioctl_setattr);
-DEFINE_INODE_EVENT(xfs_file_fsync);
-DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
-
-DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
-DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-
-DECLARE_EVENT_CLASS(xfs_iref_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
-       TP_ARGS(ip, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, count)
-               __field(int, pincount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->count = atomic_read(&VFS_I(ip)->i_count);
-               __entry->pincount = atomic_read(&ip->i_pincount);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->count,
-                 __entry->pincount,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_iref_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
-       TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
-
-DECLARE_EVENT_CLASS(xfs_namespace_class,
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
-       TP_ARGS(dp, name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, dp_ino)
-               __dynamic_array(char, name, name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(dp)->i_sb->s_dev;
-               __entry->dp_ino = dp->i_ino;
-               memcpy(__get_str(name), name->name, name->len);
-       ),
-       TP_printk("dev %d:%d dp ino 0x%llx name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->dp_ino,
-                 __get_str(name))
-)
-
-#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_EVENT(xfs_namespace_class, name, \
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
-       TP_ARGS(dp, name))
-DEFINE_NAMESPACE_EVENT(xfs_remove);
-DEFINE_NAMESPACE_EVENT(xfs_link);
-DEFINE_NAMESPACE_EVENT(xfs_lookup);
-DEFINE_NAMESPACE_EVENT(xfs_create);
-DEFINE_NAMESPACE_EVENT(xfs_symlink);
-
-TRACE_EVENT(xfs_rename,
-       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
-                struct xfs_name *src_name, struct xfs_name *target_name),
-       TP_ARGS(src_dp, target_dp, src_name, target_name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, src_dp_ino)
-               __field(xfs_ino_t, target_dp_ino)
-               __dynamic_array(char, src_name, src_name->len)
-               __dynamic_array(char, target_name, target_name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
-               __entry->src_dp_ino = src_dp->i_ino;
-               __entry->target_dp_ino = target_dp->i_ino;
-               memcpy(__get_str(src_name), src_name->name, src_name->len);
-               memcpy(__get_str(target_name), target_name->name, target_name->len);
-       ),
-       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
-                 " src name %s target name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->src_dp_ino,
-                 __entry->target_dp_ino,
-                 __get_str(src_name),
-                 __get_str(target_name))
-)
-
-DECLARE_EVENT_CLASS(xfs_dquot_class,
-       TP_PROTO(struct xfs_dquot *dqp),
-       TP_ARGS(dqp),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(u32, id)
-               __field(unsigned, flags)
-               __field(unsigned, nrefs)
-               __field(unsigned long long, res_bcount)
-               __field(unsigned long long, bcount)
-               __field(unsigned long long, icount)
-               __field(unsigned long long, blk_hardlimit)
-               __field(unsigned long long, blk_softlimit)
-               __field(unsigned long long, ino_hardlimit)
-               __field(unsigned long long, ino_softlimit)
-       ), \
-       TP_fast_assign(
-               __entry->dev = dqp->q_mount->m_super->s_dev;
-               __entry->id = be32_to_cpu(dqp->q_core.d_id);
-               __entry->flags = dqp->dq_flags;
-               __entry->nrefs = dqp->q_nrefs;
-               __entry->res_bcount = dqp->q_res_bcount;
-               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
-               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
-               __entry->blk_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               __entry->blk_softlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               __entry->ino_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-               __entry->ino_softlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
-       ),
-       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
-                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
-                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->id,
-                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
-                 __entry->nrefs,
-                 __entry->res_bcount,
-                 __entry->bcount,
-                 __entry->blk_hardlimit,
-                 __entry->blk_softlimit,
-                 __entry->icount,
-                 __entry->ino_hardlimit,
-                 __entry->ino_softlimit)
-)
-
-#define DEFINE_DQUOT_EVENT(name) \
-DEFINE_EVENT(xfs_dquot_class, name, \
-       TP_PROTO(struct xfs_dquot *dqp), \
-       TP_ARGS(dqp))
-DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
-DEFINE_DQUOT_EVENT(xfs_dqattach_found);
-DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
-DEFINE_DQUOT_EVENT(xfs_dqalloc);
-DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
-DEFINE_DQUOT_EVENT(xfs_dqread);
-DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
-DEFINE_DQUOT_EVENT(xfs_dqget_hit);
-DEFINE_DQUOT_EVENT(xfs_dqget_miss);
-DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
-DEFINE_DQUOT_EVENT(xfs_dqput_free);
-DEFINE_DQUOT_EVENT(xfs_dqrele);
-DEFINE_DQUOT_EVENT(xfs_dqflush);
-DEFINE_DQUOT_EVENT(xfs_dqflush_force);
-DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-
-DECLARE_EVENT_CLASS(xfs_loggrant_class,
-       TP_PROTO(struct log *log, struct xlog_ticket *tic),
-       TP_ARGS(log, tic),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned, trans_type)
-               __field(char, ocnt)
-               __field(char, cnt)
-               __field(int, curr_res)
-               __field(int, unit_res)
-               __field(unsigned int, flags)
-               __field(int, reserveq)
-               __field(int, writeq)
-               __field(int, grant_reserve_cycle)
-               __field(int, grant_reserve_bytes)
-               __field(int, grant_write_cycle)
-               __field(int, grant_write_bytes)
-               __field(int, curr_cycle)
-               __field(int, curr_block)
-               __field(xfs_lsn_t, tail_lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->trans_type = tic->t_trans_type;
-               __entry->ocnt = tic->t_ocnt;
-               __entry->cnt = tic->t_cnt;
-               __entry->curr_res = tic->t_curr_res;
-               __entry->unit_res = tic->t_unit_res;
-               __entry->flags = tic->t_flags;
-               __entry->reserveq = list_empty(&log->l_reserveq);
-               __entry->writeq = list_empty(&log->l_writeq);
-               xlog_crack_grant_head(&log->l_grant_reserve_head,
-                               &__entry->grant_reserve_cycle,
-                               &__entry->grant_reserve_bytes);
-               xlog_crack_grant_head(&log->l_grant_write_head,
-                               &__entry->grant_write_cycle,
-                               &__entry->grant_write_bytes);
-               __entry->curr_cycle = log->l_curr_cycle;
-               __entry->curr_block = log->l_curr_block;
-               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
-       ),
-       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
-                 "t_unit_res %u t_flags %s reserveq %s "
-                 "writeq %s grant_reserve_cycle %d "
-                 "grant_reserve_bytes %d grant_write_cycle %d "
-                 "grant_write_bytes %d curr_cycle %d curr_block %d "
-                 "tail_cycle %d tail_block %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
-                 __entry->ocnt,
-                 __entry->cnt,
-                 __entry->curr_res,
-                 __entry->unit_res,
-                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
-                 __entry->reserveq ? "empty" : "active",
-                 __entry->writeq ? "empty" : "active",
-                 __entry->grant_reserve_cycle,
-                 __entry->grant_reserve_bytes,
-                 __entry->grant_write_cycle,
-                 __entry->grant_write_bytes,
-                 __entry->curr_cycle,
-                 __entry->curr_block,
-                 CYCLE_LSN(__entry->tail_lsn),
-                 BLOCK_LSN(__entry->tail_lsn)
-       )
-)
-
-#define DEFINE_LOGGRANT_EVENT(name) \
-DEFINE_EVENT(xfs_loggrant_class, name, \
-       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
-       TP_ARGS(log, tic))
-DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
-DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
-DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-
-DECLARE_EVENT_CLASS(xfs_file_class,
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-       TP_ARGS(ip, count, offset, flags),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count 0x%zx ioflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
-)
-
-#define DEFINE_RW_EVENT(name)          \
-DEFINE_EVENT(xfs_file_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
-       TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
-DEFINE_RW_EVENT(xfs_file_buffered_write);
-DEFINE_RW_EVENT(xfs_file_direct_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
-
-DECLARE_EVENT_CLASS(xfs_page_class,
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
-       TP_ARGS(inode, page, off),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(pgoff_t, pgoff)
-               __field(loff_t, size)
-               __field(unsigned long, offset)
-               __field(int, delalloc)
-               __field(int, unwritten)
-       ),
-       TP_fast_assign(
-               int delalloc = -1, unwritten = -1;
-
-               if (page_has_buffers(page))
-                       xfs_count_page_state(page, &delalloc, &unwritten);
-               __entry->dev = inode->i_sb->s_dev;
-               __entry->ino = XFS_I(inode)->i_ino;
-               __entry->pgoff = page_offset(page);
-               __entry->size = i_size_read(inode);
-               __entry->offset = off;
-               __entry->delalloc = delalloc;
-               __entry->unwritten = unwritten;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-                 "delalloc %d unwritten %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->pgoff,
-                 __entry->size,
-                 __entry->offset,
-                 __entry->delalloc,
-                 __entry->unwritten)
-)
-
-#define DEFINE_PAGE_EVENT(name)                \
-DEFINE_EVENT(xfs_page_class, name,     \
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
-       TP_ARGS(inode, page, off))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_imap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-                int type, struct xfs_bmbt_irec *irec),
-       TP_ARGS(ip, offset, count, type, irec),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, size)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, type)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->type = type;
-               __entry->startoff = irec ? irec->br_startoff : 0;
-               __entry->startblock = irec ? irec->br_startblock : 0;
-               __entry->blockcount = irec ? irec->br_blockcount : 0;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd type %s "
-                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_symbolic(__entry->type, XFS_IO_TYPES),
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount)
-)
-
-#define DEFINE_IOMAP_EVENT(name)       \
-DEFINE_EVENT(xfs_imap_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
-                int type, struct xfs_bmbt_irec *irec),         \
-       TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-
-DECLARE_EVENT_CLASS(xfs_simple_io_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
-       TP_ARGS(ip, offset, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, isize)
-               __field(loff_t, disize)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->isize = ip->i_size;
-               __entry->disize = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->isize,
-                 __entry->disize,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count)
-);
-
-#define DEFINE_SIMPLE_IO_EVENT(name)   \
-DEFINE_EVENT(xfs_simple_io_class, name,        \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
-       TP_ARGS(ip, offset, count))
-DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
-DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
-
-DECLARE_EVENT_CLASS(xfs_itrunc_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
-       TP_ARGS(ip, new_size),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = new_size;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size)
-)
-
-#define DEFINE_ITRUNC_EVENT(name) \
-DEFINE_EVENT(xfs_itrunc_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
-       TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
-
-TRACE_EVENT(xfs_pagecache_inval,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
-       TP_ARGS(ip, start, finish),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_off_t, start)
-               __field(xfs_off_t, finish)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->start = start;
-               __entry->finish = finish;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->start,
-                 __entry->finish)
-);
-
-TRACE_EVENT(xfs_bunmap,
-       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
-                int flags, unsigned long caller_ip),
-       TP_ARGS(ip, bno, len, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fileoff_t, bno)
-               __field(xfs_filblks_t, len)
-               __field(unsigned long, caller_ip)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->bno = bno;
-               __entry->len = len;
-               __entry->caller_ip = caller_ip;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->bno,
-                 __entry->len,
-                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
-                 (void *)__entry->caller_ip)
-
-);
-
-DECLARE_EVENT_CLASS(xfs_busy_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-);
-#define DEFINE_BUSY_EVENT(name) \
-DEFINE_EVENT(xfs_busy_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_BUSY_EVENT(xfs_alloc_busy);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-
-TRACE_EVENT(xfs_alloc_busy_trim,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len,
-                xfs_agblock_t tbno, xfs_extlen_t tlen),
-       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(xfs_agblock_t, tbno)
-               __field(xfs_extlen_t, tlen)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->tbno = tbno;
-               __entry->tlen = tlen;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->tbno,
-                 __entry->tlen)
-);
-
-TRACE_EVENT(xfs_trans_commit_lsn,
-       TP_PROTO(struct xfs_trans *trans),
-       TP_ARGS(trans),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(struct xfs_trans *, tp)
-               __field(xfs_lsn_t, lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = trans->t_mountp->m_super->s_dev;
-               __entry->tp = trans;
-               __entry->lsn = trans->t_commit_lsn;
-       ),
-       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tp,
-                 __entry->lsn)
-);
-
-TRACE_EVENT(xfs_agf,
-       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agf, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, flags)
-               __field(__u32, length)
-               __field(__u32, bno_root)
-               __field(__u32, cnt_root)
-               __field(__u32, bno_level)
-               __field(__u32, cnt_level)
-               __field(__u32, flfirst)
-               __field(__u32, fllast)
-               __field(__u32, flcount)
-               __field(__u32, freeblks)
-               __field(__u32, longest)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = be32_to_cpu(agf->agf_seqno),
-               __entry->flags = flags;
-               __entry->length = be32_to_cpu(agf->agf_length),
-               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
-               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
-               __entry->bno_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-               __entry->cnt_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
-               __entry->fllast = be32_to_cpu(agf->agf_fllast),
-               __entry->flcount = be32_to_cpu(agf->agf_flcount),
-               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
-               __entry->longest = be32_to_cpu(agf->agf_longest);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
-                 "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
-                 __entry->length,
-                 __entry->bno_root,
-                 __entry->cnt_root,
-                 __entry->bno_level,
-                 __entry->cnt_level,
-                 __entry->flfirst,
-                 __entry->fllast,
-                 __entry->flcount,
-                 __entry->freeblks,
-                 __entry->longest,
-                 (void *)__entry->caller_ip)
-);
-
-TRACE_EVENT(xfs_free_extent,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
-                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
-       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(int, isfl)
-               __field(int, haveleft)
-               __field(int, haveright)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->isfl = isfl;
-               __entry->haveleft = haveleft;
-               __entry->haveright = haveright;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->isfl,
-                 __entry->haveleft ?
-                       (__entry->haveright ? "both" : "left") :
-                       (__entry->haveright ? "right" : "none"))
-
-);
-
-DECLARE_EVENT_CLASS(xfs_alloc_class,
-       TP_PROTO(struct xfs_alloc_arg *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, minlen)
-               __field(xfs_extlen_t, maxlen)
-               __field(xfs_extlen_t, mod)
-               __field(xfs_extlen_t, prod)
-               __field(xfs_extlen_t, minleft)
-               __field(xfs_extlen_t, total)
-               __field(xfs_extlen_t, alignment)
-               __field(xfs_extlen_t, minalignslop)
-               __field(xfs_extlen_t, len)
-               __field(short, type)
-               __field(short, otype)
-               __field(char, wasdel)
-               __field(char, wasfromfl)
-               __field(char, isfl)
-               __field(char, userdata)
-               __field(xfs_fsblock_t, firstblock)
-       ),
-       TP_fast_assign(
-               __entry->dev = args->mp->m_super->s_dev;
-               __entry->agno = args->agno;
-               __entry->agbno = args->agbno;
-               __entry->minlen = args->minlen;
-               __entry->maxlen = args->maxlen;
-               __entry->mod = args->mod;
-               __entry->prod = args->prod;
-               __entry->minleft = args->minleft;
-               __entry->total = args->total;
-               __entry->alignment = args->alignment;
-               __entry->minalignslop = args->minalignslop;
-               __entry->len = args->len;
-               __entry->type = args->type;
-               __entry->otype = args->otype;
-               __entry->wasdel = args->wasdel;
-               __entry->wasfromfl = args->wasfromfl;
-               __entry->isfl = args->isfl;
-               __entry->userdata = args->userdata;
-               __entry->firstblock = args->firstblock;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
-                 "prod %u minleft %u total %u alignment %u minalignslop %u "
-                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
-                 "userdata %d firstblock 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->minlen,
-                 __entry->maxlen,
-                 __entry->mod,
-                 __entry->prod,
-                 __entry->minleft,
-                 __entry->total,
-                 __entry->alignment,
-                 __entry->minalignslop,
-                 __entry->len,
-                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
-                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
-                 __entry->wasdel,
-                 __entry->wasfromfl,
-                 __entry->isfl,
-                 __entry->userdata,
-                 (unsigned long long)__entry->firstblock)
-)
-
-#define DEFINE_ALLOC_EVENT(name) \
-DEFINE_EVENT(xfs_alloc_class, name, \
-       TP_PROTO(struct xfs_alloc_arg *args), \
-       TP_ARGS(args))
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-
-DECLARE_EVENT_CLASS(xfs_dir2_class,
-       TP_PROTO(struct xfs_da_args *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __dynamic_array(char, name, args->namelen)
-               __field(int, namelen)
-               __field(xfs_dahash_t, hashval)
-               __field(xfs_ino_t, inumber)
-               __field(int, op_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               if (args->namelen)
-                       memcpy(__get_str(name), args->name, args->namelen);
-               __entry->namelen = args->namelen;
-               __entry->hashval = args->hashval;
-               __entry->inumber = args->inumber;
-               __entry->op_flags = args->op_flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
-                 "inumber 0x%llx op_flags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->namelen,
-                 __entry->namelen ? __get_str(name) : NULL,
-                 __entry->namelen,
-                 __entry->hashval,
-                 __entry->inumber,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
-)
-
-#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
-       TP_PROTO(struct xfs_da_args *args), \
-       TP_ARGS(args))
-DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
-
-DECLARE_EVENT_CLASS(xfs_dir2_space_class,
-       TP_PROTO(struct xfs_da_args *args, int idx),
-       TP_ARGS(args, idx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, idx)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->idx = idx;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->idx)
-)
-
-#define DEFINE_DIR2_SPACE_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_space_class, name, \
-       TP_PROTO(struct xfs_da_args *args, int idx), \
-       TP_ARGS(args, idx))
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
-
-TRACE_EVENT(xfs_dir2_leafn_moveents,
-       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
-       TP_ARGS(args, src_idx, dst_idx, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, src_idx)
-               __field(int, dst_idx)
-               __field(int, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->src_idx = src_idx;
-               __entry->dst_idx = dst_idx;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
-                 "src_idx %d dst_idx %d count %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->src_idx,
-                 __entry->dst_idx,
-                 __entry->count)
-);
-
-#define XFS_SWAPEXT_INODES \
-       { 0,    "target" }, \
-       { 1,    "temp" }
-
-#define XFS_INODE_FORMAT_STR \
-       { 0,    "invalid" }, \
-       { 1,    "local" }, \
-       { 2,    "extent" }, \
-       { 3,    "btree" }
-
-DECLARE_EVENT_CLASS(xfs_swap_extent_class,
-       TP_PROTO(struct xfs_inode *ip, int which),
-       TP_ARGS(ip, which),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(int, which)
-               __field(xfs_ino_t, ino)
-               __field(int, format)
-               __field(int, nex)
-               __field(int, max_nex)
-               __field(int, broot_size)
-               __field(int, fork_off)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->which = which;
-               __entry->ino = ip->i_ino;
-               __entry->format = ip->i_d.di_format;
-               __entry->nex = ip->i_d.di_nextents;
-               __entry->max_nex = ip->i_df.if_ext_max;
-               __entry->broot_size = ip->i_df.if_broot_bytes;
-               __entry->fork_off = XFS_IFORK_BOFF(ip);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
-                 "Max in-fork extents %d, broot size %d, fork offset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
-                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
-                 __entry->nex,
-                 __entry->max_nex,
-                 __entry->broot_size,
-                 __entry->fork_off)
-)
-
-#define DEFINE_SWAPEXT_EVENT(name) \
-DEFINE_EVENT(xfs_swap_extent_class, name, \
-       TP_PROTO(struct xfs_inode *ip, int which), \
-       TP_ARGS(ip, which))
-
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-       TP_PROTO(struct log *log, struct xlog_recover *trans,
-               struct xlog_recover_item *item, int pass),
-       TP_ARGS(log, trans, item, pass),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned long, item)
-               __field(xlog_tid_t, tid)
-               __field(int, type)
-               __field(int, pass)
-               __field(int, count)
-               __field(int, total)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->item = (unsigned long)item;
-               __entry->tid = trans->r_log_tid;
-               __entry->type = ITEM_TYPE(item);
-               __entry->pass = pass;
-               __entry->count = item->ri_cnt;
-               __entry->total = item->ri_total;
-       ),
-       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
-                 "item region count/total %d/%d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tid,
-                 __entry->pass,
-                 (void *)__entry->item,
-                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
-                 __entry->count,
-                 __entry->total)
-)
-
-#define DEFINE_LOG_RECOVER_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_item_class, name, \
-       TP_PROTO(struct log *log, struct xlog_recover *trans, \
-               struct xlog_recover_item *item, int pass), \
-       TP_ARGS(log, trans, item, pass))
-
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
-       TP_ARGS(log, buf_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(__int64_t, blkno)
-               __field(unsigned short, len)
-               __field(unsigned short, flags)
-               __field(unsigned short, size)
-               __field(unsigned int, map_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->blkno = buf_f->blf_blkno;
-               __entry->len = buf_f->blf_len;
-               __entry->flags = buf_f->blf_flags;
-               __entry->size = buf_f->blf_size;
-               __entry->map_size = buf_f->blf_map_size;
-       ),
-       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
-                       "map_size %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->flags,
-                 __entry->size,
-                 __entry->map_size)
-)
-
-#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
-       TP_ARGS(log, buf_f))
-
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
-       TP_ARGS(log, in_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(unsigned short, size)
-               __field(int, fields)
-               __field(unsigned short, asize)
-               __field(unsigned short, dsize)
-               __field(__int64_t, blkno)
-               __field(int, len)
-               __field(int, boffset)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->ino = in_f->ilf_ino;
-               __entry->size = in_f->ilf_size;
-               __entry->fields = in_f->ilf_fields;
-               __entry->asize = in_f->ilf_asize;
-               __entry->dsize = in_f->ilf_dsize;
-               __entry->blkno = in_f->ilf_blkno;
-               __entry->len = in_f->ilf_len;
-               __entry->boffset = in_f->ilf_boffset;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
-                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->fields,
-                 __entry->asize,
-                 __entry->dsize,
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->boffset)
-)
-#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
-       TP_ARGS(log, in_f))
-
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
-
-DECLARE_EVENT_CLASS(xfs_discard_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-)
-
-#define DEFINE_DISCARD_EVENT(name) \
-DEFINE_EVENT(xfs_discard_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_DISCARD_EVENT(xfs_discard_extent);
-DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
-DEFINE_DISCARD_EVENT(xfs_discard_exclude);
-DEFINE_DISCARD_EVENT(xfs_discard_busy);
-
-#endif /* _TRACE_XFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE xfs_trace
-#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
deleted file mode 100644 (file)
index 7c220b4..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-/*
- * Return values for xfs_inactive.  A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define        VN_INACTIVE_CACHE       0
-#define        VN_INACTIVE_NOCACHE     1
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT    0x00004         /* bypass page cache */
-#define IO_INVIS       0x00020         /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-       { IO_ISDIRECT,  "DIRECT" }, \
-       { IO_INVIS,     "INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE                        0       /* none */
-#define FI_REMAPF              1       /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
-                                          Prevent VM access to the pages until
-                                          the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
-                                       PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
deleted file mode 100644 (file)
index 87d3e03..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2008 Christoph Hellwig.
- * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-
-static int
-xfs_xattr_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-       int error, asize = size;
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (!size) {
-               xflags |= ATTR_KERNOVAL;
-               value = NULL;
-       }
-
-       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
-       if (error)
-               return error;
-       return asize;
-}
-
-static int
-xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
-               size_t size, int flags, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (flags & XATTR_CREATE)
-               xflags |= ATTR_CREATE;
-       if (flags & XATTR_REPLACE)
-               xflags |= ATTR_REPLACE;
-
-       if (!value)
-               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
-       return -xfs_attr_set(ip, (unsigned char *)name,
-                               (void *)value, size, xflags);
-}
-
-static const struct xattr_handler xfs_xattr_user_handler = {
-       .prefix = XATTR_USER_PREFIX,
-       .flags  = 0, /* no flags implies user namespace */
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_trusted_handler = {
-       .prefix = XATTR_TRUSTED_PREFIX,
-       .flags  = ATTR_ROOT,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_security_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .flags  = ATTR_SECURE,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-const struct xattr_handler *xfs_xattr_handlers[] = {
-       &xfs_xattr_user_handler,
-       &xfs_xattr_trusted_handler,
-       &xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
-       &xfs_xattr_acl_access_handler,
-       &xfs_xattr_acl_default_handler,
-#endif
-       NULL
-};
-
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return sizeof("security");
-       else if (flags & XFS_ATTR_ROOT)
-               return sizeof("trusted");
-       else
-               return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return xfs_xattr_security_handler.prefix;
-       else if (flags & XFS_ATTR_ROOT)
-               return xfs_xattr_trusted_handler.prefix;
-       else
-               return xfs_xattr_user_handler.prefix;
-}
-
-static int
-xfs_xattr_put_listent(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
-       char *offset;
-       int arraytop;
-
-       ASSERT(context->count >= 0);
-
-       /*
-        * Only show root namespace entries if we are actually allowed to
-        * see them.
-        */
-       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
-               return 0;
-
-       arraytop = context->count + prefix_len + namelen + 1;
-       if (arraytop > context->firstu) {
-               context->count = -1;    /* insufficient space */
-               return 1;
-       }
-       offset = (char *)context->alist + context->count;
-       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
-       offset += prefix_len;
-       strncpy(offset, (char *)name, namelen);                 /* real name */
-       offset += namelen;
-       *offset = '\0';
-       context->count += prefix_len + namelen + 1;
-       return 0;
-}
-
-static int
-xfs_xattr_put_listent_sizes(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
-       return 0;
-}
-
-static int
-list_one_attr(const char *name, const size_t len, void *data,
-               size_t size, ssize_t *result)
-{
-       char *p = data + *result;
-
-       *result += len;
-       if (!size)
-               return 0;
-       if (*result > size)
-               return -ERANGE;
-
-       strcpy(p, name);
-       return 0;
-}
-
-ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
-{
-       struct xfs_attr_list_context context;
-       struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = dentry->d_inode;
-       int                     error;
-
-       /*
-        * First read the regular on-disk attributes.
-        */
-       memset(&context, 0, sizeof(context));
-       context.dp = XFS_I(inode);
-       context.cursor = &cursor;
-       context.resynch = 1;
-       context.alist = data;
-       context.bufsize = size;
-       context.firstu = context.bufsize;
-
-       if (size)
-               context.put_listent = xfs_xattr_put_listent;
-       else
-               context.put_listent = xfs_xattr_put_listent_sizes;
-
-       xfs_attr_list_int(&context);
-       if (context.count < 0)
-               return -ERANGE;
-
-       /*
-        * Then add the two synthetic ACL attributes.
-        */
-       if (posix_acl_access_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
-                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       if (posix_acl_default_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
-                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       return context.count;
-}
diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h
new file mode 100644 (file)
index 0000000..ff6a198
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+typedef struct {
+       struct rw_semaphore     mr_lock;
+#ifdef DEBUG
+       int                     mr_writer;
+#endif
+} mrlock_t;
+
+#ifdef DEBUG
+#define mrinit(mrp, name)      \
+       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name)      \
+       do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
+#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
+#define mrfree(mrp)            do { } while (0)
+
+static inline void mraccess_nested(mrlock_t *mrp, int subclass)
+{
+       down_read_nested(&mrp->mr_lock, subclass);
+}
+
+static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
+{
+       down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+       return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+       if (!down_write_trylock(&mrp->mr_lock))
+               return 0;
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+       return 1;
+}
+
+static inline void mrunlock_excl(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       up_write(&mrp->mr_lock);
+}
+
+static inline void mrunlock_shared(mrlock_t *mrp)
+{
+       up_read(&mrp->mr_lock);
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       downgrade_write(&mrp->mr_lock);
+}
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
deleted file mode 100644 (file)
index db62959..0000000
+++ /dev/null
@@ -1,1454 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-
-/*
-   LOCK ORDER
-
-   inode lock              (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-                     has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-static struct lock_class_key xfs_dquot_other_class;
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-       xfs_mount_t  *mp,
-       xfs_dqid_t   id,
-       uint         type)
-{
-       xfs_dquot_t     *dqp;
-       boolean_t       brandnewdquot;
-
-       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-       dqp->dq_flags = type;
-       dqp->q_core.d_id = cpu_to_be32(id);
-       dqp->q_mount = mp;
-
-       /*
-        * No need to re-initialize these if this is a reclaimed dquot.
-        */
-       if (brandnewdquot) {
-               INIT_LIST_HEAD(&dqp->q_freelist);
-               mutex_init(&dqp->q_qlock);
-               init_waitqueue_head(&dqp->q_pinwait);
-
-               /*
-                * Because we want to use a counting completion, complete
-                * the flush completion once to allow a single access to
-                * the flush completion without blocking.
-                */
-               init_completion(&dqp->q_flush);
-               complete(&dqp->q_flush);
-
-               trace_xfs_dqinit(dqp);
-       } else {
-               /*
-                * Only the q_core portion was zeroed in dqreclaim_one().
-                * So, we need to reset others.
-                */
-               dqp->q_nrefs = 0;
-               dqp->q_blkno = 0;
-               INIT_LIST_HEAD(&dqp->q_mplist);
-               INIT_LIST_HEAD(&dqp->q_hashlist);
-               dqp->q_bufoffset = 0;
-               dqp->q_fileoffset = 0;
-               dqp->q_transp = NULL;
-               dqp->q_gdquot = NULL;
-               dqp->q_res_bcount = 0;
-               dqp->q_res_icount = 0;
-               dqp->q_res_rtbcount = 0;
-               atomic_set(&dqp->q_pincount, 0);
-               dqp->q_hash = NULL;
-               ASSERT(list_empty(&dqp->q_freelist));
-
-               trace_xfs_dqreuse(dqp);
-       }
-
-       /*
-        * In either case we need to make sure group quotas have a different
-        * lock class than user quotas, to make sure lockdep knows we can
-        * locks of one of each at the same time.
-        */
-       if (!(type & XFS_DQ_USER))
-               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-       /*
-        * log item gets initialized later
-        */
-       return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(list_empty(&dqp->q_freelist));
-
-       mutex_destroy(&dqp->q_qlock);
-       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
-       atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_dqblk_t     *d)
-{
-       /*
-        * Caller has zero'd the entire dquot 'chunk' already.
-        */
-       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-       d->dd_diskdq.d_id = cpu_to_be32(id);
-       d->dd_diskdq.d_flags = type;
-}
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       xfs_quotainfo_t         *q = mp->m_quotainfo;
-
-       ASSERT(d->d_id);
-
-       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
-               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
-       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
-               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
-       if (q->qi_isoftlimit && !d->d_ino_softlimit)
-               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.  They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       ASSERT(d->d_id);
-
-#ifdef DEBUG
-       if (d->d_blk_hardlimit)
-               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
-                      be64_to_cpu(d->d_blk_hardlimit));
-       if (d->d_ino_hardlimit)
-               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
-                      be64_to_cpu(d->d_ino_hardlimit));
-       if (d->d_rtb_hardlimit)
-               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
-                      be64_to_cpu(d->d_rtb_hardlimit));
-#endif
-
-       if (!d->d_btimer) {
-               if ((d->d_blk_softlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_softlimit))) ||
-                   (d->d_blk_hardlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_btimelimit);
-               } else {
-                       d->d_bwarns = 0;
-               }
-       } else {
-               if ((!d->d_blk_softlimit ||
-                    (be64_to_cpu(d->d_bcount) <
-                     be64_to_cpu(d->d_blk_softlimit))) &&
-                   (!d->d_blk_hardlimit ||
-                   (be64_to_cpu(d->d_bcount) <
-                    be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = 0;
-               }
-       }
-
-       if (!d->d_itimer) {
-               if ((d->d_ino_softlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_softlimit))) ||
-                   (d->d_ino_hardlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_itimelimit);
-               } else {
-                       d->d_iwarns = 0;
-               }
-       } else {
-               if ((!d->d_ino_softlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_softlimit)))  &&
-                   (!d->d_ino_hardlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = 0;
-               }
-       }
-
-       if (!d->d_rtbtimer) {
-               if ((d->d_rtb_softlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_softlimit))) ||
-                   (d->d_rtb_hardlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_rtbtimelimit);
-               } else {
-                       d->d_rtbwarns = 0;
-               }
-       } else {
-               if ((!d->d_rtb_softlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_softlimit))) &&
-                   (!d->d_rtb_hardlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = 0;
-               }
-       }
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_buf_t       *bp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_dqblk_t     *d;
-       int             curid, i;
-
-       ASSERT(tp);
-       ASSERT(xfs_buf_islocked(bp));
-
-       d = bp->b_addr;
-
-       /*
-        * ID of the first dquot in the block - id's are zero based.
-        */
-       curid = id - (id % q->qi_dqperchunk);
-       ASSERT(curid >= 0);
-       memset(d, 0, BBTOB(q->qi_dqchunklen));
-       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
-               xfs_qm_dqinit_core(curid, type, d);
-       xfs_trans_dquot_buf(tp, bp,
-                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
-                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
-                            XFS_BLF_GDQUOT_BUF)));
-       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
-       xfs_trans_t     **tpp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       xfs_inode_t     *quotip,
-       xfs_fileoff_t   offset_fsb,
-       xfs_buf_t       **O_bpp)
-{
-       xfs_fsblock_t   firstblock;
-       xfs_bmap_free_t flist;
-       xfs_bmbt_irec_t map;
-       int             nmaps, error, committed;
-       xfs_buf_t       *bp;
-       xfs_trans_t     *tp = *tpp;
-
-       ASSERT(tp != NULL);
-
-       trace_xfs_dqalloc(dqp);
-
-       /*
-        * Initialize the bmap freelist prior to calling bmapi code.
-        */
-       xfs_bmap_init(&flist, &firstblock);
-       xfs_ilock(quotip, XFS_ILOCK_EXCL);
-       /*
-        * Return if this type of quotas is turned off while we didn't
-        * have an inode lock
-        */
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-               return (ESRCH);
-       }
-
-       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
-       nmaps = 1;
-       if ((error = xfs_bmapi(tp, quotip,
-                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-                             &firstblock,
-                             XFS_QM_DQALLOC_SPACE_RES(mp),
-                             &map, &nmaps, &flist))) {
-               goto error0;
-       }
-       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(nmaps == 1);
-       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-              (map.br_startblock != HOLESTARTBLOCK));
-
-       /*
-        * Keep track of the blkno to save a lookup later
-        */
-       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-       /* now we can just get the buffer (there's nothing to read yet) */
-       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-                              dqp->q_blkno,
-                              mp->m_quotainfo->qi_dqchunklen,
-                              0);
-       if (!bp || (error = xfs_buf_geterror(bp)))
-               goto error1;
-       /*
-        * Make a chunk of dquots out of this buffer and log
-        * the entire thing.
-        */
-       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
-                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
-       /*
-        * xfs_bmap_finish() may commit the current transaction and
-        * start a second transaction if the freelist is not empty.
-        *
-        * Since we still want to modify this buffer, we need to
-        * ensure that the buffer is not released on commit of
-        * the first transaction and ensure the buffer is added to the
-        * second transaction.
-        *
-        * If there is only one transaction then don't stop the buffer
-        * from being released when it commits later on.
-        */
-
-       xfs_trans_bhold(tp, bp);
-
-       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
-               goto error1;
-       }
-
-       if (committed) {
-               tp = *tpp;
-               xfs_trans_bjoin(tp, bp);
-       } else {
-               xfs_trans_bhold_release(tp, bp);
-       }
-
-       *O_bpp = bp;
-       return 0;
-
-      error1:
-       xfs_bmap_cancel(&flist);
-      error0:
-       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
-       return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
-       xfs_trans_t             **tpp,
-       xfs_dquot_t             *dqp,
-       xfs_disk_dquot_t        **O_ddpp,
-       xfs_buf_t               **O_bpp,
-       uint                    flags)
-{
-       xfs_bmbt_irec_t map;
-       int             nmaps = 1, error;
-       xfs_buf_t       *bp;
-       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_disk_dquot_t *ddq;
-       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
-       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
-
-       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-
-       xfs_ilock(quotip, XFS_ILOCK_SHARED);
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               /*
-                * Return if this type of quotas is turned off while we
-                * didn't have the quota inode lock.
-                */
-               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-               return ESRCH;
-       }
-
-       /*
-        * Find the block map; no allocations yet
-        */
-       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-                         NULL, 0, &map, &nmaps, NULL);
-
-       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-       if (error)
-               return error;
-
-       ASSERT(nmaps == 1);
-       ASSERT(map.br_blockcount == 1);
-
-       /*
-        * Offset of dquot in the (fixed sized) dquot chunk.
-        */
-       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-               sizeof(xfs_dqblk_t);
-
-       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-       if (map.br_startblock == HOLESTARTBLOCK) {
-               /*
-                * We don't allocate unless we're asked to
-                */
-               if (!(flags & XFS_QMOPT_DQALLOC))
-                       return ENOENT;
-
-               ASSERT(tp);
-               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                       dqp->q_fileoffset, &bp);
-               if (error)
-                       return error;
-               tp = *tpp;
-       } else {
-               trace_xfs_dqtobp_read(dqp);
-
-               /*
-                * store the blkno etc so that we don't have to do the
-                * mapping all the time
-                */
-               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                          dqp->q_blkno,
-                                          mp->m_quotainfo->qi_dqchunklen,
-                                          0, &bp);
-               if (error || !bp)
-                       return XFS_ERROR(error);
-       }
-
-       ASSERT(xfs_buf_islocked(bp));
-
-       /*
-        * calculate the location of the dquot inside the buffer.
-        */
-       ddq = bp->b_addr + dqp->q_bufoffset;
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot...
-        */
-       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
-                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
-                          "dqtobp");
-       if (error) {
-               if (!(flags & XFS_QMOPT_DQREPAIR)) {
-                       xfs_trans_brelse(tp, bp);
-                       return XFS_ERROR(EIO);
-               }
-       }
-
-       *O_bpp = bp;
-       *O_ddpp = ddq;
-
-       return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
-       xfs_trans_t     **tpp,
-       xfs_dqid_t      id,
-       xfs_dquot_t     *dqp,   /* dquot to get filled in */
-       uint            flags)
-{
-       xfs_disk_dquot_t *ddqp;
-       xfs_buf_t        *bp;
-       int              error;
-       xfs_trans_t      *tp;
-
-       ASSERT(tpp);
-
-       trace_xfs_dqread(dqp);
-
-       /*
-        * get a pointer to the on-disk dquot and the buffer containing it
-        * dqp already knows its own type (GROUP/USER).
-        */
-       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
-               return (error);
-       }
-       tp = *tpp;
-
-       /* copy everything from disk dquot to the incore dquot */
-       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-       xfs_qm_dquot_logitem_init(dqp);
-
-       /*
-        * Reservation counters are defined as reservation plus current usage
-        * to avoid having to add every time.
-        */
-       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
-       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
-       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
-       /* Mark the buf so that this will stay incore a little longer */
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
-       /*
-        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-        * So we need to release with xfs_trans_brelse().
-        * The strategy here is identical to that of inodes; we lock
-        * the dquot in xfs_qm_dqget() before making it accessible to
-        * others. This is because dquots, like inodes, need a good level of
-        * concurrency, and we don't want to take locks on the entire buffers
-        * for dquot accesses.
-        * Note also that the dquot buffer may even be dirty at this point, if
-        * this particular dquot was repaired. We still aren't afraid to
-        * brelse it because we have the changes incore.
-        */
-       ASSERT(xfs_buf_islocked(bp));
-       xfs_trans_brelse(tp, bp);
-
-       return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,      /* gid or uid, depending on type */
-       uint            type,    /* UDQUOT or GDQUOT */
-       uint            flags,   /* DQALLOC, DQREPAIR */
-       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-       xfs_trans_t     *tp;
-       int             cancelflags=0;
-
-       dqp = xfs_qm_dqinit(mp, id, type);
-       tp = NULL;
-       if (flags & XFS_QMOPT_DQALLOC) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                               XFS_WRITE_LOG_RES(mp) +
-                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-                               128,
-                               0,
-                               XFS_TRANS_PERM_LOG_RES,
-                               XFS_WRITE_LOG_COUNT);
-               if (error) {
-                       cancelflags = 0;
-                       goto error0;
-               }
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-       }
-
-       /*
-        * Read it from disk; xfs_dqread() takes care of
-        * all the necessary initialization of dquot's fields (locks, etc)
-        */
-       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-               /*
-                * This can happen if quotas got turned off (ESRCH),
-                * or if the dquot didn't exist on disk and we ask to
-                * allocate (ENOENT).
-                */
-               trace_xfs_dqread_fail(dqp);
-               cancelflags |= XFS_TRANS_ABORT;
-               goto error0;
-       }
-       if (tp) {
-               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
-                       goto error1;
-       }
-
-       *O_dqpp = dqp;
-       return (0);
-
- error0:
-       ASSERT(error);
-       if (tp)
-               xfs_trans_cancel(tp, cancelflags);
- error1:
-       xfs_qm_dqdestroy(dqp);
-       *O_dqpp = NULL;
-       return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       xfs_dqhash_t            *qh,
-       xfs_dquot_t             **O_dqpp)
-{
-       xfs_dquot_t             *dqp;
-       uint                    flist_locked;
-
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-
-       flist_locked = B_FALSE;
-
-       /*
-        * Traverse the hashchain looking for a match
-        */
-       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-               /*
-                * We already have the hashlock. We don't need the
-                * dqlock to look at the id field of the dquot, since the
-                * id can't be modified without the hashlock anyway.
-                */
-               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
-                       trace_xfs_dqlookup_found(dqp);
-
-                       /*
-                        * All in core dquots must be on the dqlist of mp
-                        */
-                       ASSERT(!list_empty(&dqp->q_mplist));
-
-                       xfs_dqlock(dqp);
-                       if (dqp->q_nrefs == 0) {
-                               ASSERT(!list_empty(&dqp->q_freelist));
-                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-                                       trace_xfs_dqlookup_want(dqp);
-
-                                       /*
-                                        * We may have raced with dqreclaim_one()
-                                        * (and lost). So, flag that we don't
-                                        * want the dquot to be reclaimed.
-                                        */
-                                       dqp->dq_flags |= XFS_DQ_WANT;
-                                       xfs_dqunlock(dqp);
-                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       xfs_dqlock(dqp);
-                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
-                               }
-                               flist_locked = B_TRUE;
-                       }
-
-                       /*
-                        * id couldn't have changed; we had the hashlock all
-                        * along
-                        */
-                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
-                       if (flist_locked) {
-                               if (dqp->q_nrefs != 0) {
-                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       flist_locked = B_FALSE;
-                               } else {
-                                       /* take it off the freelist */
-                                       trace_xfs_dqlookup_freelist(dqp);
-                                       list_del_init(&dqp->q_freelist);
-                                       xfs_Gqm->qm_dqfrlist_cnt--;
-                               }
-                       }
-
-                       XFS_DQHOLD(dqp);
-
-                       if (flist_locked)
-                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       /*
-                        * move the dquot to the front of the hashchain
-                        */
-                       ASSERT(mutex_is_locked(&qh->qh_lock));
-                       list_move(&dqp->q_hashlist, &qh->qh_list);
-                       trace_xfs_dqlookup_done(dqp);
-                       *O_dqpp = dqp;
-                       return 0;
-               }
-       }
-
-       *O_dqpp = NULL;
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-       return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,      /* locked inode (optional) */
-       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
-       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
-       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
-{
-       xfs_dquot_t     *dqp;
-       xfs_dqhash_t    *h;
-       uint            version;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
-           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-               return (ESRCH);
-       }
-       h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
-       if (xfs_do_dqerror) {
-               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
-                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
-                       xfs_debug(mp, "Returning error in dqget");
-                       return (EIO);
-               }
-       }
-#endif
-
- again:
-
-#ifdef DEBUG
-       ASSERT(type == XFS_DQ_USER ||
-              type == XFS_DQ_PROJ ||
-              type == XFS_DQ_GROUP);
-       if (ip) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               if (type == XFS_DQ_USER)
-                       ASSERT(ip->i_udquot == NULL);
-               else
-                       ASSERT(ip->i_gdquot == NULL);
-       }
-#endif
-       mutex_lock(&h->qh_lock);
-
-       /*
-        * Look in the cache (hashtable).
-        * The chain is kept locked during lookup.
-        */
-       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
-               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-               /*
-                * The dquot was found, moved to the front of the chain,
-                * taken off the freelist if it was on it, and locked
-                * at this point. Just unlock the hashchain and return.
-                */
-               ASSERT(*O_dqpp);
-               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-               mutex_unlock(&h->qh_lock);
-               trace_xfs_dqget_hit(*O_dqpp);
-               return (0);     /* success */
-       }
-       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
-       /*
-        * Dquot cache miss. We don't want to keep the inode lock across
-        * a (potential) disk read. Also we don't want to deal with the lock
-        * ordering between quotainode and this inode. OTOH, dropping the inode
-        * lock here means dealing with a chown that can happen before
-        * we re-acquire the lock.
-        */
-       if (ip)
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * Save the hashchain version stamp, and unlock the chain, so that
-        * we don't keep the lock across a disk read
-        */
-       version = h->qh_version;
-       mutex_unlock(&h->qh_lock);
-
-       /*
-        * Allocate the dquot on the kernel heap, and read the ondisk
-        * portion off the disk. Also, do all the necessary initialization
-        * This can return ENOENT if dquot didn't exist on disk and we didn't
-        * ask it to allocate; ESRCH if quotas got turned off suddenly.
-        */
-       if ((error = xfs_qm_idtodq(mp, id, type,
-                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-                                          XFS_QMOPT_DOWARN),
-                                 &dqp))) {
-               if (ip)
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-               return (error);
-       }
-
-       /*
-        * See if this is mount code calling to look at the overall quota limits
-        * which are stored in the id == 0 user or group's dquot.
-        * Since we may not have done a quotacheck by this point, just return
-        * the dquot without attaching it to any hashtables, lists, etc, or even
-        * taking a reference.
-        * The caller must dqdestroy this once done.
-        */
-       if (flags & XFS_QMOPT_DQSUSER) {
-               ASSERT(id == 0);
-               ASSERT(! ip);
-               goto dqret;
-       }
-
-       /*
-        * Dquot lock comes after hashlock in the lock ordering
-        */
-       if (ip) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * A dquot could be attached to this inode by now, since
-                * we had dropped the ilock.
-                */
-               if (type == XFS_DQ_USER) {
-                       if (!XFS_IS_UQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_udquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_udquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               } else {
-                       if (!XFS_IS_OQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_gdquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_gdquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               }
-       }
-
-       /*
-        * Hashlock comes after ilock in lock order
-        */
-       mutex_lock(&h->qh_lock);
-       if (version != h->qh_version) {
-               xfs_dquot_t *tmpdqp;
-               /*
-                * Now, see if somebody else put the dquot in the
-                * hashtable before us. This can happen because we didn't
-                * keep the hashchain lock. We don't have to worry about
-                * lock order between the two dquots here since dqp isn't
-                * on any findable lists yet.
-                */
-               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
-                       /*
-                        * Duplicate found. Just throw away the new dquot
-                        * and start over.
-                        */
-                       xfs_qm_dqput(tmpdqp);
-                       mutex_unlock(&h->qh_lock);
-                       xfs_qm_dqdestroy(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto again;
-               }
-       }
-
-       /*
-        * Put the dquot at the beginning of the hash-chain and mp's list
-        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-        */
-       ASSERT(mutex_is_locked(&h->qh_lock));
-       dqp->q_hash = h;
-       list_add(&dqp->q_hashlist, &h->qh_list);
-       h->qh_version++;
-
-       /*
-        * Attach this dquot to this filesystem's list of all dquots,
-        * kept inside the mount structure in m_quotainfo field
-        */
-       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
-       /*
-        * We return a locked dquot to the caller, with a reference taken
-        */
-       xfs_dqlock(dqp);
-       dqp->q_nrefs = 1;
-
-       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
-       mp->m_quotainfo->qi_dquots++;
-       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-       mutex_unlock(&h->qh_lock);
- dqret:
-       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       trace_xfs_dqget_miss(dqp);
-       *O_dqpp = dqp;
-       return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dquot_t     *gdqp;
-
-       ASSERT(dqp->q_nrefs > 0);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       trace_xfs_dqput(dqp);
-
-       if (dqp->q_nrefs != 1) {
-               dqp->q_nrefs--;
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       /*
-        * drop the dqlock and acquire the freelist and dqlock
-        * in the right order; but try to get it out-of-order first
-        */
-       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-               trace_xfs_dqput_wait(dqp);
-               xfs_dqunlock(dqp);
-               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-               xfs_dqlock(dqp);
-       }
-
-       while (1) {
-               gdqp = NULL;
-
-               /* We can't depend on nrefs being == 1 here */
-               if (--dqp->q_nrefs == 0) {
-                       trace_xfs_dqput_free(dqp);
-
-                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-                       xfs_Gqm->qm_dqfrlist_cnt++;
-
-                       /*
-                        * If we just added a udquot to the freelist, then
-                        * we want to release the gdquot reference that
-                        * it (probably) has. Otherwise it'll keep the
-                        * gdquot from getting reclaimed.
-                        */
-                       if ((gdqp = dqp->q_gdquot)) {
-                               /*
-                                * Avoid a recursive dqput call
-                                */
-                               xfs_dqlock(gdqp);
-                               dqp->q_gdquot = NULL;
-                       }
-               }
-               xfs_dqunlock(dqp);
-
-               /*
-                * If we had a group quota inside the user quota as a hint,
-                * release it now.
-                */
-               if (! gdqp)
-                       break;
-               dqp = gdqp;
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
-       xfs_dquot_t     *dqp)
-{
-       if (!dqp)
-               return;
-
-       trace_xfs_dqrele(dqp);
-
-       xfs_dqlock(dqp);
-       /*
-        * We don't care to flush it if the dquot is dirty here.
-        * That will create stutters that we want to avoid.
-        * Instead we do a delayed write when we try to reclaim
-        * a dirty dquot. Also xfs_sync will take part of the burden...
-        */
-       xfs_qm_dqput(dqp);
-}
-
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-STATIC void
-xfs_qm_dqflush_done(
-       struct xfs_buf          *bp,
-       struct xfs_log_item     *lip)
-{
-       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
-       xfs_dquot_t             *dqp = qip->qli_dquot;
-       struct xfs_ail          *ailp = lip->li_ailp;
-
-       /*
-        * We only want to pull the item from the AIL if its
-        * location in the log has not changed since we started the flush.
-        * Thus, we only bother if the dquot's lsn has
-        * not changed. First we check the lsn outside the lock
-        * since it's cheaper, and then we recheck while
-        * holding the lock before removing the dquot from the AIL.
-        */
-       if ((lip->li_flags & XFS_LI_IN_AIL) &&
-           lip->li_lsn == qip->qli_flush_lsn) {
-
-               /* xfs_trans_ail_delete() drops the AIL lock. */
-               spin_lock(&ailp->xa_lock);
-               if (lip->li_lsn == qip->qli_flush_lsn)
-                       xfs_trans_ail_delete(ailp, lip);
-               else
-                       spin_unlock(&ailp->xa_lock);
-       }
-
-       /*
-        * Release the dq's flush lock since we're done with it.
-        */
-       xfs_dqfunlock(dqp);
-}
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
-       xfs_dquot_t             *dqp,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = dqp->q_mount;
-       struct xfs_buf          *bp;
-       struct xfs_disk_dquot   *ddqp;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       trace_xfs_dqflush(dqp);
-
-       /*
-        * If not dirty, or it's pinned and we are not supposed to block, nada.
-        */
-       if (!XFS_DQ_IS_DIRTY(dqp) ||
-           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
-               xfs_dqfunlock(dqp);
-               return 0;
-       }
-       xfs_qm_dqunpin_wait(dqp);
-
-       /*
-        * This may have been unpinned because the filesystem is shutting
-        * down forcibly. If that's the case we must not write this dquot
-        * to disk, because the log record didn't make it to disk!
-        */
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               dqp->dq_flags &= ~XFS_DQ_DIRTY;
-               xfs_dqfunlock(dqp);
-               return XFS_ERROR(EIO);
-       }
-
-       /*
-        * Get the buffer containing the on-disk dquot
-        */
-       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
-                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-       if (error) {
-               ASSERT(error != ENOENT);
-               xfs_dqfunlock(dqp);
-               return error;
-       }
-
-       /*
-        * Calculate the location of the dquot inside the buffer.
-        */
-       ddqp = bp->b_addr + dqp->q_bufoffset;
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot..
-        */
-       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
-                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
-       if (error) {
-               xfs_buf_relse(bp);
-               xfs_dqfunlock(dqp);
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               return XFS_ERROR(EIO);
-       }
-
-       /* This is the only portion of data that needs to persist */
-       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
-
-       /*
-        * Clear the dirty field and remember the flush lsn for later use.
-        */
-       dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
-       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
-                                       &dqp->q_logitem.qli_item.li_lsn);
-
-       /*
-        * Attach an iodone routine so that we can remove this dquot from the
-        * AIL and release the flush lock once the dquot is synced to disk.
-        */
-       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
-                                 &dqp->q_logitem.qli_item);
-
-       /*
-        * If the buffer is pinned then push on the log so we won't
-        * get stuck waiting in the write for too long.
-        */
-       if (xfs_buf_ispinned(bp)) {
-               trace_xfs_dqflush_force(dqp);
-               xfs_log_force(mp, 0);
-       }
-
-       if (flags & SYNC_WAIT)
-               error = xfs_bwrite(mp, bp);
-       else
-               xfs_bdwrite(mp, bp);
-
-       trace_xfs_dqflush_done(dqp);
-
-       /*
-        * dqp is still locked, but caller is free to unlock it now.
-        */
-       return error;
-
-}
-
-int
-xfs_qm_dqlock_nowait(
-       xfs_dquot_t *dqp)
-{
-       return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-       if (dqp->q_logitem.qli_dquot == dqp) {
-               /* Once was dqp->q_mount, but might just have been cleared */
-               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                       (xfs_log_item_t*)&(dqp->q_logitem));
-       }
-}
-
-
-void
-xfs_dqunlock_nonotify(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-}
-
-/*
- * Lock two xfs_dquot structures.
- *
- * To avoid deadlocks we always lock the quota structure with
- * the lowerd id first.
- */
-void
-xfs_dqlock2(
-       xfs_dquot_t     *d1,
-       xfs_dquot_t     *d2)
-{
-       if (d1 && d2) {
-               ASSERT(d1 != d2);
-               if (be32_to_cpu(d1->q_core.d_id) >
-                   be32_to_cpu(d2->q_core.d_id)) {
-                       mutex_lock(&d2->q_qlock);
-                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
-               } else {
-                       mutex_lock(&d1->q_qlock);
-                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
-               }
-       } else if (d1) {
-               mutex_lock(&d1->q_qlock);
-       } else if (d2) {
-               mutex_lock(&d2->q_qlock);
-       }
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dqhash_t    *qh = dqp->q_hash;
-       xfs_mount_t     *mp = dqp->q_mount;
-
-       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
-       xfs_dqlock(dqp);
-       /*
-        * We really can't afford to purge a dquot that is
-        * referenced, because these are hard refs.
-        * It shouldn't happen in general because we went thru _all_ inodes in
-        * dqrele_all_inodes before calling this and didn't let the mountlock go.
-        * However it is possible that we have dquots with temporary
-        * references that are not attached to an inode. e.g. see xfs_setattr().
-        */
-       if (dqp->q_nrefs != 0) {
-               xfs_dqunlock(dqp);
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return (1);
-       }
-
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       /*
-        * If we're turning off quotas, we have to make sure that, for
-        * example, we don't delete quota disk blocks while dquots are
-        * in the process of getting written to those disk blocks.
-        * This dquot might well be on AIL, and we can't leave it there
-        * if we're turning off quotas. Basically, we need this flush
-        * lock, and are willing to block on it.
-        */
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * Block on the flush lock after nudging dquot buffer,
-                * if it is incore.
-                */
-               xfs_qm_dqflock_pushbuf_wait(dqp);
-       }
-
-       /*
-        * XXXIf we're turning this type of quotas off, we don't care
-        * about the dirty metadata sitting in this dquot. OTOH, if
-        * we're unmounting, we do care, so we flush it and wait.
-        */
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               int     error;
-
-               /* dqflush unlocks dqflock */
-               /*
-                * Given that dqpurge is a very rare occurrence, it is OK
-                * that we're holding the hashlist and mplist locks
-                * across the disk write. But, ... XXXsup
-                *
-                * We don't care about getting disk errors here. We need
-                * to purge this dquot anyway, so we go ahead regardless.
-                */
-               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
-               if (error)
-                       xfs_warn(mp, "%s: dquot %p flush failed",
-                               __func__, dqp);
-               xfs_dqflock(dqp);
-       }
-       ASSERT(atomic_read(&dqp->q_pincount) == 0);
-       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-       list_del_init(&dqp->q_hashlist);
-       qh->qh_version++;
-       list_del_init(&dqp->q_mplist);
-       mp->m_quotainfo->qi_dqreclaims++;
-       mp->m_quotainfo->qi_dquots--;
-       /*
-        * XXX Move this to the front of the freelist, if we can get the
-        * freelist lock.
-        */
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       dqp->q_mount = NULL;
-       dqp->q_hash = NULL;
-       dqp->dq_flags = XFS_DQ_INACTIVE;
-       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-       mutex_unlock(&qh->qh_lock);
-       return (0);
-}
-
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
-       xfs_dquot_t     *dqp)
-{
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_buf_t       *bp;
-
-       /*
-        * Check to see if the dquot has been flushed delayed
-        * write.  If so, grab its buffer and send it
-        * out immediately.  We'll be able to acquire
-        * the flush lock when the I/O completes.
-        */
-       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
-                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       if (!bp)
-               goto out_lock;
-
-       if (XFS_BUF_ISDELAYWRITE(bp)) {
-               if (xfs_buf_ispinned(bp))
-                       xfs_log_force(mp, 0);
-               xfs_buf_delwri_promote(bp);
-               wake_up_process(bp->b_target->bt_task);
-       }
-       xfs_buf_relse(bp);
-out_lock:
-       xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
deleted file mode 100644 (file)
index 34b7e94..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-       struct list_head  qh_list;
-       struct mutex      qh_lock;
-       uint              qh_version;   /* ever increasing version */
-       uint              qh_nelems;    /* number of dquots on the list */
-} xfs_dqhash_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
-       uint             dq_flags;      /* various flags (XFS_DQ_*) */
-       struct list_head q_freelist;    /* global free list of dquots */
-       struct list_head q_mplist;      /* mount's list of dquots */
-       struct list_head q_hashlist;    /* gloabl hash list of dquots */
-       xfs_dqhash_t    *q_hash;        /* the hashchain header */
-       struct xfs_mount*q_mount;       /* filesystem this relates to */
-       struct xfs_trans*q_transp;      /* trans this belongs to currently */
-       uint             q_nrefs;       /* # active refs from inodes */
-       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
-       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
-       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
-
-       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
-       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
-       xfs_dq_logitem_t q_logitem;     /* dquot log item */
-       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
-       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
-       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
-       struct mutex     q_qlock;       /* quota lock */
-       struct completion q_flush;      /* flush completion queue */
-       atomic_t          q_pincount;   /* dquot pin count */
-       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
-} xfs_dquot_t;
-
-/*
- * Lock hierarchy for q_qlock:
- *     XFS_QLOCK_NORMAL is the implicit default,
- *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
- */
-enum {
-       XFS_QLOCK_NORMAL = 0,
-       XFS_QLOCK_NESTED,
-};
-
-#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
-
-/*
- * Manage the q_flush completion queue embedded in the dquot.  This completion
- * queue synchronizes processes attempting to flush the in-core dquot back to
- * disk.
- */
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
-{
-       wait_for_completion(&dqp->q_flush);
-}
-
-static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
-{
-       return try_wait_for_completion(&dqp->q_flush);
-}
-
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
-{
-       complete(&dqp->q_flush);
-}
-
-#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
-                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
-                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int             xfs_qm_dqpurge(xfs_dquot_t *);
-extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
-                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void            xfs_qm_dqput(xfs_dquot_t *);
-extern void            xfs_dqlock(xfs_dquot_t *);
-extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void            xfs_dqunlock(xfs_dquot_t *);
-extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
deleted file mode 100644 (file)
index 9e0e2fa..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_dq_logitem, qli_item);
-}
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-STATIC uint
-xfs_qm_dquot_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       /*
-        * we need only two iovecs, one for the format, one for the real thing
-        */
-       return 2;
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *logvec)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-
-       logvec->i_addr = &qlip->qli_format;
-       logvec->i_len  = sizeof(xfs_dq_logformat_t);
-       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
-       logvec++;
-       logvec->i_addr = &qlip->qli_dquot->q_core;
-       logvec->i_len  = sizeof(xfs_disk_dquot_t);
-       logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
-       ASSERT(2 == lip->li_desc->lid_size);
-       qlip->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       atomic_inc(&dqp->q_pincount);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
- * dquot must have been previously pinned with a call to
- * xfs_qm_dquot_logitem_pin().
- */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(atomic_read(&dqp->q_pincount) > 0);
-       if (atomic_dec_and_test(&dqp->q_pincount))
-               wake_up(&dqp->q_pinwait);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       /*
-        * Since we were able to lock the dquot's flush lock and
-        * we found it on the AIL, the dquot must be dirty.  This
-        * is because the dquot is removed from the AIL while still
-        * holding the flush lock in xfs_dqflush_done().  Thus, if
-        * we found it in the AIL and were able to obtain the flush
-        * lock without sleeping, then there must not have been
-        * anyone in the process of flushing the dquot.
-        */
-       error = xfs_qm_dqflush(dqp, 0);
-       if (error)
-               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
-                       __func__, error, dqp);
-       xfs_dqunlock(dqp);
-}
-
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       /*
-        * We always re-log the entire dquot when it becomes dirty,
-        * so, the latest copy _is_ the only one that matters.
-        */
-       return lsn;
-}
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
-       struct xfs_dquot        *dqp)
-{
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       if (atomic_read(&dqp->q_pincount) == 0)
-               return;
-
-       /*
-        * Give the log a push so we don't wait here too long.
-        */
-       xfs_log_force(dqp->q_mount, 0);
-       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL lock at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL lock is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-       struct xfs_dquot        *dqp = qlip->qli_dquot;
-       struct xfs_buf          *bp;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * If flushlock isn't locked anymore, chances are that the
-        * inode flush completed and the inode was taken off the AIL.
-        * So, just get out.
-        */
-       if (completion_done(&dqp->q_flush) ||
-           !(lip->li_flags & XFS_LI_IN_AIL)) {
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
-                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       xfs_dqunlock(dqp);
-       if (!bp)
-               return;
-       if (XFS_BUF_ISDELAYWRITE(bp))
-               xfs_buf_delwri_promote(bp);
-       xfs_buf_relse(bp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping.  If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       if (atomic_read(&dqp->q_pincount) > 0)
-               return XFS_ITEM_PINNED;
-
-       if (!xfs_qm_dqlock_nowait(dqp))
-               return XFS_ITEM_LOCKED;
-
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * dquot has already been flushed to the backing buffer,
-                * leave it locked, pushbuf routine will unlock it.
-                */
-               return XFS_ITEM_PUSHBUF;
-       }
-
-       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
-       return XFS_ITEM_SUCCESS;
-}
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * Clear the transaction pointer in the dquot
-        */
-       dqp->q_transp = NULL;
-
-       /*
-        * dquots are never 'held' from getting unlocked at the end of
-        * a transaction.  Their locking and unlocking is hidden inside the
-        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
-        * for the logitem.
-        */
-       xfs_dqunlock(dqp);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-STATIC void
-xfs_qm_dquot_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector for dquots
- */
-static struct xfs_item_ops xfs_dquot_item_ops = {
-       .iop_size       = xfs_qm_dquot_logitem_size,
-       .iop_format     = xfs_qm_dquot_logitem_format,
-       .iop_pin        = xfs_qm_dquot_logitem_pin,
-       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
-       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-       .iop_committed  = xfs_qm_dquot_logitem_committed,
-       .iop_push       = xfs_qm_dquot_logitem_push,
-       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
-       .iop_committing = xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
-       struct xfs_dquot        *dqp)
-{
-       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
-
-       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
-                                       &xfs_dquot_item_ops);
-       lp->qli_dquot = dqp;
-       lp->qli_format.qlf_type = XFS_LI_DQUOT;
-       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
-       lp->qli_format.qlf_blkno = dqp->q_blkno;
-       lp->qli_format.qlf_len = 1;
-       /*
-        * This is just the offset of this dquot within its buffer
-        * (which is currently 1 FSB and probably won't change).
-        * Hence 32 bits for this offset should be just fine.
-        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
-        * here, and recompute it at recovery time.
-        */
-       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-STATIC uint
-xfs_qm_qoff_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       return 1;
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *log_vector)
-{
-       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
-
-       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
-       log_vector->i_addr = &qflip->qql_format;
-       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
-       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
-       qflip->qql_format.qf_size = 1;
-}
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-STATIC uint
-xfs_qm_qoff_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC void
-xfs_qm_qoff_logitem_push(
-       struct xfs_log_item     *lip)
-{
-}
-
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
-       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
-       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
-
-       /*
-        * Delete the qoff-start logitem from the AIL.
-        * xfs_trans_ail_delete() drops the AIL lock.
-        */
-       spin_lock(&ailp->xa_lock);
-       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
-
-       kmem_free(qfs);
-       kmem_free(qfe);
-       return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoffend_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoff_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
-       struct xfs_mount        *mp,
-       struct xfs_qoff_logitem *start,
-       uint                    flags)
-{
-       struct xfs_qoff_logitem *qf;
-
-       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
-
-       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
-                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
-       qf->qql_item.li_mountp = mp;
-       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
-       qf->qql_format.qf_flags = flags;
-       qf->qql_start_lip = start;
-       return qf;
-}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
deleted file mode 100644 (file)
index 5acae2a..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-struct xfs_qoff_logitem;
-
-typedef struct xfs_dq_logitem {
-       xfs_log_item_t           qli_item;         /* common portion */
-       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
-       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
-       xfs_dq_logformat_t       qli_format;       /* logged structure */
-} xfs_dq_logitem_t;
-
-typedef struct xfs_qoff_logitem {
-       xfs_log_item_t           qql_item;      /* common portion */
-       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
-       xfs_qoff_logformat_t     qql_format;    /* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
-                                       struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *, uint);
-extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *);
-
-#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
deleted file mode 100644 (file)
index 9a0aa76..0000000
+++ /dev/null
@@ -1,2416 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex   xfs_Gqm_lock;
-struct xfs_qm  *xfs_Gqm;
-uint           ndquot;
-
-kmem_zone_t    *qm_dqzone;
-kmem_zone_t    *qm_dqtrxzone;
-
-STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
-
-static struct shrinker xfs_qm_shaker = {
-       .shrink = xfs_qm_shake,
-       .seeks = DEFAULT_SEEKS,
-};
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
-       xfs_dqhash_t    *udqhash, *gdqhash;
-       xfs_qm_t        *xqm;
-       size_t          hsize;
-       uint            i;
-
-       /*
-        * Initialize the dquot hash tables.
-        */
-       udqhash = kmem_zalloc_greedy(&hsize,
-                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-       if (!udqhash)
-               goto out;
-
-       gdqhash = kmem_zalloc_large(hsize);
-       if (!gdqhash)
-               goto out_free_udqhash;
-
-       hsize /= sizeof(xfs_dqhash_t);
-       ndquot = hsize << 8;
-
-       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-       xqm->qm_dqhashmask = hsize - 1;
-       xqm->qm_usr_dqhtable = udqhash;
-       xqm->qm_grp_dqhtable = gdqhash;
-       ASSERT(xqm->qm_usr_dqhtable != NULL);
-       ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-       }
-
-       /*
-        * Freelist of all dquots of all file systems
-        */
-       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-       xqm->qm_dqfrlist_cnt = 0;
-       mutex_init(&xqm->qm_dqfrlist_lock);
-
-       /*
-        * dquot zone. we register our own low-memory callback.
-        */
-       if (!qm_dqzone) {
-               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-                                               "xfs_dquots");
-               qm_dqzone = xqm->qm_dqzone;
-       } else
-               xqm->qm_dqzone = qm_dqzone;
-
-       register_shrinker(&xfs_qm_shaker);
-
-       /*
-        * The t_dqinfo portion of transactions.
-        */
-       if (!qm_dqtrxzone) {
-               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-                                                  "xfs_dqtrx");
-               qm_dqtrxzone = xqm->qm_dqtrxzone;
-       } else
-               xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-       atomic_set(&xqm->qm_totaldquots, 0);
-       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
-       xqm->qm_nrefs = 0;
-       return xqm;
-
- out_free_udqhash:
-       kmem_free_large(udqhash);
- out:
-       return NULL;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
-       struct xfs_qm   *xqm)
-{
-       struct xfs_dquot *dqp, *n;
-       int             hsize, i;
-
-       ASSERT(xqm != NULL);
-       ASSERT(xqm->qm_nrefs == 0);
-       unregister_shrinker(&xfs_qm_shaker);
-       hsize = xqm->qm_dqhashmask + 1;
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-       }
-       kmem_free_large(xqm->qm_usr_dqhtable);
-       kmem_free_large(xqm->qm_grp_dqhtable);
-       xqm->qm_usr_dqhtable = NULL;
-       xqm->qm_grp_dqhtable = NULL;
-       xqm->qm_dqhashmask = 0;
-
-       /* frlist cleanup */
-       mutex_lock(&xqm->qm_dqfrlist_lock);
-       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               xfs_dqunlock(dqp);
-               xfs_qm_dqdestroy(dqp);
-       }
-       mutex_unlock(&xqm->qm_dqfrlist_lock);
-       mutex_destroy(&xqm->qm_dqfrlist_lock);
-       kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       /*
-        * Need to lock the xfs_Gqm structure for things like this. For example,
-        * the structure could disappear between the entry to this routine and
-        * a HOLD operation if not locked.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-
-       if (!xfs_Gqm) {
-               xfs_Gqm = xfs_Gqm_init();
-               if (!xfs_Gqm) {
-                       mutex_unlock(&xfs_Gqm_lock);
-                       return ENOMEM;
-               }
-       }
-
-       /*
-        * We can keep a list of all filesystems with quotas mounted for
-        * debugging and statistical purposes, but ...
-        * Just take a reference and get out.
-        */
-       xfs_Gqm->qm_nrefs++;
-       mutex_unlock(&xfs_Gqm_lock);
-
-       return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       xfs_dquot_t     *dqp, *n;
-
-       ASSERT(xfs_Gqm);
-       ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-       /*
-        * Go thru the freelist and destroy all inactive dquots.
-        */
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(dqp->q_mount == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       xfs_dqunlock(dqp);
-                       xfs_qm_dqdestroy(dqp);
-               } else {
-                       xfs_dqunlock(dqp);
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       /*
-        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-        * be restarted.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-       if (--xfs_Gqm->qm_nrefs == 0) {
-               xfs_qm_destroy(xfs_Gqm);
-               xfs_Gqm = NULL;
-       }
-       mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_quotainfo) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               xfs_qm_destroy_quotainfo(mp);
-       }
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo.  This is also responsible for
- * running a quotacheck as necessary.  We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
-       xfs_mount_t     *mp)
-{
-       int             error = 0;
-       uint            sbf;
-
-       /*
-        * If quotas on realtime volumes is not supported, we disable
-        * quotas immediately.
-        */
-       if (mp->m_sb.sb_rextents) {
-               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Allocate the quotainfo structure inside the mount struct, and
-        * create quotainode(s), and change/rev superblock if necessary.
-        */
-       error = xfs_qm_init_quotainfo(mp);
-       if (error) {
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo == NULL);
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-       /*
-        * If any of the quotas are not consistent, do a quotacheck.
-        */
-       if (XFS_QM_NEED_QUOTACHECK(mp)) {
-               error = xfs_qm_quotacheck(mp);
-               if (error) {
-                       /* Quotacheck failed and disabled quotas. */
-                       return;
-               }
-       }
-       /* 
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       if (!XFS_IS_UQUOTA_ON(mp))
-               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
-       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
-               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-
- write_changes:
-       /*
-        * We actually don't have to acquire the m_sb_lock at all.
-        * This can only be called from mount, and that's single threaded. XXX
-        */
-       spin_lock(&mp->m_sb_lock);
-       sbf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
-               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
-                       /*
-                        * We could only have been turning quotas off.
-                        * We aren't in very good shape actually because
-                        * the incore structures are convinced that quotas are
-                        * off, but the on disk superblock doesn't know that !
-                        */
-                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
-                       xfs_alert(mp, "%s: Superblock update failed!",
-                               __func__);
-               }
-       }
-
-       if (error) {
-               xfs_warn(mp, "Failed to initialize disk quotas.");
-               return;
-       }
-}
-
-/*
- * Called from the vfsops layer.
- */
-void
-xfs_qm_unmount_quotas(
-       xfs_mount_t     *mp)
-{
-       /*
-        * Release the dquots that root inode, et al might be holding,
-        * before we flush quotas and blow away the quotainfo structure.
-        */
-       ASSERT(mp->m_rootip);
-       xfs_qm_dqdetach(mp->m_rootip);
-       if (mp->m_rbmip)
-               xfs_qm_dqdetach(mp->m_rbmip);
-       if (mp->m_rsumip)
-               xfs_qm_dqdetach(mp->m_rsumip);
-
-       /*
-        * Release the quota inodes.
-        */
-       if (mp->m_quotainfo) {
-               if (mp->m_quotainfo->qi_uquotaip) {
-                       IRELE(mp->m_quotainfo->qi_uquotaip);
-                       mp->m_quotainfo->qi_uquotaip = NULL;
-               }
-               if (mp->m_quotainfo->qi_gquotaip) {
-                       IRELE(mp->m_quotainfo->qi_gquotaip);
-                       mp->m_quotainfo->qi_gquotaip = NULL;
-               }
-       }
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
-       struct xfs_mount        *mp,
-       int                     sync_mode)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!q)
-               return 0;
-again:
-       mutex_lock(&q->qi_dqlist_lock);
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       /*
-                        * If we can't grab the flush lock then check
-                        * to see if the dquot has been flushed delayed
-                        * write.  If so, grab its buffer and send it
-                        * out immediately.  We'll be able to acquire
-                        * the flush lock when the I/O completes.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write.
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, sync_mode);
-               xfs_dqunlock(dqp);
-               if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       /* XXX restart limit */
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       /* return ! busy */
-       return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-       struct xfs_mount        *mp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *gdqp;
-       int                     nrecl;
-
- again:
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if ((gdqp = dqp->q_gdquot)) {
-                       xfs_dqlock(gdqp);
-                       dqp->q_gdquot = NULL;
-               }
-               xfs_dqunlock(dqp);
-
-               if (gdqp) {
-                       /*
-                        * Can't hold the mplist lock across a dqput.
-                        * XXXmust convert to marker based iterations here.
-                        */
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       xfs_qm_dqput(gdqp);
-
-                       mutex_lock(&q->qi_dqlist_lock);
-                       if (nrecl != q->qi_dqreclaims)
-                               goto again;
-               }
-       }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
-       struct xfs_mount        *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *n;
-       uint                    dqtype;
-       int                     nrecl;
-       int                     nmisses;
-
-       if (!q)
-               return 0;
-
-       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-       mutex_lock(&q->qi_dqlist_lock);
-
-       /*
-        * In the first pass through all incore dquots of this filesystem,
-        * we release the group dquot pointers the user dquots may be
-        * carrying around as a hint. We need to do this irrespective of
-        * what's being turned off.
-        */
-       xfs_qm_detach_gdquots(mp);
-
-      again:
-       nmisses = 0;
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       /*
-        * Try to get rid of all of the unwanted dquots. The idea is to
-        * get them off mplist and hashlist, but leave them on freelist.
-        */
-       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-               /*
-                * It's OK to look at the type without taking dqlock here.
-                * We're holding the mplist lock here, and that's needed for
-                * a dqreclaim.
-                */
-               if ((dqp->dq_flags & dqtype) == 0)
-                       continue;
-
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       mutex_lock(&dqp->q_hash->qh_lock);
-                       mutex_lock(&q->qi_dqlist_lock);
-
-                       /*
-                        * XXXTheoretically, we can get into a very long
-                        * ping pong game here.
-                        * No one can be adding dquots to the mplist at
-                        * this point, but somebody might be taking things off.
-                        */
-                       if (nrecl != q->qi_dqreclaims) {
-                               mutex_unlock(&dqp->q_hash->qh_lock);
-                               goto again;
-                       }
-               }
-
-               /*
-                * Take the dquot off the mplist and hashlist. It may remain on
-                * freelist in INACTIVE state.
-                */
-               nmisses += xfs_qm_dqpurge(dqp);
-       }
-       mutex_unlock(&q->qi_dqlist_lock);
-       return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             ndquots;
-
-       /*
-        * Purge the dquot cache.
-        * None of the dquots should really be busy at this point.
-        */
-       if (mp->m_quotainfo) {
-               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-                       delay(ndquots * 10);
-               }
-       }
-       return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
-       xfs_inode_t     *ip,
-       xfs_dqid_t      id,
-       uint            type,
-       uint            doalloc,
-       xfs_dquot_t     *udqhint, /* hint */
-       xfs_dquot_t     **IO_idqpp)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       error = 0;
-
-       /*
-        * See if we already have it in the inode itself. IO_idqpp is
-        * &i_udquot or &i_gdquot. This made the code look weird, but
-        * made the logic a lot simpler.
-        */
-       dqp = *IO_idqpp;
-       if (dqp) {
-               trace_xfs_dqattach_found(dqp);
-               return 0;
-       }
-
-       /*
-        * udqhint is the i_udquot field in inode, and is non-NULL only
-        * when the type arg is group/project. Its purpose is to save a
-        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-        * the user dquot.
-        */
-       if (udqhint) {
-               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-               xfs_dqlock(udqhint);
-
-               /*
-                * No need to take dqlock to look at the id.
-                *
-                * The ID can't change until it gets reclaimed, and it won't
-                * be reclaimed as long as we have a ref from inode and we
-                * hold the ilock.
-                */
-               dqp = udqhint->q_gdquot;
-               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-                       xfs_dqlock(dqp);
-                       XFS_DQHOLD(dqp);
-                       ASSERT(*IO_idqpp == NULL);
-                       *IO_idqpp = dqp;
-
-                       xfs_dqunlock(dqp);
-                       xfs_dqunlock(udqhint);
-                       return 0;
-               }
-
-               /*
-                * We can't hold a dquot lock when we call the dqget code.
-                * We'll deadlock in no time, because of (not conforming to)
-                * lock ordering - the inodelock comes before any dquot lock,
-                * and we may drop and reacquire the ilock in xfs_qm_dqget().
-                */
-               xfs_dqunlock(udqhint);
-       }
-
-       /*
-        * Find the dquot from somewhere. This bumps the
-        * reference count of dquot and returns it locked.
-        * This can return ENOENT if dquot didn't exist on
-        * disk and we didn't ask it to allocate;
-        * ESRCH if quotas got turned off suddenly.
-        */
-       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
-       if (error)
-               return error;
-
-       trace_xfs_dqattach_get(dqp);
-
-       /*
-        * dqget may have dropped and re-acquired the ilock, but it guarantees
-        * that the dquot returned is the one that should go in the inode.
-        */
-       *IO_idqpp = dqp;
-       xfs_dqunlock(dqp);
-       return 0;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
-       xfs_dquot_t     *udq,
-       xfs_dquot_t     *gdq)
-{
-       xfs_dquot_t     *tmp;
-
-       xfs_dqlock(udq);
-
-       if ((tmp = udq->q_gdquot)) {
-               if (tmp == gdq) {
-                       xfs_dqunlock(udq);
-                       return;
-               }
-
-               udq->q_gdquot = NULL;
-               /*
-                * We can't keep any dqlocks when calling dqrele,
-                * because the freelist lock comes before dqlocks.
-                */
-               xfs_dqunlock(udq);
-               /*
-                * we took a hard reference once upon a time in dqget,
-                * so give it back when the udquot no longer points at it
-                * dqput() does the unlocking of the dquot.
-                */
-               xfs_qm_dqrele(tmp);
-
-               xfs_dqlock(udq);
-               xfs_dqlock(gdq);
-
-       } else {
-               ASSERT(XFS_DQ_IS_LOCKED(udq));
-               xfs_dqlock(gdq);
-       }
-
-       ASSERT(XFS_DQ_IS_LOCKED(udq));
-       ASSERT(XFS_DQ_IS_LOCKED(gdq));
-       /*
-        * Somebody could have attached a gdquot here,
-        * when we dropped the uqlock. If so, just do nothing.
-        */
-       if (udq->q_gdquot == NULL) {
-               XFS_DQHOLD(gdq);
-               udq->q_gdquot = gdq;
-       }
-
-       xfs_dqunlock(gdq);
-       xfs_dqunlock(udq);
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach_locked(
-       xfs_inode_t     *ip,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            nquotas = 0;
-       int             error = 0;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           !XFS_NOT_DQATTACHED(mp, ip) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return 0;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               NULL, &ip->i_udquot);
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       if (XFS_IS_OQUOTA_ON(mp)) {
-               error = XFS_IS_GQUOTA_ON(mp) ?
-                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot) :
-                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot);
-               /*
-                * Don't worry about the udquot that we may have
-                * attached above. It'll get detached, if not already.
-                */
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       /*
-        * Attach this group quota to the user quota as a hint.
-        * This WON'T, in general, result in a thrash.
-        */
-       if (nquotas == 2) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               ASSERT(ip->i_udquot);
-               ASSERT(ip->i_gdquot);
-
-               /*
-                * We may or may not have the i_udquot locked at this point,
-                * but this check is OK since we don't depend on the i_gdquot to
-                * be accurate 100% all the time. It is just a hint, and this
-                * will succeed in general.
-                */
-               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-                       goto done;
-               /*
-                * Attach i_gdquot to the gdquot hint inside the i_udquot.
-                */
-               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
-       }
-
- done:
-#ifdef DEBUG
-       if (!error) {
-               if (XFS_IS_UQUOTA_ON(mp))
-                       ASSERT(ip->i_udquot);
-               if (XFS_IS_OQUOTA_ON(mp))
-                       ASSERT(ip->i_gdquot);
-       }
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
-       return error;
-}
-
-int
-xfs_qm_dqattach(
-       struct xfs_inode        *ip,
-       uint                    flags)
-{
-       int                     error;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       error = xfs_qm_dqattach_locked(ip, flags);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
-       xfs_inode_t     *ip)
-{
-       if (!(ip->i_udquot || ip->i_gdquot))
-               return;
-
-       trace_xfs_dquot_dqdetach(ip);
-
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-       if (ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-}
-
-int
-xfs_qm_sync(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl, restarts;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       restarts = 0;
-
-  again:
-       mutex_lock(&q->qi_dqlist_lock);
-       /*
-        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-        * when we have the mplist lock, we know that dquots will be consistent
-        * as long as we have it locked.
-        */
-       if (!XFS_IS_QUOTA_ON(mp)) {
-               mutex_unlock(&q->qi_dqlist_lock);
-               return 0;
-       }
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               /*
-                * If this is vfs_sync calling, then skip the dquots that
-                * don't 'seem' to be dirty. ie. don't acquire dqlock.
-                * This is very similar to what xfs_sync does with inodes.
-                */
-               if (flags & SYNC_TRYLOCK) {
-                       if (!XFS_DQ_IS_DIRTY(dqp))
-                               continue;
-                       if (!xfs_qm_dqlock_nowait(dqp))
-                               continue;
-               } else {
-                       xfs_dqlock(dqp);
-               }
-
-               /*
-                * Now, find out for sure if this dquot is dirty or not.
-                */
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       if (flags & SYNC_TRYLOCK) {
-                               xfs_dqunlock(dqp);
-                               continue;
-                       }
-                       /*
-                        * If we can't grab the flush lock then if the caller
-                        * really wanted us to give this our best shot, so
-                        * see if we can give a push to the buffer before we wait
-                        * on the flush lock. At this point, we know that
-                        * even though the dquot is being flushed,
-                        * it has (new) dirty data.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, flags);
-               xfs_dqunlock(dqp);
-               if (error && XFS_FORCED_SHUTDOWN(mp))
-                       return 0;       /* Need to prevent umount failure */
-               else if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-                               break;
-
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       return 0;
-}
-
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qinf;
-       int             error;
-       xfs_dquot_t     *dqp;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Tell XQM that we exist as soon as possible.
-        */
-       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-               return error;
-       }
-
-       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
-       /*
-        * See if quotainodes are setup, and if not, allocate them,
-        * and change the superblock accordingly.
-        */
-       if ((error = xfs_qm_init_quotainos(mp))) {
-               kmem_free(qinf);
-               mp->m_quotainfo = NULL;
-               return error;
-       }
-
-       INIT_LIST_HEAD(&qinf->qi_dqlist);
-       mutex_init(&qinf->qi_dqlist_lock);
-       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
-
-       qinf->qi_dqreclaims = 0;
-
-       /* mutex used to serialize quotaoffs */
-       mutex_init(&qinf->qi_quotaofflock);
-
-       /* Precalc some constants */
-       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(qinf->qi_dqchunklen);
-       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
-       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
-       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
-       /*
-        * We try to get the limits from the superuser's limits fields.
-        * This is quite hacky, but it is standard quota practice.
-        * We look at the USR dquot with id == 0 first, but if user quotas
-        * are not enabled we goto the GRP dquot with id == 0.
-        * We don't really care to keep separate default limits for user
-        * and group quotas, at least not at this point.
-        */
-       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
-                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-                               XFS_DQ_PROJ),
-                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
-                            &dqp);
-       if (! error) {
-               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
-
-               /*
-                * The warnings and timers set the grace period given to
-                * a user or group before he or she can not perform any
-                * more writing. If it is zero, a default is used.
-                */
-               qinf->qi_btimelimit = ddqp->d_btimer ?
-                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = ddqp->d_itimer ?
-                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
-                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
-                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
-                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
-                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-               /*
-                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-                * we don't want this dquot cached. We haven't done a
-                * quotacheck yet, and quotacheck doesn't like incore dquots.
-                */
-               xfs_qm_dqdestroy(dqp);
-       } else {
-               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
-       }
-
-       return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qi;
-
-       qi = mp->m_quotainfo;
-       ASSERT(qi != NULL);
-       ASSERT(xfs_Gqm != NULL);
-
-       /*
-        * Release the reference that XQM kept, so that we know
-        * when the XQM structure should be freed. We cannot assume
-        * that xfs_Gqm is non-null after this point.
-        */
-       xfs_qm_rele_quotafs_ref(mp);
-
-       ASSERT(list_empty(&qi->qi_dqlist));
-       mutex_destroy(&qi->qi_dqlist_lock);
-
-       if (qi->qi_uquotaip) {
-               IRELE(qi->qi_uquotaip);
-               qi->qi_uquotaip = NULL; /* paranoia */
-       }
-       if (qi->qi_gquotaip) {
-               IRELE(qi->qi_gquotaip);
-               qi->qi_gquotaip = NULL;
-       }
-       mutex_destroy(&qi->qi_quotaofflock);
-       kmem_free(qi);
-       mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-       xfs_dqlist_t    *list,
-       char            *str,
-       int             n)
-{
-       mutex_init(&list->qh_lock);
-       INIT_LIST_HEAD(&list->qh_list);
-       list->qh_version = 0;
-       list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-       xfs_dqlist_t    *list)
-{
-       mutex_destroy(&(list->qh_lock));
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
-       xfs_mount_t     *mp,
-       xfs_inode_t     **ip,
-       __int64_t       sbfields,
-       uint            flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-       int             committed;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-       if ((error = xfs_trans_reserve(tp,
-                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
-                                     XFS_CREATE_LOG_RES(mp), 0,
-                                     XFS_TRANS_PERM_LOG_RES,
-                                     XFS_CREATE_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT);
-               return error;
-       }
-
-       /*
-        * Make the changes in the superblock, and log those too.
-        * sbfields arg may contain fields other than *QUOTINO;
-        * VERSIONNUM for example.
-        */
-       spin_lock(&mp->m_sb_lock);
-       if (flags & XFS_QMOPT_SBVERSION) {
-               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
-               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
-               xfs_sb_version_addquota(&mp->m_sb);
-               mp->m_sb.sb_uquotino = NULLFSINO;
-               mp->m_sb.sb_gquotino = NULLFSINO;
-
-               /* qflags will get updated _after_ quotacheck */
-               mp->m_sb.sb_qflags = 0;
-       }
-       if (flags & XFS_QMOPT_UQUOTA)
-               mp->m_sb.sb_uquotino = (*ip)->i_ino;
-       else
-               mp->m_sb.sb_gquotino = (*ip)->i_ino;
-       spin_unlock(&mp->m_sb_lock);
-       xfs_mod_sb(tp, sbfields);
-
-       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
-               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-               return error;
-       }
-       return 0;
-}
-
-
-STATIC void
-xfs_qm_reset_dqcounts(
-       xfs_mount_t     *mp,
-       xfs_buf_t       *bp,
-       xfs_dqid_t      id,
-       uint            type)
-{
-       xfs_disk_dquot_t        *ddq;
-       int                     j;
-
-       trace_xfs_reset_dqcounts(bp, _RET_IP_);
-
-       /*
-        * Reset all counters and timers. They'll be
-        * started afresh by xfs_qm_quotacheck.
-        */
-#ifdef DEBUG
-       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       do_div(j, sizeof(xfs_dqblk_t));
-       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
-#endif
-       ddq = bp->b_addr;
-       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
-               /*
-                * Do a sanity check, and if needed, repair the dqblk. Don't
-                * output any warnings because it's perfectly possible to
-                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
-                */
-               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-                                     "xfs_quotacheck");
-               ddq->d_bcount = 0;
-               ddq->d_icount = 0;
-               ddq->d_rtbcount = 0;
-               ddq->d_btimer = 0;
-               ddq->d_itimer = 0;
-               ddq->d_rtbtimer = 0;
-               ddq->d_bwarns = 0;
-               ddq->d_iwarns = 0;
-               ddq->d_rtbwarns = 0;
-               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
-       }
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      firstid,
-       xfs_fsblock_t   bno,
-       xfs_filblks_t   blkcnt,
-       uint            flags)
-{
-       xfs_buf_t       *bp;
-       int             error;
-       int             type;
-
-       ASSERT(blkcnt > 0);
-       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
-               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
-       error = 0;
-
-       /*
-        * Blkcnt arg can be a very big number, and might even be
-        * larger than the log itself. So, we have to break it up into
-        * manageable-sized transactions.
-        * Note that we don't start a permanent transaction here; we might
-        * not be able to get a log reservation for the whole thing up front,
-        * and we don't really care to either, because we just discard
-        * everything if we were to crash in the middle of this loop.
-        */
-       while (blkcnt--) {
-               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                             XFS_FSB_TO_DADDR(mp, bno),
-                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-               if (error)
-                       break;
-
-               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
-               xfs_bdwrite(mp, bp);
-               /*
-                * goto the next block.
-                */
-               bno++;
-               firstid += mp->m_quotainfo->qi_dqperchunk;
-       }
-       return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *qip,
-       uint            flags)
-{
-       xfs_bmbt_irec_t         *map;
-       int                     i, nmaps;       /* number of map entries */
-       int                     error;          /* return value */
-       xfs_fileoff_t           lblkno;
-       xfs_filblks_t           maxlblkcnt;
-       xfs_dqid_t              firstid;
-       xfs_fsblock_t           rablkno;
-       xfs_filblks_t           rablkcnt;
-
-       error = 0;
-       /*
-        * This looks racy, but we can't keep an inode lock across a
-        * trans_reserve. But, this gets called during quotacheck, and that
-        * happens only at mount time which is single threaded.
-        */
-       if (qip->i_d.di_nblocks == 0)
-               return 0;
-
-       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
-       lblkno = 0;
-       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-       do {
-               nmaps = XFS_DQITER_MAP_SIZE;
-               /*
-                * We aren't changing the inode itself. Just changing
-                * some of its data. No new blocks are added here, and
-                * the inode is never added to the transaction.
-                */
-               xfs_ilock(qip, XFS_ILOCK_SHARED);
-               error = xfs_bmapi(NULL, qip, lblkno,
-                                 maxlblkcnt - lblkno,
-                                 XFS_BMAPI_METADATA,
-                                 NULL,
-                                 0, map, &nmaps, NULL);
-               xfs_iunlock(qip, XFS_ILOCK_SHARED);
-               if (error)
-                       break;
-
-               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
-               for (i = 0; i < nmaps; i++) {
-                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-                       ASSERT(map[i].br_blockcount);
-
-
-                       lblkno += map[i].br_blockcount;
-
-                       if (map[i].br_startblock == HOLESTARTBLOCK)
-                               continue;
-
-                       firstid = (xfs_dqid_t) map[i].br_startoff *
-                               mp->m_quotainfo->qi_dqperchunk;
-                       /*
-                        * Do a read-ahead on the next extent.
-                        */
-                       if ((i+1 < nmaps) &&
-                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
-                               rablkcnt =  map[i+1].br_blockcount;
-                               rablkno = map[i+1].br_startblock;
-                               while (rablkcnt--) {
-                                       xfs_buf_readahead(mp->m_ddev_targp,
-                                              XFS_FSB_TO_DADDR(mp, rablkno),
-                                              mp->m_quotainfo->qi_dqchunklen);
-                                       rablkno++;
-                               }
-                       }
-                       /*
-                        * Iterate thru all the blks in the extent and
-                        * reset the counters of all the dquots inside them.
-                        */
-                       if ((error = xfs_qm_dqiter_bufs(mp,
-                                                      firstid,
-                                                      map[i].br_startblock,
-                                                      map[i].br_blockcount,
-                                                      flags))) {
-                               break;
-                       }
-               }
-
-               if (error)
-                       break;
-       } while (nmaps > 0);
-
-       kmem_free(map);
-
-       return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- *
- * Given the inode, and a dquot id this updates both the incore dqout as well
- * as the buffer copy. This is so that once the quotacheck is done, we can
- * just log all the buffers, as opposed to logging numerous updates to
- * individual dquots.
- */
-STATIC int
-xfs_qm_quotacheck_dqadjust(
-       struct xfs_inode        *ip,
-       xfs_dqid_t              id,
-       uint                    type,
-       xfs_qcnt_t              nblks,
-       xfs_qcnt_t              rtblks)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       error = xfs_qm_dqget(mp, ip, id, type,
-                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
-       if (error) {
-               /*
-                * Shouldn't be able to turn off quotas here.
-                */
-               ASSERT(error != ESRCH);
-               ASSERT(error != ENOENT);
-               return error;
-       }
-
-       trace_xfs_dqadjust(dqp);
-
-       /*
-        * Adjust the inode count and the block count to reflect this inode's
-        * resource usage.
-        */
-       be64_add_cpu(&dqp->q_core.d_icount, 1);
-       dqp->q_res_icount++;
-       if (nblks) {
-               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
-               dqp->q_res_bcount += nblks;
-       }
-       if (rtblks) {
-               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
-               dqp->q_res_rtbcount += rtblks;
-       }
-
-       /*
-        * Set default limits, adjust timers (since we changed usages)
-        *
-        * There are no timers for the default values set in the root dquot.
-        */
-       if (dqp->q_core.d_id) {
-               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
-       }
-
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_qm_dqput(dqp);
-       return 0;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
-       xfs_inode_t     *ip,
-       xfs_qcnt_t      *O_rtblks)
-{
-       xfs_filblks_t   rtblks;                 /* total rt blks */
-       xfs_extnum_t    idx;                    /* extent record index */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    nextents;               /* number of extent entries */
-       int             error;
-
-       ASSERT(XFS_IS_REALTIME_INODE(ip));
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-                       return error;
-       }
-       rtblks = 0;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       for (idx = 0; idx < nextents; idx++)
-               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
-       *O_rtblks = (xfs_qcnt_t)rtblks;
-       return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
-       int             *res)           /* result code value */
-{
-       xfs_inode_t     *ip;
-       xfs_qcnt_t      nblks, rtblks = 0;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * rootino must have its resources accounted for, not so with the quota
-        * inodes.
-        */
-       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-               *res = BULKSTAT_RV_NOTHING;
-               return XFS_ERROR(EINVAL);
-       }
-
-       /*
-        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-        * interface expects the inode to be exclusively locked because that's
-        * the case in all other instances. It's OK that we do this because
-        * quotacheck is done only at mount time.
-        */
-       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
-       if (error) {
-               *res = BULKSTAT_RV_NOTHING;
-               return error;
-       }
-
-       ASSERT(ip->i_delayed_blks == 0);
-
-       if (XFS_IS_REALTIME_INODE(ip)) {
-               /*
-                * Walk thru the extent list and count the realtime blocks.
-                */
-               error = xfs_qm_get_rtblks(ip, &rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-
-       /*
-        * Add the (disk blocks and inode) resources occupied by this
-        * inode to its dquots. We do this adjustment in the incore dquot,
-        * and also copy the changes to its buffer.
-        * We don't care about putting these changes in a transaction
-        * envelope because if we crash in the middle of a 'quotacheck'
-        * we have to start from the beginning anyway.
-        * Once we're done, we'll log all the dquot bufs.
-        *
-        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
-        * and quotaoffs don't race. (Quotachecks happen at mount time only).
-        */
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-                                                  XFS_DQ_USER, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_GQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-                                                  XFS_DQ_GROUP, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_PQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-                                                  XFS_DQ_PROJ, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_DIDONE;
-       return 0;
-
-error0:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_GIVEUP;
-       return error;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
-       xfs_mount_t     *mp)
-{
-       int             done, count, error;
-       xfs_ino_t       lastino;
-       size_t          structsz;
-       xfs_inode_t     *uip, *gip;
-       uint            flags;
-
-       count = INT_MAX;
-       structsz = 1;
-       lastino = 0;
-       flags = 0;
-
-       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * There should be no cached dquots. The (simplistic) quotacheck
-        * algorithm doesn't like that.
-        */
-       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
-       xfs_notice(mp, "Quotacheck needed: Please wait.");
-
-       /*
-        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
-        * their counters to zero. We need a clean slate.
-        * We don't log our changes till later.
-        */
-       uip = mp->m_quotainfo->qi_uquotaip;
-       if (uip) {
-               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_UQUOTA_CHKD;
-       }
-
-       gip = mp->m_quotainfo->qi_gquotaip;
-       if (gip) {
-               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
-                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_OQUOTA_CHKD;
-       }
-
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
-
-       /*
-        * We've made all the changes that we need to make incore.
-        * Flush them down to disk buffers if everything was updated
-        * successfully.
-        */
-       if (!error)
-               error = xfs_qm_dqflush_all(mp, 0);
-
-       /*
-        * We can get this error if we couldn't do a dquot allocation inside
-        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
-        * dirty dquots that might be cached, we just want to get rid of them
-        * and turn quotaoff. The dquots won't be attached to any of the inodes
-        * at this point (because we intentionally didn't in dqget_noattach).
-        */
-       if (error) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               goto error_return;
-       }
-
-       /*
-        * We didn't log anything, because if we crashed, we'll have to
-        * start the quotacheck from scratch anyway. However, we must make
-        * sure that our dquot changes are secure before we put the
-        * quotacheck'd stamp on the superblock. So, here we do a synchronous
-        * flush.
-        */
-       XFS_bflush(mp->m_ddev_targp);
-
-       /*
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
-       mp->m_qflags |= flags;
-
- error_return:
-       if (error) {
-               xfs_warn(mp,
-       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
-                       error);
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo != NULL);
-               ASSERT(xfs_Gqm != NULL);
-               xfs_qm_destroy_quotainfo(mp);
-               if (xfs_mount_reset_sbqflags(mp)) {
-                       xfs_warn(mp,
-                               "Quotacheck: Failed to reset quota flags.");
-               }
-       } else
-               xfs_notice(mp, "Quotacheck: Done.");
-       return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
-       xfs_mount_t     *mp)
-{
-       xfs_inode_t     *uip, *gip;
-       int             error;
-       __int64_t       sbflags;
-       uint            flags;
-
-       ASSERT(mp->m_quotainfo);
-       uip = gip = NULL;
-       sbflags = 0;
-       flags = 0;
-
-       /*
-        * Get the uquota and gquota inodes
-        */
-       if (xfs_sb_version_hasquota(&mp->m_sb)) {
-               if (XFS_IS_UQUOTA_ON(mp) &&
-                   mp->m_sb.sb_uquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_uquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                            0, 0, &uip)))
-                               return XFS_ERROR(error);
-               }
-               if (XFS_IS_OQUOTA_ON(mp) &&
-                   mp->m_sb.sb_gquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_gquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                            0, 0, &gip))) {
-                               if (uip)
-                                       IRELE(uip);
-                               return XFS_ERROR(error);
-                       }
-               }
-       } else {
-               flags |= XFS_QMOPT_SBVERSION;
-               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
-       }
-
-       /*
-        * Create the two inodes, if they don't exist already. The changes
-        * made above will get added to a transaction and logged in one of
-        * the qino_alloc calls below.  If the device is readonly,
-        * temporarily switch to read-write to do this.
-        */
-       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-               if ((error = xfs_qm_qino_alloc(mp, &uip,
-                                             sbflags | XFS_SB_UQUOTINO,
-                                             flags | XFS_QMOPT_UQUOTA)))
-                       return XFS_ERROR(error);
-
-               flags &= ~XFS_QMOPT_SBVERSION;
-       }
-       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
-               flags |= (XFS_IS_GQUOTA_ON(mp) ?
-                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               error = xfs_qm_qino_alloc(mp, &gip,
-                                         sbflags | XFS_SB_GQUOTINO, flags);
-               if (error) {
-                       if (uip)
-                               IRELE(uip);
-
-                       return XFS_ERROR(error);
-               }
-       }
-
-       mp->m_quotainfo->qi_uquotaip = uip;
-       mp->m_quotainfo->qi_gquotaip = gip;
-
-       return 0;
-}
-
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-       xfs_dquot_t     *dqpout;
-       xfs_dquot_t     *dqp;
-       int             restarts;
-       int             startagain;
-
-       restarts = 0;
-       dqpout = NULL;
-
-       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
-       startagain = 0;
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               struct xfs_mount *mp = dqp->q_mount;
-               xfs_dqlock(dqp);
-
-               /*
-                * We are racing with dqlookup here. Naturally we don't
-                * want to reclaim a dquot that lookup wants. We release the
-                * freelist lock and start over, so that lookup will grab
-                * both the dquot and the freelistlock.
-                */
-               if (dqp->dq_flags & XFS_DQ_WANT) {
-                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-                       trace_xfs_dqreclaim_want(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                       restarts++;
-                       startagain = 1;
-                       goto dqunlock;
-               }
-
-               /*
-                * If the dquot is inactive, we are assured that it is
-                * not on the mplist or the hashlist, and that makes our
-                * life easier.
-                */
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(mp == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       dqpout = dqp;
-                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                       goto dqunlock;
-               }
-
-               ASSERT(dqp->q_hash);
-               ASSERT(!list_empty(&dqp->q_mplist));
-
-               /*
-                * Try to grab the flush lock. If this dquot is in the process
-                * of getting flushed to disk, we don't want to reclaim it.
-                */
-               if (!xfs_dqflock_nowait(dqp))
-                       goto dqunlock;
-
-               /*
-                * We have the flush lock so we know that this is not in the
-                * process of being flushed. So, if this is dirty, flush it
-                * DELWRI so that we don't get a freelist infested with
-                * dirty dquots.
-                */
-               if (XFS_DQ_IS_DIRTY(dqp)) {
-                       int     error;
-
-                       trace_xfs_dqreclaim_dirty(dqp);
-
-                       /*
-                        * We flush it delayed write, so don't bother
-                        * releasing the freelist lock.
-                        */
-                       error = xfs_qm_dqflush(dqp, 0);
-                       if (error) {
-                               xfs_warn(mp, "%s: dquot %p flush failed",
-                                       __func__, dqp);
-                       }
-                       goto dqunlock;
-               }
-
-               /*
-                * We're trying to get the hashlock out of order. This races
-                * with dqlookup; so, we giveup and goto the next dquot if
-                * we couldn't get the hashlock. This way, we won't starve
-                * a dqlookup process that holds the hashlock that is
-                * waiting for the freelist lock.
-                */
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       restarts++;
-                       goto dqfunlock;
-               }
-
-               /*
-                * This races with dquot allocation code as well as dqflush_all
-                * and reclaim code. So, if we failed to grab the mplist lock,
-                * giveup everything and start over.
-                */
-               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-                       restarts++;
-                       startagain = 1;
-                       goto qhunlock;
-               }
-
-               ASSERT(dqp->q_nrefs == 0);
-               list_del_init(&dqp->q_mplist);
-               mp->m_quotainfo->qi_dquots--;
-               mp->m_quotainfo->qi_dqreclaims++;
-               list_del_init(&dqp->q_hashlist);
-               dqp->q_hash->qh_version++;
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               dqpout = dqp;
-               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
-               mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-               xfs_dqfunlock(dqp);
-dqunlock:
-               xfs_dqunlock(dqp);
-               if (dqpout)
-                       break;
-               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                       break;
-               if (startagain) {
-                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       goto again;
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-       return dqpout;
-}
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-       int     howmany)
-{
-       int             nreclaimed = 0;
-       xfs_dquot_t     *dqp;
-
-       if (howmany <= 0)
-               return 0;
-
-       while (nreclaimed < howmany) {
-               dqp = xfs_qm_dqreclaim_one();
-               if (!dqp)
-                       return nreclaimed;
-               xfs_qm_dqdestroy(dqp);
-               nreclaimed++;
-       }
-       return nreclaimed;
-}
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(
-       struct shrinker *shrink,
-       struct shrink_control *sc)
-{
-       int     ndqused, nfree, n;
-       gfp_t gfp_mask = sc->gfp_mask;
-
-       if (!kmem_shake_allow(gfp_mask))
-               return 0;
-       if (!xfs_Gqm)
-               return 0;
-
-       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-       /* incore dquots in all f/s's */
-       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-       ASSERT(ndqused >= 0);
-
-       if (nfree <= ndqused && nfree < ndquot)
-               return 0;
-
-       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
-       n = nfree - ndqused - ndquot;           /* # over target */
-
-       return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-       xfs_dquot_t **O_dqpp)
-{
-       xfs_dquot_t     *dqp;
-
-       /*
-        * Check against high water mark to see if we want to pop
-        * a nincompoop dquot off the freelist.
-        */
-       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-               /*
-                * Try to recycle a dquot from the freelist.
-                */
-               if ((dqp = xfs_qm_dqreclaim_one())) {
-                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-                       /*
-                        * Just zero the core here. The rest will get
-                        * reinitialized by caller. XXX we shouldn't even
-                        * do this zero ...
-                        */
-                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-                       *O_dqpp = dqp;
-                       return B_FALSE;
-               }
-               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
-       }
-
-       /*
-        * Allocate a brand new dquot on the kernel heap and return it
-        * to the caller to initialize.
-        */
-       ASSERT(xfs_Gqm->qm_dqzone != NULL);
-       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-       atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-       return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
-       xfs_mount_t     *mp,
-       __int64_t       flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     mp->m_sb.sb_sectsize + 128, 0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_mod_sb(tp, flags);
-       error = xfs_trans_commit(tp, 0);
-
-       return error;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid, gid and prid make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in  : inode (unlocked)
- * out : udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
-       struct xfs_inode        *ip,
-       uid_t                   uid,
-       gid_t                   gid,
-       prid_t                  prid,
-       uint                    flags,
-       struct xfs_dquot        **O_udqpp,
-       struct xfs_dquot        **O_gdqpp)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *uq, *gq;
-       int                     error;
-       uint                    lockflags;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       lockflags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lockflags);
-
-       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
-               gid = ip->i_d.di_gid;
-
-       /*
-        * Attach the dquot(s) to this inode, doing a dquot allocation
-        * if necessary. The dquot(s) will not be locked.
-        */
-       if (XFS_NOT_DQATTACHED(mp, ip)) {
-               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-               if (error) {
-                       xfs_iunlock(ip, lockflags);
-                       return error;
-               }
-       }
-
-       uq = gq = NULL;
-       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
-               if (ip->i_d.di_uid != uid) {
-                       /*
-                        * What we need is the dquot that has this uid, and
-                        * if we send the inode to dqget, the uid of the inode
-                        * takes priority over what's sent in the uid argument.
-                        * We must unlock inode here before calling dqget if
-                        * we're not sending the inode, because otherwise
-                        * we'll deadlock by doing trans_reserve while
-                        * holding ilock.
-                        */
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
-                                                XFS_DQ_USER,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &uq))) {
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       /*
-                        * Get the ilock in the right order.
-                        */
-                       xfs_dqunlock(uq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       /*
-                        * Take an extra reference, because we'll return
-                        * this to caller
-                        */
-                       ASSERT(ip->i_udquot);
-                       uq = ip->i_udquot;
-                       xfs_dqlock(uq);
-                       XFS_DQHOLD(uq);
-                       xfs_dqunlock(uq);
-               }
-       }
-       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
-               if (ip->i_d.di_gid != gid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
-                                                XFS_DQ_GROUP,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-               if (xfs_get_projid(ip) != prid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
-                                                XFS_DQ_PROJ,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return (error);
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       }
-       if (uq)
-               trace_xfs_dquot_dqalloc(ip);
-
-       xfs_iunlock(ip, lockflags);
-       if (O_udqpp)
-               *O_udqpp = uq;
-       else if (uq)
-               xfs_qm_dqrele(uq);
-       if (O_gdqpp)
-               *O_gdqpp = gq;
-       else if (gq)
-               xfs_qm_dqrele(gq);
-       return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     **IO_olddq,
-       xfs_dquot_t     *newdq)
-{
-       xfs_dquot_t     *prevdq;
-       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
-                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
-       /* old dquot */
-       prevdq = *IO_olddq;
-       ASSERT(prevdq);
-       ASSERT(prevdq != newdq);
-
-       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
-       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
-       /* the sparkling new dquot */
-       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
-       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
-       /*
-        * Take an extra reference, because the inode
-        * is going to keep this dquot pointer even
-        * after the trans_commit.
-        */
-       xfs_dqlock(newdq);
-       XFS_DQHOLD(newdq);
-       xfs_dqunlock(newdq);
-       *IO_olddq = newdq;
-
-       return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            delblks, blkflags, prjflags = 0;
-       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
-       int             error;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       delblks = ip->i_delayed_blks;
-       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
-       blkflags = XFS_IS_REALTIME_INODE(ip) ?
-                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
-       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
-               delblksudq = udqp;
-               /*
-                * If there are delayed allocation blocks, then we have to
-                * unreserve those from the old dquot, and add them to the
-                * new dquot.
-                */
-               if (delblks) {
-                       ASSERT(ip->i_udquot);
-                       unresudq = ip->i_udquot;
-               }
-       }
-       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
-               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
-                       prjflags = XFS_QMOPT_ENOSPC;
-
-               if (prjflags ||
-                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
-                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
-                       delblksgdq = gdqp;
-                       if (delblks) {
-                               ASSERT(ip->i_gdquot);
-                               unresgdq = ip->i_gdquot;
-                       }
-               }
-       }
-
-       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
-                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-                               flags | blkflags | prjflags)))
-               return (error);
-
-       /*
-        * Do the delayed blks reservations/unreservations now. Since, these
-        * are done without the help of a transaction, if a reservation fails
-        * its previous reservations won't be automatically undone by trans
-        * code. So, we have to do it manually here.
-        */
-       if (delblks) {
-               /*
-                * Do the reservations first. Unreservation can't fail.
-                */
-               ASSERT(delblksudq || delblksgdq);
-               ASSERT(unresudq || unresgdq);
-               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-                               flags | blkflags | prjflags)))
-                       return (error);
-               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-                               blkflags);
-       }
-
-       return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
-       struct xfs_inode        **i_tab)
-{
-       struct xfs_mount        *mp = i_tab[0]->i_mount;
-       int                     i;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       for (i = 0; (i < 4 && i_tab[i]); i++) {
-               struct xfs_inode        *ip = i_tab[i];
-               int                     error;
-
-               /*
-                * Watch out for duplicate entries in the table.
-                */
-               if (i == 0 || ip != i_tab[i-1]) {
-                       if (XFS_NOT_DQATTACHED(mp, ip)) {
-                               error = xfs_qm_dqattach(ip, 0);
-                               if (error)
-                                       return error;
-                       }
-               }
-       }
-       return 0;
-}
-
-void
-xfs_qm_vop_create_dqattach(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       struct xfs_dquot        *udqp,
-       struct xfs_dquot        *gdqp)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       if (udqp) {
-               xfs_dqlock(udqp);
-               XFS_DQHOLD(udqp);
-               xfs_dqunlock(udqp);
-               ASSERT(ip->i_udquot == NULL);
-               ip->i_udquot = udqp;
-               ASSERT(XFS_IS_UQUOTA_ON(mp));
-               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-       if (gdqp) {
-               xfs_dqlock(gdqp);
-               XFS_DQHOLD(gdqp);
-               xfs_dqunlock(gdqp);
-               ASSERT(ip->i_gdquot == NULL);
-               ip->i_gdquot = gdqp;
-               ASSERT(XFS_IS_OQUOTA_ON(mp));
-               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
-                               be32_to_cpu(gdqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-}
-
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
deleted file mode 100644 (file)
index 43b9abe..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint            ndquot;
-extern struct mutex    xfs_Gqm_lock;
-extern struct xfs_qm   *xfs_Gqm;
-extern kmem_zone_t     *qm_dqzone;
-extern kmem_zone_t     *qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS       7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS    4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO            2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
-
-typedef xfs_dqhash_t   xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
-       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
-       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
-       struct list_head qm_dqfrlist;    /* freelist of dquots */
-       struct mutex     qm_dqfrlist_lock;
-       int              qm_dqfrlist_cnt;
-       atomic_t         qm_totaldquots; /* total incore dquots */
-       uint             qm_nrefs;       /* file systems with quota on */
-       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
-       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
-       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
-       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
-       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
-       struct list_head qi_dqlist;      /* all dquots in filesys */
-       struct mutex     qi_dqlist_lock;
-       int              qi_dquots;
-       int              qi_dqreclaims;  /* a change here indicates
-                                           a removal in the dqlist */
-       time_t           qi_btimelimit;  /* limit for blks timer */
-       time_t           qi_itimelimit;  /* limit for inodes timer */
-       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
-       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
-       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
-       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
-       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
-       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
-       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
-       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
-       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
-       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
-                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS            2
-typedef struct xfs_dquot_acct {
-       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
-       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
-
-#define XFS_QM_BWARNLIMIT      5
-#define XFS_QM_IWARNLIMIT      5
-#define XFS_QM_RTBWARNLIMIT    5
-
-extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int             xfs_qm_quotacheck(xfs_mount_t *);
-extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* dquot stuff */
-extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* quota ops */
-extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
deleted file mode 100644 (file)
index a0a829a..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-
-STATIC void
-xfs_fill_statvfs_from_dquot(
-       struct kstatfs          *statp,
-       xfs_disk_dquot_t        *dp)
-{
-       __uint64_t              limit;
-
-       limit = dp->d_blk_softlimit ?
-               be64_to_cpu(dp->d_blk_softlimit) :
-               be64_to_cpu(dp->d_blk_hardlimit);
-       if (limit && statp->f_blocks > limit) {
-               statp->f_blocks = limit;
-               statp->f_bfree = statp->f_bavail =
-                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
-                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
-       }
-
-       limit = dp->d_ino_softlimit ?
-               be64_to_cpu(dp->d_ino_softlimit) :
-               be64_to_cpu(dp->d_ino_hardlimit);
-       if (limit && statp->f_files > limit) {
-               statp->f_files = limit;
-               statp->f_ffree =
-                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
-                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
-       }
-}
-
-
-/*
- * Directory tree accounting is implemented using project quotas, where
- * the project identifier is inherited from parent directories.
- * A statvfs (df, etc.) of a directory that is using project quota should
- * return a statvfs of the project, not the entire filesystem.
- * This makes such trees appear as if they are filesystems in themselves.
- */
-void
-xfs_qm_statvfs(
-       xfs_inode_t             *ip,
-       struct kstatfs          *statp)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       xfs_dquot_t             *dqp;
-
-       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
-               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
-               xfs_qm_dqput(dqp);
-       }
-}
-
-int
-xfs_qm_newmount(
-       xfs_mount_t     *mp,
-       uint            *needquotamount,
-       uint            *quotaflags)
-{
-       uint            quotaondisk;
-       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
-       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
-                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
-       if (quotaondisk) {
-               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
-               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
-               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
-       }
-
-       /*
-        * If the device itself is read-only, we can't allow
-        * the user to change the state of quota on the mount -
-        * this would generate a transaction on the ro device,
-        * which would lead to an I/O error and shutdown
-        */
-
-       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
-           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
-            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
-           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
-            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
-           xfs_dev_is_read_only(mp, "changing quota state")) {
-               xfs_warn(mp, "please mount with%s%s%s%s.",
-                       (!quotaondisk ? "out quota" : ""),
-                       (uquotaondisk ? " usrquota" : ""),
-                       (pquotaondisk ? " prjquota" : ""),
-                       (gquotaondisk ? " grpquota" : ""));
-               return XFS_ERROR(EPERM);
-       }
-
-       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
-               /*
-                * Call mount_quotas at this point only if we won't have to do
-                * a quotacheck.
-                */
-               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
-                       /*
-                        * If an error occurred, qm_mount_quotas code
-                        * has already disabled quotas. So, just finish
-                        * mounting, and get on with the boring life
-                        * without disk quotas.
-                        */
-                       xfs_qm_mount_quotas(mp);
-               } else {
-                       /*
-                        * Clear the quota flags, but remember them. This
-                        * is so that the quota code doesn't get invoked
-                        * before we're ready. This can happen when an
-                        * inode goes inactive and wants to free blocks,
-                        * or via xfs_log_mount_finish.
-                        */
-                       *needquotamount = B_TRUE;
-                       *quotaflags = mp->m_qflags;
-                       mp->m_qflags = 0;
-               }
-       }
-
-       return 0;
-}
-
-void __init
-xfs_qm_init(void)
-{
-       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-       mutex_init(&xfs_Gqm_lock);
-       xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-       xfs_qm_cleanup_procfs();
-       if (qm_dqzone)
-               kmem_zone_destroy(qm_dqzone);
-       if (qm_dqtrxzone)
-               kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
deleted file mode 100644 (file)
index 8671a0b..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
-       /* maximum; incore; ratio free to inuse; freelist */
-       seq_printf(m, "%d\t%d\t%d\t%u\n",
-                       ndquot,
-                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
-       return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqm_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
-       /* quota performance statistics */
-       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-                       xqmstats.xs_qm_dqreclaims,
-                       xqmstats.xs_qm_dqreclaim_misses,
-                       xqmstats.xs_qm_dquot_dups,
-                       xqmstats.xs_qm_dqcachemisses,
-                       xqmstats.xs_qm_dqcachehits,
-                       xqmstats.xs_qm_dqwants,
-                       xqmstats.xs_qm_dqshake_reclaims,
-                       xqmstats.xs_qm_dqinact_reclaims);
-       return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqmstat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
-       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/xqm", NULL);
-       remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
deleted file mode 100644 (file)
index 5b964fc..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
-       __uint32_t              xs_qm_dqreclaims;
-       __uint32_t              xs_qm_dqreclaim_misses;
-       __uint32_t              xs_qm_dquot_dups;
-       __uint32_t              xs_qm_dqcachemisses;
-       __uint32_t              xs_qm_dqcachehits;
-       __uint32_t              xs_qm_dqwants;
-       __uint32_t              xs_qm_dqshake_reclaims;
-       __uint32_t              xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count)  ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count)  do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
deleted file mode 100644 (file)
index 609246f..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
-                                       uint);
-STATIC uint    xfs_qm_export_flags(uint);
-STATIC uint    xfs_qm_export_qtype_flags(uint);
-STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-                                       fs_disk_quota_t *);
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-int
-xfs_qm_scall_quotaoff(
-       xfs_mount_t             *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       uint                    dqtype;
-       int                     error;
-       uint                    inactivate_flags;
-       xfs_qoff_logitem_t      *qoffstart;
-       int                     nculprits;
-
-       /*
-        * No file system can have quotas enabled on disk but not in core.
-        * Note that quota utilities (like quotaoff) _expect_
-        * errno == EEXIST here.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               return XFS_ERROR(EEXIST);
-       error = 0;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-       /*
-        * We don't want to deal with two quotaoffs messing up each other,
-        * so we're going to serialize it. quotaoff isn't exactly a performance
-        * critical thing.
-        * If quotaoff, then we must be dealing with the root filesystem.
-        */
-       ASSERT(q);
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * If we're just turning off quota enforcement, change mp and go.
-        */
-       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-               mp->m_qflags &= ~(flags);
-
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = mp->m_qflags;
-               spin_unlock(&mp->m_sb_lock);
-               mutex_unlock(&q->qi_quotaofflock);
-
-               /* XXX what to do if error ? Revert back to old vals incore ? */
-               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
-               return (error);
-       }
-
-       dqtype = 0;
-       inactivate_flags = 0;
-       /*
-        * If accounting is off, we must turn enforcement off, clear the
-        * quota 'CHKD' certificate to make it known that we have to
-        * do a quotacheck the next time this quota is turned on.
-        */
-       if (flags & XFS_UQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_UQUOTA;
-               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-               inactivate_flags |= XFS_UQUOTA_ACTIVE;
-       }
-       if (flags & XFS_GQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_GQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_GQUOTA_ACTIVE;
-       } else if (flags & XFS_PQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_PQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_PQUOTA_ACTIVE;
-       }
-
-       /*
-        * Nothing to do?  Don't complain. This happens when we're just
-        * turning off quota enforcement.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               goto out_unlock;
-
-       /*
-        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-        * and synchronously. If we fail to write, we should abort the
-        * operation as it cannot be recovered safely if we crash.
-        */
-       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-        * to take care of the race between dqget and quotaoff. We don't take
-        * any special locks to reset these bits. All processes need to check
-        * these bits *after* taking inode lock(s) to see if the particular
-        * quota type is in the process of being turned off. If *ACTIVE, it is
-        * guaranteed that all dquot structures and all quotainode ptrs will all
-        * stay valid as long as that inode is kept locked.
-        *
-        * There is no turning back after this.
-        */
-       mp->m_qflags &= ~inactivate_flags;
-
-       /*
-        * Give back all the dquot reference(s) held by inodes.
-        * Here we go thru every single incore inode in this file system, and
-        * do a dqrele on the i_udquot/i_gdquot that it may have.
-        * Essentially, as long as somebody has an inode locked, this guarantees
-        * that quotas will not be turned off. This is handy because in a
-        * transaction once we lock the inode(s) and check for quotaon, we can
-        * depend on the quota inodes (and other things) being valid as long as
-        * we keep the lock(s).
-        */
-       xfs_qm_dqrele_all_inodes(mp, flags);
-
-       /*
-        * Next we make the changes in the quota flag in the mount struct.
-        * This isn't protected by a particular lock directly, because we
-        * don't want to take a mrlock every time we depend on quotas being on.
-        */
-       mp->m_qflags &= ~(flags);
-
-       /*
-        * Go through all the dquots of this file system and purge them,
-        * according to what was turned off. We may not be able to get rid
-        * of all dquots, because dquots can have temporary references that
-        * are not attached to inodes. eg. xfs_setattr, xfs_create.
-        * So, if we couldn't purge all the dquots from the filesystem,
-        * we can't get rid of the incore data structures.
-        */
-       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
-               delay(10 * nculprits);
-
-       /*
-        * Transactions that had started before ACTIVE state bit was cleared
-        * could have logged many dquots, so they'd have higher LSNs than
-        * the first QUOTAOFF log record does. If we happen to crash when
-        * the tail of the log has gone past the QUOTAOFF record, but
-        * before the last dquot modification, those dquots __will__
-        * recover, and that's not good.
-        *
-        * So, we have QUOTAOFF start and end logitems; the start
-        * logitem won't get overwritten until the end logitem appears...
-        */
-       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-       if (error) {
-               /* We're screwed now. Shutdown is the only option. */
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               goto out_unlock;
-       }
-
-       /*
-        * If quotas is completely disabled, close shop.
-        */
-       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
-           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
-               mutex_unlock(&q->qi_quotaofflock);
-               xfs_qm_destroy_quotainfo(mp);
-               return (0);
-       }
-
-       /*
-        * Release our quotainode references if we don't need them anymore.
-        */
-       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-               IRELE(q->qi_uquotaip);
-               q->qi_uquotaip = NULL;
-       }
-       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
-               IRELE(q->qi_gquotaip);
-               q->qi_gquotaip = NULL;
-       }
-
-out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfile(
-       struct xfs_mount        *mp,
-       xfs_ino_t               ino)
-{
-       struct xfs_inode        *ip;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       if (ino == NULLFSINO)
-               return 0;
-
-       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
-       if (error)
-               return error;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                 XFS_TRANS_PERM_LOG_RES,
-                                 XFS_ITRUNCATE_LOG_COUNT);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-               goto out_put;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip);
-
-       error = xfs_itruncate_data(&tp, ip, 0);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                    XFS_TRANS_ABORT);
-               goto out_unlock;
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
-out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-out_put:
-       IRELE(ip);
-       return error;
-}
-
-int
-xfs_qm_scall_trunc_qfiles(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error = 0, error2 = 0;
-
-       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
-               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
-                       __func__, flags, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (flags & XFS_DQ_USER)
-               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
-               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-
-       return error ? error : error2;
-}
-
-/*
- * Switch on (a given) quota enforcement for a filesystem.  This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-int
-xfs_qm_scall_quotaon(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error;
-       uint            qf;
-       __int64_t       sbflags;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-       /*
-        * Switching on quota accounting must be done at mount time.
-        */
-       flags &= ~(XFS_ALL_QUOTA_ACCT);
-
-       sbflags = 0;
-
-       if (flags == 0) {
-               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
-                       __func__, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       /* No fs can turn on quotas with a delayed effect */
-       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
-       /*
-        * Can't enforce without accounting. We check the superblock
-        * qflags here instead of m_qflags because rootfs can have
-        * quota acct on ondisk without m_qflags' knowing.
-        */
-       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
-           (flags & XFS_UQUOTA_ENFD))
-           ||
-           ((flags & XFS_PQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
-           (flags & XFS_GQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-           (flags & XFS_OQUOTA_ENFD))) {
-               xfs_debug(mp,
-                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
-                       __func__, flags, mp->m_sb.sb_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-       /*
-        * If everything's up to-date incore, then don't waste time.
-        */
-       if ((mp->m_qflags & flags) == flags)
-               return XFS_ERROR(EEXIST);
-
-       /*
-        * Change sb_qflags on disk but not incore mp->qflags
-        * if this is the root filesystem.
-        */
-       spin_lock(&mp->m_sb_lock);
-       qf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = qf | flags;
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * There's nothing to change if it's the same.
-        */
-       if ((qf & flags) == flags && sbflags == 0)
-               return XFS_ERROR(EEXIST);
-       sbflags |= XFS_SB_QFLAGS;
-
-       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
-               return (error);
-       /*
-        * If we aren't trying to switch on quota enforcement, we are done.
-        */
-       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
-           (flags & XFS_ALL_QUOTA_ENFD) == 0)
-               return (0);
-
-       if (! XFS_IS_QUOTA_RUNNING(mp))
-               return XFS_ERROR(ESRCH);
-
-       /*
-        * Switch on quota enforcement in core.
-        */
-       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
-       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
-
-       return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-int
-xfs_qm_scall_getqstat(
-       struct xfs_mount        *mp,
-       struct fs_quota_stat    *out)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_inode        *uip, *gip;
-       boolean_t               tempuqip, tempgqip;
-
-       uip = gip = NULL;
-       tempuqip = tempgqip = B_FALSE;
-       memset(out, 0, sizeof(fs_quota_stat_t));
-
-       out->qs_version = FS_QSTAT_VERSION;
-       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
-               out->qs_uquota.qfs_ino = NULLFSINO;
-               out->qs_gquota.qfs_ino = NULLFSINO;
-               return (0);
-       }
-       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-                                                       (XFS_ALL_QUOTA_ACCT|
-                                                        XFS_ALL_QUOTA_ENFD));
-       out->qs_pad = 0;
-       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
-       if (q) {
-               uip = q->qi_uquotaip;
-               gip = q->qi_gquotaip;
-       }
-       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                       0, 0, &uip) == 0)
-                       tempuqip = B_TRUE;
-       }
-       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                       0, 0, &gip) == 0)
-                       tempgqip = B_TRUE;
-       }
-       if (uip) {
-               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-               if (tempuqip)
-                       IRELE(uip);
-       }
-       if (gip) {
-               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-               if (tempgqip)
-                       IRELE(gip);
-       }
-       if (q) {
-               out->qs_incoredqs = q->qi_dquots;
-               out->qs_btimelimit = q->qi_btimelimit;
-               out->qs_itimelimit = q->qi_itimelimit;
-               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-               out->qs_bwarnlimit = q->qi_bwarnlimit;
-               out->qs_iwarnlimit = q->qi_iwarnlimit;
-       }
-       return 0;
-}
-
-#define XFS_DQ_MASK \
-       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-int
-xfs_qm_scall_setqlim(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       uint                    type,
-       fs_disk_quota_t         *newlim)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_disk_dquot_t        *ddq;
-       xfs_dquot_t             *dqp;
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qcnt_t              hard, soft;
-
-       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
-               return EINVAL;
-       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
-               return 0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       /*
-        * We don't want to race with a quotaoff so take the quotaoff lock.
-        * (We don't hold an inode lock, so there's nothing else to stop
-        * a quotaoff from happening). (XXXThis doesn't currently happen
-        * because we take the vfslock before calling xfs_qm_sysent).
-        */
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * Get the dquot (locked), and join it to the transaction.
-        * Allocate the dquot if this doesn't exist.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
-               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               ASSERT(error != ENOENT);
-               goto out_unlock;
-       }
-       xfs_trans_dqjoin(tp, dqp);
-       ddq = &dqp->q_core;
-
-       /*
-        * Make sure that hardlimits are >= soft limits before changing.
-        */
-       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-                       be64_to_cpu(ddq->d_blk_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-                       be64_to_cpu(ddq->d_blk_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_blk_hardlimit = cpu_to_be64(hard);
-               ddq->d_blk_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_bhardlimit = hard;
-                       q->qi_bsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
-       }
-       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-                       be64_to_cpu(ddq->d_rtb_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-                       be64_to_cpu(ddq->d_rtb_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
-               ddq->d_rtb_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_rtbhardlimit = hard;
-                       q->qi_rtbsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
-       }
-
-       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
-               (xfs_qcnt_t) newlim->d_ino_hardlimit :
-                       be64_to_cpu(ddq->d_ino_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
-               (xfs_qcnt_t) newlim->d_ino_softlimit :
-                       be64_to_cpu(ddq->d_ino_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_ino_hardlimit = cpu_to_be64(hard);
-               ddq->d_ino_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_ihardlimit = hard;
-                       q->qi_isoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
-       }
-
-       /*
-        * Update warnings counter(s) if requested
-        */
-       if (newlim->d_fieldmask & FS_DQ_BWARNS)
-               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
-       if (newlim->d_fieldmask & FS_DQ_IWARNS)
-               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
-       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
-       if (id == 0) {
-               /*
-                * Timelimits for the super user set the relative time
-                * the other users can be over quota for this file system.
-                * If it is zero a default is used.  Ditto for the default
-                * soft and hard limit values (already done, above), and
-                * for warnings.
-                */
-               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-                       q->qi_btimelimit = newlim->d_btimer;
-                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-                       q->qi_itimelimit = newlim->d_itimer;
-                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
-                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_BWARNS)
-                       q->qi_bwarnlimit = newlim->d_bwarns;
-               if (newlim->d_fieldmask & FS_DQ_IWARNS)
-                       q->qi_iwarnlimit = newlim->d_iwarns;
-               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
-       } else {
-               /*
-                * If the user is now over quota, start the timelimit.
-                * The user will not be 'warned'.
-                * Note that we keep the timers ticking, whether enforcement
-                * is on or off. We don't really want to bother with iterating
-                * over all ondisk dquots and turning the timers on/off.
-                */
-               xfs_qm_adjust_dqtimers(mp, ddq);
-       }
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_trans_log_dquot(tp, dqp);
-
-       error = xfs_trans_commit(tp, 0);
-       xfs_qm_dqrele(dqp);
-
- out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-int
-xfs_qm_scall_getquota(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       fs_disk_quota_t *out)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       /*
-        * Try to get the dquot. We don't want it allocated on disk, so
-        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-        * exist, we'll get ENOENT back.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-               return (error);
-       }
-
-       /*
-        * If everything's NULL, this dquot doesn't quite exist as far as
-        * our utility programs are concerned.
-        */
-       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-               xfs_qm_dqput(dqp);
-               return XFS_ERROR(ENOENT);
-       }
-       /*
-        * Convert the disk dquot to the exportable format
-        */
-       xfs_qm_export_dquot(mp, &dqp->q_core, out);
-       xfs_qm_dqput(dqp);
-       return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-       xfs_mount_t             *mp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qoff_logitem_t      *qoffi;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
-                                       flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-       return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
-       xfs_mount_t            *mp,
-       xfs_qoff_logitem_t     **qoffstartp,
-       uint                   flags)
-{
-       xfs_trans_t            *tp;
-       int                     error;
-       xfs_qoff_logitem_t     *qoffi=NULL;
-       uint                    oldsbqflag=0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     sizeof(xfs_qoff_logitem_t) * 2 +
-                                     mp->m_sb.sb_sectsize + 128,
-                                     0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               goto error0;
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       spin_lock(&mp->m_sb_lock);
-       oldsbqflag = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-error0:
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /*
-                * No one else is modifying sb_qflags, so this is OK.
-                * We still hold the quotaofflock.
-                */
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = oldsbqflag;
-               spin_unlock(&mp->m_sb_lock);
-       }
-       *qoffstartp = qoffi;
-       return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *src,
-       struct fs_disk_quota    *dst)
-{
-       memset(dst, 0, sizeof(*dst));
-       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
-       dst->d_id = be32_to_cpu(src->d_id);
-       dst->d_blk_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
-       dst->d_blk_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
-       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
-       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
-       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
-       dst->d_icount = be64_to_cpu(src->d_icount);
-       dst->d_btimer = be32_to_cpu(src->d_btimer);
-       dst->d_itimer = be32_to_cpu(src->d_itimer);
-       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
-       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
-       dst->d_rtb_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
-       dst->d_rtb_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
-       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
-       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
-       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
-       /*
-        * Internally, we don't reset all the timers when quota enforcement
-        * gets turned off. No need to confuse the user level code,
-        * so return zeroes in that case.
-        */
-       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
-           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
-               dst->d_btimer = 0;
-               dst->d_itimer = 0;
-               dst->d_rtbtimer = 0;
-       }
-
-#ifdef DEBUG
-       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
-            (XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
-           dst->d_id != 0) {
-               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
-                   (dst->d_blk_softlimit > 0)) {
-                       ASSERT(dst->d_btimer != 0);
-               }
-               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
-                   (dst->d_ino_softlimit > 0)) {
-                       ASSERT(dst->d_itimer != 0);
-               }
-       }
-#endif
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
-       uint flags)
-{
-       /*
-        * Can't be more than one, or none.
-        */
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_USER_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_USER_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
-
-       return (flags & XFS_DQ_USER) ?
-               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_export_flags(
-       uint flags)
-{
-       uint uflags;
-
-       uflags = 0;
-       if (flags & XFS_UQUOTA_ACCT)
-               uflags |= FS_QUOTA_UDQ_ACCT;
-       if (flags & XFS_PQUOTA_ACCT)
-               uflags |= FS_QUOTA_PDQ_ACCT;
-       if (flags & XFS_GQUOTA_ACCT)
-               uflags |= FS_QUOTA_GDQ_ACCT;
-       if (flags & XFS_UQUOTA_ENFD)
-               uflags |= FS_QUOTA_UDQ_ENFD;
-       if (flags & (XFS_OQUOTA_ENFD)) {
-               uflags |= (flags & XFS_GQUOTA_ACCT) ?
-                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
-       }
-       return (uflags);
-}
-
-
-STATIC int
-xfs_dqrele_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       /* skip quota inodes */
-       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
-           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
-               ASSERT(ip->i_udquot == NULL);
-               ASSERT(ip->i_gdquot == NULL);
-               return 0;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- *
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
- */
-void
-xfs_qm_dqrele_all_inodes(
-       struct xfs_mount *mp,
-       uint             flags)
-{
-       ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
deleted file mode 100644 (file)
index 94a3d92..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE    10
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-                                (__psunsigned_t)(id)) & \
-                               (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-                                    (xfs_Gqm->qm_usr_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)) : \
-                                    (xfs_Gqm->qm_grp_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
-       !dqp->q_core.d_blk_hardlimit && \
-       !dqp->q_core.d_blk_softlimit && \
-       !dqp->q_core.d_rtb_hardlimit && \
-       !dqp->q_core.d_rtb_softlimit && \
-       !dqp->q_core.d_ino_hardlimit && \
-       !dqp->q_core.d_ino_softlimit && \
-       !dqp->q_core.d_bcount && \
-       !dqp->q_core.d_rtbcount && \
-       !dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
-                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
deleted file mode 100644 (file)
index 4d00ee6..0000000
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp != tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(dqp->q_logitem.qli_dquot == dqp);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-       /*
-        * Initialize d_transp so we can later determine if this dquot is
-        * associated with this transaction.
-        */
-       dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed.  The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp == tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
-       xfs_trans_t     *otp,
-       xfs_trans_t     *ntp)
-{
-       xfs_dqtrx_t     *oq, *nq;
-       int             i,j;
-       xfs_dqtrx_t     *oqa, *nqa;
-
-       if (!otp->t_dqinfo)
-               return;
-
-       xfs_trans_alloc_dqinfo(ntp);
-       oqa = otp->t_dqinfo->dqa_usrdquots;
-       nqa = ntp->t_dqinfo->dqa_usrdquots;
-
-       /*
-        * Because the quota blk reservation is carried forward,
-        * it is also necessary to carry forward the DQ_DIRTY flag.
-        */
-       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
-               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       if (oqa[i].qt_dquot == NULL)
-                               break;
-                       oq = &oqa[i];
-                       nq = &nqa[i];
-
-                       nq->qt_dquot = oq->qt_dquot;
-                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
-                       nq->qt_rtbcount_delta = 0;
-
-                       /*
-                        * Transfer whatever is left of the reservations.
-                        */
-                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-                       oq->qt_blk_res = oq->qt_blk_res_used;
-
-                       nq->qt_rtblk_res = oq->qt_rtblk_res -
-                               oq->qt_rtblk_res_used;
-                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
-                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
-                       oq->qt_ino_res = oq->qt_ino_res_used;
-
-               }
-               oqa = otp->t_dqinfo->dqa_grpdquots;
-               nqa = ntp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       uint            field,
-       long            delta)
-{
-       xfs_mount_t     *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       int             i;
-       xfs_dqtrx_t     *qa;
-
-       qa = XFS_QM_ISUDQ(dqp) ?
-               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
-
-       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-               if (qa[i].qt_dquot == NULL ||
-                   qa[i].qt_dquot == dqp)
-                       return &qa[i];
-       }
-
-       return NULL;
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp,
-       uint            field,
-       long            delta)
-{
-       xfs_dqtrx_t     *qtrx;
-
-       ASSERT(tp);
-       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-       qtrx = NULL;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-       /*
-        * Find either the first free slot or the slot that belongs
-        * to this dquot.
-        */
-       qtrx = xfs_trans_get_dqtrx(tp, dqp);
-       ASSERT(qtrx);
-       if (qtrx->qt_dquot == NULL)
-               qtrx->qt_dquot = dqp;
-
-       switch (field) {
-
-               /*
-                * regular disk blk reservation
-                */
-             case XFS_TRANS_DQ_RES_BLKS:
-               qtrx->qt_blk_res += (ulong)delta;
-               break;
-
-               /*
-                * inode reservation
-                */
-             case XFS_TRANS_DQ_RES_INOS:
-               qtrx->qt_ino_res += (ulong)delta;
-               break;
-
-               /*
-                * disk blocks used.
-                */
-             case XFS_TRANS_DQ_BCOUNT:
-               if (qtrx->qt_blk_res && delta > 0) {
-                       qtrx->qt_blk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-               }
-               qtrx->qt_bcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELBCOUNT:
-               qtrx->qt_delbcnt_delta += delta;
-               break;
-
-               /*
-                * Inode Count
-                */
-             case XFS_TRANS_DQ_ICOUNT:
-               if (qtrx->qt_ino_res && delta > 0) {
-                       qtrx->qt_ino_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
-               }
-               qtrx->qt_icount_delta += delta;
-               break;
-
-               /*
-                * rtblk reservation
-                */
-             case XFS_TRANS_DQ_RES_RTBLKS:
-               qtrx->qt_rtblk_res += (ulong)delta;
-               break;
-
-               /*
-                * rtblk count
-                */
-             case XFS_TRANS_DQ_RTBCOUNT:
-               if (qtrx->qt_rtblk_res && delta > 0) {
-                       qtrx->qt_rtblk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
-               }
-               qtrx->qt_rtbcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELRTBCOUNT:
-               qtrx->qt_delrtb_delta += delta;
-               break;
-
-             default:
-               ASSERT(0);
-       }
-       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
-       xfs_trans_t     *tp,
-       xfs_dqtrx_t     *q)
-{
-       ASSERT(q[0].qt_dquot != NULL);
-       if (q[1].qt_dquot == NULL) {
-               xfs_dqlock(q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-       } else {
-               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
-               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[1].qt_dquot);
-       }
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction.
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       xfs_disk_dquot_t        *d;
-       long                    totalbdelta;
-       long                    totalrtbdelta;
-
-       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       ASSERT(tp->t_dqinfo);
-       qa = tp->t_dqinfo->dqa_usrdquots;
-       for (j = 0; j < 2; j++) {
-               if (qa[0].qt_dquot == NULL) {
-                       qa = tp->t_dqinfo->dqa_grpdquots;
-                       continue;
-               }
-
-               /*
-                * Lock all of the dquots and join them to the transaction.
-                */
-               xfs_trans_dqlockedjoin(tp, qa);
-
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * The array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-
-                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(dqp->q_transp == tp);
-
-                       /*
-                        * adjust the actual number of blocks used
-                        */
-                       d = &dqp->q_core;
-
-                       /*
-                        * The issue here is - sometimes we don't make a blkquota
-                        * reservation intentionally to be fair to users
-                        * (when the amount is small). On the other hand,
-                        * delayed allocs do make reservations, but that's
-                        * outside of a transaction, so we have no
-                        * idea how much was really reserved.
-                        * So, here we've accumulated delayed allocation blks and
-                        * non-delay blks. The assumption is that the
-                        * delayed ones are always reserved (outside of a
-                        * transaction), and the others may or may not have
-                        * quota reservations.
-                        */
-                       totalbdelta = qtrx->qt_bcount_delta +
-                               qtrx->qt_delbcnt_delta;
-                       totalrtbdelta = qtrx->qt_rtbcount_delta +
-                               qtrx->qt_delrtb_delta;
-#ifdef DEBUG
-                       if (totalbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_bcount) >=
-                                      -totalbdelta);
-
-                       if (totalrtbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
-                                      -totalrtbdelta);
-
-                       if (qtrx->qt_icount_delta < 0)
-                               ASSERT(be64_to_cpu(d->d_icount) >=
-                                      -qtrx->qt_icount_delta);
-#endif
-                       if (totalbdelta)
-                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
-
-                       if (qtrx->qt_icount_delta)
-                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
-                       if (totalrtbdelta)
-                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
-
-                       /*
-                        * Get any default limits in use.
-                        * Start/reset the timer(s) if needed.
-                        */
-                       if (d->d_id) {
-                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
-                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
-                       }
-
-                       dqp->dq_flags |= XFS_DQ_DIRTY;
-                       /*
-                        * add this to the list of items to get logged
-                        */
-                       xfs_trans_log_dquot(tp, dqp);
-                       /*
-                        * Take off what's left of the original reservation.
-                        * In case of delayed allocations, there's no
-                        * reservation that a transaction structure knows of.
-                        */
-                       if (qtrx->qt_blk_res != 0) {
-                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-                                       if (qtrx->qt_blk_res >
-                                           qtrx->qt_blk_res_used)
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res -
-                                                        qtrx->qt_blk_res_used);
-                                       else
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res_used -
-                                                        qtrx->qt_blk_res);
-                               }
-                       } else {
-                               /*
-                                * These blks were never reserved, either inside
-                                * a transaction or outside one (in a delayed
-                                * allocation). Also, this isn't always a
-                                * negative number since we sometimes
-                                * deliberately skip quota reservations.
-                                */
-                               if (qtrx->qt_bcount_delta) {
-                                       dqp->q_res_bcount +=
-                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
-                               }
-                       }
-                       /*
-                        * Adjust the RT reservation.
-                        */
-                       if (qtrx->qt_rtblk_res != 0) {
-                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
-                                       if (qtrx->qt_rtblk_res >
-                                           qtrx->qt_rtblk_res_used)
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res -
-                                                       qtrx->qt_rtblk_res_used);
-                                       else
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res_used -
-                                                       qtrx->qt_rtblk_res);
-                               }
-                       } else {
-                               if (qtrx->qt_rtbcount_delta)
-                                       dqp->q_res_rtbcount +=
-                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
-                       }
-
-                       /*
-                        * Adjust the inode reservation.
-                        */
-                       if (qtrx->qt_ino_res != 0) {
-                               ASSERT(qtrx->qt_ino_res >=
-                                      qtrx->qt_ino_res_used);
-                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
-                                       dqp->q_res_icount -= (xfs_qcnt_t)
-                                               (qtrx->qt_ino_res -
-                                                qtrx->qt_ino_res_used);
-                       } else {
-                               if (qtrx->qt_icount_delta)
-                                       dqp->q_res_icount +=
-                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
-                       }
-
-                       ASSERT(dqp->q_res_bcount >=
-                               be64_to_cpu(dqp->q_core.d_bcount));
-                       ASSERT(dqp->q_res_icount >=
-                               be64_to_cpu(dqp->q_core.d_icount));
-                       ASSERT(dqp->q_res_rtbcount >=
-                               be64_to_cpu(dqp->q_core.d_rtbcount));
-               }
-               /*
-                * Do the group quotas next
-                */
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       boolean_t               locked;
-
-       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       qa = tp->t_dqinfo->dqa_usrdquots;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * We assume that the array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-                       /*
-                        * Unreserve the original reservation. We don't care
-                        * about the number of blocks used field, or deltas.
-                        * Also we don't bother to zero the fields.
-                        */
-                       locked = B_FALSE;
-                       if (qtrx->qt_blk_res) {
-                               xfs_dqlock(dqp);
-                               locked = B_TRUE;
-                               dqp->q_res_bcount -=
-                                       (xfs_qcnt_t)qtrx->qt_blk_res;
-                       }
-                       if (qtrx->qt_ino_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_icount -=
-                                       (xfs_qcnt_t)qtrx->qt_ino_res;
-                       }
-
-                       if (qtrx->qt_rtblk_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_rtbcount -=
-                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
-                       }
-                       if (locked)
-                               xfs_dqunlock(dqp);
-
-               }
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-STATIC void
-xfs_quota_warn(
-       struct xfs_mount        *mp,
-       struct xfs_dquot        *dqp,
-       int                     type)
-{
-       /* no warnings for project quotas - we just return ENOSPC later */
-       if (dqp->dq_flags & XFS_DQ_PROJ)
-               return;
-       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
-                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
-                          type);
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- */
-STATIC int
-xfs_trans_dqresv(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       xfs_qcnt_t      hardlimit;
-       xfs_qcnt_t      softlimit;
-       time_t          timer;
-       xfs_qwarncnt_t  warns;
-       xfs_qwarncnt_t  warnlimit;
-       xfs_qcnt_t      count;
-       xfs_qcnt_t      *resbcountp;
-       xfs_quotainfo_t *q = mp->m_quotainfo;
-
-
-       xfs_dqlock(dqp);
-
-       if (flags & XFS_TRANS_DQ_RES_BLKS) {
-               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_bhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_bsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_btimer);
-               warns = be16_to_cpu(dqp->q_core.d_bwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
-               resbcountp = &dqp->q_res_bcount;
-       } else {
-               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_rtbhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_rtbsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
-               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
-               resbcountp = &dqp->q_res_rtbcount;
-       }
-
-       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
-           dqp->q_core.d_id &&
-           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
-            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
-             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-               if (nblks > 0) {
-                       /*
-                        * dquot is locked already. See if we'd go over the
-                        * hardlimit or exceed the timelimit if we allocate
-                        * nblks.
-                        */
-                       if (hardlimit > 0ULL &&
-                           hardlimit <= nblks + *resbcountp) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL &&
-                           softlimit <= nblks + *resbcountp) {
-                               if ((timer != 0 && get_seconds() > timer) ||
-                                   (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_BSOFTLONGWARN);
-                                       goto error_return;
-                               }
-
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
-                       }
-               }
-               if (ninos > 0) {
-                       count = be64_to_cpu(dqp->q_core.d_icount);
-                       timer = be32_to_cpu(dqp->q_core.d_itimer);
-                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
-                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
-                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-                       if (!hardlimit)
-                               hardlimit = q->qi_ihardlimit;
-                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
-                       if (!softlimit)
-                               softlimit = q->qi_isoftlimit;
-
-                       if (hardlimit > 0ULL && count >= hardlimit) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL && count >= softlimit) {
-                               if  ((timer != 0 && get_seconds() > timer) ||
-                                    (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_ISOFTLONGWARN);
-                                       goto error_return;
-                               }
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
-                       }
-               }
-       }
-
-       /*
-        * Change the reservation, but not the actual usage.
-        * Note that q_res_bcount = q_core.d_bcount + resv
-        */
-       (*resbcountp) += (xfs_qcnt_t)nblks;
-       if (ninos != 0)
-               dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
-       /*
-        * note the reservation amt in the trans struct too,
-        * so that the transaction knows how much was reserved by
-        * it against this particular dquot.
-        * We don't do this when we are reserving for a delayed allocation,
-        * because we don't have the luxury of a transaction envelope then.
-        */
-       if (tp) {
-               ASSERT(tp->t_dqinfo);
-               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-               if (nblks != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           flags & XFS_QMOPT_RESBLK_MASK,
-                                           nblks);
-               if (ninos != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           XFS_TRANS_DQ_RES_INOS,
-                                           ninos);
-       }
-       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
-       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
-       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
-
-       xfs_dqunlock(dqp);
-       return 0;
-
-error_return:
-       xfs_dqunlock(dqp);
-       if (flags & XFS_QMOPT_ENOSPC)
-               return ENOSPC;
-       return EDQUOT;
-}
-
-
-/*
- * Given dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp/prj quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
- *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       int             resvd = 0, error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       if (tp && tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-
-       if (udqp) {
-               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
-                                       (flags & ~XFS_QMOPT_ENOSPC));
-               if (error)
-                       return error;
-               resvd = 1;
-       }
-
-       if (gdqp) {
-               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
-               if (error) {
-                       /*
-                        * can't do it, so backout previous reservation
-                        */
-                       if (resvd) {
-                               flags |= XFS_QMOPT_FORCE_RES;
-                               xfs_trans_dqresv(tp, mp, udqp,
-                                                -nblks, -ninos, flags);
-                       }
-                       return error;
-               }
-       }
-
-       /*
-        * Didn't change anything critical, so, no need to log
-        */
-       return 0;
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- */
-int
-xfs_trans_reserve_quota_nblks(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       long                    nblks,
-       long                    ninos,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-       if (XFS_IS_PQUOTA_ON(mp))
-               flags |= XFS_QMOPT_ENOSPC;
-
-       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_RTBLKS ||
-              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_BLKS);
-
-       /*
-        * Reserve nblks against these dquots, with trans as the mediator.
-        */
-       return xfs_trans_reserve_quota_bydquots(tp, mp,
-                                               ip->i_udquot, ip->i_gdquot,
-                                               nblks, ninos, flags);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_qoff_logitem_t      *q;
-
-       ASSERT(tp != NULL);
-
-       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-       ASSERT(q != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &q->qql_item);
-       return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *qlp)
-{
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-STATIC void
-xfs_trans_alloc_dqinfo(
-       xfs_trans_t     *tp)
-{
-       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
-       xfs_trans_t     *tp)
-{
-       if (!tp->t_dqinfo)
-               return;
-       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-       tp->t_dqinfo = NULL;
-}
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
deleted file mode 100644 (file)
index b83f76b..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-       __be32  uu_timelow;
-       __be16  uu_timemid;
-       __be16  uu_timehi;
-       __be16  uu_clockseq;
-       __be16  uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-       xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                  be16_to_cpu(uup->uu_timemid);
-       fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-       int     i;
-       char    *cp = (char *)uuid;
-
-       if (uuid == NULL)
-               return 0;
-       /* implied check of version number here... */
-       for (i = 0; i < sizeof *uuid; i++)
-               if (*cp++) return 0;    /* not nil */
-       return 1;       /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
deleted file mode 100644 (file)
index 4732d71..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-       unsigned char   __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/time.h b/fs/xfs/time.h
new file mode 100644 (file)
index 0000000..387e695
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+       schedule_timeout_uninterruptible(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+       *tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
new file mode 100644 (file)
index 0000000..b83f76b
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <xfs.h>
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+       __be32  uu_timelow;
+       __be16  uu_timemid;
+       __be16  uu_timehi;
+       __be16  uu_clockseq;
+       __be16  uu_node[3];
+} xfs_uu_t;
+
+/*
+ * uuid_getnodeuniq - obtain the node unique fields of a UUID.
+ *
+ * This is not in any way a standard or condoned UUID function;
+ * it just something that's needed for user-level file handles.
+ */
+void
+uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
+{
+       xfs_uu_t *uup = (xfs_uu_t *)uuid;
+
+       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+                  be16_to_cpu(uup->uu_timemid);
+       fsid[1] = be32_to_cpu(uup->uu_timelow);
+}
+
+int
+uuid_is_nil(uuid_t *uuid)
+{
+       int     i;
+       char    *cp = (char *)uuid;
+
+       if (uuid == NULL)
+               return 0;
+       /* implied check of version number here... */
+       for (i = 0; i < sizeof *uuid; i++)
+               if (*cp++) return 0;    /* not nil */
+       return 1;       /* is nil */
+}
+
+int
+uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
+{
+       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
+}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
new file mode 100644 (file)
index 0000000..4732d71
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_UUID_H__
+#define __XFS_SUPPORT_UUID_H__
+
+typedef struct {
+       unsigned char   __u_bits[16];
+} uuid_t;
+
+extern int uuid_is_nil(uuid_t *uuid);
+extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
+extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
+
+#endif /* __XFS_SUPPORT_UUID_H__ */
index 53ec3ea..d8b11b7 100644 (file)
@@ -24,5 +24,6 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
-#include <linux-2.6/xfs_linux.h>
+#include "xfs_linux.h"
+
 #endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
new file mode 100644 (file)
index 0000000..b6c4b37
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+       struct posix_acl_entry *acl_e;
+       struct posix_acl *acl;
+       struct xfs_acl_entry *ace;
+       int count, i;
+
+       count = be32_to_cpu(aclp->acl_cnt);
+
+       acl = posix_acl_alloc(count, GFP_KERNEL);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < count; i++) {
+               acl_e = &acl->a_entries[i];
+               ace = &aclp->acl_entry[i];
+
+               /*
+                * The tag is 32 bits on disk and 16 bits in core.
+                *
+                * Because every access to it goes through the core
+                * format first this is not a problem.
+                */
+               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+               case ACL_GROUP:
+                       acl_e->e_id = be32_to_cpu(ace->ae_id);
+                       break;
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       acl_e->e_id = ACL_UNDEFINED_ID;
+                       break;
+               default:
+                       goto fail;
+               }
+       }
+       return acl;
+
+fail:
+       posix_acl_release(acl);
+       return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+       const struct posix_acl_entry *acl_e;
+       struct xfs_acl_entry *ace;
+       int i;
+
+       aclp->acl_cnt = cpu_to_be32(acl->a_count);
+       for (i = 0; i < acl->a_count; i++) {
+               ace = &aclp->acl_entry[i];
+               acl_e = &acl->a_entries[i];
+
+               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+               ace->ae_id = cpu_to_be32(acl_e->e_id);
+               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+       }
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       struct posix_acl *acl;
+       struct xfs_acl *xfs_acl;
+       int len = sizeof(struct xfs_acl);
+       unsigned char *ea_name;
+       int error;
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       trace_xfs_get_acl(ip);
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               BUG();
+       }
+
+       /*
+        * If we have a cached ACLs value just return it, not need to
+        * go out to the disk.
+        */
+
+       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+       if (!xfs_acl)
+               return ERR_PTR(-ENOMEM);
+
+       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+                                                       &len, ATTR_ROOT);
+       if (error) {
+               /*
+                * If the attribute doesn't exist make sure we have a negative
+                * cache entry, for any other error assume it is transient and
+                * leave the cache entry as ACL_NOT_CACHED.
+                */
+               if (error == -ENOATTR) {
+                       acl = NULL;
+                       goto out_update_cache;
+               }
+               goto out;
+       }
+
+       acl = xfs_acl_from_disk(xfs_acl);
+       if (IS_ERR(acl))
+               goto out;
+
+ out_update_cache:
+       set_cached_acl(inode, type, acl);
+ out:
+       kfree(xfs_acl);
+       return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       unsigned char *ea_name;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               if (!S_ISDIR(inode->i_mode))
+                       return acl ? -EACCES : 0;
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (acl) {
+               struct xfs_acl *xfs_acl;
+               int len;
+
+               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+               if (!xfs_acl)
+                       return -ENOMEM;
+
+               xfs_acl_to_disk(xfs_acl, acl);
+               len = sizeof(struct xfs_acl) -
+                       (sizeof(struct xfs_acl_entry) *
+                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+                               len, ATTR_ROOT);
+
+               kfree(xfs_acl);
+       } else {
+               /*
+                * A NULL ACL argument means we want to remove the ACL.
+                */
+               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+               /*
+                * If the attribute didn't exist to start with that's fine.
+                */
+               if (error == -ENOATTR)
+                       error = 0;
+       }
+
+       if (!error)
+               set_cached_acl(inode, type, acl);
+       return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, umode_t mode)
+{
+       int error = 0;
+
+       if (mode != inode->i_mode) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
+               iattr.ia_mode = mode;
+               iattr.ia_ctime = current_fs_time(inode->i_sb);
+
+               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+       }
+
+       return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, unsigned char *name)
+{
+       int len = sizeof(struct xfs_acl);
+
+       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+       return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+       if (!S_ISDIR(inode->i_mode))
+               return 0;
+       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
+{
+       umode_t mode = inode->i_mode;
+       int error = 0, inherit = 0;
+
+       if (S_ISDIR(inode->i_mode)) {
+               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+               if (error)
+                       goto out;
+       }
+
+       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+       if (error < 0)
+               return error;
+
+       /*
+        * If posix_acl_create returns a positive value we need to
+        * inherit a permission that can't be represented using the Unix
+        * mode bits and we actually need to set an ACL.
+        */
+       if (error > 0)
+               inherit = 1;
+
+       error = xfs_set_mode(inode, mode);
+       if (error)
+               goto out;
+
+       if (inherit)
+               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+
+out:
+       posix_acl_release(acl);
+       return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+       struct posix_acl *acl;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+       if (IS_ERR(acl) || !acl)
+               return PTR_ERR(acl);
+
+       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+       if (error)
+               return error;
+
+       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+       posix_acl_release(acl);
+       return error;
+}
+
+static int
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int type)
+{
+       struct posix_acl *acl;
+       int error;
+
+       acl = xfs_get_acl(dentry->d_inode, type);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+
+       error = posix_acl_to_xattr(acl, value, size);
+       posix_acl_release(acl);
+
+       return error;
+}
+
+static int
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags, int type)
+{
+       struct inode *inode = dentry->d_inode;
+       struct posix_acl *acl = NULL;
+       int error = 0;
+
+       if (flags & XATTR_CREATE)
+               return -EINVAL;
+       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+               return value ? -EACCES : 0;
+       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+               return -EPERM;
+
+       if (!value)
+               goto set_acl;
+
+       acl = posix_acl_from_xattr(value, size);
+       if (!acl) {
+               /*
+                * acl_set_file(3) may request that we set default ACLs with
+                * zero length -- defend (gracefully) against that here.
+                */
+               goto out;
+       }
+       if (IS_ERR(acl)) {
+               error = PTR_ERR(acl);
+               goto out;
+       }
+
+       error = posix_acl_valid(acl);
+       if (error)
+               goto out_release;
+
+       error = -EINVAL;
+       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+               goto out_release;
+
+       if (type == ACL_TYPE_ACCESS) {
+               umode_t mode = inode->i_mode;
+               error = posix_acl_equiv_mode(acl, &mode);
+
+               if (error <= 0) {
+                       posix_acl_release(acl);
+                       acl = NULL;
+
+                       if (error < 0)
+                               return error;
+               }
+
+               error = xfs_set_mode(inode, mode);
+               if (error)
+                       goto out_release;
+       }
+
+ set_acl:
+       error = xfs_set_acl(inode, type, acl);
+ out_release:
+       posix_acl_release(acl);
+ out:
+       return error;
+}
+
+const struct xattr_handler xfs_xattr_acl_access_handler = {
+       .prefix = POSIX_ACL_XATTR_ACCESS,
+       .flags  = ACL_TYPE_ACCESS,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
+
+const struct xattr_handler xfs_xattr_acl_default_handler = {
+       .prefix = POSIX_ACL_XATTR_DEFAULT,
+       .flags  = ACL_TYPE_DEFAULT,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
new file mode 100644 (file)
index 0000000..63e971e
--- /dev/null
@@ -0,0 +1,1499 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC         37
+#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+
+void __init
+xfs_ioend_init(void)
+{
+       int i;
+
+       for (i = 0; i < NVSYNC; i++)
+               init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+
+void
+xfs_ioend_wait(
+       xfs_inode_t     *ip)
+{
+       wait_queue_head_t *wq = to_ioend_wq(ip);
+
+       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+
+STATIC void
+xfs_ioend_wake(
+       xfs_inode_t     *ip)
+{
+       if (atomic_dec_and_test(&ip->i_iocount))
+               wake_up(to_ioend_wq(ip));
+}
+
+void
+xfs_count_page_state(
+       struct page             *page,
+       int                     *delalloc,
+       int                     *unwritten)
+{
+       struct buffer_head      *bh, *head;
+
+       *delalloc = *unwritten = 0;
+
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_unwritten(bh))
+                       (*unwritten) = 1;
+               else if (buffer_delay(bh))
+                       (*delalloc) = 1;
+       } while ((bh = bh->b_this_page) != head);
+}
+
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (XFS_IS_REALTIME_INODE(ip))
+               return mp->m_rtdev_targp->bt_bdev;
+       else
+               return mp->m_ddev_targp->bt_bdev;
+}
+
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
+STATIC void
+xfs_destroy_ioend(
+       xfs_ioend_t             *ioend)
+{
+       struct buffer_head      *bh, *next;
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+
+       for (bh = ioend->io_buffer_head; bh; bh = next) {
+               next = bh->b_private;
+               bh->b_end_io(bh, !ioend->io_error);
+       }
+
+       /*
+        * Volume managers supporting multiple paths can send back ENODEV
+        * when the final path disappears.  In this case continuing to fill
+        * the page cache with dirty data which cannot be written out is
+        * evil, so prevent that.
+        */
+       if (unlikely(ioend->io_error == -ENODEV)) {
+               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+                                     __FILE__, __LINE__);
+       }
+
+       xfs_ioend_wake(ip);
+       mempool_free(ioend, xfs_ioend_pool);
+}
+
+/*
+ * If the end of the current ioend is beyond the current EOF,
+ * return the new EOF value, otherwise zero.
+ */
+STATIC xfs_fsize_t
+xfs_ioend_new_eof(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+       xfs_fsize_t             bsize;
+
+       bsize = ioend->io_offset + ioend->io_size;
+       isize = MAX(ip->i_size, ip->i_new_size);
+       isize = MIN(isize, bsize);
+       return isize > ip->i_d.di_size ? isize : 0;
+}
+
+/*
+ * Update on-disk file size now that data has been written to disk.  The
+ * current in-memory file size is i_size.  If a write is beyond eof i_new_size
+ * will be the intended file size until i_size is updated.  If this write does
+ * not extend all the way to the valid file size then restrict this update to
+ * the end of the write.
+ *
+ * This function does not block as blocking on the inode lock in IO completion
+ * can lead to IO completion order dependency deadlocks.. If it can't get the
+ * inode ilock it will return EAGAIN. Callers must handle this.
+ */
+STATIC int
+xfs_setfilesize(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+
+       if (unlikely(ioend->io_error))
+               return 0;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+               return EAGAIN;
+
+       isize = xfs_ioend_new_eof(ioend);
+       if (isize) {
+               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+               ip->i_d.di_size = isize;
+               xfs_mark_inode_dirty(ip);
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+/*
+ * Schedule IO completion handling on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining)) {
+               if (ioend->io_type == IO_UNWRITTEN)
+                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
+               else
+                       queue_work(xfsdatad_workqueue, &ioend->io_work);
+       }
+}
+
+/*
+ * IO write completion.
+ */
+STATIC void
+xfs_end_io(
+       struct work_struct *work)
+{
+       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
+       struct xfs_inode *ip = XFS_I(ioend->io_inode);
+       int             error = 0;
+
+       /*
+        * For unwritten extents we need to issue transactions to convert a
+        * range to normal written extens after the data I/O has finished.
+        */
+       if (ioend->io_type == IO_UNWRITTEN &&
+           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
+
+               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+                                                ioend->io_size);
+               if (error)
+                       ioend->io_error = error;
+       }
+
+       /*
+        * We might have to update the on-disk file size after extending
+        * writes.
+        */
+       error = xfs_setfilesize(ioend);
+       ASSERT(!error || error == EAGAIN);
+
+       /*
+        * If we didn't complete processing of the ioend, requeue it to the
+        * tail of the workqueue for another attempt later. Otherwise destroy
+        * it.
+        */
+       if (error == EAGAIN) {
+               atomic_inc(&ioend->io_remaining);
+               xfs_finish_ioend(ioend);
+               /* ensure we don't spin on blocked ioends */
+               delay(1);
+       } else {
+               if (ioend->io_iocb)
+                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
+               xfs_destroy_ioend(ioend);
+       }
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining))
+               xfs_end_io(&ioend->io_work);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+       struct inode            *inode,
+       unsigned int            type)
+{
+       xfs_ioend_t             *ioend;
+
+       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+       /*
+        * Set the count to 1 initially, which will prevent an I/O
+        * completion callback from happening before we have started
+        * all the I/O from calling the completion routine too early.
+        */
+       atomic_set(&ioend->io_remaining, 1);
+       ioend->io_error = 0;
+       ioend->io_list = NULL;
+       ioend->io_type = type;
+       ioend->io_inode = inode;
+       ioend->io_buffer_head = NULL;
+       ioend->io_buffer_tail = NULL;
+       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
+       ioend->io_offset = 0;
+       ioend->io_size = 0;
+       ioend->io_iocb = NULL;
+       ioend->io_result = 0;
+
+       INIT_WORK(&ioend->io_work, xfs_end_io);
+       return ioend;
+}
+
+STATIC int
+xfs_map_blocks(
+       struct inode            *inode,
+       loff_t                  offset,
+       struct xfs_bmbt_irec    *imap,
+       int                     type,
+       int                     nonblocking)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 count = 1 << inode->i_blkbits;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
+       int                     nimaps = 1;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (type == IO_UNWRITTEN)
+               bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+               if (nonblocking)
+                       return -XFS_ERROR(EAGAIN);
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+       }
+
+       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+              (ip->i_df.if_flags & XFS_IFEXTENTS));
+       ASSERT(offset <= mp->m_maxioffset);
+
+       if (offset + count > mp->m_maxioffset)
+               count = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (error)
+               return -XFS_ERROR(error);
+
+       if (type == IO_DELALLOC &&
+           (!nimaps || isnullstartblock(imap->br_startblock))) {
+               error = xfs_iomap_write_allocate(ip, offset, count, imap);
+               if (!error)
+                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+               return -XFS_ERROR(error);
+       }
+
+#ifdef DEBUG
+       if (type == IO_UNWRITTEN) {
+               ASSERT(nimaps);
+               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+       }
+#endif
+       if (nimaps)
+               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+       return 0;
+}
+
+STATIC int
+xfs_imap_valid(
+       struct inode            *inode,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       offset >>= inode->i_blkbits;
+
+       return offset >= imap->br_startoff &&
+               offset < imap->br_startoff + imap->br_blockcount;
+}
+
+/*
+ * BIO completion handler for buffered IO.
+ */
+STATIC void
+xfs_end_bio(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_ioend_t             *ioend = bio->bi_private;
+
+       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+
+       /* Toss bio and pass work off to an xfsdatad thread */
+       bio->bi_private = NULL;
+       bio->bi_end_io = NULL;
+       bio_put(bio);
+
+       xfs_finish_ioend(ioend);
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend,
+       struct bio              *bio)
+{
+       atomic_inc(&ioend->io_remaining);
+       bio->bi_private = ioend;
+       bio->bi_end_io = xfs_end_bio;
+
+       /*
+        * If the I/O is beyond EOF we mark the inode dirty immediately
+        * but don't update the inode size until I/O completion.
+        */
+       if (xfs_ioend_new_eof(ioend))
+               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+       struct buffer_head      *bh)
+{
+       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
+       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
+
+       ASSERT(bio->bi_private == NULL);
+       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+       bio->bi_bdev = bh->b_bdev;
+       return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+       struct buffer_head      *bh)
+{
+       ASSERT(buffer_mapped(bh));
+       ASSERT(buffer_locked(bh));
+       ASSERT(!buffer_delay(bh));
+       ASSERT(!buffer_unwritten(bh));
+
+       mark_buffer_async_write(bh);
+       set_buffer_uptodate(bh);
+       clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
+       struct page             *page,
+       int                     clear_dirty,
+       int                     buffers)
+{
+       ASSERT(PageLocked(page));
+       ASSERT(!PageWriteback(page));
+       if (clear_dirty)
+               clear_page_dirty_for_io(page);
+       set_page_writeback(page);
+       unlock_page(page);
+       /* If no buffers on the page are to be written, finish it here */
+       if (!buffers)
+               end_page_writeback(page);
+}
+
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * buffer_heads, and then submit them for I/O on the second pass.
+ */
+STATIC void
+xfs_submit_ioend(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *head = ioend;
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh;
+       struct bio              *bio;
+       sector_t                lastblock = 0;
+
+       /* Pass 1 - start writeback */
+       do {
+               next = ioend->io_list;
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
+                       xfs_start_buffer_writeback(bh);
+       } while ((ioend = next) != NULL);
+
+       /* Pass 2 - submit I/O */
+       ioend = head;
+       do {
+               next = ioend->io_list;
+               bio = NULL;
+
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+
+                       if (!bio) {
+ retry:
+                               bio = xfs_alloc_ioend_bio(bh);
+                       } else if (bh->b_blocknr != lastblock + 1) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       if (bio_add_buffer(bio, bh) != bh->b_size) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       lastblock = bh->b_blocknr;
+               }
+               if (bio)
+                       xfs_submit_ioend_bio(wbc, ioend, bio);
+               xfs_finish_ioend(ioend);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh, *next_bh;
+
+       do {
+               next = ioend->io_list;
+               bh = ioend->io_buffer_head;
+               do {
+                       next_bh = bh->b_private;
+                       clear_buffer_async_write(bh);
+                       unlock_buffer(bh);
+               } while ((bh = next_bh) != NULL);
+
+               xfs_ioend_wake(XFS_I(ioend->io_inode));
+               mempool_free(ioend, xfs_ioend_pool);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       xfs_off_t               offset,
+       unsigned int            type,
+       xfs_ioend_t             **result,
+       int                     need_ioend)
+{
+       xfs_ioend_t             *ioend = *result;
+
+       if (!ioend || need_ioend || type != ioend->io_type) {
+               xfs_ioend_t     *previous = *result;
+
+               ioend = xfs_alloc_ioend(inode, type);
+               ioend->io_offset = offset;
+               ioend->io_buffer_head = bh;
+               ioend->io_buffer_tail = bh;
+               if (previous)
+                       previous->io_list = ioend;
+               *result = ioend;
+       } else {
+               ioend->io_buffer_tail->b_private = bh;
+               ioend->io_buffer_tail = bh;
+       }
+
+       bh->b_private = NULL;
+       ioend->io_size += bh->b_size;
+}
+
+STATIC void
+xfs_map_buffer(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       sector_t                bn;
+       struct xfs_mount        *m = XFS_I(inode)->i_mount;
+       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
+
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+             ((offset - iomap_offset) >> inode->i_blkbits);
+
+       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
+
+       bh->b_blocknr = bn;
+       set_buffer_mapped(bh);
+}
+
+STATIC void
+xfs_map_at_offset(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       xfs_map_buffer(inode, bh, imap, offset);
+       set_buffer_mapped(bh);
+       clear_buffer_delay(bh);
+       clear_buffer_unwritten(bh);
+}
+
+/*
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
+ */
+STATIC int
+xfs_is_delayed_page(
+       struct page             *page,
+       unsigned int            type)
+{
+       if (PageWriteback(page))
+               return 0;
+
+       if (page->mapping && page_has_buffers(page)) {
+               struct buffer_head      *bh, *head;
+               int                     acceptable = 0;
+
+               bh = head = page_buffers(page);
+               do {
+                       if (buffer_unwritten(bh))
+                               acceptable = (type == IO_UNWRITTEN);
+                       else if (buffer_delay(bh))
+                               acceptable = (type == IO_DELALLOC);
+                       else if (buffer_dirty(bh) && buffer_mapped(bh))
+                               acceptable = (type == IO_OVERWRITE);
+                       else
+                               break;
+               } while ((bh = bh->b_this_page) != head);
+
+               if (acceptable)
+                       return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC int
+xfs_convert_page(
+       struct inode            *inode,
+       struct page             *page,
+       loff_t                  tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc)
+{
+       struct buffer_head      *bh, *head;
+       xfs_off_t               end_offset;
+       unsigned long           p_offset;
+       unsigned int            type;
+       int                     len, page_dirty;
+       int                     count = 0, done = 0, uptodate = 1;
+       xfs_off_t               offset = page_offset(page);
+
+       if (page->index != tindex)
+               goto fail;
+       if (!trylock_page(page))
+               goto fail;
+       if (PageWriteback(page))
+               goto fail_unlock_page;
+       if (page->mapping != inode->i_mapping)
+               goto fail_unlock_page;
+       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+               goto fail_unlock_page;
+
+       /*
+        * page_dirty is initially a count of buffers on the page before
+        * EOF and is decremented as we move each into a cleanable state.
+        *
+        * Derivation:
+        *
+        * End offset is the highest offset that this page should represent.
+        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+        * hence give us the correct page_dirty count. On any other page,
+        * it will be zero and in that case we need page_dirty to be the
+        * count of buffers on the page.
+        */
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       i_size_read(inode));
+
+       len = 1 << inode->i_blkbits;
+       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+                                       PAGE_CACHE_SIZE);
+       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+       page_dirty = p_offset / len;
+
+       bh = head = page_buffers(page);
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+                       done = 1;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh) || buffer_delay(bh) ||
+                   buffer_mapped(bh)) {
+                       if (buffer_unwritten(bh))
+                               type = IO_UNWRITTEN;
+                       else if (buffer_delay(bh))
+                               type = IO_DELALLOC;
+                       else
+                               type = IO_OVERWRITE;
+
+                       if (!xfs_imap_valid(inode, imap, offset)) {
+                               done = 1;
+                               continue;
+                       }
+
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type,
+                                        ioendp, done);
+
+                       page_dirty--;
+                       count++;
+               } else {
+                       done = 1;
+               }
+       } while (offset += len, (bh = bh->b_this_page) != head);
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       if (count) {
+               if (--wbc->nr_to_write <= 0 &&
+                   wbc->sync_mode == WB_SYNC_NONE)
+                       done = 1;
+       }
+       xfs_start_page_writeback(page, !page_dirty, count);
+
+       return done;
+ fail_unlock_page:
+       unlock_page(page);
+ fail:
+       return 1;
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+       struct inode            *inode,
+       pgoff_t                 tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc,
+       pgoff_t                 tlast)
+{
+       struct pagevec          pvec;
+       int                     done = 0, i;
+
+       pagevec_init(&pvec, 0);
+       while (!done && tindex <= tlast) {
+               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                       break;
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                       imap, ioendp, wbc);
+                       if (done)
+                               break;
+               }
+
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+}
+
+STATIC void
+xfs_vm_invalidatepage(
+       struct page             *page,
+       unsigned long           offset)
+{
+       trace_xfs_invalidatepage(page->mapping->host, page, offset);
+       block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+       struct page             *page)
+{
+       struct inode            *inode = page->mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct buffer_head      *bh, *head;
+       loff_t                  offset = page_offset(page);
+
+       if (!xfs_is_delayed_page(page, IO_DELALLOC))
+               goto out_invalidate;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               goto out_invalidate;
+
+       xfs_alert(ip->i_mount,
+               "page discard on page %p, inode 0x%llx, offset %llu.",
+                       page, ip->i_ino, offset);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       bh = head = page_buffers(page);
+       do {
+               int             error;
+               xfs_fileoff_t   start_fsb;
+
+               if (!buffer_delay(bh))
+                       goto next_buffer;
+
+               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "page discard unable to remove delalloc mapping.");
+                       }
+                       break;
+               }
+next_buffer:
+               offset += 1 << inode->i_blkbits;
+
+       } while ((bh = bh->b_this_page) != head);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+       xfs_vm_invalidatepage(page, 0);
+       return;
+}
+
+/*
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
+ */
+STATIC int
+xfs_vm_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       struct inode            *inode = page->mapping->host;
+       struct buffer_head      *bh, *head;
+       struct xfs_bmbt_irec    imap;
+       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
+       loff_t                  offset;
+       unsigned int            type;
+       __uint64_t              end_offset;
+       pgoff_t                 end_index, last_index;
+       ssize_t                 len;
+       int                     err, imap_valid = 0, uptodate = 1;
+       int                     count = 0;
+       int                     nonblocking = 0;
+
+       trace_xfs_writepage(inode, page, 0);
+
+       ASSERT(page_has_buffers(page));
+
+       /*
+        * Refuse to write the page out if we are called from reclaim context.
+        *
+        * This avoids stack overflows when called from deeply used stacks in
+        * random callers for direct reclaim or memcg reclaim.  We explicitly
+        * allow reclaim from kswapd as the stack usage there is relatively low.
+        *
+        * This should really be done by the core VM, but until that happens
+        * filesystems like XFS, btrfs and ext4 have to take care of this
+        * by themselves.
+        */
+       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+               goto redirty;
+
+       /*
+        * Given that we do not allow direct reclaim to call us, we should
+        * never be called while in a filesystem transaction.
+        */
+       if (WARN_ON(current->flags & PF_FSTRANS))
+               goto redirty;
+
+       /* Is this page beyond the end of the file? */
+       offset = i_size_read(inode);
+       end_index = offset >> PAGE_CACHE_SHIFT;
+       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+       if (page->index >= end_index) {
+               if ((page->index >= end_index + 1) ||
+                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                       unlock_page(page);
+                       return 0;
+               }
+       }
+
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       offset);
+       len = 1 << inode->i_blkbits;
+
+       bh = head = page_buffers(page);
+       offset = page_offset(page);
+       type = IO_OVERWRITE;
+
+       if (wbc->sync_mode == WB_SYNC_NONE)
+               nonblocking = 1;
+
+       do {
+               int new_ioend = 0;
+
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+
+               /*
+                * set_page_dirty dirties all buffers in a page, independent
+                * of their state.  The dirty state however is entirely
+                * meaningless for holes (!mapped && uptodate), so skip
+                * buffers covering holes here.
+                */
+               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                       imap_valid = 0;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh)) {
+                       if (type != IO_UNWRITTEN) {
+                               type = IO_UNWRITTEN;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_delay(bh)) {
+                       if (type != IO_DELALLOC) {
+                               type = IO_DELALLOC;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_uptodate(bh)) {
+                       if (type != IO_OVERWRITE) {
+                               type = IO_OVERWRITE;
+                               imap_valid = 0;
+                       }
+               } else {
+                       if (PageUptodate(page)) {
+                               ASSERT(buffer_mapped(bh));
+                               imap_valid = 0;
+                       }
+                       continue;
+               }
+
+               if (imap_valid)
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               if (!imap_valid) {
+                       /*
+                        * If we didn't have a valid mapping then we need to
+                        * put the new mapping into a separate ioend structure.
+                        * This ensures non-contiguous extents always have
+                        * separate ioends, which is particularly important
+                        * for unwritten extent conversion at I/O completion
+                        * time.
+                        */
+                       new_ioend = 1;
+                       err = xfs_map_blocks(inode, offset, &imap, type,
+                                            nonblocking);
+                       if (err)
+                               goto error;
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               }
+               if (imap_valid) {
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, &imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+                                        new_ioend);
+                       count++;
+               }
+
+               if (!iohead)
+                       iohead = ioend;
+
+       } while (offset += len, ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       xfs_start_page_writeback(page, 1, count);
+
+       if (ioend && imap_valid) {
+               xfs_off_t               end_index;
+
+               end_index = imap.br_startoff + imap.br_blockcount;
+
+               /* to bytes */
+               end_index <<= inode->i_blkbits;
+
+               /* to pages */
+               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+               /* check against file size */
+               if (end_index > last_index)
+                       end_index = last_index;
+
+               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+                                 wbc, end_index);
+       }
+
+       if (iohead)
+               xfs_submit_ioend(wbc, iohead);
+
+       return 0;
+
+error:
+       if (iohead)
+               xfs_cancel_ioend(iohead);
+
+       if (err == -EAGAIN)
+               goto redirty;
+
+       xfs_aops_discard_page(page);
+       ClearPageUptodate(page);
+       unlock_page(page);
+       return err;
+
+redirty:
+       redirty_page_for_writepage(wbc, page);
+       unlock_page(page);
+       return 0;
+}
+
+STATIC int
+xfs_vm_writepages(
+       struct address_space    *mapping,
+       struct writeback_control *wbc)
+{
+       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+       return generic_writepages(mapping, wbc);
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. The page should already be clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 1 if the page is ok to release, 0 otherwise.
+ */
+STATIC int
+xfs_vm_releasepage(
+       struct page             *page,
+       gfp_t                   gfp_mask)
+{
+       int                     delalloc, unwritten;
+
+       trace_xfs_releasepage(page->mapping->host, page, 0);
+
+       xfs_count_page_state(page, &delalloc, &unwritten);
+
+       if (WARN_ON(delalloc))
+               return 0;
+       if (WARN_ON(unwritten))
+               return 0;
+
+       return try_to_free_buffers(page);
+}
+
+STATIC int
+__xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create,
+       int                     direct)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     lockmode = 0;
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1;
+       xfs_off_t               offset;
+       ssize_t                 size;
+       int                     new = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       offset = (xfs_off_t)iblock << inode->i_blkbits;
+       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+       size = bh_result->b_size;
+
+       if (!create && direct && offset >= i_size_read(inode))
+               return 0;
+
+       if (create) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, lockmode);
+       } else {
+               lockmode = xfs_ilock_map_shared(ip);
+       }
+
+       ASSERT(offset <= mp->m_maxioffset);
+       if (offset + size > mp->m_maxioffset)
+               size = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
+       if (error)
+               goto out_unlock;
+
+       if (create &&
+           (!nimaps ||
+            (imap.br_startblock == HOLESTARTBLOCK ||
+             imap.br_startblock == DELAYSTARTBLOCK))) {
+               if (direct) {
+                       error = xfs_iomap_write_direct(ip, offset, size,
+                                                      &imap, nimaps);
+               } else {
+                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
+               }
+               if (error)
+                       goto out_unlock;
+
+               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+       } else if (nimaps) {
+               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+       } else {
+               trace_xfs_get_blocks_notfound(ip, offset, size);
+               goto out_unlock;
+       }
+       xfs_iunlock(ip, lockmode);
+
+       if (imap.br_startblock != HOLESTARTBLOCK &&
+           imap.br_startblock != DELAYSTARTBLOCK) {
+               /*
+                * For unwritten extents do not report a disk address on
+                * the read case (treat as if we're reading into a hole).
+                */
+               if (create || !ISUNWRITTEN(&imap))
+                       xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (create && ISUNWRITTEN(&imap)) {
+                       if (direct)
+                               bh_result->b_private = inode;
+                       set_buffer_unwritten(bh_result);
+               }
+       }
+
+       /*
+        * If this is a realtime file, data may be on a different device.
+        * to that pointed to from the buffer_head b_bdev currently.
+        */
+       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
+
+       /*
+        * If we previously allocated a block out beyond eof and we are now
+        * coming back to use it then we will need to flag it as new even if it
+        * has a disk address.
+        *
+        * With sub-block writes into unwritten extents we also need to mark
+        * the buffer as new so that the unwritten parts of the buffer gets
+        * correctly zeroed.
+        */
+       if (create &&
+           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+            (offset >= i_size_read(inode)) ||
+            (new || ISUNWRITTEN(&imap))))
+               set_buffer_new(bh_result);
+
+       if (imap.br_startblock == DELAYSTARTBLOCK) {
+               BUG_ON(direct);
+               if (create) {
+                       set_buffer_uptodate(bh_result);
+                       set_buffer_mapped(bh_result);
+                       set_buffer_delay(bh_result);
+               }
+       }
+
+       /*
+        * If this is O_DIRECT or the mpage code calling tell them how large
+        * the mapping is, so that we can avoid repeated get_blocks calls.
+        */
+       if (direct || size > (1 << inode->i_blkbits)) {
+               xfs_off_t               mapping_size;
+
+               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+               mapping_size <<= inode->i_blkbits;
+
+               ASSERT(mapping_size > 0);
+               if (mapping_size > size)
+                       mapping_size = size;
+               if (mapping_size > LONG_MAX)
+                       mapping_size = LONG_MAX;
+
+               bh_result->b_size = mapping_size;
+       }
+
+       return 0;
+
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return -error;
+}
+
+int
+xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+}
+
+STATIC int
+xfs_get_blocks_direct(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+}
+
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents.  In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done.  But in case this was a successful AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions.  In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
+STATIC void
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
+       loff_t                  offset,
+       ssize_t                 size,
+       void                    *private,
+       int                     ret,
+       bool                    is_async)
+{
+       struct xfs_ioend        *ioend = iocb->private;
+
+       /*
+        * blockdev_direct_IO can return an error even after the I/O
+        * completion handler was called.  Thus we need to protect
+        * against double-freeing.
+        */
+       iocb->private = NULL;
+
+       ioend->io_offset = offset;
+       ioend->io_size = size;
+       if (private && size > 0)
+               ioend->io_type = IO_UNWRITTEN;
+
+       if (is_async) {
+               /*
+                * If we are converting an unwritten extent we need to delay
+                * the AIO completion until after the unwrittent extent
+                * conversion has completed, otherwise do it ASAP.
+                */
+               if (ioend->io_type == IO_UNWRITTEN) {
+                       ioend->io_iocb = iocb;
+                       ioend->io_result = ret;
+               } else {
+                       aio_complete(iocb, ret, 0);
+               }
+               xfs_finish_ioend(ioend);
+       } else {
+               xfs_finish_ioend_sync(ioend);
+       }
+
+       /* XXX: probably should move into the real I/O completion handler */
+       inode_dio_done(ioend->io_inode);
+}
+
+STATIC ssize_t
+xfs_vm_direct_IO(
+       int                     rw,
+       struct kiocb            *iocb,
+       const struct iovec      *iov,
+       loff_t                  offset,
+       unsigned long           nr_segs)
+{
+       struct inode            *inode = iocb->ki_filp->f_mapping->host;
+       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
+       ssize_t                 ret;
+
+       if (rw & WRITE) {
+               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           xfs_end_io_direct_write, NULL, 0);
+               if (ret != -EIOCBQUEUED && iocb->private)
+                       xfs_destroy_ioend(iocb->private);
+       } else {
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           NULL, NULL, 0);
+       }
+
+       return ret;
+}
+
+STATIC void
+xfs_vm_write_failed(
+       struct address_space    *mapping,
+       loff_t                  to)
+{
+       struct inode            *inode = mapping->host;
+
+       if (to > inode->i_size) {
+               /*
+                * punch out the delalloc blocks we have already allocated. We
+                * don't call xfs_setattr() to do this as we may be in the
+                * middle of a multi-iovec write and so the vfs inode->i_size
+                * will not match the xfs ip->i_size and so it will zero too
+                * much. Hence we jus truncate the page cache to zero what is
+                * necessary and punch the delalloc blocks directly.
+                */
+               struct xfs_inode        *ip = XFS_I(inode);
+               xfs_fileoff_t           start_fsb;
+               xfs_fileoff_t           end_fsb;
+               int                     error;
+
+               truncate_pagecache(inode, to, inode->i_size);
+
+               /*
+                * Check if there are any blocks that are outside of i_size
+                * that need to be trimmed back.
+                */
+               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+               if (end_fsb <= start_fsb)
+                       return;
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                                       end_fsb - start_fsb);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "xfs_vm_write_failed: unable to clean up ino %lld",
+                                               ip->i_ino);
+                       }
+               }
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+STATIC int
+xfs_vm_write_begin(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                flags,
+       struct page             **pagep,
+       void                    **fsdata)
+{
+       int                     ret;
+
+       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+                               pagep, xfs_get_blocks);
+       if (unlikely(ret))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                copied,
+       struct page             *page,
+       void                    *fsdata)
+{
+       int                     ret;
+
+       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+       if (unlikely(ret < len))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC sector_t
+xfs_vm_bmap(
+       struct address_space    *mapping,
+       sector_t                block)
+{
+       struct inode            *inode = (struct inode *)mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_vm_bmap(XFS_I(inode));
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+       return generic_block_bmap(mapping, block, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpage(
+       struct file             *unused,
+       struct page             *page)
+{
+       return mpage_readpage(page, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpages(
+       struct file             *unused,
+       struct address_space    *mapping,
+       struct list_head        *pages,
+       unsigned                nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+}
+
+const struct address_space_operations xfs_address_space_operations = {
+       .readpage               = xfs_vm_readpage,
+       .readpages              = xfs_vm_readpages,
+       .writepage              = xfs_vm_writepage,
+       .writepages             = xfs_vm_writepages,
+       .releasepage            = xfs_vm_releasepage,
+       .invalidatepage         = xfs_vm_invalidatepage,
+       .write_begin            = xfs_vm_write_begin,
+       .write_end              = xfs_vm_write_end,
+       .bmap                   = xfs_vm_bmap,
+       .direct_IO              = xfs_vm_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
new file mode 100644 (file)
index 0000000..71f721e
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+       IO_DIRECT = 0,  /* special case for direct I/O ioends */
+       IO_DELALLOC,    /* mapping covers delalloc region */
+       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
+       IO_OVERWRITE,   /* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+       { 0,                    "" }, \
+       { IO_DELALLOC,          "delalloc" }, \
+       { IO_UNWRITTEN,         "unwritten" }, \
+       { IO_OVERWRITE,         "overwrite" }
+
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
+typedef struct xfs_ioend {
+       struct xfs_ioend        *io_list;       /* next ioend in chain */
+       unsigned int            io_type;        /* delalloc / unwritten */
+       int                     io_error;       /* I/O error code */
+       atomic_t                io_remaining;   /* hold count */
+       struct inode            *io_inode;      /* file being written to */
+       struct buffer_head      *io_buffer_head;/* buffer linked list head */
+       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
+       size_t                  io_size;        /* size of the extent */
+       xfs_off_t               io_offset;      /* offset in the file */
+       struct work_struct      io_work;        /* xfsdatad work queue */
+       struct kiocb            *io_iocb;
+       int                     io_result;
+} xfs_ioend_t;
+
+extern const struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
+
+extern void xfs_count_page_state(struct page *, int *, int *);
+
+#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
new file mode 100644 (file)
index 0000000..c57836d
--- /dev/null
@@ -0,0 +1,1876 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+#include <linux/kthread.h>
+#include <linux/migrate.h>
+#include <linux/backing-dev.h>
+#include <linux/freezer.h>
+
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+
+static kmem_zone_t *xfs_buf_zone;
+STATIC int xfsbufd(void *);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
+
+static struct workqueue_struct *xfslogd_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
+
+#ifdef XFS_BUF_LOCK_TRACKING
+# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
+# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
+# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
+#else
+# define XB_SET_OWNER(bp)      do { } while (0)
+# define XB_CLEAR_OWNER(bp)    do { } while (0)
+# define XB_GET_OWNER(bp)      do { } while (0)
+#endif
+
+#define xb_to_gfp(flags) \
+       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
+         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define xb_to_km(flags) \
+        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+#define xfs_buf_allocate(flags) \
+       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
+#define xfs_buf_deallocate(bp) \
+       kmem_zone_free(xfs_buf_zone, (bp));
+
+static inline int
+xfs_buf_is_vmapped(
+       struct xfs_buf  *bp)
+{
+       /*
+        * Return true if the buffer is vmapped.
+        *
+        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
+        * code is clever enough to know it doesn't have to map a single page,
+        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
+        */
+       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
+}
+
+static inline int
+xfs_buf_vmap_len(
+       struct xfs_buf  *bp)
+{
+       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+}
+
+/*
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
+ */
+STATIC void
+xfs_buf_lru_add(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (list_empty(&bp->b_lru)) {
+               atomic_inc(&bp->b_hold);
+               list_add_tail(&bp->b_lru, &btp->bt_lru);
+               btp->bt_lru_nr++;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       if (list_empty(&bp->b_lru))
+               return;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (!list_empty(&bp->b_lru)) {
+               list_del_init(&bp->b_lru);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+       struct xfs_buf  *bp)
+{
+       bp->b_flags |= XBF_STALE;
+       atomic_set(&(bp)->b_lru_ref, 0);
+       if (!list_empty(&bp->b_lru)) {
+               struct xfs_buftarg *btp = bp->b_target;
+
+               spin_lock(&btp->bt_lru_lock);
+               if (!list_empty(&bp->b_lru)) {
+                       list_del_init(&bp->b_lru);
+                       btp->bt_lru_nr--;
+                       atomic_dec(&bp->b_hold);
+               }
+               spin_unlock(&btp->bt_lru_lock);
+       }
+       ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
+
+STATIC void
+_xfs_buf_initialize(
+       xfs_buf_t               *bp,
+       xfs_buftarg_t           *target,
+       xfs_off_t               range_base,
+       size_t                  range_length,
+       xfs_buf_flags_t         flags)
+{
+       /*
+        * We don't want certain flags to appear in b_flags.
+        */
+       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
+
+       memset(bp, 0, sizeof(xfs_buf_t));
+       atomic_set(&bp->b_hold, 1);
+       atomic_set(&bp->b_lru_ref, 1);
+       init_completion(&bp->b_iowait);
+       INIT_LIST_HEAD(&bp->b_lru);
+       INIT_LIST_HEAD(&bp->b_list);
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
+       XB_SET_OWNER(bp);
+       bp->b_target = target;
+       bp->b_file_offset = range_base;
+       /*
+        * Set buffer_length and count_desired to the same value initially.
+        * I/O routines should use count_desired, which will be the same in
+        * most cases but may be reset (e.g. XFS recovery).
+        */
+       bp->b_buffer_length = bp->b_count_desired = range_length;
+       bp->b_flags = flags;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       atomic_set(&bp->b_pin_count, 0);
+       init_waitqueue_head(&bp->b_waiters);
+
+       XFS_STATS_INC(xb_create);
+
+       trace_xfs_buf_init(bp, _RET_IP_);
+}
+
+/*
+ *     Allocate a page array capable of holding a specified number
+ *     of pages, and point the page buf at it.
+ */
+STATIC int
+_xfs_buf_get_pages(
+       xfs_buf_t               *bp,
+       int                     page_count,
+       xfs_buf_flags_t         flags)
+{
+       /* Make sure that we have a page list */
+       if (bp->b_pages == NULL) {
+               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
+               bp->b_page_count = page_count;
+               if (page_count <= XB_PAGES) {
+                       bp->b_pages = bp->b_page_array;
+               } else {
+                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
+                                       page_count, xb_to_km(flags));
+                       if (bp->b_pages == NULL)
+                               return -ENOMEM;
+               }
+               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+       }
+       return 0;
+}
+
+/*
+ *     Frees b_pages if it was allocated.
+ */
+STATIC void
+_xfs_buf_free_pages(
+       xfs_buf_t       *bp)
+{
+       if (bp->b_pages != bp->b_page_array) {
+               kmem_free(bp->b_pages);
+               bp->b_pages = NULL;
+       }
+}
+
+/*
+ *     Releases the specified buffer.
+ *
+ *     The modification state of any associated pages is left unchanged.
+ *     The buffer most not be on any hash - use xfs_buf_rele instead for
+ *     hashed and refcounted buffers
+ */
+void
+xfs_buf_free(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_free(bp, _RET_IP_);
+
+       ASSERT(list_empty(&bp->b_lru));
+
+       if (bp->b_flags & _XBF_PAGES) {
+               uint            i;
+
+               if (xfs_buf_is_vmapped(bp))
+                       vm_unmap_ram(bp->b_addr - bp->b_offset,
+                                       bp->b_page_count);
+
+               for (i = 0; i < bp->b_page_count; i++) {
+                       struct page     *page = bp->b_pages[i];
+
+                       __free_page(page);
+               }
+       } else if (bp->b_flags & _XBF_KMEM)
+               kmem_free(bp->b_addr);
+       _xfs_buf_free_pages(bp);
+       xfs_buf_deallocate(bp);
+}
+
+/*
+ * Allocates all the pages for buffer in question and builds it's page list.
+ */
+STATIC int
+xfs_buf_allocate_memory(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       size_t                  size = bp->b_count_desired;
+       size_t                  nbytes, offset;
+       gfp_t                   gfp_mask = xb_to_gfp(flags);
+       unsigned short          page_count, i;
+       xfs_off_t               end;
+       int                     error;
+
+       /*
+        * for buffers that are contained within a single page, just allocate
+        * the memory from the heap - there's no need for the complexity of
+        * page arrays to keep allocation down to order 0.
+        */
+       if (bp->b_buffer_length < PAGE_SIZE) {
+               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
+               if (!bp->b_addr) {
+                       /* low memory - use alloc_page loop instead */
+                       goto use_alloc_page;
+               }
+
+               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
+                                                               PAGE_MASK) !=
+                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
+                       /* b_addr spans two pages - use alloc_page instead */
+                       kmem_free(bp->b_addr);
+                       bp->b_addr = NULL;
+                       goto use_alloc_page;
+               }
+               bp->b_offset = offset_in_page(bp->b_addr);
+               bp->b_pages = bp->b_page_array;
+               bp->b_pages[0] = virt_to_page(bp->b_addr);
+               bp->b_page_count = 1;
+               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
+               return 0;
+       }
+
+use_alloc_page:
+       end = bp->b_file_offset + bp->b_buffer_length;
+       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
+       error = _xfs_buf_get_pages(bp, page_count, flags);
+       if (unlikely(error))
+               return error;
+
+       offset = bp->b_offset;
+       bp->b_flags |= _XBF_PAGES;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               struct page     *page;
+               uint            retries = 0;
+retry:
+               page = alloc_page(gfp_mask);
+               if (unlikely(page == NULL)) {
+                       if (flags & XBF_READ_AHEAD) {
+                               bp->b_page_count = i;
+                               error = ENOMEM;
+                               goto out_free_pages;
+                       }
+
+                       /*
+                        * This could deadlock.
+                        *
+                        * But until all the XFS lowlevel code is revamped to
+                        * handle buffer allocation failures we can't do much.
+                        */
+                       if (!(++retries % 100))
+                               xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, gfp_mask);
+
+                       XFS_STATS_INC(xb_page_retries);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto retry;
+               }
+
+               XFS_STATS_INC(xb_page_found);
+
+               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+               size -= nbytes;
+               bp->b_pages[i] = page;
+               offset = 0;
+       }
+       return 0;
+
+out_free_pages:
+       for (i = 0; i < bp->b_page_count; i++)
+               __free_page(bp->b_pages[i]);
+       return error;
+}
+
+/*
+ *     Map buffer into kernel address-space if necessary.
+ */
+STATIC int
+_xfs_buf_map_pages(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       ASSERT(bp->b_flags & _XBF_PAGES);
+       if (bp->b_page_count == 1) {
+               /* A single page buffer is always mappable */
+               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       } else if (flags & XBF_MAPPED) {
+               int retried = 0;
+
+               do {
+                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+                                               -1, PAGE_KERNEL);
+                       if (bp->b_addr)
+                               break;
+                       vm_unmap_aliases();
+               } while (retried++ <= 1);
+
+               if (!bp->b_addr)
+                       return -ENOMEM;
+               bp->b_addr += bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       }
+
+       return 0;
+}
+
+/*
+ *     Finding and Reading Buffers
+ */
+
+/*
+ *     Look up, and creates if absent, a lockable buffer for
+ *     a given range of an inode.  The buffer is returned
+ *     locked.  If other overlapping buffers exist, they are
+ *     released before the new buffer is created and locked,
+ *     which may imply that this call will block until those buffers
+ *     are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_xfs_buf_find(
+       xfs_buftarg_t           *btp,   /* block device target          */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags,
+       xfs_buf_t               *new_bp)
+{
+       xfs_off_t               range_base;
+       size_t                  range_length;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
+
+       range_base = (ioff << BBSHIFT);
+       range_length = (isize << BBSHIFT);
+
+       /* Check for IOs smaller than the sector size / not sector aligned */
+       ASSERT(!(range_length < (1 << btp->bt_sshift)));
+       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
+
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
+                       atomic_inc(&bp->b_hold);
+                       goto found;
+               }
+       }
+
+       /* No match found */
+       if (new_bp) {
+               _xfs_buf_initialize(new_bp, btp, range_base,
+                               range_length, flags);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
+       } else {
+               XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
+       }
+       return new_bp;
+
+found:
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
+
+       if (!xfs_buf_trylock(bp)) {
+               if (flags & XBF_TRYLOCK) {
+                       xfs_buf_rele(bp);
+                       XFS_STATS_INC(xb_busy_locked);
+                       return NULL;
+               }
+               xfs_buf_lock(bp);
+               XFS_STATS_INC(xb_get_locked_waited);
+       }
+
+       /*
+        * if the buffer is stale, clear all the external state associated with
+        * it. We need to keep flags such as how we allocated the buffer memory
+        * intact here.
+        */
+       if (bp->b_flags & XBF_STALE) {
+               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
+       }
+
+       trace_xfs_buf_find(bp, flags, _RET_IP_);
+       XFS_STATS_INC(xb_get_locked);
+       return bp;
+}
+
+/*
+ *     Assembles a buffer covering the specified range.
+ *     Storage in memory for all portions of the buffer will be allocated,
+ *     although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get(
+       xfs_buftarg_t           *target,/* target for buffer            */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp, *new_bp;
+       int                     error = 0;
+
+       new_bp = xfs_buf_allocate(flags);
+       if (unlikely(!new_bp))
+               return NULL;
+
+       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
+       if (bp == new_bp) {
+               error = xfs_buf_allocate_memory(bp, flags);
+               if (error)
+                       goto no_buffer;
+       } else {
+               xfs_buf_deallocate(new_bp);
+               if (unlikely(bp == NULL))
+                       return NULL;
+       }
+
+       if (!(bp->b_flags & XBF_MAPPED)) {
+               error = _xfs_buf_map_pages(bp, flags);
+               if (unlikely(error)) {
+                       xfs_warn(target->bt_mount,
+                               "%s: failed to map pages\n", __func__);
+                       goto no_buffer;
+               }
+       }
+
+       XFS_STATS_INC(xb_get);
+
+       /*
+        * Always fill in the block number now, the mapped cases can do
+        * their own overlay of this later.
+        */
+       bp->b_bn = ioff;
+       bp->b_count_desired = bp->b_buffer_length;
+
+       trace_xfs_buf_get(bp, flags, _RET_IP_);
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+STATIC int
+_xfs_buf_read(
+       xfs_buf_t               *bp,
+       xfs_buf_flags_t         flags)
+{
+       int                     status;
+
+       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+
+       status = xfs_buf_iorequest(bp);
+       if (status || bp->b_error || (flags & XBF_ASYNC))
+               return status;
+       return xfs_buf_iowait(bp);
+}
+
+xfs_buf_t *
+xfs_buf_read(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize,
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp;
+
+       flags |= XBF_READ;
+
+       bp = xfs_buf_get(target, ioff, isize, flags);
+       if (bp) {
+               trace_xfs_buf_read(bp, flags, _RET_IP_);
+
+               if (!XFS_BUF_ISDONE(bp)) {
+                       XFS_STATS_INC(xb_get_read);
+                       _xfs_buf_read(bp, flags);
+               } else if (flags & XBF_ASYNC) {
+                       /*
+                        * Read ahead call which is already satisfied,
+                        * drop the buffer
+                        */
+                       goto no_buffer;
+               } else {
+                       /* We do not want read in the flags */
+                       bp->b_flags &= ~XBF_READ;
+               }
+       }
+
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+/*
+ *     If we are not low on memory then do the readahead in a deadlock
+ *     safe manner.
+ */
+void
+xfs_buf_readahead(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize)
+{
+       if (bdi_read_congested(target->bt_bdi))
+               return;
+
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+{
+       xfs_buf_t               *bp;
+       int                     error;
+
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+
+       /* set up the buffer for a read IO */
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
+}
+
+xfs_buf_t *
+xfs_buf_get_empty(
+       size_t                  len,
+       xfs_buftarg_t           *target)
+{
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (bp)
+               _xfs_buf_initialize(bp, target, 0, len, 0);
+       return bp;
+}
+
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+       struct xfs_buf          *bp,
+       size_t                  len)
+{
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_page_count = 0;
+       bp->b_addr = NULL;
+       bp->b_file_offset = 0;
+       bp->b_buffer_length = bp->b_count_desired = len;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       bp->b_flags &= ~XBF_MAPPED;
+}
+
+static inline struct page *
+mem_to_page(
+       void                    *addr)
+{
+       if ((!is_vmalloc_addr(addr))) {
+               return virt_to_page(addr);
+       } else {
+               return vmalloc_to_page(addr);
+       }
+}
+
+int
+xfs_buf_associate_memory(
+       xfs_buf_t               *bp,
+       void                    *mem,
+       size_t                  len)
+{
+       int                     rval;
+       int                     i = 0;
+       unsigned long           pageaddr;
+       unsigned long           offset;
+       size_t                  buflen;
+       int                     page_count;
+
+       pageaddr = (unsigned long)mem & PAGE_MASK;
+       offset = (unsigned long)mem - pageaddr;
+       buflen = PAGE_ALIGN(len + offset);
+       page_count = buflen >> PAGE_SHIFT;
+
+       /* Free any previous set of page pointers */
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_addr = mem;
+
+       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
+       if (rval)
+               return rval;
+
+       bp->b_offset = offset;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               bp->b_pages[i] = mem_to_page((void *)pageaddr);
+               pageaddr += PAGE_SIZE;
+       }
+
+       bp->b_count_desired = len;
+       bp->b_buffer_length = buflen;
+       bp->b_flags |= XBF_MAPPED;
+
+       return 0;
+}
+
+xfs_buf_t *
+xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
+       size_t                  len,
+       int                     flags)
+{
+       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+       int                     error, i;
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (unlikely(bp == NULL))
+               goto fail;
+       _xfs_buf_initialize(bp, target, 0, len, 0);
+
+       error = _xfs_buf_get_pages(bp, page_count, 0);
+       if (error)
+               goto fail_free_buf;
+
+       for (i = 0; i < page_count; i++) {
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
+               if (!bp->b_pages[i])
+                       goto fail_free_mem;
+       }
+       bp->b_flags |= _XBF_PAGES;
+
+       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+       if (unlikely(error)) {
+               xfs_warn(target->bt_mount,
+                       "%s: failed to map pages\n", __func__);
+               goto fail_free_mem;
+       }
+
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
+       return bp;
+
+ fail_free_mem:
+       while (--i >= 0)
+               __free_page(bp->b_pages[i]);
+       _xfs_buf_free_pages(bp);
+ fail_free_buf:
+       xfs_buf_deallocate(bp);
+ fail:
+       return NULL;
+}
+
+/*
+ *     Increment reference count on buffer, to hold the buffer concurrently
+ *     with another thread which may release (free) the buffer asynchronously.
+ *     Must hold the buffer already to call this function.
+ */
+void
+xfs_buf_hold(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_hold(bp, _RET_IP_);
+       atomic_inc(&bp->b_hold);
+}
+
+/*
+ *     Releases a hold on the specified buffer.  If the
+ *     the hold count is 1, calls xfs_buf_free.
+ */
+void
+xfs_buf_rele(
+       xfs_buf_t               *bp)
+{
+       struct xfs_perag        *pag = bp->b_pag;
+
+       trace_xfs_buf_rele(bp, _RET_IP_);
+
+       if (!pag) {
+               ASSERT(list_empty(&bp->b_lru));
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
+               if (atomic_dec_and_test(&bp->b_hold))
+                       xfs_buf_free(bp);
+               return;
+       }
+
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
+       ASSERT(atomic_read(&bp->b_hold) > 0);
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
+               if (!(bp->b_flags & XBF_STALE) &&
+                          atomic_read(&bp->b_lru_ref)) {
+                       xfs_buf_lru_add(bp);
+                       spin_unlock(&pag->pag_buf_lock);
+               } else {
+                       xfs_buf_lru_del(bp);
+                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
+                       xfs_buf_free(bp);
+               }
+       }
+}
+
+
+/*
+ *     Lock a buffer object, if it is not already locked.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we are
+ *     being asked to lock a buffer that has been reallocated. Because it is
+ *     pinned, we know that the log has not been pushed to disk and hence it
+ *     will still be locked.  Rather than continuing to have trylock attempts
+ *     fail until someone else pushes the log, push it ourselves before
+ *     returning.  This means that the xfsaild will not get stuck trying
+ *     to push on stale inode buffers.
+ */
+int
+xfs_buf_trylock(
+       struct xfs_buf          *bp)
+{
+       int                     locked;
+
+       locked = down_trylock(&bp->b_sema) == 0;
+       if (locked)
+               XB_SET_OWNER(bp);
+       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+
+       trace_xfs_buf_trylock(bp, _RET_IP_);
+       return locked;
+}
+
+/*
+ *     Lock a buffer object.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we
+ *     are being asked to lock a buffer that has been reallocated. Because
+ *     it is pinned, we know that the log has not been pushed to disk and
+ *     hence it will still be locked. Rather than sleeping until someone
+ *     else pushes the log, push it ourselves before trying to get the lock.
+ */
+void
+xfs_buf_lock(
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_lock(bp, _RET_IP_);
+
+       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+       down(&bp->b_sema);
+       XB_SET_OWNER(bp);
+
+       trace_xfs_buf_lock_done(bp, _RET_IP_);
+}
+
+/*
+ *     Releases the lock on the buffer object.
+ *     If the buffer is marked delwri but is not queued, do so before we
+ *     unlock the buffer as we need to set flags correctly.  We also need to
+ *     take a reference for the delwri queue because the unlocker is going to
+ *     drop their's and they don't know we just queued it.
+ */
+void
+xfs_buf_unlock(
+       struct xfs_buf          *bp)
+{
+       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
+               atomic_inc(&bp->b_hold);
+               bp->b_flags |= XBF_ASYNC;
+               xfs_buf_delwri_queue(bp, 0);
+       }
+
+       XB_CLEAR_OWNER(bp);
+       up(&bp->b_sema);
+
+       trace_xfs_buf_unlock(bp, _RET_IP_);
+}
+
+STATIC void
+xfs_buf_wait_unpin(
+       xfs_buf_t               *bp)
+{
+       DECLARE_WAITQUEUE       (wait, current);
+
+       if (atomic_read(&bp->b_pin_count) == 0)
+               return;
+
+       add_wait_queue(&bp->b_waiters, &wait);
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (atomic_read(&bp->b_pin_count) == 0)
+                       break;
+               io_schedule();
+       }
+       remove_wait_queue(&bp->b_waiters, &wait);
+       set_current_state(TASK_RUNNING);
+}
+
+/*
+ *     Buffer Utility Routines
+ */
+
+STATIC void
+xfs_buf_iodone_work(
+       struct work_struct      *work)
+{
+       xfs_buf_t               *bp =
+               container_of(work, xfs_buf_t, b_iodone_work);
+
+       if (bp->b_iodone)
+               (*(bp->b_iodone))(bp);
+       else if (bp->b_flags & XBF_ASYNC)
+               xfs_buf_relse(bp);
+}
+
+void
+xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       trace_xfs_buf_iodone(bp, _RET_IP_);
+
+       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+       if (bp->b_error == 0)
+               bp->b_flags |= XBF_DONE;
+
+       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
+               if (schedule) {
+                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
+                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
+               } else {
+                       xfs_buf_iodone_work(&bp->b_iodone_work);
+               }
+       } else {
+               complete(&bp->b_iowait);
+       }
+}
+
+void
+xfs_buf_ioerror(
+       xfs_buf_t               *bp,
+       int                     error)
+{
+       ASSERT(error >= 0 && error <= 0xffff);
+       bp->b_error = (unsigned short)error;
+       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+}
+
+int
+xfs_bwrite(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       int                     error;
+
+       bp->b_flags |= XBF_WRITE;
+       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
+
+       xfs_buf_delwri_dequeue(bp);
+       xfs_bdstrat_cb(bp);
+
+       error = xfs_buf_iowait(bp);
+       if (error)
+               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+       xfs_buf_relse(bp);
+       return error;
+}
+
+void
+xfs_bdwrite(
+       void                    *mp,
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_bdwrite(bp, _RET_IP_);
+
+       bp->b_flags &= ~XBF_READ;
+       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
+
+       xfs_buf_delwri_queue(bp, 1);
+}
+
+/*
+ * Called when we want to stop a buffer from getting written or read.
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
+ * so that the proper iodone callbacks get called.
+ */
+STATIC int
+xfs_bioerror(
+       xfs_buf_t *bp)
+{
+#ifdef XFSERRORDEBUG
+       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
+#endif
+
+       /*
+        * No need to wait until the buffer is unpinned, we aren't flushing it.
+        */
+       xfs_buf_ioerror(bp, EIO);
+
+       /*
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_UNDONE(bp);
+       XFS_BUF_STALE(bp);
+
+       xfs_buf_ioend(bp, 0);
+
+       return EIO;
+}
+
+/*
+ * Same as xfs_bioerror, except that we are releasing the buffer
+ * here ourselves, and avoiding the xfs_buf_ioend call.
+ * This is meant for userdata errors; metadata bufs come with
+ * iodone functions attached, so that we can track down errors.
+ */
+STATIC int
+xfs_bioerror_relse(
+       struct xfs_buf  *bp)
+{
+       int64_t         fl = bp->b_flags;
+       /*
+        * No need to wait until the buffer is unpinned.
+        * We aren't flushing it.
+        *
+        * chunkhold expects B_DONE to be set, whether
+        * we actually finish the I/O or not. We don't want to
+        * change that interface.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_DONE(bp);
+       XFS_BUF_STALE(bp);
+       bp->b_iodone = NULL;
+       if (!(fl & XBF_ASYNC)) {
+               /*
+                * Mark b_error and B_ERROR _both_.
+                * Lot's of chunkcache code assumes that.
+                * There's no reason to mark error for
+                * ASYNC buffers.
+                */
+               xfs_buf_ioerror(bp, EIO);
+               XFS_BUF_FINISH_IOWAIT(bp);
+       } else {
+               xfs_buf_relse(bp);
+       }
+
+       return EIO;
+}
+
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(
+       struct xfs_buf  *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               /*
+                * Metadata write that didn't get logged but
+                * written delayed anyway. These aren't associated
+                * with a transaction, and can be ignored.
+                */
+               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
+                       return xfs_bioerror_relse(bp);
+               else
+                       return xfs_bioerror(bp);
+       }
+
+       xfs_buf_iorequest(bp);
+       return 0;
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
+ * we are shutting down the filesystem.  Typically user data goes thru this
+ * path; one of the exceptions is the superblock.
+ */
+void
+xfsbdstrat(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               xfs_bioerror_relse(bp);
+               return;
+       }
+
+       xfs_buf_iorequest(bp);
+}
+
+STATIC void
+_xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+               xfs_buf_ioend(bp, schedule);
+}
+
+STATIC void
+xfs_buf_bio_end_io(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
+
+       xfs_buf_ioerror(bp, -error);
+
+       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
+
+       _xfs_buf_ioend(bp, 1);
+       bio_put(bio);
+}
+
+STATIC void
+_xfs_buf_ioapply(
+       xfs_buf_t               *bp)
+{
+       int                     rw, map_i, total_nr_pages, nr_pages;
+       struct bio              *bio;
+       int                     offset = bp->b_offset;
+       int                     size = bp->b_count_desired;
+       sector_t                sector = bp->b_bn;
+
+       total_nr_pages = bp->b_page_count;
+       map_i = 0;
+
+       if (bp->b_flags & XBF_WRITE) {
+               if (bp->b_flags & XBF_SYNCIO)
+                       rw = WRITE_SYNC;
+               else
+                       rw = WRITE;
+               if (bp->b_flags & XBF_FUA)
+                       rw |= REQ_FUA;
+               if (bp->b_flags & XBF_FLUSH)
+                       rw |= REQ_FLUSH;
+       } else if (bp->b_flags & XBF_READ_AHEAD) {
+               rw = READA;
+       } else {
+               rw = READ;
+       }
+
+       /* we only use the buffer cache for meta-data */
+       rw |= REQ_META;
+
+next_chunk:
+       atomic_inc(&bp->b_io_remaining);
+       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+       if (nr_pages > total_nr_pages)
+               nr_pages = total_nr_pages;
+
+       bio = bio_alloc(GFP_NOIO, nr_pages);
+       bio->bi_bdev = bp->b_target->bt_bdev;
+       bio->bi_sector = sector;
+       bio->bi_end_io = xfs_buf_bio_end_io;
+       bio->bi_private = bp;
+
+
+       for (; size && nr_pages; nr_pages--, map_i++) {
+               int     rbytes, nbytes = PAGE_SIZE - offset;
+
+               if (nbytes > size)
+                       nbytes = size;
+
+               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+               if (rbytes < nbytes)
+                       break;
+
+               offset = 0;
+               sector += nbytes >> BBSHIFT;
+               size -= nbytes;
+               total_nr_pages--;
+       }
+
+       if (likely(bio->bi_size)) {
+               if (xfs_buf_is_vmapped(bp)) {
+                       flush_kernel_vmap_range(bp->b_addr,
+                                               xfs_buf_vmap_len(bp));
+               }
+               submit_bio(rw, bio);
+               if (size)
+                       goto next_chunk;
+       } else {
+               xfs_buf_ioerror(bp, EIO);
+               bio_put(bio);
+       }
+}
+
+int
+xfs_buf_iorequest(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iorequest(bp, _RET_IP_);
+
+       if (bp->b_flags & XBF_DELWRI) {
+               xfs_buf_delwri_queue(bp, 1);
+               return 0;
+       }
+
+       if (bp->b_flags & XBF_WRITE) {
+               xfs_buf_wait_unpin(bp);
+       }
+
+       xfs_buf_hold(bp);
+
+       /* Set the count to 1 initially, this will stop an I/O
+        * completion callout which happens before we have started
+        * all the I/O from calling xfs_buf_ioend too early.
+        */
+       atomic_set(&bp->b_io_remaining, 1);
+       _xfs_buf_ioapply(bp);
+       _xfs_buf_ioend(bp, 0);
+
+       xfs_buf_rele(bp);
+       return 0;
+}
+
+/*
+ *     Waits for I/O to complete on the buffer supplied.
+ *     It returns immediately if no I/O is pending.
+ *     It returns the I/O error code, if any, or 0 if there was no error.
+ */
+int
+xfs_buf_iowait(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iowait(bp, _RET_IP_);
+
+       wait_for_completion(&bp->b_iowait);
+
+       trace_xfs_buf_iowait_done(bp, _RET_IP_);
+       return bp->b_error;
+}
+
+xfs_caddr_t
+xfs_buf_offset(
+       xfs_buf_t               *bp,
+       size_t                  offset)
+{
+       struct page             *page;
+
+       if (bp->b_flags & XBF_MAPPED)
+               return bp->b_addr + offset;
+
+       offset += bp->b_offset;
+       page = bp->b_pages[offset >> PAGE_SHIFT];
+       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+}
+
+/*
+ *     Move data into or out of a buffer.
+ */
+void
+xfs_buf_iomove(
+       xfs_buf_t               *bp,    /* buffer to process            */
+       size_t                  boff,   /* starting buffer offset       */
+       size_t                  bsize,  /* length to copy               */
+       void                    *data,  /* data address                 */
+       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
+{
+       size_t                  bend, cpoff, csize;
+       struct page             *page;
+
+       bend = boff + bsize;
+       while (boff < bend) {
+               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
+               cpoff = xfs_buf_poff(boff + bp->b_offset);
+               csize = min_t(size_t,
+                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
+
+               ASSERT(((csize + cpoff) <= PAGE_SIZE));
+
+               switch (mode) {
+               case XBRW_ZERO:
+                       memset(page_address(page) + cpoff, 0, csize);
+                       break;
+               case XBRW_READ:
+                       memcpy(data, page_address(page) + cpoff, csize);
+                       break;
+               case XBRW_WRITE:
+                       memcpy(page_address(page) + cpoff, data, csize);
+               }
+
+               boff += csize;
+               data += csize;
+       }
+}
+
+/*
+ *     Handling of buffer targets (buftargs).
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
+ */
+void
+xfs_wait_buftarg(
+       struct xfs_buftarg      *btp)
+{
+       struct xfs_buf          *bp;
+
+restart:
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+               if (atomic_read(&bp->b_hold) > 1) {
+                       spin_unlock(&btp->bt_lru_lock);
+                       delay(100);
+                       goto restart;
+               }
+               /*
+                * clear the LRU reference count so the bufer doesn't get
+                * ignored in xfs_buf_rele().
+                */
+               atomic_set(&bp->b_lru_ref, 0);
+               spin_unlock(&btp->bt_lru_lock);
+               xfs_buf_rele(bp);
+               spin_lock(&btp->bt_lru_lock);
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+int
+xfs_buftarg_shrink(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_buftarg      *btp = container_of(shrink,
+                                       struct xfs_buftarg, bt_shrinker);
+       struct xfs_buf          *bp;
+       int nr_to_scan = sc->nr_to_scan;
+       LIST_HEAD(dispose);
+
+       if (!nr_to_scan)
+               return btp->bt_lru_nr;
+
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               if (nr_to_scan-- <= 0)
+                       break;
+
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+               /*
+                * Decrement the b_lru_ref count unless the value is already
+                * zero. If the value is already zero, we need to reclaim the
+                * buffer, otherwise it gets another trip through the LRU.
+                */
+               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+                       list_move_tail(&bp->b_lru, &btp->bt_lru);
+                       continue;
+               }
+
+               /*
+                * remove the buffer from the LRU now to avoid needing another
+                * lock round trip inside xfs_buf_rele().
+                */
+               list_move(&bp->b_lru, &dispose);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+
+       while (!list_empty(&dispose)) {
+               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+               list_del_init(&bp->b_lru);
+               xfs_buf_rele(bp);
+       }
+
+       return btp->bt_lru_nr;
+}
+
+void
+xfs_free_buftarg(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *btp)
+{
+       unregister_shrinker(&btp->bt_shrinker);
+
+       xfs_flush_buftarg(btp, 1);
+       if (mp->m_flags & XFS_MOUNT_BARRIER)
+               xfs_blkdev_issue_flush(btp);
+
+       kthread_stop(btp->bt_task);
+       kmem_free(btp);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize,
+       int                     verbose)
+{
+       btp->bt_bsize = blocksize;
+       btp->bt_sshift = ffs(sectorsize) - 1;
+       btp->bt_smask = sectorsize - 1;
+
+       if (set_blocksize(btp->bt_bdev, sectorsize)) {
+               xfs_warn(btp->bt_mount,
+                       "Cannot set_blocksize to %u on device %s\n",
+                       sectorsize, xfs_buf_target_name(btp));
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ *     When allocating the initial buffer target we have not yet
+ *     read in the superblock, so don't know what sized sectors
+ *     are being used is at this early stage.  Play safe.
+ */
+STATIC int
+xfs_setsize_buftarg_early(
+       xfs_buftarg_t           *btp,
+       struct block_device     *bdev)
+{
+       return xfs_setsize_buftarg_flags(btp,
+                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize)
+{
+       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_alloc_delwrite_queue(
+       xfs_buftarg_t           *btp,
+       const char              *fsname)
+{
+       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+       spin_lock_init(&btp->bt_delwrite_lock);
+       btp->bt_flags = 0;
+       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
+       if (IS_ERR(btp->bt_task))
+               return PTR_ERR(btp->bt_task);
+       return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
+       struct block_device     *bdev,
+       int                     external,
+       const char              *fsname)
+{
+       xfs_buftarg_t           *btp;
+
+       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+       btp->bt_mount = mp;
+       btp->bt_dev =  bdev->bd_dev;
+       btp->bt_bdev = bdev;
+       btp->bt_bdi = blk_get_backing_dev_info(bdev);
+       if (!btp->bt_bdi)
+               goto error;
+
+       INIT_LIST_HEAD(&btp->bt_lru);
+       spin_lock_init(&btp->bt_lru_lock);
+       if (xfs_setsize_buftarg_early(btp, bdev))
+               goto error;
+       if (xfs_alloc_delwrite_queue(btp, fsname))
+               goto error;
+       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+       register_shrinker(&btp->bt_shrinker);
+       return btp;
+
+error:
+       kmem_free(btp);
+       return NULL;
+}
+
+
+/*
+ *     Delayed write buffer handling
+ */
+STATIC void
+xfs_buf_delwri_queue(
+       xfs_buf_t               *bp,
+       int                     unlock)
+{
+       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+
+       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
+
+       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
+
+       spin_lock(dwlk);
+       /* If already in the queue, dequeue and place at tail */
+       if (!list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               if (unlock)
+                       atomic_dec(&bp->b_hold);
+               list_del(&bp->b_list);
+       }
+
+       if (list_empty(dwq)) {
+               /* start xfsbufd as it is about to have something to do */
+               wake_up_process(bp->b_target->bt_task);
+       }
+
+       bp->b_flags |= _XBF_DELWRI_Q;
+       list_add_tail(&bp->b_list, dwq);
+       bp->b_queuetime = jiffies;
+       spin_unlock(dwlk);
+
+       if (unlock)
+               xfs_buf_unlock(bp);
+}
+
+void
+xfs_buf_delwri_dequeue(
+       xfs_buf_t               *bp)
+{
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+       int                     dequeued = 0;
+
+       spin_lock(dwlk);
+       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               list_del_init(&bp->b_list);
+               dequeued = 1;
+       }
+       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+       spin_unlock(dwlk);
+
+       if (dequeued)
+               xfs_buf_rele(bp);
+
+       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
+}
+
+/*
+ * If a delwri buffer needs to be pushed before it has aged out, then promote
+ * it to the head of the delwri queue so that it will be flushed on the next
+ * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
+ * than the age currently needed to flush the buffer. Hence the next time the
+ * xfsbufd sees it is guaranteed to be considered old enough to flush.
+ */
+void
+xfs_buf_delwri_promote(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
+
+       ASSERT(bp->b_flags & XBF_DELWRI);
+       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+       /*
+        * Check the buffer age before locking the delayed write queue as we
+        * don't need to promote buffers that are already past the flush age.
+        */
+       if (bp->b_queuetime < jiffies - age)
+               return;
+       bp->b_queuetime = jiffies - age;
+       spin_lock(&btp->bt_delwrite_lock);
+       list_move(&bp->b_list, &btp->bt_delwrite_queue);
+       spin_unlock(&btp->bt_delwrite_lock);
+}
+
+STATIC void
+xfs_buf_runall_queues(
+       struct workqueue_struct *queue)
+{
+       flush_workqueue(queue);
+}
+
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+       xfs_buftarg_t   *target,
+       struct list_head *list,
+       unsigned long   age)
+{
+       xfs_buf_t       *bp, *n;
+       struct list_head *dwq = &target->bt_delwrite_queue;
+       spinlock_t      *dwlk = &target->bt_delwrite_lock;
+       int             skipped = 0;
+       int             force;
+
+       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       INIT_LIST_HEAD(list);
+       spin_lock(dwlk);
+       list_for_each_entry_safe(bp, n, dwq, b_list) {
+               ASSERT(bp->b_flags & XBF_DELWRI);
+
+               if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
+                       if (!force &&
+                           time_before(jiffies, bp->b_queuetime + age)) {
+                               xfs_buf_unlock(bp);
+                               break;
+                       }
+
+                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
+                       bp->b_flags |= XBF_WRITE;
+                       list_move_tail(&bp->b_list, list);
+                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
+               } else
+                       skipped++;
+       }
+       spin_unlock(dwlk);
+
+       return skipped;
+
+}
+
+/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+ */
+static int
+xfs_buf_cmp(
+       void            *priv,
+       struct list_head *a,
+       struct list_head *b)
+{
+       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
+       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
+       xfs_daddr_t             diff;
+
+       diff = ap->b_bn - bp->b_bn;
+       if (diff < 0)
+               return -1;
+       if (diff > 0)
+               return 1;
+       return 0;
+}
+
+STATIC int
+xfsbufd(
+       void            *data)
+{
+       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
+
+       current->flags |= PF_MEMALLOC;
+
+       set_freezable();
+
+       do {
+               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
+               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
+               struct list_head tmp;
+               struct blk_plug plug;
+
+               if (unlikely(freezing(current))) {
+                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+                       refrigerator();
+               } else {
+                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+               }
+
+               /* sleep for a long time if there is nothing to do. */
+               if (list_empty(&target->bt_delwrite_queue))
+                       tout = MAX_SCHEDULE_TIMEOUT;
+               schedule_timeout_interruptible(tout);
+
+               xfs_buf_delwri_split(target, &tmp, age);
+               list_sort(NULL, &tmp, xfs_buf_cmp);
+
+               blk_start_plug(&plug);
+               while (!list_empty(&tmp)) {
+                       struct xfs_buf *bp;
+                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
+                       list_del_init(&bp->b_list);
+                       xfs_bdstrat_cb(bp);
+               }
+               blk_finish_plug(&plug);
+       } while (!kthread_should_stop());
+
+       return 0;
+}
+
+/*
+ *     Go through all incore buffers, and release buffers if they belong to
+ *     the given device. This is used in filesystem error handling to
+ *     preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+       xfs_buftarg_t   *target,
+       int             wait)
+{
+       xfs_buf_t       *bp;
+       int             pincount = 0;
+       LIST_HEAD(tmp_list);
+       LIST_HEAD(wait_list);
+       struct blk_plug plug;
+
+       xfs_buf_runall_queues(xfsconvertd_workqueue);
+       xfs_buf_runall_queues(xfsdatad_workqueue);
+       xfs_buf_runall_queues(xfslogd_workqueue);
+
+       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
+
+       /*
+        * Dropped the delayed write list lock, now walk the temporary list.
+        * All I/O is issued async and then if we need to wait for completion
+        * we do that after issuing all the IO.
+        */
+       list_sort(NULL, &tmp_list, xfs_buf_cmp);
+
+       blk_start_plug(&plug);
+       while (!list_empty(&tmp_list)) {
+               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
+               ASSERT(target == bp->b_target);
+               list_del_init(&bp->b_list);
+               if (wait) {
+                       bp->b_flags &= ~XBF_ASYNC;
+                       list_add(&bp->b_list, &wait_list);
+               }
+               xfs_bdstrat_cb(bp);
+       }
+       blk_finish_plug(&plug);
+
+       if (wait) {
+               /* Wait for IO to complete. */
+               while (!list_empty(&wait_list)) {
+                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
+
+                       list_del_init(&bp->b_list);
+                       xfs_buf_iowait(bp);
+                       xfs_buf_relse(bp);
+               }
+       }
+
+       return pincount;
+}
+
+int __init
+xfs_buf_init(void)
+{
+       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+                                               KM_ZONE_HWALIGN, NULL);
+       if (!xfs_buf_zone)
+               goto out;
+
+       xfslogd_workqueue = alloc_workqueue("xfslogd",
+                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
+       if (!xfslogd_workqueue)
+               goto out_free_buf_zone;
+
+       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
+       if (!xfsdatad_workqueue)
+               goto out_destroy_xfslogd_workqueue;
+
+       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+                                               WQ_MEM_RECLAIM, 1);
+       if (!xfsconvertd_workqueue)
+               goto out_destroy_xfsdatad_workqueue;
+
+       return 0;
+
+ out_destroy_xfsdatad_workqueue:
+       destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+       destroy_workqueue(xfslogd_workqueue);
+ out_free_buf_zone:
+       kmem_zone_destroy(xfs_buf_zone);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_buf_terminate(void)
+{
+       destroy_workqueue(xfsconvertd_workqueue);
+       destroy_workqueue(xfsdatad_workqueue);
+       destroy_workqueue(xfslogd_workqueue);
+       kmem_zone_destroy(xfs_buf_zone);
+}
+
+#ifdef CONFIG_KDB_MODULES
+struct list_head *
+xfs_get_buftarg_list(void)
+{
+       return &xfs_buftarg_list;
+}
+#endif
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
new file mode 100644 (file)
index 0000000..620972b
--- /dev/null
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *     Base types
+ */
+
+#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+       XBRW_READ = 1,                  /* transfer into target memory */
+       XBRW_WRITE = 2,                 /* transfer from target memory */
+       XBRW_ZERO = 3,                  /* Zero target memory */
+} xfs_buf_rw_t;
+
+#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
+#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA                (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK       (1 << 15)/* lock requested */
+#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
+
+typedef unsigned int xfs_buf_flags_t;
+
+#define XFS_BUF_FLAGS \
+       { XBF_READ,             "READ" }, \
+       { XBF_WRITE,            "WRITE" }, \
+       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
+       { XBF_MAPPED,           "MAPPED" }, \
+       { XBF_ASYNC,            "ASYNC" }, \
+       { XBF_DONE,             "DONE" }, \
+       { XBF_DELWRI,           "DELWRI" }, \
+       { XBF_STALE,            "STALE" }, \
+       { XBF_SYNCIO,           "SYNCIO" }, \
+       { XBF_FUA,              "FUA" }, \
+       { XBF_FLUSH,            "FLUSH" }, \
+       { XBF_LOCK,             "LOCK" },       /* should never be set */\
+       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
+       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
+       { _XBF_PAGES,           "PAGES" }, \
+       { _XBF_KMEM,            "KMEM" }, \
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
+
+typedef enum {
+       XBT_FORCE_SLEEP = 0,
+       XBT_FORCE_FLUSH = 1,
+} xfs_buftarg_flags_t;
+
+typedef struct xfs_buftarg {
+       dev_t                   bt_dev;
+       struct block_device     *bt_bdev;
+       struct backing_dev_info *bt_bdi;
+       struct xfs_mount        *bt_mount;
+       unsigned int            bt_bsize;
+       unsigned int            bt_sshift;
+       size_t                  bt_smask;
+
+       /* per device delwri queue */
+       struct task_struct      *bt_task;
+       struct list_head        bt_delwrite_queue;
+       spinlock_t              bt_delwrite_lock;
+       unsigned long           bt_flags;
+
+       /* LRU control structures */
+       struct shrinker         bt_shrinker;
+       struct list_head        bt_lru;
+       spinlock_t              bt_lru_lock;
+       unsigned int            bt_lru_nr;
+} xfs_buftarg_t;
+
+struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+
+#define XB_PAGES       2
+
+typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       atomic_t                b_lru_ref;      /* lru reclaim ref count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
+       struct semaphore        b_sema;         /* semaphore for lockables */
+
+       struct list_head        b_lru;          /* lru list */
+       wait_queue_head_t       b_waiters;      /* unpin waiters */
+       struct list_head        b_list;
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
+       xfs_buftarg_t           *b_target;      /* buffer target (device) */
+       xfs_daddr_t             b_bn;           /* block number for I/O */
+       size_t                  b_count_desired;/* desired transfer size */
+       void                    *b_addr;        /* virtual address of buffer */
+       struct work_struct      b_iodone_work;
+       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
+       struct completion       b_iowait;       /* queue for I/O waiters */
+       void                    *b_fspriv;
+       struct xfs_trans        *b_transp;
+       struct page             **b_pages;      /* array of page pointers */
+       struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
+#ifdef XFS_BUF_LOCK_TRACKING
+       int                     b_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t, xfs_buf_t *);
+#define xfs_incore(buftarg,blkno,len,lockit) \
+       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
+
+/* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
+
+/* Locking and Unlocking Buffers */
+extern int xfs_buf_trylock(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+       ((bp)->b_sema.count <= 0)
+
+/* Buffer Read and Write Routines */
+extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
+
+extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern void xfs_buf_ioend(xfs_buf_t *, int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
+                               xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
+{
+       return bp ? bp->b_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+
+/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_promote(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
+
+static inline const char *
+xfs_buf_target_name(struct xfs_buftarg *target)
+{
+       static char __b[BDEVNAME_SIZE];
+
+       return bdevname(target->bt_bdev, __b);
+}
+
+
+#define XFS_BUF_ZEROFLAGS(bp) \
+       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
+#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
+#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
+#define XFS_BUF_SUPER_STALE(bp)        do {                            \
+                                       XFS_BUF_STALE(bp);      \
+                                       xfs_buf_delwri_dequeue(bp);     \
+                                       XFS_BUF_DONE(bp);       \
+                               } while (0)
+
+#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
+
+static inline void
+xfs_buf_set_ref(
+       struct xfs_buf  *bp,
+       int             lru_ref)
+{
+       atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
+
+static inline int xfs_buf_ispinned(struct xfs_buf *bp)
+{
+       return atomic_read(&bp->b_pin_count);
+}
+
+#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+       xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+}
+
+/*
+ *     Handling of buftargs.
+ */
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#ifdef CONFIG_KDB_MODULES
+extern struct list_head *xfs_get_buftarg_list(void);
+#endif
+
+#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
+
+#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
new file mode 100644 (file)
index 0000000..244e797
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_fsblock_t           start,
+       xfs_fsblock_t           len,
+       xfs_fsblock_t           minlen,
+       __uint64_t              *blocks_trimmed)
+{
+       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
+       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp;
+       struct xfs_perag        *pag;
+       int                     error;
+       int                     i;
+
+       pag = xfs_perag_get(mp, agno);
+
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error || !agbp)
+               goto out_put_perag;
+
+       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+       /*
+        * Force out the log.  This means any transactions that might have freed
+        * space before we took the AGF buffer lock are now on disk, and the
+        * volatile disk cache is flushed.
+        */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /*
+        * Look up the longest btree in the AGF and start with it.
+        */
+       error = xfs_alloc_lookup_le(cur, 0,
+                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+       if (error)
+               goto out_del_cursor;
+
+       /*
+        * Loop until we are done with all extents that are large
+        * enough to be worth discarding.
+        */
+       while (i) {
+               xfs_agblock_t fbno;
+               xfs_extlen_t flen;
+
+               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+               if (error)
+                       goto out_del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+               /*
+                * Too small?  Give up.
+                */
+               if (flen < minlen) {
+                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+                       goto out_del_cursor;
+               }
+
+               /*
+                * If the extent is entirely outside of the range we are
+                * supposed to discard skip it.  Do not bother to trim
+                * down partially overlapping ranges for now.
+                */
+               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
+                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               /*
+                * If any blocks in the range are still busy, skip the
+                * discard and try again the next time.
+                */
+               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+                       trace_xfs_discard_busy(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               trace_xfs_discard_extent(mp, agno, fbno, flen);
+               error = -blkdev_issue_discard(bdev,
+                               XFS_AGB_TO_DADDR(mp, agno, fbno),
+                               XFS_FSB_TO_BB(mp, flen),
+                               GFP_NOFS, 0);
+               if (error)
+                       goto out_del_cursor;
+               *blocks_trimmed += flen;
+
+next_extent:
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto out_del_cursor;
+       }
+
+out_del_cursor:
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       xfs_buf_relse(agbp);
+out_put_perag:
+       xfs_perag_put(pag);
+       return error;
+}
+
+int
+xfs_ioc_trim(
+       struct xfs_mount                *mp,
+       struct fstrim_range __user      *urange)
+{
+       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+       unsigned int            granularity = q->limits.discard_granularity;
+       struct fstrim_range     range;
+       xfs_fsblock_t           start, len, minlen;
+       xfs_agnumber_t          start_agno, end_agno, agno;
+       __uint64_t              blocks_trimmed = 0;
+       int                     error, last_error = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (!blk_queue_discard(q))
+               return -XFS_ERROR(EOPNOTSUPP);
+       if (copy_from_user(&range, urange, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+
+       /*
+        * Truncating down the len isn't actually quite correct, but using
+        * XFS_B_TO_FSB would mean we trivially get overflows for values
+        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
+        * used by the fstrim application.  In the end it really doesn't
+        * matter as trimming blocks is an advisory interface.
+        */
+       start = XFS_B_TO_FSBT(mp, range.start);
+       len = XFS_B_TO_FSBT(mp, range.len);
+       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+       start_agno = XFS_FSB_TO_AGNO(mp, start);
+       if (start_agno >= mp->m_sb.sb_agcount)
+               return -XFS_ERROR(EINVAL);
+
+       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+       if (end_agno >= mp->m_sb.sb_agcount)
+               end_agno = mp->m_sb.sb_agcount - 1;
+
+       for (agno = start_agno; agno <= end_agno; agno++) {
+               error = -xfs_trim_extents(mp, agno, start, len, minlen,
+                                         &blocks_trimmed);
+               if (error)
+                       last_error = error;
+       }
+
+       if (last_error)
+               return last_error;
+
+       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+       if (copy_to_user(urange, &range, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+int
+xfs_discard_extents(
+       struct xfs_mount        *mp,
+       struct list_head        *list)
+{
+       struct xfs_busy_extent  *busyp;
+       int                     error = 0;
+
+       list_for_each_entry(busyp, list, list) {
+               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+                                        busyp->length);
+
+               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+                               XFS_FSB_TO_BB(mp, busyp->length),
+                               GFP_NOFS, 0);
+               if (error && error != EOPNOTSUPP) {
+                       xfs_info(mp,
+        "discard failed for extent [0x%llu,%u], error %d",
+                                (unsigned long long)busyp->bno,
+                                busyp->length,
+                                error);
+                       return error;
+               }
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
new file mode 100644 (file)
index 0000000..344879a
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+struct list_head;
+
+extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
+
+#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
new file mode 100644 (file)
index 0000000..db62959
--- /dev/null
@@ -0,0 +1,1454 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+
+/*
+   LOCK ORDER
+
+   inode lock              (ilock)
+   dquot hash-chain lock    (hashlock)
+   xqm dquot freelist lock  (freelistlock
+   mount's dquot list lock  (mplistlock)
+   user dquot lock - lock ordering among dquots is based on the uid or gid
+   group dquot lock - similar to udquots. Between the two dquots, the udquot
+                     has to be locked first.
+   pin lock - the dquot lock must be held to take this lock.
+   flush lock - ditto.
+*/
+
+#ifdef DEBUG
+xfs_buftarg_t *xfs_dqerror_target;
+int xfs_do_dqerror;
+int xfs_dqreq_num;
+int xfs_dqerror_mod = 33;
+#endif
+
+static struct lock_class_key xfs_dquot_other_class;
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqinit(
+       xfs_mount_t  *mp,
+       xfs_dqid_t   id,
+       uint         type)
+{
+       xfs_dquot_t     *dqp;
+       boolean_t       brandnewdquot;
+
+       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+       dqp->dq_flags = type;
+       dqp->q_core.d_id = cpu_to_be32(id);
+       dqp->q_mount = mp;
+
+       /*
+        * No need to re-initialize these if this is a reclaimed dquot.
+        */
+       if (brandnewdquot) {
+               INIT_LIST_HEAD(&dqp->q_freelist);
+               mutex_init(&dqp->q_qlock);
+               init_waitqueue_head(&dqp->q_pinwait);
+
+               /*
+                * Because we want to use a counting completion, complete
+                * the flush completion once to allow a single access to
+                * the flush completion without blocking.
+                */
+               init_completion(&dqp->q_flush);
+               complete(&dqp->q_flush);
+
+               trace_xfs_dqinit(dqp);
+       } else {
+               /*
+                * Only the q_core portion was zeroed in dqreclaim_one().
+                * So, we need to reset others.
+                */
+               dqp->q_nrefs = 0;
+               dqp->q_blkno = 0;
+               INIT_LIST_HEAD(&dqp->q_mplist);
+               INIT_LIST_HEAD(&dqp->q_hashlist);
+               dqp->q_bufoffset = 0;
+               dqp->q_fileoffset = 0;
+               dqp->q_transp = NULL;
+               dqp->q_gdquot = NULL;
+               dqp->q_res_bcount = 0;
+               dqp->q_res_icount = 0;
+               dqp->q_res_rtbcount = 0;
+               atomic_set(&dqp->q_pincount, 0);
+               dqp->q_hash = NULL;
+               ASSERT(list_empty(&dqp->q_freelist));
+
+               trace_xfs_dqreuse(dqp);
+       }
+
+       /*
+        * In either case we need to make sure group quotas have a different
+        * lock class than user quotas, to make sure lockdep knows we can
+        * locks of one of each at the same time.
+        */
+       if (!(type & XFS_DQ_USER))
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+       /*
+        * log item gets initialized later
+        */
+       return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(list_empty(&dqp->q_freelist));
+
+       mutex_destroy(&dqp->q_qlock);
+       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+
+       atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_dqblk_t     *d)
+{
+       /*
+        * Caller has zero'd the entire dquot 'chunk' already.
+        */
+       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+       d->dd_diskdq.d_id = cpu_to_be32(id);
+       d->dd_diskdq.d_flags = type;
+}
+
+/*
+ * If default limits are in force, push them into the dquot now.
+ * We overwrite the dquot limits only if they are zero and this
+ * is not the root dquot.
+ */
+void
+xfs_qm_adjust_dqlimits(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       xfs_quotainfo_t         *q = mp->m_quotainfo;
+
+       ASSERT(d->d_id);
+
+       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
+               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
+               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+       if (q->qi_isoftlimit && !d->d_ino_softlimit)
+               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
+       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
+       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
+       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+}
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
+ */
+void
+xfs_qm_adjust_dqtimers(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       ASSERT(d->d_id);
+
+#ifdef DEBUG
+       if (d->d_blk_hardlimit)
+               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
+                      be64_to_cpu(d->d_blk_hardlimit));
+       if (d->d_ino_hardlimit)
+               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
+                      be64_to_cpu(d->d_ino_hardlimit));
+       if (d->d_rtb_hardlimit)
+               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
+                      be64_to_cpu(d->d_rtb_hardlimit));
+#endif
+
+       if (!d->d_btimer) {
+               if ((d->d_blk_softlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_softlimit))) ||
+                   (d->d_blk_hardlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_btimelimit);
+               } else {
+                       d->d_bwarns = 0;
+               }
+       } else {
+               if ((!d->d_blk_softlimit ||
+                    (be64_to_cpu(d->d_bcount) <
+                     be64_to_cpu(d->d_blk_softlimit))) &&
+                   (!d->d_blk_hardlimit ||
+                   (be64_to_cpu(d->d_bcount) <
+                    be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = 0;
+               }
+       }
+
+       if (!d->d_itimer) {
+               if ((d->d_ino_softlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_softlimit))) ||
+                   (d->d_ino_hardlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_itimelimit);
+               } else {
+                       d->d_iwarns = 0;
+               }
+       } else {
+               if ((!d->d_ino_softlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_softlimit)))  &&
+                   (!d->d_ino_hardlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = 0;
+               }
+       }
+
+       if (!d->d_rtbtimer) {
+               if ((d->d_rtb_softlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_softlimit))) ||
+                   (d->d_rtb_hardlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_rtbtimelimit);
+               } else {
+                       d->d_rtbwarns = 0;
+               }
+       } else {
+               if ((!d->d_rtb_softlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_softlimit))) &&
+                   (!d->d_rtb_hardlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = 0;
+               }
+       }
+}
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_buf_t       *bp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_dqblk_t     *d;
+       int             curid, i;
+
+       ASSERT(tp);
+       ASSERT(xfs_buf_islocked(bp));
+
+       d = bp->b_addr;
+
+       /*
+        * ID of the first dquot in the block - id's are zero based.
+        */
+       curid = id - (id % q->qi_dqperchunk);
+       ASSERT(curid >= 0);
+       memset(d, 0, BBTOB(q->qi_dqchunklen));
+       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
+               xfs_qm_dqinit_core(curid, type, d);
+       xfs_trans_dquot_buf(tp, bp,
+                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
+                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
+                            XFS_BLF_GDQUOT_BUF)));
+       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+       xfs_trans_t     **tpp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       xfs_inode_t     *quotip,
+       xfs_fileoff_t   offset_fsb,
+       xfs_buf_t       **O_bpp)
+{
+       xfs_fsblock_t   firstblock;
+       xfs_bmap_free_t flist;
+       xfs_bmbt_irec_t map;
+       int             nmaps, error, committed;
+       xfs_buf_t       *bp;
+       xfs_trans_t     *tp = *tpp;
+
+       ASSERT(tp != NULL);
+
+       trace_xfs_dqalloc(dqp);
+
+       /*
+        * Initialize the bmap freelist prior to calling bmapi code.
+        */
+       xfs_bmap_init(&flist, &firstblock);
+       xfs_ilock(quotip, XFS_ILOCK_EXCL);
+       /*
+        * Return if this type of quotas is turned off while we didn't
+        * have an inode lock
+        */
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+               return (ESRCH);
+       }
+
+       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
+       nmaps = 1;
+       if ((error = xfs_bmapi(tp, quotip,
+                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+                             &firstblock,
+                             XFS_QM_DQALLOC_SPACE_RES(mp),
+                             &map, &nmaps, &flist))) {
+               goto error0;
+       }
+       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(nmaps == 1);
+       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+              (map.br_startblock != HOLESTARTBLOCK));
+
+       /*
+        * Keep track of the blkno to save a lookup later
+        */
+       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+       /* now we can just get the buffer (there's nothing to read yet) */
+       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+                              dqp->q_blkno,
+                              mp->m_quotainfo->qi_dqchunklen,
+                              0);
+       if (!bp || (error = xfs_buf_geterror(bp)))
+               goto error1;
+       /*
+        * Make a chunk of dquots out of this buffer and log
+        * the entire thing.
+        */
+       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
+                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
+
+       /*
+        * xfs_bmap_finish() may commit the current transaction and
+        * start a second transaction if the freelist is not empty.
+        *
+        * Since we still want to modify this buffer, we need to
+        * ensure that the buffer is not released on commit of
+        * the first transaction and ensure the buffer is added to the
+        * second transaction.
+        *
+        * If there is only one transaction then don't stop the buffer
+        * from being released when it commits later on.
+        */
+
+       xfs_trans_bhold(tp, bp);
+
+       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+               goto error1;
+       }
+
+       if (committed) {
+               tp = *tpp;
+               xfs_trans_bjoin(tp, bp);
+       } else {
+               xfs_trans_bhold_release(tp, bp);
+       }
+
+       *O_bpp = bp;
+       return 0;
+
+      error1:
+       xfs_bmap_cancel(&flist);
+      error0:
+       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+       return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+       xfs_trans_t             **tpp,
+       xfs_dquot_t             *dqp,
+       xfs_disk_dquot_t        **O_ddpp,
+       xfs_buf_t               **O_bpp,
+       uint                    flags)
+{
+       xfs_bmbt_irec_t map;
+       int             nmaps = 1, error;
+       xfs_buf_t       *bp;
+       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_disk_dquot_t *ddq;
+       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
+       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
+
+       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+
+       xfs_ilock(quotip, XFS_ILOCK_SHARED);
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               /*
+                * Return if this type of quotas is turned off while we
+                * didn't have the quota inode lock.
+                */
+               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+               return ESRCH;
+       }
+
+       /*
+        * Find the block map; no allocations yet
+        */
+       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                         NULL, 0, &map, &nmaps, NULL);
+
+       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+       if (error)
+               return error;
+
+       ASSERT(nmaps == 1);
+       ASSERT(map.br_blockcount == 1);
+
+       /*
+        * Offset of dquot in the (fixed sized) dquot chunk.
+        */
+       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+               sizeof(xfs_dqblk_t);
+
+       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+       if (map.br_startblock == HOLESTARTBLOCK) {
+               /*
+                * We don't allocate unless we're asked to
+                */
+               if (!(flags & XFS_QMOPT_DQALLOC))
+                       return ENOENT;
+
+               ASSERT(tp);
+               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                       dqp->q_fileoffset, &bp);
+               if (error)
+                       return error;
+               tp = *tpp;
+       } else {
+               trace_xfs_dqtobp_read(dqp);
+
+               /*
+                * store the blkno etc so that we don't have to do the
+                * mapping all the time
+                */
+               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                          dqp->q_blkno,
+                                          mp->m_quotainfo->qi_dqchunklen,
+                                          0, &bp);
+               if (error || !bp)
+                       return XFS_ERROR(error);
+       }
+
+       ASSERT(xfs_buf_islocked(bp));
+
+       /*
+        * calculate the location of the dquot inside the buffer.
+        */
+       ddq = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot...
+        */
+       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
+                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+                          "dqtobp");
+       if (error) {
+               if (!(flags & XFS_QMOPT_DQREPAIR)) {
+                       xfs_trans_brelse(tp, bp);
+                       return XFS_ERROR(EIO);
+               }
+       }
+
+       *O_bpp = bp;
+       *O_ddpp = ddq;
+
+       return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+       xfs_trans_t     **tpp,
+       xfs_dqid_t      id,
+       xfs_dquot_t     *dqp,   /* dquot to get filled in */
+       uint            flags)
+{
+       xfs_disk_dquot_t *ddqp;
+       xfs_buf_t        *bp;
+       int              error;
+       xfs_trans_t      *tp;
+
+       ASSERT(tpp);
+
+       trace_xfs_dqread(dqp);
+
+       /*
+        * get a pointer to the on-disk dquot and the buffer containing it
+        * dqp already knows its own type (GROUP/USER).
+        */
+       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
+               return (error);
+       }
+       tp = *tpp;
+
+       /* copy everything from disk dquot to the incore dquot */
+       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+       xfs_qm_dquot_logitem_init(dqp);
+
+       /*
+        * Reservation counters are defined as reservation plus current usage
+        * to avoid having to add every time.
+        */
+       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
+       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
+       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
+
+       /* Mark the buf so that this will stay incore a little longer */
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+       /*
+        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+        * So we need to release with xfs_trans_brelse().
+        * The strategy here is identical to that of inodes; we lock
+        * the dquot in xfs_qm_dqget() before making it accessible to
+        * others. This is because dquots, like inodes, need a good level of
+        * concurrency, and we don't want to take locks on the entire buffers
+        * for dquot accesses.
+        * Note also that the dquot buffer may even be dirty at this point, if
+        * this particular dquot was repaired. We still aren't afraid to
+        * brelse it because we have the changes incore.
+        */
+       ASSERT(xfs_buf_islocked(bp));
+       xfs_trans_brelse(tp, bp);
+
+       return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,      /* gid or uid, depending on type */
+       uint            type,    /* UDQUOT or GDQUOT */
+       uint            flags,   /* DQALLOC, DQREPAIR */
+       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+       xfs_trans_t     *tp;
+       int             cancelflags=0;
+
+       dqp = xfs_qm_dqinit(mp, id, type);
+       tp = NULL;
+       if (flags & XFS_QMOPT_DQALLOC) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               XFS_WRITE_LOG_RES(mp) +
+                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
+                               128,
+                               0,
+                               XFS_TRANS_PERM_LOG_RES,
+                               XFS_WRITE_LOG_COUNT);
+               if (error) {
+                       cancelflags = 0;
+                       goto error0;
+               }
+               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+       }
+
+       /*
+        * Read it from disk; xfs_dqread() takes care of
+        * all the necessary initialization of dquot's fields (locks, etc)
+        */
+       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
+               /*
+                * This can happen if quotas got turned off (ESRCH),
+                * or if the dquot didn't exist on disk and we ask to
+                * allocate (ENOENT).
+                */
+               trace_xfs_dqread_fail(dqp);
+               cancelflags |= XFS_TRANS_ABORT;
+               goto error0;
+       }
+       if (tp) {
+               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
+                       goto error1;
+       }
+
+       *O_dqpp = dqp;
+       return (0);
+
+ error0:
+       ASSERT(error);
+       if (tp)
+               xfs_trans_cancel(tp, cancelflags);
+ error1:
+       xfs_qm_dqdestroy(dqp);
+       *O_dqpp = NULL;
+       return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       xfs_dqhash_t            *qh,
+       xfs_dquot_t             **O_dqpp)
+{
+       xfs_dquot_t             *dqp;
+       uint                    flist_locked;
+
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+
+       flist_locked = B_FALSE;
+
+       /*
+        * Traverse the hashchain looking for a match
+        */
+       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
+               /*
+                * We already have the hashlock. We don't need the
+                * dqlock to look at the id field of the dquot, since the
+                * id can't be modified without the hashlock anyway.
+                */
+               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
+                       trace_xfs_dqlookup_found(dqp);
+
+                       /*
+                        * All in core dquots must be on the dqlist of mp
+                        */
+                       ASSERT(!list_empty(&dqp->q_mplist));
+
+                       xfs_dqlock(dqp);
+                       if (dqp->q_nrefs == 0) {
+                               ASSERT(!list_empty(&dqp->q_freelist));
+                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+                                       trace_xfs_dqlookup_want(dqp);
+
+                                       /*
+                                        * We may have raced with dqreclaim_one()
+                                        * (and lost). So, flag that we don't
+                                        * want the dquot to be reclaimed.
+                                        */
+                                       dqp->dq_flags |= XFS_DQ_WANT;
+                                       xfs_dqunlock(dqp);
+                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       xfs_dqlock(dqp);
+                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
+                               }
+                               flist_locked = B_TRUE;
+                       }
+
+                       /*
+                        * id couldn't have changed; we had the hashlock all
+                        * along
+                        */
+                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+
+                       if (flist_locked) {
+                               if (dqp->q_nrefs != 0) {
+                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       flist_locked = B_FALSE;
+                               } else {
+                                       /* take it off the freelist */
+                                       trace_xfs_dqlookup_freelist(dqp);
+                                       list_del_init(&dqp->q_freelist);
+                                       xfs_Gqm->qm_dqfrlist_cnt--;
+                               }
+                       }
+
+                       XFS_DQHOLD(dqp);
+
+                       if (flist_locked)
+                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       /*
+                        * move the dquot to the front of the hashchain
+                        */
+                       ASSERT(mutex_is_locked(&qh->qh_lock));
+                       list_move(&dqp->q_hashlist, &qh->qh_list);
+                       trace_xfs_dqlookup_done(dqp);
+                       *O_dqpp = dqp;
+                       return 0;
+               }
+       }
+
+       *O_dqpp = NULL;
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+       return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *ip,      /* locked inode (optional) */
+       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
+       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
+       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
+{
+       xfs_dquot_t     *dqp;
+       xfs_dqhash_t    *h;
+       uint            version;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
+           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+               return (ESRCH);
+       }
+       h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+       if (xfs_do_dqerror) {
+               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
+                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+                       xfs_debug(mp, "Returning error in dqget");
+                       return (EIO);
+               }
+       }
+#endif
+
+ again:
+
+#ifdef DEBUG
+       ASSERT(type == XFS_DQ_USER ||
+              type == XFS_DQ_PROJ ||
+              type == XFS_DQ_GROUP);
+       if (ip) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               if (type == XFS_DQ_USER)
+                       ASSERT(ip->i_udquot == NULL);
+               else
+                       ASSERT(ip->i_gdquot == NULL);
+       }
+#endif
+       mutex_lock(&h->qh_lock);
+
+       /*
+        * Look in the cache (hashtable).
+        * The chain is kept locked during lookup.
+        */
+       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+               /*
+                * The dquot was found, moved to the front of the chain,
+                * taken off the freelist if it was on it, and locked
+                * at this point. Just unlock the hashchain and return.
+                */
+               ASSERT(*O_dqpp);
+               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+               mutex_unlock(&h->qh_lock);
+               trace_xfs_dqget_hit(*O_dqpp);
+               return (0);     /* success */
+       }
+       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+       /*
+        * Dquot cache miss. We don't want to keep the inode lock across
+        * a (potential) disk read. Also we don't want to deal with the lock
+        * ordering between quotainode and this inode. OTOH, dropping the inode
+        * lock here means dealing with a chown that can happen before
+        * we re-acquire the lock.
+        */
+       if (ip)
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * Save the hashchain version stamp, and unlock the chain, so that
+        * we don't keep the lock across a disk read
+        */
+       version = h->qh_version;
+       mutex_unlock(&h->qh_lock);
+
+       /*
+        * Allocate the dquot on the kernel heap, and read the ondisk
+        * portion off the disk. Also, do all the necessary initialization
+        * This can return ENOENT if dquot didn't exist on disk and we didn't
+        * ask it to allocate; ESRCH if quotas got turned off suddenly.
+        */
+       if ((error = xfs_qm_idtodq(mp, id, type,
+                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+                                          XFS_QMOPT_DOWARN),
+                                 &dqp))) {
+               if (ip)
+                       xfs_ilock(ip, XFS_ILOCK_EXCL);
+               return (error);
+       }
+
+       /*
+        * See if this is mount code calling to look at the overall quota limits
+        * which are stored in the id == 0 user or group's dquot.
+        * Since we may not have done a quotacheck by this point, just return
+        * the dquot without attaching it to any hashtables, lists, etc, or even
+        * taking a reference.
+        * The caller must dqdestroy this once done.
+        */
+       if (flags & XFS_QMOPT_DQSUSER) {
+               ASSERT(id == 0);
+               ASSERT(! ip);
+               goto dqret;
+       }
+
+       /*
+        * Dquot lock comes after hashlock in the lock ordering
+        */
+       if (ip) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * A dquot could be attached to this inode by now, since
+                * we had dropped the ilock.
+                */
+               if (type == XFS_DQ_USER) {
+                       if (!XFS_IS_UQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_udquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_udquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               } else {
+                       if (!XFS_IS_OQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_gdquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_gdquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               }
+       }
+
+       /*
+        * Hashlock comes after ilock in lock order
+        */
+       mutex_lock(&h->qh_lock);
+       if (version != h->qh_version) {
+               xfs_dquot_t *tmpdqp;
+               /*
+                * Now, see if somebody else put the dquot in the
+                * hashtable before us. This can happen because we didn't
+                * keep the hashchain lock. We don't have to worry about
+                * lock order between the two dquots here since dqp isn't
+                * on any findable lists yet.
+                */
+               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+                       /*
+                        * Duplicate found. Just throw away the new dquot
+                        * and start over.
+                        */
+                       xfs_qm_dqput(tmpdqp);
+                       mutex_unlock(&h->qh_lock);
+                       xfs_qm_dqdestroy(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+                       goto again;
+               }
+       }
+
+       /*
+        * Put the dquot at the beginning of the hash-chain and mp's list
+        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+        */
+       ASSERT(mutex_is_locked(&h->qh_lock));
+       dqp->q_hash = h;
+       list_add(&dqp->q_hashlist, &h->qh_list);
+       h->qh_version++;
+
+       /*
+        * Attach this dquot to this filesystem's list of all dquots,
+        * kept inside the mount structure in m_quotainfo field
+        */
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+
+       /*
+        * We return a locked dquot to the caller, with a reference taken
+        */
+       xfs_dqlock(dqp);
+       dqp->q_nrefs = 1;
+
+       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
+       mp->m_quotainfo->qi_dquots++;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+       mutex_unlock(&h->qh_lock);
+ dqret:
+       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       trace_xfs_dqget_miss(dqp);
+       *O_dqpp = dqp;
+       return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dquot_t     *gdqp;
+
+       ASSERT(dqp->q_nrefs > 0);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       trace_xfs_dqput(dqp);
+
+       if (dqp->q_nrefs != 1) {
+               dqp->q_nrefs--;
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       /*
+        * drop the dqlock and acquire the freelist and dqlock
+        * in the right order; but try to get it out-of-order first
+        */
+       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+               trace_xfs_dqput_wait(dqp);
+               xfs_dqunlock(dqp);
+               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+               xfs_dqlock(dqp);
+       }
+
+       while (1) {
+               gdqp = NULL;
+
+               /* We can't depend on nrefs being == 1 here */
+               if (--dqp->q_nrefs == 0) {
+                       trace_xfs_dqput_free(dqp);
+
+                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+                       xfs_Gqm->qm_dqfrlist_cnt++;
+
+                       /*
+                        * If we just added a udquot to the freelist, then
+                        * we want to release the gdquot reference that
+                        * it (probably) has. Otherwise it'll keep the
+                        * gdquot from getting reclaimed.
+                        */
+                       if ((gdqp = dqp->q_gdquot)) {
+                               /*
+                                * Avoid a recursive dqput call
+                                */
+                               xfs_dqlock(gdqp);
+                               dqp->q_gdquot = NULL;
+                       }
+               }
+               xfs_dqunlock(dqp);
+
+               /*
+                * If we had a group quota inside the user quota as a hint,
+                * release it now.
+                */
+               if (! gdqp)
+                       break;
+               dqp = gdqp;
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+       xfs_dquot_t     *dqp)
+{
+       if (!dqp)
+               return;
+
+       trace_xfs_dqrele(dqp);
+
+       xfs_dqlock(dqp);
+       /*
+        * We don't care to flush it if the dquot is dirty here.
+        * That will create stutters that we want to avoid.
+        * Instead we do a delayed write when we try to reclaim
+        * a dirty dquot. Also xfs_sync will take part of the burden...
+        */
+       xfs_qm_dqput(dqp);
+}
+
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+       struct xfs_buf          *bp,
+       struct xfs_log_item     *lip)
+{
+       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
+       xfs_dquot_t             *dqp = qip->qli_dquot;
+       struct xfs_ail          *ailp = lip->li_ailp;
+
+       /*
+        * We only want to pull the item from the AIL if its
+        * location in the log has not changed since we started the flush.
+        * Thus, we only bother if the dquot's lsn has
+        * not changed. First we check the lsn outside the lock
+        * since it's cheaper, and then we recheck while
+        * holding the lock before removing the dquot from the AIL.
+        */
+       if ((lip->li_flags & XFS_LI_IN_AIL) &&
+           lip->li_lsn == qip->qli_flush_lsn) {
+
+               /* xfs_trans_ail_delete() drops the AIL lock. */
+               spin_lock(&ailp->xa_lock);
+               if (lip->li_lsn == qip->qli_flush_lsn)
+                       xfs_trans_ail_delete(ailp, lip);
+               else
+                       spin_unlock(&ailp->xa_lock);
+       }
+
+       /*
+        * Release the dq's flush lock since we're done with it.
+        */
+       xfs_dqfunlock(dqp);
+}
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+       xfs_dquot_t             *dqp,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_buf          *bp;
+       struct xfs_disk_dquot   *ddqp;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       trace_xfs_dqflush(dqp);
+
+       /*
+        * If not dirty, or it's pinned and we are not supposed to block, nada.
+        */
+       if (!XFS_DQ_IS_DIRTY(dqp) ||
+           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+               xfs_dqfunlock(dqp);
+               return 0;
+       }
+       xfs_qm_dqunpin_wait(dqp);
+
+       /*
+        * This may have been unpinned because the filesystem is shutting
+        * down forcibly. If that's the case we must not write this dquot
+        * to disk, because the log record didn't make it to disk!
+        */
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               dqp->dq_flags &= ~XFS_DQ_DIRTY;
+               xfs_dqfunlock(dqp);
+               return XFS_ERROR(EIO);
+       }
+
+       /*
+        * Get the buffer containing the on-disk dquot
+        */
+       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+       if (error) {
+               ASSERT(error != ENOENT);
+               xfs_dqfunlock(dqp);
+               return error;
+       }
+
+       /*
+        * Calculate the location of the dquot inside the buffer.
+        */
+       ddqp = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot..
+        */
+       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
+       if (error) {
+               xfs_buf_relse(bp);
+               xfs_dqfunlock(dqp);
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               return XFS_ERROR(EIO);
+       }
+
+       /* This is the only portion of data that needs to persist */
+       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+
+       /*
+        * Clear the dirty field and remember the flush lsn for later use.
+        */
+       dqp->dq_flags &= ~XFS_DQ_DIRTY;
+
+       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+                                       &dqp->q_logitem.qli_item.li_lsn);
+
+       /*
+        * Attach an iodone routine so that we can remove this dquot from the
+        * AIL and release the flush lock once the dquot is synced to disk.
+        */
+       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
+                                 &dqp->q_logitem.qli_item);
+
+       /*
+        * If the buffer is pinned then push on the log so we won't
+        * get stuck waiting in the write for too long.
+        */
+       if (xfs_buf_ispinned(bp)) {
+               trace_xfs_dqflush_force(dqp);
+               xfs_log_force(mp, 0);
+       }
+
+       if (flags & SYNC_WAIT)
+               error = xfs_bwrite(mp, bp);
+       else
+               xfs_bdwrite(mp, bp);
+
+       trace_xfs_dqflush_done(dqp);
+
+       /*
+        * dqp is still locked, but caller is free to unlock it now.
+        */
+       return error;
+
+}
+
+int
+xfs_qm_dqlock_nowait(
+       xfs_dquot_t *dqp)
+{
+       return mutex_trylock(&dqp->q_qlock);
+}
+
+void
+xfs_dqlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_lock(&dqp->q_qlock);
+}
+
+void
+xfs_dqunlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+       if (dqp->q_logitem.qli_dquot == dqp) {
+               /* Once was dqp->q_mount, but might just have been cleared */
+               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
+                                       (xfs_log_item_t*)&(dqp->q_logitem));
+       }
+}
+
+
+void
+xfs_dqunlock_nonotify(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+}
+
+/*
+ * Lock two xfs_dquot structures.
+ *
+ * To avoid deadlocks we always lock the quota structure with
+ * the lowerd id first.
+ */
+void
+xfs_dqlock2(
+       xfs_dquot_t     *d1,
+       xfs_dquot_t     *d2)
+{
+       if (d1 && d2) {
+               ASSERT(d1 != d2);
+               if (be32_to_cpu(d1->q_core.d_id) >
+                   be32_to_cpu(d2->q_core.d_id)) {
+                       mutex_lock(&d2->q_qlock);
+                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
+               } else {
+                       mutex_lock(&d1->q_qlock);
+                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
+               }
+       } else if (d1) {
+               mutex_lock(&d1->q_qlock);
+       } else if (d2) {
+               mutex_lock(&d2->q_qlock);
+       }
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dqhash_t    *qh = dqp->q_hash;
+       xfs_mount_t     *mp = dqp->q_mount;
+
+       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
+       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+
+       xfs_dqlock(dqp);
+       /*
+        * We really can't afford to purge a dquot that is
+        * referenced, because these are hard refs.
+        * It shouldn't happen in general because we went thru _all_ inodes in
+        * dqrele_all_inodes before calling this and didn't let the mountlock go.
+        * However it is possible that we have dquots with temporary
+        * references that are not attached to an inode. e.g. see xfs_setattr().
+        */
+       if (dqp->q_nrefs != 0) {
+               xfs_dqunlock(dqp);
+               mutex_unlock(&dqp->q_hash->qh_lock);
+               return (1);
+       }
+
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       /*
+        * If we're turning off quotas, we have to make sure that, for
+        * example, we don't delete quota disk blocks while dquots are
+        * in the process of getting written to those disk blocks.
+        * This dquot might well be on AIL, and we can't leave it there
+        * if we're turning off quotas. Basically, we need this flush
+        * lock, and are willing to block on it.
+        */
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * Block on the flush lock after nudging dquot buffer,
+                * if it is incore.
+                */
+               xfs_qm_dqflock_pushbuf_wait(dqp);
+       }
+
+       /*
+        * XXXIf we're turning this type of quotas off, we don't care
+        * about the dirty metadata sitting in this dquot. OTOH, if
+        * we're unmounting, we do care, so we flush it and wait.
+        */
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               int     error;
+
+               /* dqflush unlocks dqflock */
+               /*
+                * Given that dqpurge is a very rare occurrence, it is OK
+                * that we're holding the hashlist and mplist locks
+                * across the disk write. But, ... XXXsup
+                *
+                * We don't care about getting disk errors here. We need
+                * to purge this dquot anyway, so we go ahead regardless.
+                */
+               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
+               if (error)
+                       xfs_warn(mp, "%s: dquot %p flush failed",
+                               __func__, dqp);
+               xfs_dqflock(dqp);
+       }
+       ASSERT(atomic_read(&dqp->q_pincount) == 0);
+       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+       list_del_init(&dqp->q_hashlist);
+       qh->qh_version++;
+       list_del_init(&dqp->q_mplist);
+       mp->m_quotainfo->qi_dqreclaims++;
+       mp->m_quotainfo->qi_dquots--;
+       /*
+        * XXX Move this to the front of the freelist, if we can get the
+        * freelist lock.
+        */
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       dqp->q_mount = NULL;
+       dqp->q_hash = NULL;
+       dqp->dq_flags = XFS_DQ_INACTIVE;
+       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+       mutex_unlock(&qh->qh_lock);
+       return (0);
+}
+
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+       xfs_dquot_t     *dqp)
+{
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_buf_t       *bp;
+
+       /*
+        * Check to see if the dquot has been flushed delayed
+        * write.  If so, grab its buffer and send it
+        * out immediately.  We'll be able to acquire
+        * the flush lock when the I/O completes.
+        */
+       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
+                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       if (!bp)
+               goto out_lock;
+
+       if (XFS_BUF_ISDELAYWRITE(bp)) {
+               if (xfs_buf_ispinned(bp))
+                       xfs_log_force(mp, 0);
+               xfs_buf_delwri_promote(bp);
+               wake_up_process(bp->b_target->bt_task);
+       }
+       xfs_buf_relse(bp);
+out_lock:
+       xfs_dqflock(dqp);
+}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
new file mode 100644 (file)
index 0000000..34b7e94
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+       struct list_head  qh_list;
+       struct mutex      qh_lock;
+       uint              qh_version;   /* ever increasing version */
+       uint              qh_nelems;    /* number of dquots on the list */
+} xfs_dqhash_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+       uint             dq_flags;      /* various flags (XFS_DQ_*) */
+       struct list_head q_freelist;    /* global free list of dquots */
+       struct list_head q_mplist;      /* mount's list of dquots */
+       struct list_head q_hashlist;    /* gloabl hash list of dquots */
+       xfs_dqhash_t    *q_hash;        /* the hashchain header */
+       struct xfs_mount*q_mount;       /* filesystem this relates to */
+       struct xfs_trans*q_transp;      /* trans this belongs to currently */
+       uint             q_nrefs;       /* # active refs from inodes */
+       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
+       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
+       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
+
+       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
+       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
+       xfs_dq_logitem_t q_logitem;     /* dquot log item */
+       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
+       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
+       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
+       struct mutex     q_qlock;       /* quota lock */
+       struct completion q_flush;      /* flush completion queue */
+       atomic_t          q_pincount;   /* dquot pin count */
+       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
+} xfs_dquot_t;
+
+/*
+ * Lock hierarchy for q_qlock:
+ *     XFS_QLOCK_NORMAL is the implicit default,
+ *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ */
+enum {
+       XFS_QLOCK_NORMAL = 0,
+       XFS_QLOCK_NESTED,
+};
+
+#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
+
+/*
+ * Manage the q_flush completion queue embedded in the dquot.  This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
+ */
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+       wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+       return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+       complete(&dqp->q_flush);
+}
+
+#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
+#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
+#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
+                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
+
+extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int             xfs_qm_dqpurge(xfs_dquot_t *);
+extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void            xfs_qm_dqput(xfs_dquot_t *);
+extern void            xfs_dqlock(xfs_dquot_t *);
+extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void            xfs_dqunlock(xfs_dquot_t *);
+extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
new file mode 100644 (file)
index 0000000..9e0e2fa
--- /dev/null
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       /*
+        * we need only two iovecs, one for the format, one for the real thing
+        */
+       return 2;
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *logvec)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+
+       logvec->i_addr = &qlip->qli_format;
+       logvec->i_len  = sizeof(xfs_dq_logformat_t);
+       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
+       logvec++;
+       logvec->i_addr = &qlip->qli_dquot->q_core;
+       logvec->i_len  = sizeof(xfs_disk_dquot_t);
+       logvec->i_type = XLOG_REG_TYPE_DQUOT;
+
+       ASSERT(2 == lip->li_desc->lid_size);
+       qlip->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       atomic_inc(&dqp->q_pincount);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
+ */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(atomic_read(&dqp->q_pincount) > 0);
+       if (atomic_dec_and_test(&dqp->q_pincount))
+               wake_up(&dqp->q_pinwait);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       /*
+        * Since we were able to lock the dquot's flush lock and
+        * we found it on the AIL, the dquot must be dirty.  This
+        * is because the dquot is removed from the AIL while still
+        * holding the flush lock in xfs_dqflush_done().  Thus, if
+        * we found it in the AIL and were able to obtain the flush
+        * lock without sleeping, then there must not have been
+        * anyone in the process of flushing the dquot.
+        */
+       error = xfs_qm_dqflush(dqp, 0);
+       if (error)
+               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+                       __func__, error, dqp);
+       xfs_dqunlock(dqp);
+}
+
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       /*
+        * We always re-log the entire dquot when it becomes dirty,
+        * so, the latest copy _is_ the only one that matters.
+        */
+       return lsn;
+}
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+       struct xfs_dquot        *dqp)
+{
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       if (atomic_read(&dqp->q_pincount) == 0)
+               return;
+
+       /*
+        * Give the log a push so we don't wait here too long.
+        */
+       xfs_log_force(dqp->q_mount, 0);
+       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL lock at this point. Calling incore() to
+ * search the buffer cache can be a time consuming thing, and AIL lock is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+       struct xfs_dquot        *dqp = qlip->qli_dquot;
+       struct xfs_buf          *bp;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * If flushlock isn't locked anymore, chances are that the
+        * inode flush completed and the inode was taken off the AIL.
+        * So, just get out.
+        */
+       if (completion_done(&dqp->q_flush) ||
+           !(lip->li_flags & XFS_LI_IN_AIL)) {
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       xfs_dqunlock(dqp);
+       if (!bp)
+               return;
+       if (XFS_BUF_ISDELAYWRITE(bp))
+               xfs_buf_delwri_promote(bp);
+       xfs_buf_relse(bp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item.  Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping.  If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       if (atomic_read(&dqp->q_pincount) > 0)
+               return XFS_ITEM_PINNED;
+
+       if (!xfs_qm_dqlock_nowait(dqp))
+               return XFS_ITEM_LOCKED;
+
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * dquot has already been flushed to the backing buffer,
+                * leave it locked, pushbuf routine will unlock it.
+                */
+               return XFS_ITEM_PUSHBUF;
+       }
+
+       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+       return XFS_ITEM_SUCCESS;
+}
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction.  If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * Clear the transaction pointer in the dquot
+        */
+       dqp->q_transp = NULL;
+
+       /*
+        * dquots are never 'held' from getting unlocked at the end of
+        * a transaction.  Their locking and unlocking is hidden inside the
+        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+        * for the logitem.
+        */
+       xfs_dqunlock(dqp);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+}
+
+/*
+ * This is the ops vector for dquots
+ */
+static struct xfs_item_ops xfs_dquot_item_ops = {
+       .iop_size       = xfs_qm_dquot_logitem_size,
+       .iop_format     = xfs_qm_dquot_logitem_format,
+       .iop_pin        = xfs_qm_dquot_logitem_pin,
+       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
+       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
+       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
+       .iop_committed  = xfs_qm_dquot_logitem_committed,
+       .iop_push       = xfs_qm_dquot_logitem_push,
+       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
+       .iop_committing = xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+       struct xfs_dquot        *dqp)
+{
+       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
+
+       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
+                                       &xfs_dquot_item_ops);
+       lp->qli_dquot = dqp;
+       lp->qli_format.qlf_type = XFS_LI_DQUOT;
+       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
+       lp->qli_format.qlf_blkno = dqp->q_blkno;
+       lp->qli_format.qlf_len = 1;
+       /*
+        * This is just the offset of this dquot within its buffer
+        * (which is currently 1 FSB and probably won't change).
+        * Hence 32 bits for this offset should be just fine.
+        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+        * here, and recompute it at recovery time.
+        */
+       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
+
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item.  It just logs the
+ * quotaoff_log_format structure.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       return 1;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *log_vector)
+{
+       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
+
+       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+       log_vector->i_addr = &qflip->qql_format;
+       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
+       qflip->qql_format.qf_size = 1;
+}
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+STATIC void
+xfs_qm_qoff_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       return lsn;
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item.  It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+STATIC void
+xfs_qm_qoff_logitem_push(
+       struct xfs_log_item     *lip)
+{
+}
+
+
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
+
+       /*
+        * Delete the qoff-start logitem from the AIL.
+        * xfs_trans_ail_delete() drops the AIL lock.
+        */
+       spin_lock(&ailp->xa_lock);
+       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
+
+       kmem_free(qfs);
+       kmem_free(qfe);
+       return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this.  I think we can
+ * just ignore it.  The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen?  Also, do we need different routines for
+ * quotaoff start and quotaoff end?  I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable).  If we do something else,
+ * then maybe we don't need two.
+ */
+STATIC void
+xfs_qm_qoff_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn)
+{
+}
+
+static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoffend_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoff_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+struct xfs_qoff_logitem *
+xfs_qm_qoff_logitem_init(
+       struct xfs_mount        *mp,
+       struct xfs_qoff_logitem *start,
+       uint                    flags)
+{
+       struct xfs_qoff_logitem *qf;
+
+       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
+
+       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
+                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
+       qf->qql_item.li_mountp = mp;
+       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+       qf->qql_format.qf_flags = flags;
+       qf->qql_start_lip = start;
+       return qf;
+}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
new file mode 100644 (file)
index 0000000..5acae2a
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
+       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
+       xfs_dq_logformat_t       qli_format;       /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+       xfs_qoff_logformat_t     qql_format;    /* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+                                       struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *, uint);
+extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *);
+
+#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
new file mode 100644 (file)
index 0000000..75e5d32
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+/*
+ * Note that we only accept fileids which are long enough rather than allow
+ * the parent generation number to default to zero.  XFS considers zero a
+ * valid generation number not an invalid/wildcard value.
+ */
+static int xfs_fileid_length(int fileid_type)
+{
+       switch (fileid_type) {
+       case FILEID_INO32_GEN:
+               return 2;
+       case FILEID_INO32_GEN_PARENT:
+               return 4;
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               return 3;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               return 6;
+       }
+       return 255; /* invalid */
+}
+
+STATIC int
+xfs_fs_encode_fh(
+       struct dentry           *dentry,
+       __u32                   *fh,
+       int                     *max_len,
+       int                     connectable)
+{
+       struct fid              *fid = (struct fid *)fh;
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
+       struct inode            *inode = dentry->d_inode;
+       int                     fileid_type;
+       int                     len;
+
+       /* Directories don't need their parent encoded, they have ".." */
+       if (S_ISDIR(inode->i_mode) || !connectable)
+               fileid_type = FILEID_INO32_GEN;
+       else
+               fileid_type = FILEID_INO32_GEN_PARENT;
+
+       /*
+        * If the the filesystem may contain 64bit inode numbers, we need
+        * to use larger file handles that can represent them.
+        *
+        * While we only allocate inodes that do not fit into 32 bits any
+        * large enough filesystem may contain them, thus the slightly
+        * confusing looking conditional below.
+        */
+       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
+               fileid_type |= XFS_FILEID_TYPE_64FLAG;
+
+       /*
+        * Only encode if there is enough space given.  In practice
+        * this means we can't export a filesystem with 64bit inodes
+        * over NFSv2 with the subtree_check export option; the other
+        * seven combinations work.  The real answer is "don't use v2".
+        */
+       len = xfs_fileid_length(fileid_type);
+       if (*max_len < len) {
+               *max_len = len;
+               return 255;
+       }
+       *max_len = len;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               spin_lock(&dentry->d_lock);
+               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN:
+               fid->i32.ino = inode->i_ino;
+               fid->i32.gen = inode->i_generation;
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               spin_lock(&dentry->d_lock);
+               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               fid64->ino = inode->i_ino;
+               fid64->gen = inode->i_generation;
+               break;
+       }
+
+       return fileid_type;
+}
+
+STATIC struct inode *
+xfs_nfs_get_inode(
+       struct super_block      *sb,
+       u64                     ino,
+       u32                     generation)
+ {
+       xfs_mount_t             *mp = XFS_M(sb);
+       xfs_inode_t             *ip;
+       int                     error;
+
+       /*
+        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
+        */
+       if (ino == 0)
+               return ERR_PTR(-ESTALE);
+
+       /*
+        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+        * fine and not an indication of a corrupted filesystem as clients can
+        * send invalid file handles and we have to handle it gracefully..
+        */
+       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
+       if (error) {
+               /*
+                * EINVAL means the inode cluster doesn't exist anymore.
+                * This implies the filehandle is stale, so we should
+                * translate it here.
+                * We don't use ESTALE directly down the chain to not
+                * confuse applications using bulkstat that expect EINVAL.
+                */
+               if (error == EINVAL || error == ENOENT)
+                       error = ESTALE;
+               return ERR_PTR(-error);
+       }
+
+       if (ip->i_d.di_gen != generation) {
+               IRELE(ip);
+               return ERR_PTR(-ESTALE);
+       }
+
+       return VFS_I(ip);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       if (fh_len < xfs_fileid_length(fileid_type))
+               return NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+       case FILEID_INO32_GEN:
+               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
+                                             fid->i32.parent_gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
+                                             fid64->parent_gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_get_parent(
+       struct dentry           *child)
+{
+       int                     error;
+       struct xfs_inode        *cip;
+
+       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+       if (unlikely(error))
+               return ERR_PTR(-error);
+
+       return d_obtain_alias(VFS_I(cip));
+}
+
+STATIC int
+xfs_fs_nfs_commit_metadata(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_ipincount(ip)) {
+               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
+                               XFS_LOG_SYNC, NULL);
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       return error;
+}
+
+const struct export_operations xfs_export_operations = {
+       .encode_fh              = xfs_fs_encode_fh,
+       .fh_to_dentry           = xfs_fs_fh_to_dentry,
+       .fh_to_parent           = xfs_fs_fh_to_parent,
+       .get_parent             = xfs_fs_get_parent,
+       .commit_metadata        = xfs_fs_nfs_commit_metadata,
+};
diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h
new file mode 100644 (file)
index 0000000..3272b6a
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1) fileid_type=0x00
+ *     (no fileid data; handled by the generic code)
+ *
+ * (2) fileid_type=0x01
+ *     inode-num
+ *     generation
+ *
+ * (3) fileid_type=0x02
+ *     inode-num
+ *     generation
+ *     parent-inode-num
+ *     parent-generation
+ *
+ * (4) fileid_type=0x81
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *
+ * (5) fileid_type=0x82
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *     parent-inode-num-lo32
+ *     parent-inode-num-hi32
+ *     parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesystem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+struct xfs_fid64 {
+       u64 ino;
+       u32 gen;
+       u64 parent_ino;
+       u32 parent_gen;
+} __attribute__((packed));
+
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
+
+#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
new file mode 100644 (file)
index 0000000..7f7b424
--- /dev/null
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
+#include "xfs_trace.h"
+
+#include <linux/dcache.h>
+#include <linux/falloc.h>
+
+static const struct vm_operations_struct xfs_file_vm_ops;
+
+/*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_lock(&VFS_I(ip)->i_mutex);
+       xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_iunlock(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_ilock_demote(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
+ *     xfs_iozero
+ *
+ *     xfs_iozero clears the specified range of buffer supplied,
+ *     and marks all the affected blocks as valid and modified.  If
+ *     an affected block is not allocated, it will be allocated.  If
+ *     an affected block is not completely overwritten, and is not
+ *     valid before the operation, it will be read from disk before
+ *     being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+       struct xfs_inode        *ip,    /* inode                        */
+       loff_t                  pos,    /* offset in file               */
+       size_t                  count)  /* size of data to zero         */
+{
+       struct page             *page;
+       struct address_space    *mapping;
+       int                     status;
+
+       mapping = VFS_I(ip)->i_mapping;
+       do {
+               unsigned offset, bytes;
+               void *fsdata;
+
+               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+               bytes = PAGE_CACHE_SIZE - offset;
+               if (bytes > count)
+                       bytes = count;
+
+               status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                       AOP_FLAG_UNINTERRUPTIBLE,
+                                       &page, &fsdata);
+               if (status)
+                       break;
+
+               zero_user(page, offset, bytes);
+
+               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+                                       page, fsdata);
+               WARN_ON(status <= 0); /* can't return less than zero! */
+               pos += bytes;
+               count -= bytes;
+               status = 0;
+       } while (count);
+
+       return (-status);
+}
+
+STATIC int
+xfs_file_fsync(
+       struct file             *file,
+       loff_t                  start,
+       loff_t                  end,
+       int                     datasync)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error = 0;
+       int                     log_flushed = 0;
+
+       trace_xfs_file_fsync(ip);
+
+       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       if (error)
+               return error;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_ioend_wait(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+       if (mp->m_flags & XFS_MOUNT_BARRIER) {
+               /*
+                * If we have an RT and/or log subvolume we need to make sure
+                * to flush the write cache the device used for file data
+                * first.  This is to ensure newly written file data make
+                * it to disk before logging the new inode size in case of
+                * an extending write.
+                */
+               if (XFS_IS_REALTIME_INODE(ip))
+                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+               else if (mp->m_logdev_targp != mp->m_ddev_targp)
+                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
+       }
+
+       /*
+        * We always need to make sure that the required inode state is safe on
+        * disk.  The inode might be clean but we still might need to force the
+        * log because of committed transactions that haven't hit the disk yet.
+        * Likewise, there could be unflushed non-transactional changes to the
+        * inode core that have to go to disk and this requires us to issue
+        * a synchronous transaction to capture these changes correctly.
+        *
+        * This code relies on the assumption that if the i_update_core field
+        * of the inode is clear and the inode is unpinned then it is clean
+        * and no action is required.
+        */
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+       /*
+        * First check if the VFS inode is marked dirty.  All the dirtying
+        * of non-transactional updates no goes through mark_inode_dirty*,
+        * which allows us to distinguish beteeen pure timestamp updates
+        * and i_size updates which need to be caught for fdatasync.
+        * After that also theck for the dirty state in the XFS inode, which
+        * might gets cleared when the inode gets written out via the AIL
+        * or xfs_iflush_cluster.
+        */
+       if (((inode->i_state & I_DIRTY_DATASYNC) ||
+           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+           ip->i_update_core) {
+               /*
+                * Kick off a transaction to log the inode core to get the
+                * updates.  The sync transaction will also force the log.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+               error = xfs_trans_reserve(tp, 0,
+                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+               if (error) {
+                       xfs_trans_cancel(tp, 0);
+                       return -error;
+               }
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * Note - it's possible that we might have pushed ourselves out
+                * of the way during trans_reserve which would flush the inode.
+                * But there's no guarantee that the inode buffer has actually
+                * gone out yet (it's delwri).  Plus the buffer could be pinned
+                * anyway if it's part of an inode in another recent
+                * transaction.  So we play it safe and fire off the
+                * transaction anyway.
+                */
+               xfs_trans_ijoin(tp, ip);
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               xfs_trans_set_sync(tp);
+               error = _xfs_trans_commit(tp, 0, &log_flushed);
+
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       } else {
+               /*
+                * Timestamps/size haven't changed since last inode flush or
+                * inode transaction commit.  That means either nothing got
+                * written or a transaction committed which caught the updates.
+                * If the latter happened and the transaction hasn't hit the
+                * disk yet, the inode will be still be pinned.  If it is,
+                * force the log.
+                */
+               if (xfs_ipincount(ip)) {
+                       error = _xfs_log_force_lsn(mp,
+                                       ip->i_itemp->ili_last_lsn,
+                                       XFS_LOG_SYNC, &log_flushed);
+               }
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       }
+
+       /*
+        * If we only have a single device, and the log force about was
+        * a no-op we might have to flush the data device cache here.
+        * This can only happen for fdatasync/O_DSYNC if we were overwriting
+        * an already allocated file and thus do not have any metadata to
+        * commit.
+        */
+       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+           mp->m_logdev_targp == mp->m_ddev_targp &&
+           !XFS_IS_REALTIME_INODE(ip) &&
+           !log_flushed)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
+
+       return -error;
+}
+
+STATIC ssize_t
+xfs_file_aio_read(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       size_t                  size = 0;
+       ssize_t                 ret = 0;
+       int                     ioflags = 0;
+       xfs_fsize_t             n;
+       unsigned long           seg;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ioflags |= IO_ISDIRECT;
+       if (file->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       /* START copy & waste from filemap.c */
+       for (seg = 0; seg < nr_segs; seg++) {
+               const struct iovec *iv = &iovp[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+               size += iv->iov_len;
+               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                       return XFS_ERROR(-EINVAL);
+       }
+       /* END copy & waste from filemap.c */
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+               if ((iocb->ki_pos & target->bt_smask) ||
+                   (size & target->bt_smask)) {
+                       if (iocb->ki_pos == ip->i_size)
+                               return 0;
+                       return -XFS_ERROR(EINVAL);
+               }
+       }
+
+       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+       if (n <= 0 || size == 0)
+               return 0;
+
+       if (n < size)
+               size = n;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
+               if (inode->i_mapping->nrpages) {
+                       ret = -xfs_flushinval_pages(ip,
+                                       (iocb->ki_pos & PAGE_CACHE_MASK),
+                                       -1, FI_REMAPF_LOCKED);
+                       if (ret) {
+                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+                               return ret;
+                       }
+               }
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+       } else
+               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
+
+       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_splice_read(
+       struct file             *infilp,
+       loff_t                  *ppos,
+       struct pipe_inode_info  *pipe,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       if (infilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC void
+xfs_aio_write_isize_update(
+       struct inode    *inode,
+       loff_t          *ppos,
+       ssize_t         bytes_written)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             isize = i_size_read(inode);
+
+       if (bytes_written > 0)
+               XFS_STATS_ADD(xs_write_bytes, bytes_written);
+
+       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
+                                       *ppos > isize))
+               *ppos = isize;
+
+       if (*ppos > ip->i_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
+ * part of the I/O may have been written to disk before the error occurred.  In
+ * this case the on-disk file size may have been adjusted beyond the in-memory
+ * file size and now needs to be truncated back.
+ */
+STATIC void
+xfs_aio_write_newsize_update(
+       struct xfs_inode        *ip)
+{
+       if (ip->i_new_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               ip->i_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
+STATIC ssize_t
+xfs_file_splice_write(
+       struct pipe_inode_info  *pipe,
+       struct file             *outfilp,
+       loff_t                  *ppos,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct inode            *inode = outfilp->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       if (outfilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+
+       xfs_aio_write_isize_update(inode, ppos, ret);
+       xfs_aio_write_newsize_update(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                             /* error (positive) */
+xfs_zero_last_block(
+       xfs_inode_t     *ip,
+       xfs_fsize_t     offset,
+       xfs_fsize_t     isize)
+{
+       xfs_fileoff_t   last_fsb;
+       xfs_mount_t     *mp = ip->i_mount;
+       int             nimaps;
+       int             zero_offset;
+       int             zero_len;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+       if (zero_offset == 0) {
+               /*
+                * There are no extra bytes in the last block on disk to
+                * zero, so return.
+                */
+               return 0;
+       }
+
+       last_fsb = XFS_B_TO_FSBT(mp, isize);
+       nimaps = 1;
+       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+                         &nimaps, NULL);
+       if (error) {
+               return error;
+       }
+       ASSERT(nimaps > 0);
+       /*
+        * If the block underlying isize is just a hole, then there
+        * is nothing to zero.
+        */
+       if (imap.br_startblock == HOLESTARTBLOCK) {
+               return 0;
+       }
+       /*
+        * Zero the part of the last block beyond the EOF, and write it
+        * out sync.  We need to drop the ilock while we do this so we
+        * don't deadlock when the buffer cache calls back to us.
+        */
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       zero_len = mp->m_sb.sb_blocksize - zero_offset;
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
+       error = xfs_iozero(ip, isize, zero_len);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int                                    /* error (positive) */
+xfs_zero_eof(
+       xfs_inode_t     *ip,
+       xfs_off_t       offset,         /* starting I/O offset */
+       xfs_fsize_t     isize)          /* current inode size */
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_fileoff_t   start_zero_fsb;
+       xfs_fileoff_t   end_zero_fsb;
+       xfs_fileoff_t   zero_count_fsb;
+       xfs_fileoff_t   last_fsb;
+       xfs_fileoff_t   zero_off;
+       xfs_fsize_t     zero_len;
+       int             nimaps;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+       ASSERT(offset > isize);
+
+       /*
+        * First handle zeroing the block on which isize resides.
+        * We only zero a part of that block so it is handled specially.
+        */
+       error = xfs_zero_last_block(ip, offset, isize);
+       if (error) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+               return error;
+       }
+
+       /*
+        * Calculate the range between the new size and the old
+        * where blocks needing to be zeroed may exist.  To get the
+        * block where the last byte in the file currently resides,
+        * we need to subtract one from the size and truncate back
+        * to a block boundary.  We subtract 1 in case the size is
+        * exactly on a block boundary.
+        */
+       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+       if (last_fsb == end_zero_fsb) {
+               /*
+                * The size was only incremented on its last block.
+                * We took care of that above, so just return.
+                */
+               return 0;
+       }
+
+       ASSERT(start_zero_fsb <= end_zero_fsb);
+       while (start_zero_fsb <= end_zero_fsb) {
+               nimaps = 1;
+               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+                                 0, NULL, 0, &imap, &nimaps, NULL);
+               if (error) {
+                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                       return error;
+               }
+               ASSERT(nimaps > 0);
+
+               if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                   imap.br_startblock == HOLESTARTBLOCK) {
+                       /*
+                        * This loop handles initializing pages that were
+                        * partially initialized by the code below this
+                        * loop. It basically zeroes the part of the page
+                        * that sits on a hole and sets the page as P_HOLE
+                        * and calls remapf if it is a mapped file.
+                        */
+                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                       continue;
+               }
+
+               /*
+                * There are blocks we need to zero.
+                * Drop the inode lock while we're doing the I/O.
+                * We'll still have the iolock to protect us.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+               if ((zero_off + zero_len) > offset)
+                       zero_len = offset - zero_off;
+
+               error = xfs_iozero(ip, zero_off, zero_len);
+               if (error) {
+                       goto out_lock;
+               }
+
+               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return 0;
+
+out_lock:
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Common pre-write limit and setup checks.
+ *
+ * Returns with iolock held according to @iolock.
+ */
+STATIC ssize_t
+xfs_file_aio_write_checks(
+       struct file             *file,
+       loff_t                  *pos,
+       size_t                  *count,
+       int                     *iolock)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     error = 0;
+
+       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
+       if (error) {
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
+               *iolock = 0;
+               return error;
+       }
+
+       new_size = *pos + *count;
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+
+       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+               file_update_time(file);
+
+       /*
+        * If the offset is beyond the size of the file, we need to zero any
+        * blocks that fall between the existing EOF and the start of this
+        * write.
+        */
+       if (*pos > ip->i_size)
+               error = -xfs_zero_eof(ip, *pos, ip->i_size);
+
+       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       if (error)
+               return error;
+
+       /*
+        * If we're writing the file then make sure to clear the setuid and
+        * setgid bits if the process is not being run by root.  This keeps
+        * people from modifying setuid and setgid binaries.
+        */
+       return file_remove_suid(file);
+
+}
+
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By separating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping.
+ *
+ * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
+ * until we're sure the bytes at the new EOF have been zeroed and/or the cached
+ * pages are flushed out.
+ *
+ * In most cases the direct IO writes will be done holding IOLOCK_SHARED
+ * allowing them to be done in parallel with reads and other direct IO writes.
+ * However, if the IO is not aligned to filesystem blocks, the direct IO layer
+ * needs to do sub-block zeroing and that requires serialisation against other
+ * direct IOs to the same block. In this case we need to serialise the
+ * submission of the unaligned IOs so that we don't get racing block zeroing in
+ * the dio layer.  To avoid the problem with aio, we also need to wait for
+ * outstanding IOs to complete so that unwritten extent conversion is completed
+ * before we try to map the overlapping block. This is currently implemented by
+ * hitting it with a big hammer (i.e. xfs_ioend_wait()).
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 ret = 0;
+       size_t                  count = ocount;
+       int                     unaligned_io = 0;
+       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
+                                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+       *iolock = 0;
+       if ((pos & target->bt_smask) || (count & target->bt_smask))
+               return -XFS_ERROR(EINVAL);
+
+       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+               unaligned_io = 1;
+
+       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
+               *iolock = XFS_IOLOCK_EXCL;
+       else
+               *iolock = XFS_IOLOCK_SHARED;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       if (mapping->nrpages) {
+               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+                                                       FI_REMAPF_LOCKED);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * If we are doing unaligned IO, wait for all other IO to drain,
+        * otherwise demote the lock if we had to flush cached pages
+        */
+       if (unaligned_io)
+               xfs_ioend_wait(ip);
+       else if (*iolock == XFS_IOLOCK_EXCL) {
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               *iolock = XFS_IOLOCK_SHARED;
+       }
+
+       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_direct_write(iocb, iovp,
+                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+       /* No fallback to buffered IO on errors for XFS. */
+       ASSERT(ret < 0 || ret == count);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     enospc = 0;
+       size_t                  count = ocount;
+
+       *iolock = XFS_IOLOCK_EXCL;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       /* We can write back this queue in page reclaim */
+       current->backing_dev_info = mapping->backing_dev_info;
+
+write_retry:
+       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+                       pos, &iocb->ki_pos, count, ret);
+       /*
+        * if we just got an ENOSPC, flush the inode now we aren't holding any
+        * page locks and retry *once*
+        */
+       if (ret == -ENOSPC && !enospc) {
+               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+               if (ret)
+                       return ret;
+               enospc = 1;
+               goto write_retry;
+       }
+       current->backing_dev_info = NULL;
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     iolock;
+       size_t                  ocount = 0;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+       if (ret)
+               return ret;
+
+       if (ocount == 0)
+               return 0;
+
+       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+       else
+               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+
+       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
+
+       if (ret <= 0)
+               goto out_unlock;
+
+       /* Handle various SYNC-type writes */
+       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+               loff_t end = pos + ret - 1;
+               int error;
+
+               xfs_rw_iunlock(ip, iolock);
+               error = xfs_file_fsync(file, pos, end,
+                                     (file->f_flags & __O_SYNC) ? 0 : 1);
+               xfs_rw_ilock(ip, iolock);
+               if (error)
+                       ret = error;
+       }
+
+out_unlock:
+       xfs_aio_write_newsize_update(ip);
+       xfs_rw_iunlock(ip, iolock);
+       return ret;
+}
+
+STATIC long
+xfs_file_fallocate(
+       struct file     *file,
+       int             mode,
+       loff_t          offset,
+       loff_t          len)
+{
+       struct inode    *inode = file->f_path.dentry->d_inode;
+       long            error;
+       loff_t          new_size = 0;
+       xfs_flock64_t   bf;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             cmd = XFS_IOC_RESVSP;
+       int             attr_flags = XFS_ATTR_NOLOCK;
+
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return -EOPNOTSUPP;
+
+       bf.l_whence = 0;
+       bf.l_start = offset;
+       bf.l_len = len;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               cmd = XFS_IOC_UNRESVSP;
+
+       /* check the new inode size is valid before allocating */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+           offset + len > i_size_read(inode)) {
+               new_size = offset + len;
+               error = inode_newsize_ok(inode, new_size);
+               if (error)
+                       goto out_unlock;
+       }
+
+       if (file->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
+       if (error)
+               goto out_unlock;
+
+       /* Change file size if needed */
+       if (new_size) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_SIZE;
+               iattr.ia_size = new_size;
+               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+       }
+
+out_unlock:
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return error;
+}
+
+
+STATIC int
+xfs_file_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+               return -EFBIG;
+       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+               return -EIO;
+       return 0;
+}
+
+STATIC int
+xfs_dir_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       int             mode;
+       int             error;
+
+       error = xfs_file_open(inode, file);
+       if (error)
+               return error;
+
+       /*
+        * If there are any blocks, read-ahead block 0 as we're almost
+        * certain to have the next operation be a read there.
+        */
+       mode = xfs_ilock_map_shared(ip);
+       if (ip->i_d.di_nextents > 0)
+               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+       xfs_iunlock(ip, mode);
+       return 0;
+}
+
+STATIC int
+xfs_file_release(
+       struct inode    *inode,
+       struct file     *filp)
+{
+       return -xfs_release(XFS_I(inode));
+}
+
+STATIC int
+xfs_file_readdir(
+       struct file     *filp,
+       void            *dirent,
+       filldir_t       filldir)
+{
+       struct inode    *inode = filp->f_path.dentry->d_inode;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             error;
+       size_t          bufsize;
+
+       /*
+        * The Linux API doesn't pass down the total size of the buffer
+        * we read into down to the filesystem.  With the filldir concept
+        * it's not needed for correct information, but the XFS dir2 leaf
+        * code wants an estimate of the buffer size to calculate it's
+        * readahead window and size the buffers used for mapping to
+        * physical blocks.
+        *
+        * Try to give it an estimate that's good enough, maybe at some
+        * point we can change the ->readdir prototype to include the
+        * buffer size.  For now we use the current glibc buffer size.
+        */
+       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
+
+       error = xfs_readdir(ip, dirent, bufsize,
+                               (xfs_off_t *)&filp->f_pos, filldir);
+       if (error)
+               return -error;
+       return 0;
+}
+
+STATIC int
+xfs_file_mmap(
+       struct file     *filp,
+       struct vm_area_struct *vma)
+{
+       vma->vm_ops = &xfs_file_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
+
+       file_accessed(filp);
+       return 0;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
+}
+
+const struct file_operations xfs_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .aio_read       = xfs_file_aio_read,
+       .aio_write      = xfs_file_aio_write,
+       .splice_read    = xfs_file_splice_read,
+       .splice_write   = xfs_file_splice_write,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .mmap           = xfs_file_mmap,
+       .open           = xfs_file_open,
+       .release        = xfs_file_release,
+       .fsync          = xfs_file_fsync,
+       .fallocate      = xfs_file_fallocate,
+};
+
+const struct file_operations xfs_dir_file_operations = {
+       .open           = xfs_dir_open,
+       .read           = generic_read_dir,
+       .readdir        = xfs_file_readdir,
+       .llseek         = generic_file_llseek,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .fsync          = xfs_file_fsync,
+};
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+       .fault          = filemap_fault,
+       .page_mkwrite   = xfs_vm_page_mkwrite,
+};
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c
new file mode 100644 (file)
index 0000000..ed88ed1
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
+void
+xfs_tosspages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       /* can't toss partial tail pages, so mask them out */
+       last &= ~(PAGE_SIZE - 1);
+       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
+}
+
+int
+xfs_flushinval_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+
+       trace_xfs_pagecache_inval(ip, first, last);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = filemap_write_and_wait_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (!ret)
+               truncate_inode_pages_range(mapping, first, last);
+       return -ret;
+}
+
+int
+xfs_flush_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       uint64_t        flags,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+       int             ret2;
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = -filemap_fdatawrite_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (flags & XBF_ASYNC)
+               return ret;
+       ret2 = xfs_wait_on_pages(ip, first, last);
+       if (!ret)
+               ret = ret2;
+       return ret;
+}
+
+int
+xfs_wait_on_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               return -filemap_fdatawait_range(mapping, first,
+                                       last == -1 ? ip->i_size - 1 : last);
+       }
+       return 0;
+}
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
new file mode 100644 (file)
index 0000000..76e81cf
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sysctl.h"
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+                         /*    MIN             DFLT            MAX     */
+       .sgid_inherit   = {     0,              0,              1       },
+       .symlink_mode   = {     0,              0,              1       },
+       .panic_mask     = {     0,              0,              255     },
+       .error_level    = {     0,              3,              11      },
+       .syncd_timer    = {     1*100,          30*100,         7200*100},
+       .stats_clear    = {     0,              0,              1       },
+       .inherit_sync   = {     0,              1,              1       },
+       .inherit_nodump = {     0,              1,              1       },
+       .inherit_noatim = {     0,              1,              1       },
+       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
+       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
+       .inherit_nosym  = {     0,              0,              1       },
+       .rotorstep      = {     1,              1,              255     },
+       .inherit_nodfrg = {     0,              1,              1       },
+       .fstrm_timer    = {     1,              30*100,         3600*100},
+};
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
new file mode 100644 (file)
index 0000000..f7ce7de
--- /dev/null
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ioctl.h"
+#include "xfs_rtalloc.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+#include "xfs_vnodeops.h"
+#include "xfs_discard.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
+#include "xfs_export.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/exportfs.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       int                     hsize;
+       xfs_handle_t            handle;
+       struct inode            *inode;
+       struct file             *file = NULL;
+       struct path             path;
+       int                     error;
+       struct xfs_inode        *ip;
+
+       if (cmd == XFS_IOC_FD_TO_HANDLE) {
+               file = fget(hreq->fd);
+               if (!file)
+                       return -EBADF;
+               inode = file->f_path.dentry->d_inode;
+       } else {
+               error = user_lpath((const char __user *)hreq->path, &path);
+               if (error)
+                       return error;
+               inode = path.dentry->d_inode;
+       }
+       ip = XFS_I(inode);
+
+       /*
+        * We can only generate handles for inodes residing on a XFS filesystem,
+        * and only for regular files, directories or symbolic links.
+        */
+       error = -EINVAL;
+       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
+               goto out_put;
+
+       error = -EBADF;
+       if (!S_ISREG(inode->i_mode) &&
+           !S_ISDIR(inode->i_mode) &&
+           !S_ISLNK(inode->i_mode))
+               goto out_put;
+
+
+       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
+
+       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
+               /*
+                * This handle only contains an fsid, zero the rest.
+                */
+               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
+               hsize = sizeof(xfs_fsid_t);
+       } else {
+               int             lock_mode;
+
+               lock_mode = xfs_ilock_map_shared(ip);
+               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+                                       sizeof(handle.ha_fid.fid_len);
+               handle.ha_fid.fid_pad = 0;
+               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_ino = ip->i_ino;
+               xfs_iunlock_map_shared(ip, lock_mode);
+
+               hsize = XFS_HSIZE(handle);
+       }
+
+       error = -EFAULT;
+       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
+           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
+               goto out_put;
+
+       error = 0;
+
+ out_put:
+       if (cmd == XFS_IOC_FD_TO_HANDLE)
+               fput(file);
+       else
+               path_put(&path);
+       return error;
+}
+
+/*
+ * No need to do permission checks on the various pathname components
+ * as the handle operations are privileged.
+ */
+STATIC int
+xfs_handle_acceptable(
+       void                    *context,
+       struct dentry           *dentry)
+{
+       return 1;
+}
+
+/*
+ * Convert userspace handle data into a dentry.
+ */
+struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen)
+{
+       xfs_handle_t            handle;
+       struct xfs_fid64        fid;
+
+       /*
+        * Only allow handle opens under a directory.
+        */
+       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+               return ERR_PTR(-ENOTDIR);
+
+       if (hlen != sizeof(xfs_handle_t))
+               return ERR_PTR(-EINVAL);
+       if (copy_from_user(&handle, uhandle, hlen))
+               return ERR_PTR(-EFAULT);
+       if (handle.ha_fid.fid_len !=
+           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
+               return ERR_PTR(-EINVAL);
+
+       memset(&fid, 0, sizeof(struct fid));
+       fid.ino = handle.ha_fid.fid_ino;
+       fid.gen = handle.ha_fid.fid_gen;
+
+       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
+                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
+                       xfs_handle_acceptable, NULL);
+}
+
+STATIC struct dentry *
+xfs_handlereq_to_dentry(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
+}
+
+int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       const struct cred       *cred = current_cred();
+       int                     error;
+       int                     fd;
+       int                     permflag;
+       struct file             *filp;
+       struct inode            *inode;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       inode = dentry->d_inode;
+
+       /* Restrict xfs_open_by_handle to directories & regular files. */
+       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+#if BITS_PER_LONG != 32
+       hreq->oflags |= O_LARGEFILE;
+#endif
+
+       /* Put open permission in namei format. */
+       permflag = hreq->oflags;
+       if ((permflag+1) & O_ACCMODE)
+               permflag++;
+       if (permflag & O_TRUNC)
+               permflag |= 2;
+
+       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+               error = -XFS_ERROR(EACCES);
+               goto out_dput;
+       }
+
+       /* Can't write directories. */
+       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+               error = -XFS_ERROR(EISDIR);
+               goto out_dput;
+       }
+
+       fd = get_unused_fd();
+       if (fd < 0) {
+               error = fd;
+               goto out_dput;
+       }
+
+       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
+                          hreq->oflags, cred);
+       if (IS_ERR(filp)) {
+               put_unused_fd(fd);
+               return PTR_ERR(filp);
+       }
+
+       if (S_ISREG(inode->i_mode)) {
+               filp->f_flags |= O_NOATIME;
+               filp->f_mode |= FMODE_NOCMTIME;
+       }
+
+       fd_install(fd, filp);
+       return fd;
+
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+/*
+ * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
+ * unused first argument.
+ */
+STATIC int
+do_readlink(
+       char __user             *buffer,
+       int                     buflen,
+       const char              *link)
+{
+        int len;
+
+       len = PTR_ERR(link);
+       if (IS_ERR(link))
+               goto out;
+
+       len = strlen(link);
+       if (len > (unsigned) buflen)
+               len = buflen;
+       if (copy_to_user(buffer, link, len))
+               len = -EFAULT;
+ out:
+       return len;
+}
+
+
+int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       struct dentry           *dentry;
+       __u32                   olen;
+       void                    *link;
+       int                     error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       /* Restrict this handle operation to symlinks only. */
+       if (!S_ISLNK(dentry->d_inode->i_mode)) {
+               error = -XFS_ERROR(EINVAL);
+               goto out_dput;
+       }
+
+       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out_dput;
+       }
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link) {
+               error = -XFS_ERROR(ENOMEM);
+               goto out_dput;
+       }
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (error)
+               goto out_kfree;
+       error = do_readlink(hreq->ohandle, olen, link);
+       if (error)
+               goto out_kfree;
+
+ out_kfree:
+       kfree(link);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+ out:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error = -ENOMEM;
+       attrlist_cursor_kern_t  *cursor;
+       xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (*len > XATTR_SIZE_MAX)
+               return EINVAL;
+       kbuf = kmalloc(*len, GFP_KERNEL);
+       if (!kbuf)
+               return ENOMEM;
+
+       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(ubuf, kbuf, *len))
+               error = EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       if (len > XATTR_SIZE_MAX)
+               return EINVAL;
+
+       kbuf = memdup_user(ubuf, len);
+       if (IS_ERR(kbuf))
+               return PTR_ERR(kbuf);
+
+       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+
+       return error;
+}
+
+int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags)
+{
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       return xfs_attr_remove(XFS_I(inode), name, flags);
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       xfs_attr_multiop_t      *ops;
+       xfs_fsop_attrmulti_handlereq_t am_hreq;
+       struct dentry           *dentry;
+       unsigned int            i, size;
+       unsigned char           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(am_hreq.ops, size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               ops[i].am_attrname, MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, &ops[i].am_length,
+                                       ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, ops[i].am_length,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(am_hreq.ops, ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf)
+{
+       int                     attr_flags = 0;
+       int                     error;
+
+       /*
+        * Only allow the sys admin to reserve space unless
+        * unwritten extents are enabled.
+        */
+       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+           !capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
+               return -XFS_ERROR(EPERM);
+
+       if (!(filp->f_mode & FMODE_WRITE))
+               return -XFS_ERROR(EBADF);
+
+       if (!S_ISREG(inode->i_mode))
+               return -XFS_ERROR(EINVAL);
+
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               attr_flags |= XFS_ATTR_NONBLOCK;
+
+       if (filp->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       if (ioflags & IO_INVIS)
+               attr_flags |= XFS_ATTR_DMI;
+
+       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+       return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+       xfs_mount_t             *mp,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS)
+               error = xfs_inumbers(mp, &inlast, &count,
+                                       bulkreq.ubuffer, xfs_inumbers_fmt);
+       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+               error = xfs_bulkstat_single(mp, &inlast,
+                                               bulkreq.ubuffer, &done);
+       else    /* XFS_IOC_FSBULKSTAT */
+               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                                    &done);
+
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+
+       /*
+        * Caller should have passed an argument of type
+        * xfs_fsop_geom_v1_t.  This is a proper subset of the
+        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+        */
+       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 4);
+       if (error)
+               return -error;
+
+       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+       unsigned int    flags,
+       unsigned int    start)
+{
+       unsigned int    xflags = start;
+
+       if (flags & FS_IMMUTABLE_FL)
+               xflags |= XFS_XFLAG_IMMUTABLE;
+       else
+               xflags &= ~XFS_XFLAG_IMMUTABLE;
+       if (flags & FS_APPEND_FL)
+               xflags |= XFS_XFLAG_APPEND;
+       else
+               xflags &= ~XFS_XFLAG_APPEND;
+       if (flags & FS_SYNC_FL)
+               xflags |= XFS_XFLAG_SYNC;
+       else
+               xflags &= ~XFS_XFLAG_SYNC;
+       if (flags & FS_NOATIME_FL)
+               xflags |= XFS_XFLAG_NOATIME;
+       else
+               xflags &= ~XFS_XFLAG_NOATIME;
+       if (flags & FS_NODUMP_FL)
+               xflags |= XFS_XFLAG_NODUMP;
+       else
+               xflags &= ~XFS_XFLAG_NODUMP;
+
+       return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+       __uint16_t      di_flags)
+{
+       unsigned int    flags = 0;
+
+       if (di_flags & XFS_DIFLAG_IMMUTABLE)
+               flags |= FS_IMMUTABLE_FL;
+       if (di_flags & XFS_DIFLAG_APPEND)
+               flags |= FS_APPEND_FL;
+       if (di_flags & XFS_DIFLAG_SYNC)
+               flags |= FS_SYNC_FL;
+       if (di_flags & XFS_DIFLAG_NOATIME)
+               flags |= FS_NOATIME_FL;
+       if (di_flags & XFS_DIFLAG_NODUMP)
+               flags |= FS_NODUMP_FL;
+       return flags;
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+       xfs_inode_t             *ip,
+       int                     attr,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+
+       memset(&fa, 0, sizeof(struct fsxattr));
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       fa.fsx_xflags = xfs_ip2xflags(ip);
+       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+       fa.fsx_projid = xfs_get_projid(ip);
+
+       if (attr) {
+               if (ip->i_afp) {
+                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+                               fa.fsx_nextents = ip->i_afp->if_bytes /
+                                                       sizeof(xfs_bmbt_rec_t);
+                       else
+                               fa.fsx_nextents = ip->i_d.di_anextents;
+               } else
+                       fa.fsx_nextents = 0;
+       } else {
+               if (ip->i_df.if_flags & XFS_IFEXTENTS)
+                       fa.fsx_nextents = ip->i_df.if_bytes /
+                                               sizeof(xfs_bmbt_rec_t);
+               else
+                       fa.fsx_nextents = ip->i_d.di_nextents;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (copy_to_user(arg, &fa, sizeof(fa)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC void
+xfs_set_diflags(
+       struct xfs_inode        *ip,
+       unsigned int            xflags)
+{
+       unsigned int            di_flags;
+
+       /* can't set PREALLOC this way, just preserve it */
+       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               di_flags |= XFS_DIFLAG_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               di_flags |= XFS_DIFLAG_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               di_flags |= XFS_DIFLAG_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               di_flags |= XFS_DIFLAG_NOATIME;
+       if (xflags & XFS_XFLAG_NODUMP)
+               di_flags |= XFS_DIFLAG_NODUMP;
+       if (xflags & XFS_XFLAG_PROJINHERIT)
+               di_flags |= XFS_DIFLAG_PROJINHERIT;
+       if (xflags & XFS_XFLAG_NODEFRAG)
+               di_flags |= XFS_DIFLAG_NODEFRAG;
+       if (xflags & XFS_XFLAG_FILESTREAM)
+               di_flags |= XFS_DIFLAG_FILESTREAM;
+       if (S_ISDIR(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_RTINHERIT)
+                       di_flags |= XFS_DIFLAG_RTINHERIT;
+               if (xflags & XFS_XFLAG_NOSYMLINKS)
+                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
+               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+       } else if (S_ISREG(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_REALTIME)
+                       di_flags |= XFS_DIFLAG_REALTIME;
+               if (xflags & XFS_XFLAG_EXTSIZE)
+                       di_flags |= XFS_DIFLAG_EXTSIZE;
+       }
+
+       ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+       unsigned int            xflags = xfs_ip2xflags(ip);
+
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID     1
+#define FSX_EXTSIZE    2
+#define FSX_XFLAGS     4
+#define FSX_NONBLOCK   8
+
+STATIC int
+xfs_ioctl_setattr(
+       xfs_inode_t             *ip,
+       struct fsxattr          *fa,
+       int                     mask)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       unsigned int            lock_flags = 0;
+       struct xfs_dquot        *udqp = NULL;
+       struct xfs_dquot        *gdqp = NULL;
+       struct xfs_dquot        *olddquot = NULL;
+       int                     code;
+
+       trace_xfs_ioctl_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       /*
+        * Disallow 32bit project ids when projid32bit feature is not enabled.
+        */
+       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+               return XFS_ERROR(EINVAL);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
+                                        ip->i_d.di_gid, fa->fsx_projid,
+                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+               if (code)
+                       return code;
+       }
+
+       /*
+        * For the other attributes, we acquire the inode lock and
+        * first do an error checking pass.
+        */
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (code)
+               goto error_return;
+
+       lock_flags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * CAP_FOWNER overrides the following restrictions:
+        *
+        * The user ID of the calling process must be equal
+        * to the file owner ID, except in cases where the
+        * CAP_FSETID capability is applicable.
+        */
+       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+               code = XFS_ERROR(EPERM);
+               goto error_return;
+       }
+
+       /*
+        * Do a quota reservation only if projid is actually going to change.
+        */
+       if (mask & FSX_PROJID) {
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   XFS_IS_PQUOTA_ON(mp) &&
+                   xfs_get_projid(ip) != fa->fsx_projid) {
+                       ASSERT(tp);
+                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (code)       /* out of quota */
+                               goto error_return;
+               }
+       }
+
+       if (mask & FSX_EXTSIZE) {
+               /*
+                * Can't change extent size if any extents are allocated.
+                */
+               if (ip->i_d.di_nextents &&
+                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+                    fa->fsx_extsize)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * Extent size must be a multiple of the appropriate block
+                * size, if set at all. It must also be smaller than the
+                * maximum extent size supported by the filesystem.
+                *
+                * Also, for non-realtime files, limit the extent size hint to
+                * half the size of the AGs in the filesystem so alignment
+                * doesn't result in extents larger than an AG.
+                */
+               if (fa->fsx_extsize != 0) {
+                       xfs_extlen_t    size;
+                       xfs_fsblock_t   extsize_fsb;
+
+                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+                       if (extsize_fsb > MAXEXTLEN) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+
+                       if (XFS_IS_REALTIME_INODE(ip) ||
+                           ((mask & FSX_XFLAGS) &&
+                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+                               size = mp->m_sb.sb_rextsize <<
+                                      mp->m_sb.sb_blocklog;
+                       } else {
+                               size = mp->m_sb.sb_blocksize;
+                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+                                       code = XFS_ERROR(EINVAL);
+                                       goto error_return;
+                               }
+                       }
+
+                       if (fa->fsx_extsize % size) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+       }
+
+
+       if (mask & FSX_XFLAGS) {
+               /*
+                * Can't change realtime flag if any extents are allocated.
+                */
+               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+                   (XFS_IS_REALTIME_INODE(ip)) !=
+                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * If realtime flag is set then must have realtime data.
+                */
+               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       if ((mp->m_sb.sb_rblocks == 0) ||
+                           (mp->m_sb.sb_rextsize == 0) ||
+                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+
+               /*
+                * Can't modify an immutable/append-only file unless
+                * we have appropriate permission.
+                */
+               if ((ip->i_d.di_flags &
+                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+                    (fa->fsx_xflags &
+                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+                   !capable(CAP_LINUX_IMMUTABLE)) {
+                       code = XFS_ERROR(EPERM);
+                       goto error_return;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & FSX_PROJID) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (xfs_get_projid(ip) != fa->fsx_projid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+                               olddquot = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       xfs_set_projid(ip, fa->fsx_projid);
+
+                       /*
+                        * We may have to rev the inode as well as
+                        * the superblock version number since projids didn't
+                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+                        */
+                       if (ip->i_d.di_version == 1)
+                               xfs_bump_ino_vers2(tp, ip);
+               }
+
+       }
+
+       if (mask & FSX_EXTSIZE)
+               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+       if (mask & FSX_XFLAGS) {
+               xfs_set_diflags(ip, fa->fsx_xflags);
+               xfs_diflags_to_linux(ip);
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       /*
+        * If this is a synchronous mount, make sure that the
+        * transaction goes to disk before returning to the user.
+        * This is slightly sub-optimal in that truncates require
+        * two sync transactions instead of one for wsync filesystems.
+        * One for the truncate and one for the timestamps since we
+        * don't want to change the timestamps unless we're sure the
+        * truncate worked.  Truncates are less than 1% of the laddis
+        * mix so this probably isn't worth the trouble to optimize.
+        */
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       code = xfs_trans_commit(tp, 0);
+       xfs_iunlock(ip, lock_flags);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       return code;
+
+ error_return:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_trans_cancel(tp, 0);
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return code;
+}
+
+STATIC int
+xfs_ioc_fssetxattr(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            mask;
+
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_ioc_getxflags(
+       xfs_inode_t             *ip,
+       void                    __user *arg)
+{
+       unsigned int            flags;
+
+       flags = xfs_di2lxflags(ip->i_d.di_flags);
+       if (copy_to_user(arg, &flags, sizeof(flags)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC int
+xfs_ioc_setxflags(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            flags;
+       unsigned int            mask;
+
+       if (copy_from_user(&flags, arg, sizeof(flags)))
+               return -EFAULT;
+
+       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+                     FS_NOATIME_FL | FS_NODUMP_FL | \
+                     FS_SYNC_FL))
+               return -EOPNOTSUPP;
+
+       mask = FSX_XFLAGS;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmap __user   *base = *ap;
+
+       /* copy only getbmap portion (not getbmapx) */
+       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmap);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmap(
+       struct xfs_inode        *ip,
+       int                     ioflags,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+       if (ioflags & IO_INVIS)
+               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+                           (struct getbmap *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header - only size of getbmap */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmapx __user  *base = *ap;
+
+       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmapx);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+       struct xfs_inode        *ip,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(bmx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       if (bmx.bmv_iflags & (~BMV_IF_VALID))
+               return -XFS_ERROR(EINVAL);
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+                           (struct getbmapx *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       return 0;
+}
+
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_ioctl(ip);
+
+       switch (cmd) {
+       case FITRIM:
+               return xfs_ioc_trim(mp, arg);
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_ZERO_RANGE: {
+               xfs_flock64_t           bf;
+
+               if (copy_from_user(&bf, arg, sizeof(bf)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_DIOINFO: {
+               struct dioattr  da;
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+               if (copy_to_user(arg, &da, sizeof(da)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_FSBULKSTAT_SINGLE:
+       case XFS_IOC_FSBULKSTAT:
+       case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_bulkstat(mp, cmd, arg);
+
+       case XFS_IOC_FSGEOMETRY_V1:
+               return xfs_ioc_fsgeometry_v1(mp, arg);
+
+       case XFS_IOC_FSGEOMETRY:
+               return xfs_ioc_fsgeometry(mp, arg);
+
+       case XFS_IOC_GETVERSION:
+               return put_user(inode->i_generation, (int __user *)arg);
+
+       case XFS_IOC_FSGETXATTR:
+               return xfs_ioc_fsgetxattr(ip, 0, arg);
+       case XFS_IOC_FSGETXATTRA:
+               return xfs_ioc_fsgetxattr(ip, 1, arg);
+       case XFS_IOC_FSSETXATTR:
+               return xfs_ioc_fssetxattr(ip, filp, arg);
+       case XFS_IOC_GETXFLAGS:
+               return xfs_ioc_getxflags(ip, arg);
+       case XFS_IOC_SETXFLAGS:
+               return xfs_ioc_setxflags(ip, filp, arg);
+
+       case XFS_IOC_FSSETDM: {
+               struct fsdmidata        dmi;
+
+               if (copy_from_user(&dmi, arg, sizeof(dmi)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+                               dmi.fsd_dmstate);
+               return -error;
+       }
+
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+       case XFS_IOC_GETBMAPX:
+               return xfs_ioc_getbmapx(ip, arg);
+
+       case XFS_IOC_FD_TO_HANDLE:
+       case XFS_IOC_PATH_TO_HANDLE:
+       case XFS_IOC_PATH_TO_FSHANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(hreq)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_FSSETDM_BY_HANDLE:
+               return xfs_fssetdm_by_handle(filp, arg);
+
+       case XFS_IOC_READLINK_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE:
+               return xfs_attrlist_by_handle(filp, arg);
+
+       case XFS_IOC_ATTRMULTI_BY_HANDLE:
+               return xfs_attrmulti_by_handle(filp, arg);
+
+       case XFS_IOC_SWAPEXT: {
+               struct xfs_swapext      sxp;
+
+               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+
+       case XFS_IOC_FSCOUNTS: {
+               xfs_fsop_counts_t out;
+
+               error = xfs_fs_counts(mp, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_SET_RESBLKS: {
+               xfs_fsop_resblks_t inout;
+               __uint64_t         in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (mp->m_flags & XFS_MOUNT_RDONLY)
+                       return -XFS_ERROR(EROFS);
+
+               if (copy_from_user(&inout, arg, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+
+               /* input parameter is passed in resblks field of structure */
+               in = inout.resblks;
+               error = xfs_reserve_blocks(mp, &in, &inout);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &inout, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_GET_RESBLKS: {
+               xfs_fsop_resblks_t out;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_reserve_blocks(mp, NULL, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+
+               return 0;
+       }
+
+       case XFS_IOC_FSGROWFSDATA: {
+               xfs_growfs_data_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSLOG: {
+               xfs_growfs_log_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_log(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSRT: {
+               xfs_growfs_rt_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_GOINGDOWN: {
+               __uint32_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (get_user(in, (__uint32_t __user *)arg))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_fs_goingdown(mp, in);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_INJECTION: {
+               xfs_error_injection_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_errortag_add(in.errtag, mp);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_CLEARALL:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_errortag_clearall(mp, 1);
+               return -error;
+
+       default:
+               return -ENOTTY;
+       }
+}
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
new file mode 100644 (file)
index 0000000..d56173b
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+
+extern int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf);
+
+extern int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags);
+
+extern struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen);
+
+extern long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p);
+
+extern long
+xfs_file_compat_ioctl(
+       struct file             *file,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
new file mode 100644 (file)
index 0000000..54e623b
--- /dev/null
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_dfrag.h"
+#include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
+#include "xfs_ioctl32.h"
+#include "xfs_trace.h"
+
+#define  _NATIVE_IOC(cmd, type) \
+         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
+#ifdef BROKEN_X86_ALIGNMENT
+STATIC int
+xfs_compat_flock64_copyin(
+       xfs_flock64_t           *bf,
+       compat_xfs_flock64_t    __user *arg32)
+{
+       if (get_user(bf->l_type,        &arg32->l_type) ||
+           get_user(bf->l_whence,      &arg32->l_whence) ||
+           get_user(bf->l_start,       &arg32->l_start) ||
+           get_user(bf->l_len,         &arg32->l_len) ||
+           get_user(bf->l_sysid,       &arg32->l_sysid) ||
+           get_user(bf->l_pid,         &arg32->l_pid) ||
+           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_ioc_fsgeometry_v1(
+       struct xfs_mount          *mp,
+       compat_xfs_fsop_geom_v1_t __user *arg32)
+{
+       xfs_fsop_geom_t           fsgeo;
+       int                       error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+       /* The 32-bit variant simply has some padding at the end */
+       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_data_copyin(
+       struct xfs_growfs_data   *in,
+       compat_xfs_growfs_data_t __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->imaxpct,   &arg32->imaxpct))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_rt_copyin(
+       struct xfs_growfs_rt     *in,
+       compat_xfs_growfs_rt_t  __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->extsize,   &arg32->extsize))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt_compat(
+       void                    __user *ubuffer,
+       const xfs_inogrp_t      *buffer,
+       long                    count,
+       long                    *written)
+{
+       compat_xfs_inogrp_t     __user *p32 = ubuffer;
+       long                    i;
+
+       for (i = 0; i < count; i++) {
+               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+                       return -XFS_ERROR(EFAULT);
+       }
+       *written = count * sizeof(*p32);
+       return 0;
+}
+
+#else
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#endif /* BROKEN_X86_ALIGNMENT */
+
+STATIC int
+xfs_ioctl32_bstime_copyin(
+       xfs_bstime_t            *bstime,
+       compat_xfs_bstime_t     __user *bstime32)
+{
+       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
+
+       if (get_user(sec32,             &bstime32->tv_sec)      ||
+           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       bstime->tv_sec = sec32;
+       return 0;
+}
+
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+       xfs_bstat_t             *bstat,
+       compat_xfs_bstat_t      __user *bstat32)
+{
+       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
+           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
+           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
+           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
+           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
+           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
+           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
+           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
+           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
+           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
+           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
+           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
+           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
+           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+STATIC int
+xfs_bstime_store_compat(
+       compat_xfs_bstime_t     __user *p32,
+       const xfs_bstime_t      *p)
+{
+       __s32                   sec32;
+
+       sec32 = p->tv_sec;
+       if (put_user(sec32, &p32->tv_sec) ||
+           put_user(p->tv_nsec, &p32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
+STATIC int
+xfs_bulkstat_one_fmt_compat(
+       void                    __user *ubuffer,
+       int                     ubsize,
+       int                     *ubused,
+       const xfs_bstat_t       *buffer)
+{
+       compat_xfs_bstat_t      __user *p32 = ubuffer;
+
+       if (ubsize < sizeof(*p32))
+               return XFS_ERROR(ENOMEM);
+
+       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
+           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
+           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
+           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
+           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
+           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
+           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
+           put_user(buffer->bs_size,     &p32->bs_size)        ||
+           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
+           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
+           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
+           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
+           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
+           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
+           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
+           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
+           put_user(buffer->bs_aextents, &p32->bs_aextents))
+               return XFS_ERROR(EFAULT);
+       if (ubused)
+               *ubused = sizeof(*p32);
+       return 0;
+}
+
+STATIC int
+xfs_bulkstat_one_compat(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* buffer to place output in */
+       int             ubsize,         /* size of buffer */
+       int             *ubused,        /* bytes used by me */
+       int             *stat)          /* BULKSTAT_RV_... */
+{
+       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
+                                   xfs_bulkstat_one_fmt_compat,
+                                   ubused, stat);
+}
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_compat_ioc_bulkstat(
+       xfs_mount_t               *mp,
+       unsigned int              cmd,
+       compat_xfs_fsop_bulkreq_t __user *p32)
+{
+       u32                     addr;
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (get_user(addr, &p32->lastip))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.lastip = compat_ptr(addr);
+       if (get_user(bulkreq.icount, &p32->icount) ||
+           get_user(addr, &p32->ubuffer))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ubuffer = compat_ptr(addr);
+       if (get_user(addr, &p32->ocount))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ocount = compat_ptr(addr);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS_32) {
+               error = xfs_inumbers(mp, &inlast, &count,
+                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
+               int res;
+
+               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+                               sizeof(compat_xfs_bstat_t), 0, &res);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
+               error = xfs_bulkstat(mp, &inlast, &count,
+                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+                       bulkreq.ubuffer, &done);
+       } else
+               error = XFS_ERROR(EINVAL);
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_compat_handlereq_copyin(
+       xfs_fsop_handlereq_t            *hreq,
+       compat_xfs_fsop_handlereq_t     __user *arg32)
+{
+       compat_xfs_fsop_handlereq_t     hreq32;
+
+       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       hreq->fd = hreq32.fd;
+       hreq->path = compat_ptr(hreq32.path);
+       hreq->oflags = hreq32.oflags;
+       hreq->ihandle = compat_ptr(hreq32.ihandle);
+       hreq->ihandlen = hreq32.ihandlen;
+       hreq->ohandle = compat_ptr(hreq32.ohandle);
+       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+
+       return 0;
+}
+
+STATIC struct dentry *
+xfs_compat_handlereq_to_dentry(
+       struct file             *parfilp,
+       compat_xfs_fsop_handlereq_t *hreq)
+{
+       return xfs_handle_to_dentry(parfilp,
+                       compat_ptr(hreq->ihandle), hreq->ihandlen);
+}
+
+STATIC int
+xfs_compat_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       attrlist_cursor_kern_t  *cursor;
+       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = -ENOMEM;
+       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_compat_attrmulti_by_handle(
+       struct file                             *parfilp,
+       void                                    __user *arg)
+{
+       int                                     error;
+       compat_xfs_attr_multiop_t               *ops;
+       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
+       struct dentry                           *dentry;
+       unsigned int                            i, size;
+       unsigned char                           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(compat_ptr(am_hreq.ops), size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               compat_ptr(ops[i].am_attrname),
+                               MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       &ops[i].am_length, ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       ops[i].am_length, ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+STATIC int
+xfs_compat_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       compat_xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg,
+                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+out:
+       dput(dentry);
+       return error;
+}
+
+long
+xfs_file_compat_ioctl(
+       struct file             *filp,
+       unsigned                cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_compat_ioctl(ip);
+
+       switch (cmd) {
+       /* No size or alignment issues on any arch */
+       case XFS_IOC_DIOINFO:
+       case XFS_IOC_FSGEOMETRY:
+       case XFS_IOC_FSGETXATTR:
+       case XFS_IOC_FSSETXATTR:
+       case XFS_IOC_FSGETXATTRA:
+       case XFS_IOC_FSSETDM:
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+       case XFS_IOC_GETBMAPX:
+       case XFS_IOC_FSCOUNTS:
+       case XFS_IOC_SET_RESBLKS:
+       case XFS_IOC_GET_RESBLKS:
+       case XFS_IOC_FSGROWFSLOG:
+       case XFS_IOC_GOINGDOWN:
+       case XFS_IOC_ERROR_INJECTION:
+       case XFS_IOC_ERROR_CLEARALL:
+               return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
+       /* These are handled fine if no alignment issues */
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_FSGEOMETRY_V1:
+       case XFS_IOC_FSGROWFSDATA:
+       case XFS_IOC_FSGROWFSRT:
+       case XFS_IOC_ZERO_RANGE:
+               return xfs_file_ioctl(filp, cmd, p);
+#else
+       case XFS_IOC_ALLOCSP_32:
+       case XFS_IOC_FREESP_32:
+       case XFS_IOC_ALLOCSP64_32:
+       case XFS_IOC_FREESP64_32:
+       case XFS_IOC_RESVSP_32:
+       case XFS_IOC_UNRESVSP_32:
+       case XFS_IOC_RESVSP64_32:
+       case XFS_IOC_UNRESVSP64_32:
+       case XFS_IOC_ZERO_RANGE_32: {
+               struct xfs_flock64      bf;
+
+               if (xfs_compat_flock64_copyin(&bf, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_FSGEOMETRY_V1_32:
+               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+       case XFS_IOC_FSGROWFSDATA_32: {
+               struct xfs_growfs_data  in;
+
+               if (xfs_compat_growfs_data_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+       case XFS_IOC_FSGROWFSRT_32: {
+               struct xfs_growfs_rt    in;
+
+               if (xfs_compat_growfs_rt_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+#endif
+       /* long changes size, but xfs only copiese out 32 bits */
+       case XFS_IOC_GETXFLAGS_32:
+       case XFS_IOC_SETXFLAGS_32:
+       case XFS_IOC_GETVERSION_32:
+               cmd = _NATIVE_IOC(cmd, long);
+               return xfs_file_ioctl(filp, cmd, p);
+       case XFS_IOC_SWAPEXT_32: {
+               struct xfs_swapext        sxp;
+               struct compat_xfs_swapext __user *sxu = arg;
+
+               /* Bulk copy in up to the sx_stat field, then copy bstat */
+               if (copy_from_user(&sxp, sxu,
+                                  offsetof(struct xfs_swapext, sx_stat)) ||
+                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+       case XFS_IOC_FSBULKSTAT_32:
+       case XFS_IOC_FSBULKSTAT_SINGLE_32:
+       case XFS_IOC_FSINUMBERS_32:
+               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+       case XFS_IOC_FD_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_FSHANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_READLINK_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
+               return xfs_compat_attrlist_by_handle(filp, arg);
+       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+               return xfs_compat_attrmulti_by_handle(filp, arg);
+       case XFS_IOC_FSSETDM_BY_HANDLE_32:
+               return xfs_compat_fssetdm_by_handle(filp, arg);
+       default:
+               return -XFS_ERROR(ENOIOCTLCMD);
+       }
+}
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
new file mode 100644 (file)
index 0000000..80f4060
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL32_H__
+#define __XFS_IOCTL32_H__
+
+#include <linux/compat.h>
+
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment.  We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
+
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+
+typedef struct compat_xfs_bstime {
+       compat_time_t   tv_sec;         /* seconds              */
+       __s32           tv_nsec;        /* and nanoseconds      */
+} compat_xfs_bstime_t;
+
+typedef struct compat_xfs_bstat {
+       __u64           bs_ino;         /* inode number                 */
+       __u16           bs_mode;        /* type and mode                */
+       __u16           bs_nlink;       /* number of links              */
+       __u32           bs_uid;         /* user id                      */
+       __u32           bs_gid;         /* group id                     */
+       __u32           bs_rdev;        /* device value                 */
+       __s32           bs_blksize;     /* block size                   */
+       __s64           bs_size;        /* file size                    */
+       compat_xfs_bstime_t bs_atime;   /* access time                  */
+       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
+       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
+       int64_t         bs_blocks;      /* number of blocks             */
+       __u32           bs_xflags;      /* extended flags               */
+       __s32           bs_extsize;     /* extent size                  */
+       __s32           bs_extents;     /* number of extents            */
+       __u32           bs_gen;         /* generation count             */
+       __u16           bs_projid_lo;   /* lower part of project id     */
+#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+       __u16           bs_projid_hi;   /* high part of project id      */
+       unsigned char   bs_pad[12];     /* pad space, unused            */
+       __u32           bs_dmevmask;    /* DMIG event mask              */
+       __u16           bs_dmstate;     /* DMIG state info              */
+       __u16           bs_aextents;    /* attribute number of extents  */
+} __compat_packed compat_xfs_bstat_t;
+
+typedef struct compat_xfs_fsop_bulkreq {
+       compat_uptr_t   lastip;         /* last inode # pointer         */
+       __s32           icount;         /* count of entries in buffer   */
+       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+       compat_uptr_t   ocount;         /* output count pointer         */
+} compat_xfs_fsop_bulkreq_t;
+
+#define XFS_IOC_FSBULKSTAT_32 \
+       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+typedef struct compat_xfs_fsop_handlereq {
+       __u32           fd;             /* fd for FD_TO_HANDLE          */
+       compat_uptr_t   path;           /* user pathname                */
+       __u32           oflags;         /* open flags                   */
+       compat_uptr_t   ihandle;        /* user supplied handle         */
+       __u32           ihandlen;       /* user supplied length         */
+       compat_uptr_t   ohandle;        /* user buffer for handle       */
+       compat_uptr_t   ohandlen;       /* user buffer length           */
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+       __int64_t               sx_version;     /* version */
+       __int64_t               sx_fdtarget;    /* fd of target file */
+       __int64_t               sx_fdtmp;       /* fd of tmp file */
+       xfs_off_t               sx_offset;      /* offset into file */
+       xfs_off_t               sx_length;      /* leng from offset */
+       char                    sx_pad[16];     /* pad space, unused */
+       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+
+#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
+
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
+       __u32                           flags;  /* which namespace to use */
+       __u32                           buflen; /* length of buffer supplied */
+       compat_uptr_t                   buffer; /* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+       __u32           am_opcode;
+       __s32           am_error;
+       compat_uptr_t   am_attrname;
+       compat_uptr_t   am_attrvalue;
+       __u32           am_length;
+       __u32           am_flags;
+} compat_xfs_attr_multiop_t;
+
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       __u32                           opcount;/* count of following multiop */
+       /* ptr to compat_xfs_attr_multiop */
+       compat_uptr_t                   ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+
+typedef struct compat_xfs_fsop_setdm_handlereq {
+       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
+       /* ptr to struct fsdmidata */
+       compat_uptr_t                   data;   /* DMAPI data   */
+} compat_xfs_fsop_setdm_handlereq_t;
+
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start __attribute__((packed));
+                       /* len == 0 means until end of file */
+       __s64           l_len __attribute__((packed));
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserve area */
+} compat_xfs_flock64_t;
+
+#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
+
+typedef struct compat_xfs_fsop_geom_v1 {
+       __u32           blocksize;      /* filesystem (data) block size */
+       __u32           rtextsize;      /* realtime extent size         */
+       __u32           agblocks;       /* fsblocks in an AG            */
+       __u32           agcount;        /* number of allocation groups  */
+       __u32           logblocks;      /* fsblocks in the log          */
+       __u32           sectsize;       /* (data) sector size, bytes    */
+       __u32           inodesize;      /* inode size in bytes          */
+       __u32           imaxpct;        /* max allowed inode space(%)   */
+       __u64           datablocks;     /* fsblocks in data subvolume   */
+       __u64           rtblocks;       /* fsblocks in realtime subvol  */
+       __u64           rtextents;      /* rt extents in realtime subvol*/
+       __u64           logstart;       /* starting fsblock of the log  */
+       unsigned char   uuid[16];       /* unique id of the filesystem  */
+       __u32           sunit;          /* stripe unit, fsblocks        */
+       __u32           swidth;         /* stripe width, fsblocks       */
+       __s32           version;        /* structure version            */
+       __u32           flags;          /* superblock version flags     */
+       __u32           logsectsize;    /* log sector size, bytes       */
+       __u32           rtsectsize;     /* realtime sector size, bytes  */
+       __u32           dirblocksize;   /* directory block size, bytes  */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+
+typedef struct compat_xfs_inogrp {
+       __u64           xi_startino;    /* starting inode number        */
+       __s32           xi_alloccount;  /* # bits set in allocmask      */
+       __u64           xi_allocmask;   /* mask of allocated inodes     */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+       __u64           newblocks;      /* new data subvol size, fsblocks */
+       __u32           imaxpct;        /* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+
+typedef struct compat_xfs_growfs_rt {
+       __u64           newblocks;      /* new realtime size, fsblocks */
+       __u32           extsize;        /* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
+
+#endif /* BROKEN_X86_ALIGNMENT */
+
+#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
new file mode 100644 (file)
index 0000000..b9c172b
--- /dev/null
@@ -0,0 +1,1210 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_acl.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/xattr.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/security.h>
+#include <linux/fiemap.h>
+#include <linux/slab.h>
+
+/*
+ * Bring the timestamps in the XFS inode uptodate.
+ *
+ * Used before writing the inode to disk.
+ */
+void
+xfs_synchronize_times(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+}
+
+/*
+ * If the linux inode is valid, mark it dirty.
+ * Used when committing a dirty inode into a transaction so that
+ * the inode will get written back by the linux code
+ */
+void
+xfs_mark_inode_dirty_sync(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty_sync(inode);
+}
+
+void
+xfs_mark_inode_dirty(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty(inode);
+}
+
+/*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+xfs_init_security(
+       struct inode    *inode,
+       struct inode    *dir,
+       const struct qstr *qstr)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       size_t          length;
+       void            *value;
+       unsigned char   *name;
+       int             error;
+
+       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
+                                            &value, &length);
+       if (error) {
+               if (error == -EOPNOTSUPP)
+                       return 0;
+               return -error;
+       }
+
+       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
+
+       kfree(name);
+       kfree(value);
+       return error;
+}
+
+static void
+xfs_dentry_to_name(
+       struct xfs_name *namep,
+       struct dentry   *dentry)
+{
+       namep->name = dentry->d_name.name;
+       namep->len = dentry->d_name.len;
+}
+
+STATIC void
+xfs_cleanup_inode(
+       struct inode    *dir,
+       struct inode    *inode,
+       struct dentry   *dentry)
+{
+       struct xfs_name teardown;
+
+       /* Oh, the horror.
+        * If we can't add the ACL or we fail in
+        * xfs_init_security we must back out.
+        * ENOSPC can hit here, among other things.
+        */
+       xfs_dentry_to_name(&teardown, dentry);
+
+       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+       iput(inode);
+}
+
+STATIC int
+xfs_vn_mknod(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       dev_t           rdev)
+{
+       struct inode    *inode;
+       struct xfs_inode *ip = NULL;
+       struct posix_acl *default_acl = NULL;
+       struct xfs_name name;
+       int             error;
+
+       /*
+        * Irix uses Missed'em'V split, but doesn't want to see
+        * the upper 5 bits of (14bit) major.
+        */
+       if (S_ISCHR(mode) || S_ISBLK(mode)) {
+               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+                       return -EINVAL;
+               rdev = sysv_encode_dev(rdev);
+       } else {
+               rdev = 0;
+       }
+
+       if (IS_POSIXACL(dir)) {
+               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+               if (IS_ERR(default_acl))
+                       return PTR_ERR(default_acl);
+
+               if (!default_acl)
+                       mode &= ~current_umask();
+       }
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+       if (unlikely(error))
+               goto out_free_acl;
+
+       inode = VFS_I(ip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       if (default_acl) {
+               error = -xfs_inherit_acl(inode, default_acl);
+               default_acl = NULL;
+               if (unlikely(error))
+                       goto out_cleanup_inode;
+       }
+
+
+       d_instantiate(dentry, inode);
+       return -error;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out_free_acl:
+       posix_acl_release(default_acl);
+       return -error;
+}
+
+STATIC int
+xfs_vn_create(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       struct nameidata *nd)
+{
+       return xfs_vn_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+xfs_vn_mkdir(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode)
+{
+       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+xfs_vn_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *cip;
+       struct xfs_name name;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               d_add(dentry, NULL);
+               return NULL;
+       }
+
+       return d_splice_alias(VFS_I(cip), dentry);
+}
+
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *ip;
+       struct xfs_name xname;
+       struct xfs_name ci_name;
+       struct qstr     dname;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&xname, dentry);
+       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               /*
+                * call d_add(dentry, NULL) here when d_drop_negative_children
+                * is called in xfs_vn_mknod (ie. allow negative dentries
+                * with CI filesystems).
+                */
+               return NULL;
+       }
+
+       /* if exact match, just splice and exit */
+       if (!ci_name.name)
+               return d_splice_alias(VFS_I(ip), dentry);
+
+       /* else case-insensitive match... */
+       dname.name = ci_name.name;
+       dname.len = ci_name.len;
+       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
+       kmem_free(ci_name.name);
+       return dentry;
+}
+
+STATIC int
+xfs_vn_link(
+       struct dentry   *old_dentry,
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *inode = old_dentry->d_inode;
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
+       if (unlikely(error))
+               return -error;
+
+       ihold(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+}
+
+STATIC int
+xfs_vn_unlink(
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+       if (error)
+               return error;
+
+       /*
+        * With unlink, the VFS makes the dentry "negative": no inode,
+        * but still hashed. This is incompatible with case-insensitive
+        * mode, so invalidate (unhash) the dentry in CI-mode.
+        */
+       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+               d_invalidate(dentry);
+       return 0;
+}
+
+STATIC int
+xfs_vn_symlink(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       const char      *symname)
+{
+       struct inode    *inode;
+       struct xfs_inode *cip = NULL;
+       struct xfs_name name;
+       int             error;
+       mode_t          mode;
+
+       mode = S_IFLNK |
+               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+       if (unlikely(error))
+               goto out;
+
+       inode = VFS_I(cip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       d_instantiate(dentry, inode);
+       return 0;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out:
+       return -error;
+}
+
+STATIC int
+xfs_vn_rename(
+       struct inode    *odir,
+       struct dentry   *odentry,
+       struct inode    *ndir,
+       struct dentry   *ndentry)
+{
+       struct inode    *new_inode = ndentry->d_inode;
+       struct xfs_name oname;
+       struct xfs_name nname;
+
+       xfs_dentry_to_name(&oname, odentry);
+       xfs_dentry_to_name(&nname, ndentry);
+
+       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+                          XFS_I(ndir), &nname, new_inode ?
+                                               XFS_I(new_inode) : NULL);
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC void *
+xfs_vn_follow_link(
+       struct dentry           *dentry,
+       struct nameidata        *nd)
+{
+       char                    *link;
+       int                     error = -ENOMEM;
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link)
+               goto out_err;
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (unlikely(error))
+               goto out_kfree;
+
+       nd_set_link(nd, link);
+       return NULL;
+
+ out_kfree:
+       kfree(link);
+ out_err:
+       nd_set_link(nd, ERR_PTR(error));
+       return NULL;
+}
+
+STATIC void
+xfs_vn_put_link(
+       struct dentry   *dentry,
+       struct nameidata *nd,
+       void            *p)
+{
+       char            *s = nd_get_link(nd);
+
+       if (!IS_ERR(s))
+               kfree(s);
+}
+
+STATIC int
+xfs_vn_getattr(
+       struct vfsmount         *mnt,
+       struct dentry           *dentry,
+       struct kstat            *stat)
+{
+       struct inode            *inode = dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       trace_xfs_getattr(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       stat->size = XFS_ISIZE(ip);
+       stat->dev = inode->i_sb->s_dev;
+       stat->mode = ip->i_d.di_mode;
+       stat->nlink = ip->i_d.di_nlink;
+       stat->uid = ip->i_d.di_uid;
+       stat->gid = ip->i_d.di_gid;
+       stat->ino = ip->i_ino;
+       stat->atime = inode->i_atime;
+       stat->mtime = inode->i_mtime;
+       stat->ctime = inode->i_ctime;
+       stat->blocks =
+               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               stat->blksize = BLKDEV_IOSIZE;
+               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                                  sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               if (XFS_IS_REALTIME_INODE(ip)) {
+                       /*
+                        * If the file blocks are being allocated from a
+                        * realtime volume, then return the inode's realtime
+                        * extent size or the realtime volume's extent size.
+                        */
+                       stat->blksize =
+                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+               } else
+                       stat->blksize = xfs_preferred_iosize(mp);
+               stat->rdev = 0;
+               break;
+       }
+
+       return 0;
+}
+
+int
+xfs_setattr_nonsize(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       xfs_trans_t             *tp;
+       int                     error;
+       uid_t                   uid = 0, iuid = 0;
+       gid_t                   gid = 0, igid = 0;
+       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT((mask & ATTR_SIZE) == 0);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+               uint    qflags = 0;
+
+               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+                       uid = iattr->ia_uid;
+                       qflags |= XFS_QMOPT_UQUOTA;
+               } else {
+                       uid = ip->i_d.di_uid;
+               }
+               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+                       gid = iattr->ia_gid;
+                       qflags |= XFS_QMOPT_GQUOTA;
+               }  else {
+                       gid = ip->i_d.di_gid;
+               }
+
+               /*
+                * We take a reference when we initialize udqp and gdqp,
+                * so it is important that we never blindly double trip on
+                * the same variable. See xfs_create() for an example.
+                */
+               ASSERT(udqp == NULL);
+               ASSERT(gdqp == NULL);
+               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+                                        qflags, &udqp, &gdqp);
+               if (error)
+                       return error;
+       }
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (error)
+               goto out_dqrele;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * These IDs could have changed since we last looked at them.
+                * But, we're assured that if the ownership did change
+                * while we didn't have the inode locked, inode's dquot(s)
+                * would have changed also.
+                */
+               iuid = ip->i_d.di_uid;
+               igid = ip->i_d.di_gid;
+               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+               /*
+                * Do a quota reservation only if uid/gid is actually
+                * going to change.
+                */
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+                       ASSERT(tp);
+                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (error)      /* out of quota */
+                               goto out_trans_cancel;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (iuid != uid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+                               ASSERT(mask & ATTR_UID);
+                               ASSERT(udqp);
+                               olddquot1 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_udquot, udqp);
+                       }
+                       ip->i_d.di_uid = uid;
+                       inode->i_uid = uid;
+               }
+               if (igid != gid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                               ASSERT(mask & ATTR_GID);
+                               ASSERT(gdqp);
+                               olddquot2 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       ip->i_d.di_gid = gid;
+                       inode->i_gid = gid;
+               }
+       }
+
+       /*
+        * Change file access modes.
+        */
+       if (mask & ATTR_MODE) {
+               umode_t mode = iattr->ia_mode;
+
+               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                       mode &= ~S_ISGID;
+
+               ip->i_d.di_mode &= S_IFMT;
+               ip->i_d.di_mode |= mode & ~S_IFMT;
+
+               inode->i_mode &= S_IFMT;
+               inode->i_mode |= mode & ~S_IFMT;
+       }
+
+       /*
+        * Change file access or modified times.
+        */
+       if (mask & ATTR_ATIME) {
+               inode->i_atime = iattr->ia_atime;
+               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot1);
+       xfs_qm_dqrele(olddquot2);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       if (error)
+               return XFS_ERROR(error);
+
+       /*
+        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+        *           update.  We could avoid this with linked transactions
+        *           and passing down the transaction pointer all the way
+        *           to attr_set.  No previous user of the generic
+        *           Posix ACL code seems to care about this issue either.
+        */
+       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+               error = -xfs_acl_chmod(inode);
+               if (error)
+                       return XFS_ERROR(error);
+       }
+
+       return 0;
+
+out_trans_cancel:
+       xfs_trans_cancel(tp, 0);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       return error;
+}
+
+/*
+ * Truncate file.  Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       struct xfs_trans        *tp;
+       int                     error;
+       uint                    lock_flags;
+       uint                    commit_flags = 0;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+       lock_flags = XFS_ILOCK_EXCL;
+       if (!(flags & XFS_ATTR_NOLOCK))
+               lock_flags |= XFS_IOLOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * Short circuit the truncate case for zero length files.
+        */
+       if (iattr->ia_size == 0 &&
+           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+                       goto out_unlock;
+
+               /*
+                * Use the regular setattr path to update the timestamps.
+                */
+               xfs_iunlock(ip, lock_flags);
+               iattr->ia_valid &= ~ATTR_SIZE;
+               return xfs_setattr_nonsize(ip, iattr, 0);
+       }
+
+       /*
+        * Make sure that the dquots are attached to the inode.
+        */
+       error = xfs_qm_dqattach_locked(ip, 0);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Now we can make the changes.  Before we join the inode to the
+        * transaction, take care of the part of the truncation that must be
+        * done without the inode lock.  This needs to be done before joining
+        * the inode to the transaction, because the inode cannot be unlocked
+        * once it is a part of the transaction.
+        */
+       if (iattr->ia_size > ip->i_size) {
+               /*
+                * Do the first part of growing a file: zero any data in the
+                * last block that is beyond the old EOF.  We need to do this
+                * before the inode is joined to the transaction to modify
+                * i_size.
+                */
+               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+               if (error)
+                       goto out_unlock;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       lock_flags &= ~XFS_ILOCK_EXCL;
+
+       /*
+        * We are going to log the inode size change in this transaction so
+        * any previous writes that are beyond the on disk EOF and the new
+        * EOF that have not been written out need to be written here.  If we
+        * do not write the data out, we expose ourselves to the null files
+        * problem.
+        *
+        * Only flush from the on disk size to the smaller of the in memory
+        * file size or the new size as that's the range we really care about
+        * here and prevents waiting for other data not within the range we
+        * care about here.
+        */
+       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+                                       XBF_ASYNC, FI_NONE);
+               if (error)
+                       goto out_unlock;
+       }
+
+       /*
+        * Wait for all I/O to complete.
+        */
+       xfs_ioend_wait(ip);
+
+       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+                                    xfs_get_blocks);
+       if (error)
+               goto out_unlock;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                XFS_TRANS_PERM_LOG_RES,
+                                XFS_ITRUNCATE_LOG_COUNT);
+       if (error)
+               goto out_trans_cancel;
+
+       truncate_setsize(inode, iattr->ia_size);
+
+       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+       lock_flags |= XFS_ILOCK_EXCL;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Only change the c/mtime if we are changing the size or we are
+        * explicitly asked to change it.  This handles the semantic difference
+        * between truncate() and ftruncate() as implemented in the VFS.
+        *
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (iattr->ia_size != ip->i_size &&
+           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+               iattr->ia_ctime = iattr->ia_mtime =
+                       current_fs_time(inode->i_sb);
+               mask |= ATTR_CTIME | ATTR_MTIME;
+       }
+
+       if (iattr->ia_size > ip->i_size) {
+               ip->i_d.di_size = iattr->ia_size;
+               ip->i_size = iattr->ia_size;
+       } else if (iattr->ia_size <= ip->i_size ||
+                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+               if (error)
+                       goto out_trans_abort;
+
+               /*
+                * Truncated "down", so we're removing references to old data
+                * here - if we delay flushing for a long time, we expose
+                * ourselves unduly to the notorious NULL files problem.  So,
+                * we mark this inode and flush it when the file is closed,
+                * and do not wait the usual (long) time for writeout.
+                */
+               xfs_iflags_set(ip, XFS_ITRUNCATED);
+       }
+
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return error;
+
+out_trans_abort:
+       commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+       xfs_trans_cancel(tp, commit_flags);
+       goto out_unlock;
+}
+
+STATIC int
+xfs_vn_setattr(
+       struct dentry   *dentry,
+       struct iattr    *iattr)
+{
+       if (iattr->ia_valid & ATTR_SIZE)
+               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
+}
+
+#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+       void                    **arg,
+       struct getbmapx         *bmv,
+       int                     *full)
+{
+       int                     error;
+       struct fiemap_extent_info *fieinfo = *arg;
+       u32                     fiemap_flags = 0;
+       u64                     logical, physical, length;
+
+       /* Do nothing for a hole */
+       if (bmv->bmv_block == -1LL)
+               return 0;
+
+       logical = BBTOB(bmv->bmv_offset);
+       physical = BBTOB(bmv->bmv_block);
+       length = BBTOB(bmv->bmv_length);
+
+       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+               physical = 0;   /* no block yet */
+       }
+       if (bmv->bmv_oflags & BMV_OF_LAST)
+               fiemap_flags |= FIEMAP_EXTENT_LAST;
+
+       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+                                       length, fiemap_flags);
+       if (error > 0) {
+               error = 0;
+               *full = 1;      /* user array now full */
+       }
+
+       return -error;
+}
+
+STATIC int
+xfs_vn_fiemap(
+       struct inode            *inode,
+       struct fiemap_extent_info *fieinfo,
+       u64                     start,
+       u64                     length)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+       struct getbmapx         bm;
+       int                     error;
+
+       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+       if (error)
+               return error;
+
+       /* Set up bmap header for xfs internal routine */
+       bm.bmv_offset = BTOBB(start);
+       /* Special case for whole file */
+       if (length == FIEMAP_MAX_OFFSET)
+               bm.bmv_length = -1LL;
+       else
+               bm.bmv_length = BTOBB(length);
+
+       /* We add one because in getbmap world count includes the header */
+       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+                                       fieinfo->fi_extents_max + 1;
+       bm.bmv_count = min_t(__s32, bm.bmv_count,
+                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
+       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+               bm.bmv_iflags |= BMV_IF_ATTRFORK;
+       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+               bm.bmv_iflags |= BMV_IF_DELALLOC;
+
+       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+       if (error)
+               return -error;
+
+       return 0;
+}
+
+static const struct inode_operations xfs_inode_operations = {
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+       .fiemap                 = xfs_vn_fiemap,
+};
+
+static const struct inode_operations xfs_dir_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_dir_ci_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_ci_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_symlink_inode_operations = {
+       .readlink               = generic_readlink,
+       .follow_link            = xfs_vn_follow_link,
+       .put_link               = xfs_vn_put_link,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+STATIC void
+xfs_diflags_to_iflags(
+       struct inode            *inode,
+       struct xfs_inode        *ip)
+{
+       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Initialize the Linux inode, set up the operation vectors and
+ * unlock the inode.
+ *
+ * When reading existing inodes from disk this is called directly
+ * from xfs_iget, when creating a new inode it is called from
+ * xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
+ */
+void
+xfs_setup_inode(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = &ip->i_vnode;
+
+       inode->i_ino = ip->i_ino;
+       inode->i_state = I_NEW;
+
+       inode_sb_list_add(inode);
+       /* make the inode look hashed for the writeback code */
+       hlist_add_fake(&inode->i_hash);
+
+       inode->i_mode   = ip->i_d.di_mode;
+       inode->i_nlink  = ip->i_d.di_nlink;
+       inode->i_uid    = ip->i_d.di_uid;
+       inode->i_gid    = ip->i_d.di_gid;
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               inode->i_rdev =
+                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                             sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               inode->i_rdev = 0;
+               break;
+       }
+
+       inode->i_generation = ip->i_d.di_gen;
+       i_size_write(inode, ip->i_d.di_size);
+       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
+       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
+       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
+       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
+       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
+       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
+       xfs_diflags_to_iflags(inode, ip);
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_op = &xfs_inode_operations;
+               inode->i_fop = &xfs_file_operations;
+               inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       case S_IFDIR:
+               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+                       inode->i_op = &xfs_dir_ci_inode_operations;
+               else
+                       inode->i_op = &xfs_dir_inode_operations;
+               inode->i_fop = &xfs_dir_file_operations;
+               break;
+       case S_IFLNK:
+               inode->i_op = &xfs_symlink_inode_operations;
+               if (!(ip->i_df.if_flags & XFS_IFINLINE))
+                       inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       default:
+               inode->i_op = &xfs_inode_operations;
+               init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               break;
+       }
+
+       /*
+        * If there is no attribute fork no ACL can exist on this inode,
+        * and it can't have any file capabilities attached to it either.
+        */
+       if (!XFS_IFORK_Q(ip)) {
+               inode_has_no_xattr(inode);
+               cache_no_acl(inode);
+       }
+
+       xfs_iflags_clear(ip, XFS_INEW);
+       barrier();
+
+       unlock_new_inode(inode);
+}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
new file mode 100644 (file)
index 0000000..ef41c92
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+struct xfs_inode;
+
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+
+extern void xfs_setup_inode(struct xfs_inode *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
new file mode 100644 (file)
index 0000000..1e8a45e
--- /dev/null
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+
+/*
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS        1
+# define XFS_BIG_INUMS 1
+#else
+# define XFS_BIG_BLKNOS        0
+# define XFS_BIG_INUMS 0
+#endif
+
+#include "xfs_types.h"
+
+#include "kmem.h"
+#include "mrlock.h"
+#include "time.h"
+#include "uuid.h"
+
+#include <linux/semaphore.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/ctype.h>
+#include <linux/writeback.h>
+#include <linux/capability.h>
+#include <linux/list_sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include "xfs_vnode.h"
+#include "xfs_stats.h"
+#include "xfs_sysctl.h"
+#include "xfs_iops.h"
+#include "xfs_aops.h"
+#include "xfs_super.h"
+#include "xfs_buf.h"
+#include "xfs_message.h"
+
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+/*
+ * Feature macros (disable/enable)
+ */
+#ifdef CONFIG_SMP
+#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#endif
+
+#define irix_sgid_inherit      xfs_params.sgid_inherit.val
+#define irix_symlink_mode      xfs_params.symlink_mode.val
+#define xfs_panic_mask         xfs_params.panic_mask.val
+#define xfs_error_level                xfs_params.error_level.val
+#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
+#define xfs_stats_clear                xfs_params.stats_clear.val
+#define xfs_inherit_sync       xfs_params.inherit_sync.val
+#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
+#define xfs_rotorstep          xfs_params.rotorstep.val
+#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
+
+#define current_cpu()          (raw_smp_processor_id())
+#define current_pid()          (current->pid)
+#define current_test_flags(f)  (current->flags & (f))
+#define current_set_flags_nested(sp, f)                \
+               (*(sp) = current->flags, current->flags |= (f))
+#define current_clear_flags_nested(sp, f)      \
+               (*(sp) = current->flags, current->flags &= ~(f))
+#define current_restore_flags_nested(sp, f)    \
+               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
+
+#define spinlock_destroy(lock)
+
+#define NBBY           8               /* number of bits per byte */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
+#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
+
+#define ENOATTR                ENODATA         /* Attribute not found */
+#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
+#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
+
+#define SYNCHRONIZE()  barrier()
+#define __return_address __builtin_return_address(0)
+
+#define XFS_PROJID_DEFAULT     0
+#define MAXPATHLEN     1024
+
+#define MIN(a,b)       (min(a,b))
+#define MAX(a,b)       (max(a,b))
+#define howmany(x, y)  (((x)+((y)-1))/(y))
+
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
+#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
+#define xfs_stack_trace()      dump_stack()
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       *(__u64 *)a = c;
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       mod = do_div(*(__u64 *)a, b);
+                       return mod;
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       __u64   c = *(__u64 *)a;
+                       return do_div(c, b);
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return(x * y);
+}
+
+static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return x;
+}
+
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
+#define ASSERT_ALWAYS(expr)    \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef DEBUG
+#define ASSERT(expr)   ((void)0)
+
+#ifndef STATIC
+# define STATIC static noinline
+#endif
+
+#else /* DEBUG */
+
+#define ASSERT(expr)   \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef STATIC
+# define STATIC noinline
+#endif
+
+#endif /* DEBUG */
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
new file mode 100644 (file)
index 0000000..bd672de
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+
+/*
+ * XFS logging functions
+ */
+static void
+__xfs_printk(
+       const char              *level,
+       const struct xfs_mount  *mp,
+       struct va_format        *vaf)
+{
+       if (mp && mp->m_fsname) {
+               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+               return;
+       }
+       printk("%sXFS: %pV\n", level, vaf);
+}
+
+#define define_xfs_printk_level(func, kern_level)              \
+void func(const struct xfs_mount *mp, const char *fmt, ...)    \
+{                                                              \
+       struct va_format        vaf;                            \
+       va_list                 args;                           \
+                                                               \
+       va_start(args, fmt);                                    \
+                                                               \
+       vaf.fmt = fmt;                                          \
+       vaf.va = &args;                                         \
+                                                               \
+       __xfs_printk(kern_level, mp, &vaf);                     \
+       va_end(args);                                           \
+}                                                              \
+
+define_xfs_printk_level(xfs_emerg, KERN_EMERG);
+define_xfs_printk_level(xfs_alert, KERN_ALERT);
+define_xfs_printk_level(xfs_crit, KERN_CRIT);
+define_xfs_printk_level(xfs_err, KERN_ERR);
+define_xfs_printk_level(xfs_warn, KERN_WARNING);
+define_xfs_printk_level(xfs_notice, KERN_NOTICE);
+define_xfs_printk_level(xfs_info, KERN_INFO);
+#ifdef DEBUG
+define_xfs_printk_level(xfs_debug, KERN_DEBUG);
+#endif
+
+void
+xfs_alert_tag(
+       const struct xfs_mount  *mp,
+       int                     panic_tag,
+       const char              *fmt, ...)
+{
+       struct va_format        vaf;
+       va_list                 args;
+       int                     do_panic = 0;
+
+       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
+               xfs_alert(mp, "Transforming an alert into a BUG.");
+               do_panic = 1;
+       }
+
+       va_start(args, fmt);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       __xfs_printk(KERN_ALERT, mp, &vaf);
+       va_end(args);
+
+       BUG_ON(do_panic);
+}
+
+void
+assfail(char *expr, char *file, int line)
+{
+       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
+               expr, file, line);
+       BUG();
+}
+
+void
+xfs_hex_dump(void *p, int length)
+{
+       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
new file mode 100644 (file)
index 0000000..7fb7ea0
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef __XFS_MESSAGE_H
+#define __XFS_MESSAGE_H 1
+
+struct xfs_mount;
+
+extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
+                        const char *fmt, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+
+#ifdef DEBUG
+extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+#else
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+{
+}
+#endif
+
+extern void assfail(char *expr, char *f, int l);
+
+extern void xfs_hex_dump(void *p, int length);
+
+#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
new file mode 100644 (file)
index 0000000..9a0aa76
--- /dev/null
@@ -0,0 +1,2416 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+struct mutex   xfs_Gqm_lock;
+struct xfs_qm  *xfs_Gqm;
+uint           ndquot;
+
+kmem_zone_t    *qm_dqzone;
+kmem_zone_t    *qm_dqtrxzone;
+
+STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
+
+STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
+
+static struct shrinker xfs_qm_shaker = {
+       .shrink = xfs_qm_shake,
+       .seeks = DEFAULT_SEEKS,
+};
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+       xfs_dqhash_t    *udqhash, *gdqhash;
+       xfs_qm_t        *xqm;
+       size_t          hsize;
+       uint            i;
+
+       /*
+        * Initialize the dquot hash tables.
+        */
+       udqhash = kmem_zalloc_greedy(&hsize,
+                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
+       if (!udqhash)
+               goto out;
+
+       gdqhash = kmem_zalloc_large(hsize);
+       if (!gdqhash)
+               goto out_free_udqhash;
+
+       hsize /= sizeof(xfs_dqhash_t);
+       ndquot = hsize << 8;
+
+       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+       xqm->qm_dqhashmask = hsize - 1;
+       xqm->qm_usr_dqhtable = udqhash;
+       xqm->qm_grp_dqhtable = gdqhash;
+       ASSERT(xqm->qm_usr_dqhtable != NULL);
+       ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+       }
+
+       /*
+        * Freelist of all dquots of all file systems
+        */
+       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
+       xqm->qm_dqfrlist_cnt = 0;
+       mutex_init(&xqm->qm_dqfrlist_lock);
+
+       /*
+        * dquot zone. we register our own low-memory callback.
+        */
+       if (!qm_dqzone) {
+               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+                                               "xfs_dquots");
+               qm_dqzone = xqm->qm_dqzone;
+       } else
+               xqm->qm_dqzone = qm_dqzone;
+
+       register_shrinker(&xfs_qm_shaker);
+
+       /*
+        * The t_dqinfo portion of transactions.
+        */
+       if (!qm_dqtrxzone) {
+               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+                                                  "xfs_dqtrx");
+               qm_dqtrxzone = xqm->qm_dqtrxzone;
+       } else
+               xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+       atomic_set(&xqm->qm_totaldquots, 0);
+       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+       xqm->qm_nrefs = 0;
+       return xqm;
+
+ out_free_udqhash:
+       kmem_free_large(udqhash);
+ out:
+       return NULL;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+STATIC void
+xfs_qm_destroy(
+       struct xfs_qm   *xqm)
+{
+       struct xfs_dquot *dqp, *n;
+       int             hsize, i;
+
+       ASSERT(xqm != NULL);
+       ASSERT(xqm->qm_nrefs == 0);
+       unregister_shrinker(&xfs_qm_shaker);
+       hsize = xqm->qm_dqhashmask + 1;
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+       }
+       kmem_free_large(xqm->qm_usr_dqhtable);
+       kmem_free_large(xqm->qm_grp_dqhtable);
+       xqm->qm_usr_dqhtable = NULL;
+       xqm->qm_grp_dqhtable = NULL;
+       xqm->qm_dqhashmask = 0;
+
+       /* frlist cleanup */
+       mutex_lock(&xqm->qm_dqfrlist_lock);
+       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               xfs_dqunlock(dqp);
+               xfs_qm_dqdestroy(dqp);
+       }
+       mutex_unlock(&xqm->qm_dqfrlist_lock);
+       mutex_destroy(&xqm->qm_dqfrlist_lock);
+       kmem_free(xqm);
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       /*
+        * Need to lock the xfs_Gqm structure for things like this. For example,
+        * the structure could disappear between the entry to this routine and
+        * a HOLD operation if not locked.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+
+       if (!xfs_Gqm) {
+               xfs_Gqm = xfs_Gqm_init();
+               if (!xfs_Gqm) {
+                       mutex_unlock(&xfs_Gqm_lock);
+                       return ENOMEM;
+               }
+       }
+
+       /*
+        * We can keep a list of all filesystems with quotas mounted for
+        * debugging and statistical purposes, but ...
+        * Just take a reference and get out.
+        */
+       xfs_Gqm->qm_nrefs++;
+       mutex_unlock(&xfs_Gqm_lock);
+
+       return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       xfs_dquot_t     *dqp, *n;
+
+       ASSERT(xfs_Gqm);
+       ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+       /*
+        * Go thru the freelist and destroy all inactive dquots.
+        */
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(dqp->q_mount == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       xfs_dqunlock(dqp);
+                       xfs_qm_dqdestroy(dqp);
+               } else {
+                       xfs_dqunlock(dqp);
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       /*
+        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+        * be restarted.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+       if (--xfs_Gqm->qm_nrefs == 0) {
+               xfs_qm_destroy(xfs_Gqm);
+               xfs_Gqm = NULL;
+       }
+       mutex_unlock(&xfs_Gqm_lock);
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_quotainfo) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               xfs_qm_destroy_quotainfo(mp);
+       }
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo.  This is also responsible for
+ * running a quotacheck as necessary.  We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+       xfs_mount_t     *mp)
+{
+       int             error = 0;
+       uint            sbf;
+
+       /*
+        * If quotas on realtime volumes is not supported, we disable
+        * quotas immediately.
+        */
+       if (mp->m_sb.sb_rextents) {
+               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Allocate the quotainfo structure inside the mount struct, and
+        * create quotainode(s), and change/rev superblock if necessary.
+        */
+       error = xfs_qm_init_quotainfo(mp);
+       if (error) {
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo == NULL);
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+       /*
+        * If any of the quotas are not consistent, do a quotacheck.
+        */
+       if (XFS_QM_NEED_QUOTACHECK(mp)) {
+               error = xfs_qm_quotacheck(mp);
+               if (error) {
+                       /* Quotacheck failed and disabled quotas. */
+                       return;
+               }
+       }
+       /* 
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       if (!XFS_IS_UQUOTA_ON(mp))
+               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
+               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
+
+ write_changes:
+       /*
+        * We actually don't have to acquire the m_sb_lock at all.
+        * This can only be called from mount, and that's single threaded. XXX
+        */
+       spin_lock(&mp->m_sb_lock);
+       sbf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+                       /*
+                        * We could only have been turning quotas off.
+                        * We aren't in very good shape actually because
+                        * the incore structures are convinced that quotas are
+                        * off, but the on disk superblock doesn't know that !
+                        */
+                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+                       xfs_alert(mp, "%s: Superblock update failed!",
+                               __func__);
+               }
+       }
+
+       if (error) {
+               xfs_warn(mp, "Failed to initialize disk quotas.");
+               return;
+       }
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+void
+xfs_qm_unmount_quotas(
+       xfs_mount_t     *mp)
+{
+       /*
+        * Release the dquots that root inode, et al might be holding,
+        * before we flush quotas and blow away the quotainfo structure.
+        */
+       ASSERT(mp->m_rootip);
+       xfs_qm_dqdetach(mp->m_rootip);
+       if (mp->m_rbmip)
+               xfs_qm_dqdetach(mp->m_rbmip);
+       if (mp->m_rsumip)
+               xfs_qm_dqdetach(mp->m_rsumip);
+
+       /*
+        * Release the quota inodes.
+        */
+       if (mp->m_quotainfo) {
+               if (mp->m_quotainfo->qi_uquotaip) {
+                       IRELE(mp->m_quotainfo->qi_uquotaip);
+                       mp->m_quotainfo->qi_uquotaip = NULL;
+               }
+               if (mp->m_quotainfo->qi_gquotaip) {
+                       IRELE(mp->m_quotainfo->qi_gquotaip);
+                       mp->m_quotainfo->qi_gquotaip = NULL;
+               }
+       }
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+STATIC int
+xfs_qm_dqflush_all(
+       struct xfs_mount        *mp,
+       int                     sync_mode)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!q)
+               return 0;
+again:
+       mutex_lock(&q->qi_dqlist_lock);
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       /*
+                        * If we can't grab the flush lock then check
+                        * to see if the dquot has been flushed delayed
+                        * write.  If so, grab its buffer and send it
+                        * out immediately.  We'll be able to acquire
+                        * the flush lock when the I/O completes.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write.
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, sync_mode);
+               xfs_dqunlock(dqp);
+               if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       /* XXX restart limit */
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       /* return ! busy */
+       return 0;
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+       struct xfs_mount        *mp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *gdqp;
+       int                     nrecl;
+
+ again:
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if ((gdqp = dqp->q_gdquot)) {
+                       xfs_dqlock(gdqp);
+                       dqp->q_gdquot = NULL;
+               }
+               xfs_dqunlock(dqp);
+
+               if (gdqp) {
+                       /*
+                        * Can't hold the mplist lock across a dqput.
+                        * XXXmust convert to marker based iterations here.
+                        */
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       xfs_qm_dqput(gdqp);
+
+                       mutex_lock(&q->qi_dqlist_lock);
+                       if (nrecl != q->qi_dqreclaims)
+                               goto again;
+               }
+       }
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+       struct xfs_mount        *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *n;
+       uint                    dqtype;
+       int                     nrecl;
+       int                     nmisses;
+
+       if (!q)
+               return 0;
+
+       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
+       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+       mutex_lock(&q->qi_dqlist_lock);
+
+       /*
+        * In the first pass through all incore dquots of this filesystem,
+        * we release the group dquot pointers the user dquots may be
+        * carrying around as a hint. We need to do this irrespective of
+        * what's being turned off.
+        */
+       xfs_qm_detach_gdquots(mp);
+
+      again:
+       nmisses = 0;
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       /*
+        * Try to get rid of all of the unwanted dquots. The idea is to
+        * get them off mplist and hashlist, but leave them on freelist.
+        */
+       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
+               /*
+                * It's OK to look at the type without taking dqlock here.
+                * We're holding the mplist lock here, and that's needed for
+                * a dqreclaim.
+                */
+               if ((dqp->dq_flags & dqtype) == 0)
+                       continue;
+
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       mutex_lock(&dqp->q_hash->qh_lock);
+                       mutex_lock(&q->qi_dqlist_lock);
+
+                       /*
+                        * XXXTheoretically, we can get into a very long
+                        * ping pong game here.
+                        * No one can be adding dquots to the mplist at
+                        * this point, but somebody might be taking things off.
+                        */
+                       if (nrecl != q->qi_dqreclaims) {
+                               mutex_unlock(&dqp->q_hash->qh_lock);
+                               goto again;
+                       }
+               }
+
+               /*
+                * Take the dquot off the mplist and hashlist. It may remain on
+                * freelist in INACTIVE state.
+                */
+               nmisses += xfs_qm_dqpurge(dqp);
+       }
+       mutex_unlock(&q->qi_dqlist_lock);
+       return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             ndquots;
+
+       /*
+        * Purge the dquot cache.
+        * None of the dquots should really be busy at this point.
+        */
+       if (mp->m_quotainfo) {
+               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+                       delay(ndquots * 10);
+               }
+       }
+       return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+       xfs_inode_t     *ip,
+       xfs_dqid_t      id,
+       uint            type,
+       uint            doalloc,
+       xfs_dquot_t     *udqhint, /* hint */
+       xfs_dquot_t     **IO_idqpp)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       error = 0;
+
+       /*
+        * See if we already have it in the inode itself. IO_idqpp is
+        * &i_udquot or &i_gdquot. This made the code look weird, but
+        * made the logic a lot simpler.
+        */
+       dqp = *IO_idqpp;
+       if (dqp) {
+               trace_xfs_dqattach_found(dqp);
+               return 0;
+       }
+
+       /*
+        * udqhint is the i_udquot field in inode, and is non-NULL only
+        * when the type arg is group/project. Its purpose is to save a
+        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+        * the user dquot.
+        */
+       if (udqhint) {
+               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
+               xfs_dqlock(udqhint);
+
+               /*
+                * No need to take dqlock to look at the id.
+                *
+                * The ID can't change until it gets reclaimed, and it won't
+                * be reclaimed as long as we have a ref from inode and we
+                * hold the ilock.
+                */
+               dqp = udqhint->q_gdquot;
+               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
+                       xfs_dqlock(dqp);
+                       XFS_DQHOLD(dqp);
+                       ASSERT(*IO_idqpp == NULL);
+                       *IO_idqpp = dqp;
+
+                       xfs_dqunlock(dqp);
+                       xfs_dqunlock(udqhint);
+                       return 0;
+               }
+
+               /*
+                * We can't hold a dquot lock when we call the dqget code.
+                * We'll deadlock in no time, because of (not conforming to)
+                * lock ordering - the inodelock comes before any dquot lock,
+                * and we may drop and reacquire the ilock in xfs_qm_dqget().
+                */
+               xfs_dqunlock(udqhint);
+       }
+
+       /*
+        * Find the dquot from somewhere. This bumps the
+        * reference count of dquot and returns it locked.
+        * This can return ENOENT if dquot didn't exist on
+        * disk and we didn't ask it to allocate;
+        * ESRCH if quotas got turned off suddenly.
+        */
+       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+       if (error)
+               return error;
+
+       trace_xfs_dqattach_get(dqp);
+
+       /*
+        * dqget may have dropped and re-acquired the ilock, but it guarantees
+        * that the dquot returned is the one that should go in the inode.
+        */
+       *IO_idqpp = dqp;
+       xfs_dqunlock(dqp);
+       return 0;
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering constraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+       xfs_dquot_t     *udq,
+       xfs_dquot_t     *gdq)
+{
+       xfs_dquot_t     *tmp;
+
+       xfs_dqlock(udq);
+
+       if ((tmp = udq->q_gdquot)) {
+               if (tmp == gdq) {
+                       xfs_dqunlock(udq);
+                       return;
+               }
+
+               udq->q_gdquot = NULL;
+               /*
+                * We can't keep any dqlocks when calling dqrele,
+                * because the freelist lock comes before dqlocks.
+                */
+               xfs_dqunlock(udq);
+               /*
+                * we took a hard reference once upon a time in dqget,
+                * so give it back when the udquot no longer points at it
+                * dqput() does the unlocking of the dquot.
+                */
+               xfs_qm_dqrele(tmp);
+
+               xfs_dqlock(udq);
+               xfs_dqlock(gdq);
+
+       } else {
+               ASSERT(XFS_DQ_IS_LOCKED(udq));
+               xfs_dqlock(gdq);
+       }
+
+       ASSERT(XFS_DQ_IS_LOCKED(udq));
+       ASSERT(XFS_DQ_IS_LOCKED(gdq));
+       /*
+        * Somebody could have attached a gdquot here,
+        * when we dropped the uqlock. If so, just do nothing.
+        */
+       if (udq->q_gdquot == NULL) {
+               XFS_DQHOLD(gdq);
+               udq->q_gdquot = gdq;
+       }
+
+       xfs_dqunlock(gdq);
+       xfs_dqunlock(udq);
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach_locked(
+       xfs_inode_t     *ip,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            nquotas = 0;
+       int             error = 0;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           !XFS_NOT_DQATTACHED(mp, ip) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return 0;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               NULL, &ip->i_udquot);
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       if (XFS_IS_OQUOTA_ON(mp)) {
+               error = XFS_IS_GQUOTA_ON(mp) ?
+                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot) :
+                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot);
+               /*
+                * Don't worry about the udquot that we may have
+                * attached above. It'll get detached, if not already.
+                */
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       /*
+        * Attach this group quota to the user quota as a hint.
+        * This WON'T, in general, result in a thrash.
+        */
+       if (nquotas == 2) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               ASSERT(ip->i_udquot);
+               ASSERT(ip->i_gdquot);
+
+               /*
+                * We may or may not have the i_udquot locked at this point,
+                * but this check is OK since we don't depend on the i_gdquot to
+                * be accurate 100% all the time. It is just a hint, and this
+                * will succeed in general.
+                */
+               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+                       goto done;
+               /*
+                * Attach i_gdquot to the gdquot hint inside the i_udquot.
+                */
+               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+       }
+
+ done:
+#ifdef DEBUG
+       if (!error) {
+               if (XFS_IS_UQUOTA_ON(mp))
+                       ASSERT(ip->i_udquot);
+               if (XFS_IS_OQUOTA_ON(mp))
+                       ASSERT(ip->i_gdquot);
+       }
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+#endif
+       return error;
+}
+
+int
+xfs_qm_dqattach(
+       struct xfs_inode        *ip,
+       uint                    flags)
+{
+       int                     error;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       error = xfs_qm_dqattach_locked(ip, flags);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+       xfs_inode_t     *ip)
+{
+       if (!(ip->i_udquot || ip->i_gdquot))
+               return;
+
+       trace_xfs_dquot_dqdetach(ip);
+
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+       if (ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+}
+
+int
+xfs_qm_sync(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl, restarts;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       restarts = 0;
+
+  again:
+       mutex_lock(&q->qi_dqlist_lock);
+       /*
+        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+        * when we have the mplist lock, we know that dquots will be consistent
+        * as long as we have it locked.
+        */
+       if (!XFS_IS_QUOTA_ON(mp)) {
+               mutex_unlock(&q->qi_dqlist_lock);
+               return 0;
+       }
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               /*
+                * If this is vfs_sync calling, then skip the dquots that
+                * don't 'seem' to be dirty. ie. don't acquire dqlock.
+                * This is very similar to what xfs_sync does with inodes.
+                */
+               if (flags & SYNC_TRYLOCK) {
+                       if (!XFS_DQ_IS_DIRTY(dqp))
+                               continue;
+                       if (!xfs_qm_dqlock_nowait(dqp))
+                               continue;
+               } else {
+                       xfs_dqlock(dqp);
+               }
+
+               /*
+                * Now, find out for sure if this dquot is dirty or not.
+                */
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       if (flags & SYNC_TRYLOCK) {
+                               xfs_dqunlock(dqp);
+                               continue;
+                       }
+                       /*
+                        * If we can't grab the flush lock then if the caller
+                        * really wanted us to give this our best shot, so
+                        * see if we can give a push to the buffer before we wait
+                        * on the flush lock. At this point, we know that
+                        * even though the dquot is being flushed,
+                        * it has (new) dirty data.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, flags);
+               xfs_dqunlock(dqp);
+               if (error && XFS_FORCED_SHUTDOWN(mp))
+                       return 0;       /* Need to prevent umount failure */
+               else if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+                               break;
+
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       return 0;
+}
+
+/*
+ * The hash chains and the mplist use the same xfs_dqhash structure as
+ * their list head, but we can take the mplist qh_lock and one of the
+ * hash qh_locks at the same time without any problem as they aren't
+ * related.
+ */
+static struct lock_class_key xfs_quota_mplist_class;
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+STATIC int
+xfs_qm_init_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qinf;
+       int             error;
+       xfs_dquot_t     *dqp;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Tell XQM that we exist as soon as possible.
+        */
+       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+               return error;
+       }
+
+       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+       /*
+        * See if quotainodes are setup, and if not, allocate them,
+        * and change the superblock accordingly.
+        */
+       if ((error = xfs_qm_init_quotainos(mp))) {
+               kmem_free(qinf);
+               mp->m_quotainfo = NULL;
+               return error;
+       }
+
+       INIT_LIST_HEAD(&qinf->qi_dqlist);
+       mutex_init(&qinf->qi_dqlist_lock);
+       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
+
+       qinf->qi_dqreclaims = 0;
+
+       /* mutex used to serialize quotaoffs */
+       mutex_init(&qinf->qi_quotaofflock);
+
+       /* Precalc some constants */
+       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(qinf->qi_dqchunklen);
+       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+       /*
+        * We try to get the limits from the superuser's limits fields.
+        * This is quite hacky, but it is standard quota practice.
+        * We look at the USR dquot with id == 0 first, but if user quotas
+        * are not enabled we goto the GRP dquot with id == 0.
+        * We don't really care to keep separate default limits for user
+        * and group quotas, at least not at this point.
+        */
+       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+                               XFS_DQ_PROJ),
+                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+                            &dqp);
+       if (! error) {
+               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+
+               /*
+                * The warnings and timers set the grace period given to
+                * a user or group before he or she can not perform any
+                * more writing. If it is zero, a default is used.
+                */
+               qinf->qi_btimelimit = ddqp->d_btimer ?
+                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = ddqp->d_itimer ?
+                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
+                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
+                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
+                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
+                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
+               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+               /*
+                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+                * we don't want this dquot cached. We haven't done a
+                * quotacheck yet, and quotacheck doesn't like incore dquots.
+                */
+               xfs_qm_dqdestroy(dqp);
+       } else {
+               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+       }
+
+       return 0;
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qi;
+
+       qi = mp->m_quotainfo;
+       ASSERT(qi != NULL);
+       ASSERT(xfs_Gqm != NULL);
+
+       /*
+        * Release the reference that XQM kept, so that we know
+        * when the XQM structure should be freed. We cannot assume
+        * that xfs_Gqm is non-null after this point.
+        */
+       xfs_qm_rele_quotafs_ref(mp);
+
+       ASSERT(list_empty(&qi->qi_dqlist));
+       mutex_destroy(&qi->qi_dqlist_lock);
+
+       if (qi->qi_uquotaip) {
+               IRELE(qi->qi_uquotaip);
+               qi->qi_uquotaip = NULL; /* paranoia */
+       }
+       if (qi->qi_gquotaip) {
+               IRELE(qi->qi_gquotaip);
+               qi->qi_gquotaip = NULL;
+       }
+       mutex_destroy(&qi->qi_quotaofflock);
+       kmem_free(qi);
+       mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+       xfs_dqlist_t    *list,
+       char            *str,
+       int             n)
+{
+       mutex_init(&list->qh_lock);
+       INIT_LIST_HEAD(&list->qh_list);
+       list->qh_version = 0;
+       list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+       xfs_dqlist_t    *list)
+{
+       mutex_destroy(&(list->qh_lock));
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+       xfs_mount_t     *mp,
+       xfs_inode_t     **ip,
+       __int64_t       sbfields,
+       uint            flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+       int             committed;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
+       if ((error = xfs_trans_reserve(tp,
+                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
+                                     XFS_CREATE_LOG_RES(mp), 0,
+                                     XFS_TRANS_PERM_LOG_RES,
+                                     XFS_CREATE_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                XFS_TRANS_ABORT);
+               return error;
+       }
+
+       /*
+        * Make the changes in the superblock, and log those too.
+        * sbfields arg may contain fields other than *QUOTINO;
+        * VERSIONNUM for example.
+        */
+       spin_lock(&mp->m_sb_lock);
+       if (flags & XFS_QMOPT_SBVERSION) {
+               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
+               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+               xfs_sb_version_addquota(&mp->m_sb);
+               mp->m_sb.sb_uquotino = NULLFSINO;
+               mp->m_sb.sb_gquotino = NULLFSINO;
+
+               /* qflags will get updated _after_ quotacheck */
+               mp->m_sb.sb_qflags = 0;
+       }
+       if (flags & XFS_QMOPT_UQUOTA)
+               mp->m_sb.sb_uquotino = (*ip)->i_ino;
+       else
+               mp->m_sb.sb_gquotino = (*ip)->i_ino;
+       spin_unlock(&mp->m_sb_lock);
+       xfs_mod_sb(tp, sbfields);
+
+       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
+               return error;
+       }
+       return 0;
+}
+
+
+STATIC void
+xfs_qm_reset_dqcounts(
+       xfs_mount_t     *mp,
+       xfs_buf_t       *bp,
+       xfs_dqid_t      id,
+       uint            type)
+{
+       xfs_disk_dquot_t        *ddq;
+       int                     j;
+
+       trace_xfs_reset_dqcounts(bp, _RET_IP_);
+
+       /*
+        * Reset all counters and timers. They'll be
+        * started afresh by xfs_qm_quotacheck.
+        */
+#ifdef DEBUG
+       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       do_div(j, sizeof(xfs_dqblk_t));
+       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
+#endif
+       ddq = bp->b_addr;
+       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+               /*
+                * Do a sanity check, and if needed, repair the dqblk. Don't
+                * output any warnings because it's perfectly possible to
+                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
+                */
+               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+                                     "xfs_quotacheck");
+               ddq->d_bcount = 0;
+               ddq->d_icount = 0;
+               ddq->d_rtbcount = 0;
+               ddq->d_btimer = 0;
+               ddq->d_itimer = 0;
+               ddq->d_rtbtimer = 0;
+               ddq->d_bwarns = 0;
+               ddq->d_iwarns = 0;
+               ddq->d_rtbwarns = 0;
+               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+       }
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      firstid,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   blkcnt,
+       uint            flags)
+{
+       xfs_buf_t       *bp;
+       int             error;
+       int             type;
+
+       ASSERT(blkcnt > 0);
+       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
+       error = 0;
+
+       /*
+        * Blkcnt arg can be a very big number, and might even be
+        * larger than the log itself. So, we have to break it up into
+        * manageable-sized transactions.
+        * Note that we don't start a permanent transaction here; we might
+        * not be able to get a log reservation for the whole thing up front,
+        * and we don't really care to either, because we just discard
+        * everything if we were to crash in the middle of this loop.
+        */
+       while (blkcnt--) {
+               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                             XFS_FSB_TO_DADDR(mp, bno),
+                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+               if (error)
+                       break;
+
+               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
+               xfs_bdwrite(mp, bp);
+               /*
+                * goto the next block.
+                */
+               bno++;
+               firstid += mp->m_quotainfo->qi_dqperchunk;
+       }
+       return error;
+}
+
+/*
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *qip,
+       uint            flags)
+{
+       xfs_bmbt_irec_t         *map;
+       int                     i, nmaps;       /* number of map entries */
+       int                     error;          /* return value */
+       xfs_fileoff_t           lblkno;
+       xfs_filblks_t           maxlblkcnt;
+       xfs_dqid_t              firstid;
+       xfs_fsblock_t           rablkno;
+       xfs_filblks_t           rablkcnt;
+
+       error = 0;
+       /*
+        * This looks racy, but we can't keep an inode lock across a
+        * trans_reserve. But, this gets called during quotacheck, and that
+        * happens only at mount time which is single threaded.
+        */
+       if (qip->i_d.di_nblocks == 0)
+               return 0;
+
+       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+       lblkno = 0;
+       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+       do {
+               nmaps = XFS_DQITER_MAP_SIZE;
+               /*
+                * We aren't changing the inode itself. Just changing
+                * some of its data. No new blocks are added here, and
+                * the inode is never added to the transaction.
+                */
+               xfs_ilock(qip, XFS_ILOCK_SHARED);
+               error = xfs_bmapi(NULL, qip, lblkno,
+                                 maxlblkcnt - lblkno,
+                                 XFS_BMAPI_METADATA,
+                                 NULL,
+                                 0, map, &nmaps, NULL);
+               xfs_iunlock(qip, XFS_ILOCK_SHARED);
+               if (error)
+                       break;
+
+               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+               for (i = 0; i < nmaps; i++) {
+                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+                       ASSERT(map[i].br_blockcount);
+
+
+                       lblkno += map[i].br_blockcount;
+
+                       if (map[i].br_startblock == HOLESTARTBLOCK)
+                               continue;
+
+                       firstid = (xfs_dqid_t) map[i].br_startoff *
+                               mp->m_quotainfo->qi_dqperchunk;
+                       /*
+                        * Do a read-ahead on the next extent.
+                        */
+                       if ((i+1 < nmaps) &&
+                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+                               rablkcnt =  map[i+1].br_blockcount;
+                               rablkno = map[i+1].br_startblock;
+                               while (rablkcnt--) {
+                                       xfs_buf_readahead(mp->m_ddev_targp,
+                                              XFS_FSB_TO_DADDR(mp, rablkno),
+                                              mp->m_quotainfo->qi_dqchunklen);
+                                       rablkno++;
+                               }
+                       }
+                       /*
+                        * Iterate thru all the blks in the extent and
+                        * reset the counters of all the dquots inside them.
+                        */
+                       if ((error = xfs_qm_dqiter_bufs(mp,
+                                                      firstid,
+                                                      map[i].br_startblock,
+                                                      map[i].br_blockcount,
+                                                      flags))) {
+                               break;
+                       }
+               }
+
+               if (error)
+                       break;
+       } while (nmaps > 0);
+
+       kmem_free(map);
+
+       return error;
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ *
+ * Given the inode, and a dquot id this updates both the incore dqout as well
+ * as the buffer copy. This is so that once the quotacheck is done, we can
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
+ */
+STATIC int
+xfs_qm_quotacheck_dqadjust(
+       struct xfs_inode        *ip,
+       xfs_dqid_t              id,
+       uint                    type,
+       xfs_qcnt_t              nblks,
+       xfs_qcnt_t              rtblks)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       error = xfs_qm_dqget(mp, ip, id, type,
+                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+       if (error) {
+               /*
+                * Shouldn't be able to turn off quotas here.
+                */
+               ASSERT(error != ESRCH);
+               ASSERT(error != ENOENT);
+               return error;
+       }
+
+       trace_xfs_dqadjust(dqp);
+
+       /*
+        * Adjust the inode count and the block count to reflect this inode's
+        * resource usage.
+        */
+       be64_add_cpu(&dqp->q_core.d_icount, 1);
+       dqp->q_res_icount++;
+       if (nblks) {
+               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
+               dqp->q_res_bcount += nblks;
+       }
+       if (rtblks) {
+               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
+               dqp->q_res_rtbcount += rtblks;
+       }
+
+       /*
+        * Set default limits, adjust timers (since we changed usages)
+        *
+        * There are no timers for the default values set in the root dquot.
+        */
+       if (dqp->q_core.d_id) {
+               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
+               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
+       }
+
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_qm_dqput(dqp);
+       return 0;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+       xfs_inode_t     *ip,
+       xfs_qcnt_t      *O_rtblks)
+{
+       xfs_filblks_t   rtblks;                 /* total rt blks */
+       xfs_extnum_t    idx;                    /* extent record index */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+       int             error;
+
+       ASSERT(XFS_IS_REALTIME_INODE(ip));
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+                       return error;
+       }
+       rtblks = 0;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (idx = 0; idx < nextents; idx++)
+               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+       *O_rtblks = (xfs_qcnt_t)rtblks;
+       return 0;
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* not used */
+       int             ubsize,         /* not used */
+       int             *ubused,        /* not used */
+       int             *res)           /* result code value */
+{
+       xfs_inode_t     *ip;
+       xfs_qcnt_t      nblks, rtblks = 0;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * rootino must have its resources accounted for, not so with the quota
+        * inodes.
+        */
+       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+               *res = BULKSTAT_RV_NOTHING;
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+        * interface expects the inode to be exclusively locked because that's
+        * the case in all other instances. It's OK that we do this because
+        * quotacheck is done only at mount time.
+        */
+       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+       if (error) {
+               *res = BULKSTAT_RV_NOTHING;
+               return error;
+       }
+
+       ASSERT(ip->i_delayed_blks == 0);
+
+       if (XFS_IS_REALTIME_INODE(ip)) {
+               /*
+                * Walk thru the extent list and count the realtime blocks.
+                */
+               error = xfs_qm_get_rtblks(ip, &rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+
+       /*
+        * Add the (disk blocks and inode) resources occupied by this
+        * inode to its dquots. We do this adjustment in the incore dquot,
+        * and also copy the changes to its buffer.
+        * We don't care about putting these changes in a transaction
+        * envelope because if we crash in the middle of a 'quotacheck'
+        * we have to start from the beginning anyway.
+        * Once we're done, we'll log all the dquot bufs.
+        *
+        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
+        * and quotaoffs don't race. (Quotachecks happen at mount time only).
+        */
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+                                                  XFS_DQ_USER, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_GQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+                                                  XFS_DQ_GROUP, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_PQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
+                                                  XFS_DQ_PROJ, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_DIDONE;
+       return 0;
+
+error0:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_GIVEUP;
+       return error;
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world. If the quotacheck fails, disable quotas.
+ */
+int
+xfs_qm_quotacheck(
+       xfs_mount_t     *mp)
+{
+       int             done, count, error;
+       xfs_ino_t       lastino;
+       size_t          structsz;
+       xfs_inode_t     *uip, *gip;
+       uint            flags;
+
+       count = INT_MAX;
+       structsz = 1;
+       lastino = 0;
+       flags = 0;
+
+       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * There should be no cached dquots. The (simplistic) quotacheck
+        * algorithm doesn't like that.
+        */
+       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
+
+       xfs_notice(mp, "Quotacheck needed: Please wait.");
+
+       /*
+        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
+        * their counters to zero. We need a clean slate.
+        * We don't log our changes till later.
+        */
+       uip = mp->m_quotainfo->qi_uquotaip;
+       if (uip) {
+               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_UQUOTA_CHKD;
+       }
+
+       gip = mp->m_quotainfo->qi_gquotaip;
+       if (gip) {
+               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_OQUOTA_CHKD;
+       }
+
+       do {
+               /*
+                * Iterate thru all the inodes in the file system,
+                * adjusting the corresponding dquot counters in core.
+                */
+               error = xfs_bulkstat(mp, &lastino, &count,
+                                    xfs_qm_dqusage_adjust,
+                                    structsz, NULL, &done);
+               if (error)
+                       break;
+
+       } while (!done);
+
+       /*
+        * We've made all the changes that we need to make incore.
+        * Flush them down to disk buffers if everything was updated
+        * successfully.
+        */
+       if (!error)
+               error = xfs_qm_dqflush_all(mp, 0);
+
+       /*
+        * We can get this error if we couldn't do a dquot allocation inside
+        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+        * dirty dquots that might be cached, we just want to get rid of them
+        * and turn quotaoff. The dquots won't be attached to any of the inodes
+        * at this point (because we intentionally didn't in dqget_noattach).
+        */
+       if (error) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               goto error_return;
+       }
+
+       /*
+        * We didn't log anything, because if we crashed, we'll have to
+        * start the quotacheck from scratch anyway. However, we must make
+        * sure that our dquot changes are secure before we put the
+        * quotacheck'd stamp on the superblock. So, here we do a synchronous
+        * flush.
+        */
+       XFS_bflush(mp->m_ddev_targp);
+
+       /*
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
+       mp->m_qflags |= flags;
+
+ error_return:
+       if (error) {
+               xfs_warn(mp,
+       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
+                       error);
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo != NULL);
+               ASSERT(xfs_Gqm != NULL);
+               xfs_qm_destroy_quotainfo(mp);
+               if (xfs_mount_reset_sbqflags(mp)) {
+                       xfs_warn(mp,
+                               "Quotacheck: Failed to reset quota flags.");
+               }
+       } else
+               xfs_notice(mp, "Quotacheck: Done.");
+       return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+       xfs_mount_t     *mp)
+{
+       xfs_inode_t     *uip, *gip;
+       int             error;
+       __int64_t       sbflags;
+       uint            flags;
+
+       ASSERT(mp->m_quotainfo);
+       uip = gip = NULL;
+       sbflags = 0;
+       flags = 0;
+
+       /*
+        * Get the uquota and gquota inodes
+        */
+       if (xfs_sb_version_hasquota(&mp->m_sb)) {
+               if (XFS_IS_UQUOTA_ON(mp) &&
+                   mp->m_sb.sb_uquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_uquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                            0, 0, &uip)))
+                               return XFS_ERROR(error);
+               }
+               if (XFS_IS_OQUOTA_ON(mp) &&
+                   mp->m_sb.sb_gquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_gquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                            0, 0, &gip))) {
+                               if (uip)
+                                       IRELE(uip);
+                               return XFS_ERROR(error);
+                       }
+               }
+       } else {
+               flags |= XFS_QMOPT_SBVERSION;
+               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+       }
+
+       /*
+        * Create the two inodes, if they don't exist already. The changes
+        * made above will get added to a transaction and logged in one of
+        * the qino_alloc calls below.  If the device is readonly,
+        * temporarily switch to read-write to do this.
+        */
+       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+               if ((error = xfs_qm_qino_alloc(mp, &uip,
+                                             sbflags | XFS_SB_UQUOTINO,
+                                             flags | XFS_QMOPT_UQUOTA)))
+                       return XFS_ERROR(error);
+
+               flags &= ~XFS_QMOPT_SBVERSION;
+       }
+       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+               flags |= (XFS_IS_GQUOTA_ON(mp) ?
+                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               error = xfs_qm_qino_alloc(mp, &gip,
+                                         sbflags | XFS_SB_GQUOTINO, flags);
+               if (error) {
+                       if (uip)
+                               IRELE(uip);
+
+                       return XFS_ERROR(error);
+               }
+       }
+
+       mp->m_quotainfo->qi_uquotaip = uip;
+       mp->m_quotainfo->qi_gquotaip = gip;
+
+       return 0;
+}
+
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+       xfs_dquot_t     *dqpout;
+       xfs_dquot_t     *dqp;
+       int             restarts;
+       int             startagain;
+
+       restarts = 0;
+       dqpout = NULL;
+
+       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+again:
+       startagain = 0;
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               struct xfs_mount *mp = dqp->q_mount;
+               xfs_dqlock(dqp);
+
+               /*
+                * We are racing with dqlookup here. Naturally we don't
+                * want to reclaim a dquot that lookup wants. We release the
+                * freelist lock and start over, so that lookup will grab
+                * both the dquot and the freelistlock.
+                */
+               if (dqp->dq_flags & XFS_DQ_WANT) {
+                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+                       trace_xfs_dqreclaim_want(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+                       restarts++;
+                       startagain = 1;
+                       goto dqunlock;
+               }
+
+               /*
+                * If the dquot is inactive, we are assured that it is
+                * not on the mplist or the hashlist, and that makes our
+                * life easier.
+                */
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(mp == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       dqpout = dqp;
+                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+                       goto dqunlock;
+               }
+
+               ASSERT(dqp->q_hash);
+               ASSERT(!list_empty(&dqp->q_mplist));
+
+               /*
+                * Try to grab the flush lock. If this dquot is in the process
+                * of getting flushed to disk, we don't want to reclaim it.
+                */
+               if (!xfs_dqflock_nowait(dqp))
+                       goto dqunlock;
+
+               /*
+                * We have the flush lock so we know that this is not in the
+                * process of being flushed. So, if this is dirty, flush it
+                * DELWRI so that we don't get a freelist infested with
+                * dirty dquots.
+                */
+               if (XFS_DQ_IS_DIRTY(dqp)) {
+                       int     error;
+
+                       trace_xfs_dqreclaim_dirty(dqp);
+
+                       /*
+                        * We flush it delayed write, so don't bother
+                        * releasing the freelist lock.
+                        */
+                       error = xfs_qm_dqflush(dqp, 0);
+                       if (error) {
+                               xfs_warn(mp, "%s: dquot %p flush failed",
+                                       __func__, dqp);
+                       }
+                       goto dqunlock;
+               }
+
+               /*
+                * We're trying to get the hashlock out of order. This races
+                * with dqlookup; so, we giveup and goto the next dquot if
+                * we couldn't get the hashlock. This way, we won't starve
+                * a dqlookup process that holds the hashlock that is
+                * waiting for the freelist lock.
+                */
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       restarts++;
+                       goto dqfunlock;
+               }
+
+               /*
+                * This races with dquot allocation code as well as dqflush_all
+                * and reclaim code. So, if we failed to grab the mplist lock,
+                * giveup everything and start over.
+                */
+               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
+                       restarts++;
+                       startagain = 1;
+                       goto qhunlock;
+               }
+
+               ASSERT(dqp->q_nrefs == 0);
+               list_del_init(&dqp->q_mplist);
+               mp->m_quotainfo->qi_dquots--;
+               mp->m_quotainfo->qi_dqreclaims++;
+               list_del_init(&dqp->q_hashlist);
+               dqp->q_hash->qh_version++;
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               dqpout = dqp;
+               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
+               mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
+               xfs_dqfunlock(dqp);
+dqunlock:
+               xfs_dqunlock(dqp);
+               if (dqpout)
+                       break;
+               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+                       break;
+               if (startagain) {
+                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       goto again;
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       return dqpout;
+}
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+       int     howmany)
+{
+       int             nreclaimed = 0;
+       xfs_dquot_t     *dqp;
+
+       if (howmany <= 0)
+               return 0;
+
+       while (nreclaimed < howmany) {
+               dqp = xfs_qm_dqreclaim_one();
+               if (!dqp)
+                       return nreclaimed;
+               xfs_qm_dqdestroy(dqp);
+               nreclaimed++;
+       }
+       return nreclaimed;
+}
+
+/*
+ * The kmem_shake interface is invoked when memory is running low.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_shake(
+       struct shrinker *shrink,
+       struct shrink_control *sc)
+{
+       int     ndqused, nfree, n;
+       gfp_t gfp_mask = sc->gfp_mask;
+
+       if (!kmem_shake_allow(gfp_mask))
+               return 0;
+       if (!xfs_Gqm)
+               return 0;
+
+       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
+       /* incore dquots in all f/s's */
+       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+       ASSERT(ndqused >= 0);
+
+       if (nfree <= ndqused && nfree < ndquot)
+               return 0;
+
+       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
+       n = nfree - ndqused - ndquot;           /* # over target */
+
+       return xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+       xfs_dquot_t **O_dqpp)
+{
+       xfs_dquot_t     *dqp;
+
+       /*
+        * Check against high water mark to see if we want to pop
+        * a nincompoop dquot off the freelist.
+        */
+       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+               /*
+                * Try to recycle a dquot from the freelist.
+                */
+               if ((dqp = xfs_qm_dqreclaim_one())) {
+                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+                       /*
+                        * Just zero the core here. The rest will get
+                        * reinitialized by caller. XXX we shouldn't even
+                        * do this zero ...
+                        */
+                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+                       *O_dqpp = dqp;
+                       return B_FALSE;
+               }
+               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+       }
+
+       /*
+        * Allocate a brand new dquot on the kernel heap and return it
+        * to the caller to initialize.
+        */
+       ASSERT(xfs_Gqm->qm_dqzone != NULL);
+       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+       atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+       return B_TRUE;
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+       xfs_mount_t     *mp,
+       __int64_t       flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     mp->m_sb.sb_sectsize + 128, 0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_mod_sb(tp, flags);
+       error = xfs_trans_commit(tp, 0);
+
+       return error;
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid, gid and prid make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in  : inode (unlocked)
+ * out : udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+       struct xfs_inode        *ip,
+       uid_t                   uid,
+       gid_t                   gid,
+       prid_t                  prid,
+       uint                    flags,
+       struct xfs_dquot        **O_udqpp,
+       struct xfs_dquot        **O_gdqpp)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *uq, *gq;
+       int                     error;
+       uint                    lockflags;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       lockflags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lockflags);
+
+       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
+               gid = ip->i_d.di_gid;
+
+       /*
+        * Attach the dquot(s) to this inode, doing a dquot allocation
+        * if necessary. The dquot(s) will not be locked.
+        */
+       if (XFS_NOT_DQATTACHED(mp, ip)) {
+               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+               if (error) {
+                       xfs_iunlock(ip, lockflags);
+                       return error;
+               }
+       }
+
+       uq = gq = NULL;
+       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+               if (ip->i_d.di_uid != uid) {
+                       /*
+                        * What we need is the dquot that has this uid, and
+                        * if we send the inode to dqget, the uid of the inode
+                        * takes priority over what's sent in the uid argument.
+                        * We must unlock inode here before calling dqget if
+                        * we're not sending the inode, because otherwise
+                        * we'll deadlock by doing trans_reserve while
+                        * holding ilock.
+                        */
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+                                                XFS_DQ_USER,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &uq))) {
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       /*
+                        * Get the ilock in the right order.
+                        */
+                       xfs_dqunlock(uq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       /*
+                        * Take an extra reference, because we'll return
+                        * this to caller
+                        */
+                       ASSERT(ip->i_udquot);
+                       uq = ip->i_udquot;
+                       xfs_dqlock(uq);
+                       XFS_DQHOLD(uq);
+                       xfs_dqunlock(uq);
+               }
+       }
+       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+               if (ip->i_d.di_gid != gid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+                                                XFS_DQ_GROUP,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+               if (xfs_get_projid(ip) != prid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+                                                XFS_DQ_PROJ,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return (error);
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       }
+       if (uq)
+               trace_xfs_dquot_dqalloc(ip);
+
+       xfs_iunlock(ip, lockflags);
+       if (O_udqpp)
+               *O_udqpp = uq;
+       else if (uq)
+               xfs_qm_dqrele(uq);
+       if (O_gdqpp)
+               *O_gdqpp = gq;
+       else if (gq)
+               xfs_qm_dqrele(gq);
+       return 0;
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     **IO_olddq,
+       xfs_dquot_t     *newdq)
+{
+       xfs_dquot_t     *prevdq;
+       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
+                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+       /* old dquot */
+       prevdq = *IO_olddq;
+       ASSERT(prevdq);
+       ASSERT(prevdq != newdq);
+
+       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
+
+       /* the sparkling new dquot */
+       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+
+       /*
+        * Take an extra reference, because the inode
+        * is going to keep this dquot pointer even
+        * after the trans_commit.
+        */
+       xfs_dqlock(newdq);
+       XFS_DQHOLD(newdq);
+       xfs_dqunlock(newdq);
+       *IO_olddq = newdq;
+
+       return prevdq;
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            delblks, blkflags, prjflags = 0;
+       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+       int             error;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       delblks = ip->i_delayed_blks;
+       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+       blkflags = XFS_IS_REALTIME_INODE(ip) ?
+                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+
+       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
+               delblksudq = udqp;
+               /*
+                * If there are delayed allocation blocks, then we have to
+                * unreserve those from the old dquot, and add them to the
+                * new dquot.
+                */
+               if (delblks) {
+                       ASSERT(ip->i_udquot);
+                       unresudq = ip->i_udquot;
+               }
+       }
+       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
+                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
+                       prjflags = XFS_QMOPT_ENOSPC;
+
+               if (prjflags ||
+                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
+                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
+                       delblksgdq = gdqp;
+                       if (delblks) {
+                               ASSERT(ip->i_gdquot);
+                               unresgdq = ip->i_gdquot;
+                       }
+               }
+       }
+
+       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+                               flags | blkflags | prjflags)))
+               return (error);
+
+       /*
+        * Do the delayed blks reservations/unreservations now. Since, these
+        * are done without the help of a transaction, if a reservation fails
+        * its previous reservations won't be automatically undone by trans
+        * code. So, we have to do it manually here.
+        */
+       if (delblks) {
+               /*
+                * Do the reservations first. Unreservation can't fail.
+                */
+               ASSERT(delblksudq || delblksgdq);
+               ASSERT(unresudq || unresgdq);
+               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+                               flags | blkflags | prjflags)))
+                       return (error);
+               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+                               blkflags);
+       }
+
+       return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+       struct xfs_inode        **i_tab)
+{
+       struct xfs_mount        *mp = i_tab[0]->i_mount;
+       int                     i;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       for (i = 0; (i < 4 && i_tab[i]); i++) {
+               struct xfs_inode        *ip = i_tab[i];
+               int                     error;
+
+               /*
+                * Watch out for duplicate entries in the table.
+                */
+               if (i == 0 || ip != i_tab[i-1]) {
+                       if (XFS_NOT_DQATTACHED(mp, ip)) {
+                               error = xfs_qm_dqattach(ip, 0);
+                               if (error)
+                                       return error;
+                       }
+               }
+       }
+       return 0;
+}
+
+void
+xfs_qm_vop_create_dqattach(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       struct xfs_dquot        *udqp,
+       struct xfs_dquot        *gdqp)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       if (udqp) {
+               xfs_dqlock(udqp);
+               XFS_DQHOLD(udqp);
+               xfs_dqunlock(udqp);
+               ASSERT(ip->i_udquot == NULL);
+               ip->i_udquot = udqp;
+               ASSERT(XFS_IS_UQUOTA_ON(mp));
+               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+       if (gdqp) {
+               xfs_dqlock(gdqp);
+               XFS_DQHOLD(gdqp);
+               xfs_dqunlock(gdqp);
+               ASSERT(ip->i_gdquot == NULL);
+               ip->i_gdquot = gdqp;
+               ASSERT(XFS_IS_OQUOTA_ON(mp));
+               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
+                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
+                               be32_to_cpu(gdqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+}
+
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
new file mode 100644 (file)
index 0000000..43b9abe
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern uint            ndquot;
+extern struct mutex    xfs_Gqm_lock;
+extern struct xfs_qm   *xfs_Gqm;
+extern kmem_zone_t     *qm_dqzone;
+extern kmem_zone_t     *qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS       7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS    4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO            2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
+
+typedef xfs_dqhash_t   xfs_dqlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
+       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
+       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
+       struct list_head qm_dqfrlist;    /* freelist of dquots */
+       struct mutex     qm_dqfrlist_lock;
+       int              qm_dqfrlist_cnt;
+       atomic_t         qm_totaldquots; /* total incore dquots */
+       uint             qm_nrefs;       /* file systems with quota on */
+       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
+       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
+       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
+       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
+       struct list_head qi_dqlist;      /* all dquots in filesys */
+       struct mutex     qi_dqlist_lock;
+       int              qi_dquots;
+       int              qi_dqreclaims;  /* a change here indicates
+                                           a removal in the dqlist */
+       time_t           qi_btimelimit;  /* limit for blks timer */
+       time_t           qi_itimelimit;  /* limit for inodes timer */
+       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
+       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
+       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
+       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
+       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
+       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
+       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
+       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
+       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
+       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
+       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
+       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
+       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+} xfs_quotainfo_t;
+
+
+extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS            2
+typedef struct xfs_dquot_acct {
+       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
+
+#define XFS_QM_BWARNLIMIT      5
+#define XFS_QM_IWARNLIMIT      5
+#define XFS_QM_RTBWARNLIMIT    5
+
+extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int             xfs_qm_quotacheck(xfs_mount_t *);
+extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+
+/* dquot stuff */
+extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* quota ops */
+extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
+
+#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
new file mode 100644 (file)
index 0000000..a0a829a
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+
+STATIC void
+xfs_fill_statvfs_from_dquot(
+       struct kstatfs          *statp,
+       xfs_disk_dquot_t        *dp)
+{
+       __uint64_t              limit;
+
+       limit = dp->d_blk_softlimit ?
+               be64_to_cpu(dp->d_blk_softlimit) :
+               be64_to_cpu(dp->d_blk_hardlimit);
+       if (limit && statp->f_blocks > limit) {
+               statp->f_blocks = limit;
+               statp->f_bfree = statp->f_bavail =
+                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
+                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
+       }
+
+       limit = dp->d_ino_softlimit ?
+               be64_to_cpu(dp->d_ino_softlimit) :
+               be64_to_cpu(dp->d_ino_hardlimit);
+       if (limit && statp->f_files > limit) {
+               statp->f_files = limit;
+               statp->f_ffree =
+                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
+                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
+       }
+}
+
+
+/*
+ * Directory tree accounting is implemented using project quotas, where
+ * the project identifier is inherited from parent directories.
+ * A statvfs (df, etc.) of a directory that is using project quota should
+ * return a statvfs of the project, not the entire filesystem.
+ * This makes such trees appear as if they are filesystems in themselves.
+ */
+void
+xfs_qm_statvfs(
+       xfs_inode_t             *ip,
+       struct kstatfs          *statp)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       xfs_dquot_t             *dqp;
+
+       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
+               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+               xfs_qm_dqput(dqp);
+       }
+}
+
+int
+xfs_qm_newmount(
+       xfs_mount_t     *mp,
+       uint            *needquotamount,
+       uint            *quotaflags)
+{
+       uint            quotaondisk;
+       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
+
+       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
+                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
+
+       if (quotaondisk) {
+               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
+               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+       }
+
+       /*
+        * If the device itself is read-only, we can't allow
+        * the user to change the state of quota on the mount -
+        * this would generate a transaction on the ro device,
+        * which would lead to an I/O error and shutdown
+        */
+
+       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
+            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
+           xfs_dev_is_read_only(mp, "changing quota state")) {
+               xfs_warn(mp, "please mount with%s%s%s%s.",
+                       (!quotaondisk ? "out quota" : ""),
+                       (uquotaondisk ? " usrquota" : ""),
+                       (pquotaondisk ? " prjquota" : ""),
+                       (gquotaondisk ? " grpquota" : ""));
+               return XFS_ERROR(EPERM);
+       }
+
+       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+               /*
+                * Call mount_quotas at this point only if we won't have to do
+                * a quotacheck.
+                */
+               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+                       /*
+                        * If an error occurred, qm_mount_quotas code
+                        * has already disabled quotas. So, just finish
+                        * mounting, and get on with the boring life
+                        * without disk quotas.
+                        */
+                       xfs_qm_mount_quotas(mp);
+               } else {
+                       /*
+                        * Clear the quota flags, but remember them. This
+                        * is so that the quota code doesn't get invoked
+                        * before we're ready. This can happen when an
+                        * inode goes inactive and wants to free blocks,
+                        * or via xfs_log_mount_finish.
+                        */
+                       *needquotamount = B_TRUE;
+                       *quotaflags = mp->m_qflags;
+                       mp->m_qflags = 0;
+               }
+       }
+
+       return 0;
+}
+
+void __init
+xfs_qm_init(void)
+{
+       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
+       mutex_init(&xfs_Gqm_lock);
+       xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+       xfs_qm_cleanup_procfs();
+       if (qm_dqzone)
+               kmem_zone_destroy(qm_dqzone);
+       if (qm_dqtrxzone)
+               kmem_zone_destroy(qm_dqtrxzone);
+}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
new file mode 100644 (file)
index 0000000..8671a0b
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+static int xqm_proc_show(struct seq_file *m, void *v)
+{
+       /* maximum; incore; ratio free to inuse; freelist */
+       seq_printf(m, "%d\t%d\t%d\t%u\n",
+                       ndquot,
+                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
+       return 0;
+}
+
+static int xqm_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqm_proc_show, NULL);
+}
+
+static const struct file_operations xqm_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqm_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
+       /* quota performance statistics */
+       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
+                       xqmstats.xs_qm_dqreclaims,
+                       xqmstats.xs_qm_dqreclaim_misses,
+                       xqmstats.xs_qm_dquot_dups,
+                       xqmstats.xs_qm_dqcachemisses,
+                       xqmstats.xs_qm_dqcachehits,
+                       xqmstats.xs_qm_dqwants,
+                       xqmstats.xs_qm_dqshake_reclaims,
+                       xqmstats.xs_qm_dqinact_reclaims);
+       return 0;
+}
+
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqmstat_proc_show, NULL);
+}
+
+static const struct file_operations xqmstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqmstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+void
+xfs_qm_init_procfs(void)
+{
+       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
+       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/xqm", NULL);
+       remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
new file mode 100644 (file)
index 0000000..5b964fc
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+       __uint32_t              xs_qm_dqreclaims;
+       __uint32_t              xs_qm_dqreclaim_misses;
+       __uint32_t              xs_qm_dquot_dups;
+       __uint32_t              xs_qm_dqcachemisses;
+       __uint32_t              xs_qm_dqcachehits;
+       __uint32_t              xs_qm_dqwants;
+       __uint32_t              xs_qm_dqshake_reclaims;
+       __uint32_t              xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count)  ( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count)  do { } while (0)
+
+static inline void xfs_qm_init_procfs(void) { };
+static inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
new file mode 100644 (file)
index 0000000..609246f
--- /dev/null
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/capability.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+                                       uint);
+STATIC uint    xfs_qm_export_flags(uint);
+STATIC uint    xfs_qm_export_qtype_flags(uint);
+STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+                                       fs_disk_quota_t *);
+
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+int
+xfs_qm_scall_quotaoff(
+       xfs_mount_t             *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       uint                    dqtype;
+       int                     error;
+       uint                    inactivate_flags;
+       xfs_qoff_logitem_t      *qoffstart;
+       int                     nculprits;
+
+       /*
+        * No file system can have quotas enabled on disk but not in core.
+        * Note that quota utilities (like quotaoff) _expect_
+        * errno == EEXIST here.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               return XFS_ERROR(EEXIST);
+       error = 0;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+       /*
+        * We don't want to deal with two quotaoffs messing up each other,
+        * so we're going to serialize it. quotaoff isn't exactly a performance
+        * critical thing.
+        * If quotaoff, then we must be dealing with the root filesystem.
+        */
+       ASSERT(q);
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * If we're just turning off quota enforcement, change mp and go.
+        */
+       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+               mp->m_qflags &= ~(flags);
+
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = mp->m_qflags;
+               spin_unlock(&mp->m_sb_lock);
+               mutex_unlock(&q->qi_quotaofflock);
+
+               /* XXX what to do if error ? Revert back to old vals incore ? */
+               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+               return (error);
+       }
+
+       dqtype = 0;
+       inactivate_flags = 0;
+       /*
+        * If accounting is off, we must turn enforcement off, clear the
+        * quota 'CHKD' certificate to make it known that we have to
+        * do a quotacheck the next time this quota is turned on.
+        */
+       if (flags & XFS_UQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_UQUOTA;
+               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+               inactivate_flags |= XFS_UQUOTA_ACTIVE;
+       }
+       if (flags & XFS_GQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_GQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_GQUOTA_ACTIVE;
+       } else if (flags & XFS_PQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_PQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_PQUOTA_ACTIVE;
+       }
+
+       /*
+        * Nothing to do?  Don't complain. This happens when we're just
+        * turning off quota enforcement.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               goto out_unlock;
+
+       /*
+        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+        * and synchronously. If we fail to write, we should abort the
+        * operation as it cannot be recovered safely if we crash.
+        */
+       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+        * to take care of the race between dqget and quotaoff. We don't take
+        * any special locks to reset these bits. All processes need to check
+        * these bits *after* taking inode lock(s) to see if the particular
+        * quota type is in the process of being turned off. If *ACTIVE, it is
+        * guaranteed that all dquot structures and all quotainode ptrs will all
+        * stay valid as long as that inode is kept locked.
+        *
+        * There is no turning back after this.
+        */
+       mp->m_qflags &= ~inactivate_flags;
+
+       /*
+        * Give back all the dquot reference(s) held by inodes.
+        * Here we go thru every single incore inode in this file system, and
+        * do a dqrele on the i_udquot/i_gdquot that it may have.
+        * Essentially, as long as somebody has an inode locked, this guarantees
+        * that quotas will not be turned off. This is handy because in a
+        * transaction once we lock the inode(s) and check for quotaon, we can
+        * depend on the quota inodes (and other things) being valid as long as
+        * we keep the lock(s).
+        */
+       xfs_qm_dqrele_all_inodes(mp, flags);
+
+       /*
+        * Next we make the changes in the quota flag in the mount struct.
+        * This isn't protected by a particular lock directly, because we
+        * don't want to take a mrlock every time we depend on quotas being on.
+        */
+       mp->m_qflags &= ~(flags);
+
+       /*
+        * Go through all the dquots of this file system and purge them,
+        * according to what was turned off. We may not be able to get rid
+        * of all dquots, because dquots can have temporary references that
+        * are not attached to inodes. eg. xfs_setattr, xfs_create.
+        * So, if we couldn't purge all the dquots from the filesystem,
+        * we can't get rid of the incore data structures.
+        */
+       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
+               delay(10 * nculprits);
+
+       /*
+        * Transactions that had started before ACTIVE state bit was cleared
+        * could have logged many dquots, so they'd have higher LSNs than
+        * the first QUOTAOFF log record does. If we happen to crash when
+        * the tail of the log has gone past the QUOTAOFF record, but
+        * before the last dquot modification, those dquots __will__
+        * recover, and that's not good.
+        *
+        * So, we have QUOTAOFF start and end logitems; the start
+        * logitem won't get overwritten until the end logitem appears...
+        */
+       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+       if (error) {
+               /* We're screwed now. Shutdown is the only option. */
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               goto out_unlock;
+       }
+
+       /*
+        * If quotas is completely disabled, close shop.
+        */
+       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
+               mutex_unlock(&q->qi_quotaofflock);
+               xfs_qm_destroy_quotainfo(mp);
+               return (0);
+       }
+
+       /*
+        * Release our quotainode references if we don't need them anymore.
+        */
+       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
+               IRELE(q->qi_uquotaip);
+               q->qi_uquotaip = NULL;
+       }
+       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
+               IRELE(q->qi_gquotaip);
+               q->qi_gquotaip = NULL;
+       }
+
+out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfile(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino)
+{
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       if (ino == NULLFSINO)
+               return 0;
+
+       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                 XFS_TRANS_PERM_LOG_RES,
+                                 XFS_ITRUNCATE_LOG_COUNT);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+               goto out_put;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip);
+
+       error = xfs_itruncate_data(&tp, ip, 0);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                    XFS_TRANS_ABORT);
+               goto out_unlock;
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+       IRELE(ip);
+       return error;
+}
+
+int
+xfs_qm_scall_trunc_qfiles(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error = 0, error2 = 0;
+
+       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
+                       __func__, flags, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (flags & XFS_DQ_USER)
+               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
+       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+
+       return error ? error : error2;
+}
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem.  This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+int
+xfs_qm_scall_quotaon(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error;
+       uint            qf;
+       __int64_t       sbflags;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+       /*
+        * Switching on quota accounting must be done at mount time.
+        */
+       flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+       sbflags = 0;
+
+       if (flags == 0) {
+               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
+                       __func__, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       /* No fs can turn on quotas with a delayed effect */
+       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+       /*
+        * Can't enforce without accounting. We check the superblock
+        * qflags here instead of m_qflags because rootfs can have
+        * quota acct on ondisk without m_qflags' knowing.
+        */
+       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+           (flags & XFS_UQUOTA_ENFD))
+           ||
+           ((flags & XFS_PQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+           (flags & XFS_GQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+           (flags & XFS_OQUOTA_ENFD))) {
+               xfs_debug(mp,
+                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
+                       __func__, flags, mp->m_sb.sb_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+       /*
+        * If everything's up to-date incore, then don't waste time.
+        */
+       if ((mp->m_qflags & flags) == flags)
+               return XFS_ERROR(EEXIST);
+
+       /*
+        * Change sb_qflags on disk but not incore mp->qflags
+        * if this is the root filesystem.
+        */
+       spin_lock(&mp->m_sb_lock);
+       qf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = qf | flags;
+       spin_unlock(&mp->m_sb_lock);
+
+       /*
+        * There's nothing to change if it's the same.
+        */
+       if ((qf & flags) == flags && sbflags == 0)
+               return XFS_ERROR(EEXIST);
+       sbflags |= XFS_SB_QFLAGS;
+
+       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+               return (error);
+       /*
+        * If we aren't trying to switch on quota enforcement, we are done.
+        */
+       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
+           (flags & XFS_ALL_QUOTA_ENFD) == 0)
+               return (0);
+
+       if (! XFS_IS_QUOTA_RUNNING(mp))
+               return XFS_ERROR(ESRCH);
+
+       /*
+        * Switch on quota enforcement in core.
+        */
+       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+
+       return (0);
+}
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+int
+xfs_qm_scall_getqstat(
+       struct xfs_mount        *mp,
+       struct fs_quota_stat    *out)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_inode        *uip, *gip;
+       boolean_t               tempuqip, tempgqip;
+
+       uip = gip = NULL;
+       tempuqip = tempgqip = B_FALSE;
+       memset(out, 0, sizeof(fs_quota_stat_t));
+
+       out->qs_version = FS_QSTAT_VERSION;
+       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
+               out->qs_uquota.qfs_ino = NULLFSINO;
+               out->qs_gquota.qfs_ino = NULLFSINO;
+               return (0);
+       }
+       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+                                                       (XFS_ALL_QUOTA_ACCT|
+                                                        XFS_ALL_QUOTA_ENFD));
+       out->qs_pad = 0;
+       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+       if (q) {
+               uip = q->qi_uquotaip;
+               gip = q->qi_gquotaip;
+       }
+       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                       0, 0, &uip) == 0)
+                       tempuqip = B_TRUE;
+       }
+       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                       0, 0, &gip) == 0)
+                       tempgqip = B_TRUE;
+       }
+       if (uip) {
+               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+               if (tempuqip)
+                       IRELE(uip);
+       }
+       if (gip) {
+               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+               if (tempgqip)
+                       IRELE(gip);
+       }
+       if (q) {
+               out->qs_incoredqs = q->qi_dquots;
+               out->qs_btimelimit = q->qi_btimelimit;
+               out->qs_itimelimit = q->qi_itimelimit;
+               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+               out->qs_bwarnlimit = q->qi_bwarnlimit;
+               out->qs_iwarnlimit = q->qi_iwarnlimit;
+       }
+       return 0;
+}
+
+#define XFS_DQ_MASK \
+       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+int
+xfs_qm_scall_setqlim(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       uint                    type,
+       fs_disk_quota_t         *newlim)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_disk_dquot_t        *ddq;
+       xfs_dquot_t             *dqp;
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qcnt_t              hard, soft;
+
+       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+               return EINVAL;
+       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+               return 0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       /*
+        * We don't want to race with a quotaoff so take the quotaoff lock.
+        * (We don't hold an inode lock, so there's nothing else to stop
+        * a quotaoff from happening). (XXXThis doesn't currently happen
+        * because we take the vfslock before calling xfs_qm_sysent).
+        */
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * Get the dquot (locked), and join it to the transaction.
+        * Allocate the dquot if this doesn't exist.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+               ASSERT(error != ENOENT);
+               goto out_unlock;
+       }
+       xfs_trans_dqjoin(tp, dqp);
+       ddq = &dqp->q_core;
+
+       /*
+        * Make sure that hardlimits are >= soft limits before changing.
+        */
+       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+                       be64_to_cpu(ddq->d_blk_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+                       be64_to_cpu(ddq->d_blk_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_blk_hardlimit = cpu_to_be64(hard);
+               ddq->d_blk_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_bhardlimit = hard;
+                       q->qi_bsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
+       }
+       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+                       be64_to_cpu(ddq->d_rtb_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+                       be64_to_cpu(ddq->d_rtb_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
+               ddq->d_rtb_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_rtbhardlimit = hard;
+                       q->qi_rtbsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+       }
+
+       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+               (xfs_qcnt_t) newlim->d_ino_hardlimit :
+                       be64_to_cpu(ddq->d_ino_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+               (xfs_qcnt_t) newlim->d_ino_softlimit :
+                       be64_to_cpu(ddq->d_ino_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_ino_hardlimit = cpu_to_be64(hard);
+               ddq->d_ino_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_ihardlimit = hard;
+                       q->qi_isoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
+       }
+
+       /*
+        * Update warnings counter(s) if requested
+        */
+       if (newlim->d_fieldmask & FS_DQ_BWARNS)
+               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
+       if (newlim->d_fieldmask & FS_DQ_IWARNS)
+               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
+       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
+
+       if (id == 0) {
+               /*
+                * Timelimits for the super user set the relative time
+                * the other users can be over quota for this file system.
+                * If it is zero a default is used.  Ditto for the default
+                * soft and hard limit values (already done, above), and
+                * for warnings.
+                */
+               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+                       q->qi_btimelimit = newlim->d_btimer;
+                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+                       q->qi_itimelimit = newlim->d_itimer;
+                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
+                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_BWARNS)
+                       q->qi_bwarnlimit = newlim->d_bwarns;
+               if (newlim->d_fieldmask & FS_DQ_IWARNS)
+                       q->qi_iwarnlimit = newlim->d_iwarns;
+               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
+       } else {
+               /*
+                * If the user is now over quota, start the timelimit.
+                * The user will not be 'warned'.
+                * Note that we keep the timers ticking, whether enforcement
+                * is on or off. We don't really want to bother with iterating
+                * over all ondisk dquots and turning the timers on/off.
+                */
+               xfs_qm_adjust_dqtimers(mp, ddq);
+       }
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_trans_log_dquot(tp, dqp);
+
+       error = xfs_trans_commit(tp, 0);
+       xfs_qm_dqrele(dqp);
+
+ out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+int
+xfs_qm_scall_getquota(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       fs_disk_quota_t *out)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       /*
+        * Try to get the dquot. We don't want it allocated on disk, so
+        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+        * exist, we'll get ENOENT back.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+               return (error);
+       }
+
+       /*
+        * If everything's NULL, this dquot doesn't quite exist as far as
+        * our utility programs are concerned.
+        */
+       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+               xfs_qm_dqput(dqp);
+               return XFS_ERROR(ENOENT);
+       }
+       /*
+        * Convert the disk dquot to the exportable format
+        */
+       xfs_qm_export_dquot(mp, &dqp->q_core, out);
+       xfs_qm_dqput(dqp);
+       return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+       xfs_mount_t             *mp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qoff_logitem_t      *qoffi;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+                                       flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+       return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+       xfs_mount_t            *mp,
+       xfs_qoff_logitem_t     **qoffstartp,
+       uint                   flags)
+{
+       xfs_trans_t            *tp;
+       int                     error;
+       xfs_qoff_logitem_t     *qoffi=NULL;
+       uint                    oldsbqflag=0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     sizeof(xfs_qoff_logitem_t) * 2 +
+                                     mp->m_sb.sb_sectsize + 128,
+                                     0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               goto error0;
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       spin_lock(&mp->m_sb_lock);
+       oldsbqflag = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+error0:
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /*
+                * No one else is modifying sb_qflags, so this is OK.
+                * We still hold the quotaofflock.
+                */
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = oldsbqflag;
+               spin_unlock(&mp->m_sb_lock);
+       }
+       *qoffstartp = qoffi;
+       return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *src,
+       struct fs_disk_quota    *dst)
+{
+       memset(dst, 0, sizeof(*dst));
+       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
+       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
+       dst->d_id = be32_to_cpu(src->d_id);
+       dst->d_blk_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+       dst->d_blk_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
+       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
+       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
+       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
+       dst->d_icount = be64_to_cpu(src->d_icount);
+       dst->d_btimer = be32_to_cpu(src->d_btimer);
+       dst->d_itimer = be32_to_cpu(src->d_itimer);
+       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
+       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+       dst->d_rtb_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+       dst->d_rtb_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
+       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
+       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
+       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
+
+       /*
+        * Internally, we don't reset all the timers when quota enforcement
+        * gets turned off. No need to confuse the user level code,
+        * so return zeroes in that case.
+        */
+       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
+           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
+               dst->d_btimer = 0;
+               dst->d_itimer = 0;
+               dst->d_rtbtimer = 0;
+       }
+
+#ifdef DEBUG
+       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
+            (XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
+           dst->d_id != 0) {
+               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+                   (dst->d_blk_softlimit > 0)) {
+                       ASSERT(dst->d_btimer != 0);
+               }
+               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+                   (dst->d_ino_softlimit > 0)) {
+                       ASSERT(dst->d_itimer != 0);
+               }
+       }
+#endif
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+       uint flags)
+{
+       /*
+        * Can't be more than one, or none.
+        */
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_USER_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_USER_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
+
+       return (flags & XFS_DQ_USER) ?
+               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_export_flags(
+       uint flags)
+{
+       uint uflags;
+
+       uflags = 0;
+       if (flags & XFS_UQUOTA_ACCT)
+               uflags |= FS_QUOTA_UDQ_ACCT;
+       if (flags & XFS_PQUOTA_ACCT)
+               uflags |= FS_QUOTA_PDQ_ACCT;
+       if (flags & XFS_GQUOTA_ACCT)
+               uflags |= FS_QUOTA_GDQ_ACCT;
+       if (flags & XFS_UQUOTA_ENFD)
+               uflags |= FS_QUOTA_UDQ_ENFD;
+       if (flags & (XFS_OQUOTA_ENFD)) {
+               uflags |= (flags & XFS_GQUOTA_ACCT) ?
+                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
+       }
+       return (uflags);
+}
+
+
+STATIC int
+xfs_dqrele_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       /* skip quota inodes */
+       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
+           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
+               ASSERT(ip->i_udquot == NULL);
+               ASSERT(ip->i_gdquot == NULL);
+               return 0;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ *
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+       struct xfs_mount *mp,
+       uint             flags)
+{
+       ASSERT(mp->m_quotainfo);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+}
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
new file mode 100644 (file)
index 0000000..94a3d92
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE    10
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+                                (__psunsigned_t)(id)) & \
+                               (xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
+                                    (xfs_Gqm->qm_usr_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)) : \
+                                    (xfs_Gqm->qm_grp_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+       !dqp->q_core.d_blk_hardlimit && \
+       !dqp->q_core.d_blk_softlimit && \
+       !dqp->q_core.d_rtb_hardlimit && \
+       !dqp->q_core.d_rtb_softlimit && \
+       !dqp->q_core.d_ino_hardlimit && \
+       !dqp->q_core.d_ino_softlimit && \
+       !dqp->q_core.d_bcount && \
+       !dqp->q_core.d_rtbcount && \
+       !dqp->q_core.d_icount)
+
+#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
+
+#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
new file mode 100644 (file)
index 0000000..7e76f53
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_qm.h"
+#include <linux/quota.h>
+
+
+STATIC int
+xfs_quota_type(int type)
+{
+       switch (type) {
+       case USRQUOTA:
+               return XFS_DQ_USER;
+       case GRPQUOTA:
+               return XFS_DQ_GROUP;
+       default:
+               return XFS_DQ_PROJ;
+       }
+}
+
+STATIC int
+xfs_fs_get_xstate(
+       struct super_block      *sb,
+       struct fs_quota_stat    *fqs)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       return -xfs_qm_scall_getqstat(mp, fqs);
+}
+
+STATIC int
+xfs_fs_set_xstate(
+       struct super_block      *sb,
+       unsigned int            uflags,
+       int                     op)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       unsigned int            flags = 0;
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+
+       if (uflags & FS_QUOTA_UDQ_ACCT)
+               flags |= XFS_UQUOTA_ACCT;
+       if (uflags & FS_QUOTA_PDQ_ACCT)
+               flags |= XFS_PQUOTA_ACCT;
+       if (uflags & FS_QUOTA_GDQ_ACCT)
+               flags |= XFS_GQUOTA_ACCT;
+       if (uflags & FS_QUOTA_UDQ_ENFD)
+               flags |= XFS_UQUOTA_ENFD;
+       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
+               flags |= XFS_OQUOTA_ENFD;
+
+       switch (op) {
+       case Q_XQUOTAON:
+               return -xfs_qm_scall_quotaon(mp, flags);
+       case Q_XQUOTAOFF:
+               if (!XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_quotaoff(mp, flags);
+       case Q_XQUOTARM:
+               if (XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_trunc_qfiles(mp, flags);
+       }
+
+       return -EINVAL;
+}
+
+STATIC int
+xfs_fs_get_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+}
+
+STATIC int
+xfs_fs_set_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+}
+
+const struct quotactl_ops xfs_quotactl_operations = {
+       .get_xstate             = xfs_fs_get_xstate,
+       .set_xstate             = xfs_fs_set_xstate,
+       .get_dqblk              = xfs_fs_get_dqblk,
+       .set_dqblk              = xfs_fs_set_dqblk,
+};
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
new file mode 100644 (file)
index 0000000..76fdc58
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+static int xfs_stat_proc_show(struct seq_file *m, void *v)
+{
+       int             c, i, j, val;
+       __uint64_t      xs_xstrat_bytes = 0;
+       __uint64_t      xs_write_bytes = 0;
+       __uint64_t      xs_read_bytes = 0;
+
+       static const struct xstats_entry {
+               char    *desc;
+               int     endpoint;
+       } xstats[] = {
+               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
+               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
+               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
+               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
+               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
+               { "trans",              XFSSTAT_END_TRANSACTIONS        },
+               { "ig",                 XFSSTAT_END_INODE_OPS           },
+               { "log",                XFSSTAT_END_LOG_OPS             },
+               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
+               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
+               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
+               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
+               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
+               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
+               { "buf",                XFSSTAT_END_BUF                 },
+               { "abtb2",              XFSSTAT_END_ABTB_V2             },
+               { "abtc2",              XFSSTAT_END_ABTC_V2             },
+               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
+               { "ibt2",               XFSSTAT_END_IBT_V2              },
+       };
+
+       /* Loop over all stats groups */
+       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
+               seq_printf(m, "%s", xstats[i].desc);
+               /* inner loop does each group */
+               while (j < xstats[i].endpoint) {
+                       val = 0;
+                       /* sum over all cpus */
+                       for_each_possible_cpu(c)
+                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+                       seq_printf(m, " %u", val);
+                       j++;
+               }
+               seq_putc(m, '\n');
+       }
+       /* extra precision counters */
+       for_each_possible_cpu(i) {
+               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+       }
+
+       seq_printf(m, "xpc %Lu %Lu %Lu\n",
+                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+       seq_printf(m, "debug %u\n",
+#if defined(DEBUG)
+               1);
+#else
+               0);
+#endif
+       return 0;
+}
+
+static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xfs_stat_proc_show, NULL);
+}
+
+static const struct file_operations xfs_stat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xfs_stat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+int
+xfs_init_procfs(void)
+{
+       if (!proc_mkdir("fs/xfs", NULL))
+               goto out;
+
+       if (!proc_create("fs/xfs/stat", 0, NULL,
+                        &xfs_stat_proc_fops))
+               goto out_remove_entry;
+       return 0;
+
+ out_remove_entry:
+       remove_proc_entry("fs/xfs", NULL);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/stat", NULL);
+       remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
new file mode 100644 (file)
index 0000000..736854b
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC      4
+       __uint32_t              xs_allocx;
+       __uint32_t              xs_allocb;
+       __uint32_t              xs_freex;
+       __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
+       __uint32_t              xs_abt_lookup;
+       __uint32_t              xs_abt_compare;
+       __uint32_t              xs_abt_insrec;
+       __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
+       __uint32_t              xs_blk_mapr;
+       __uint32_t              xs_blk_mapw;
+       __uint32_t              xs_blk_unmap;
+       __uint32_t              xs_add_exlist;
+       __uint32_t              xs_del_exlist;
+       __uint32_t              xs_look_exlist;
+       __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
+       __uint32_t              xs_bmbt_lookup;
+       __uint32_t              xs_bmbt_compare;
+       __uint32_t              xs_bmbt_insrec;
+       __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+       __uint32_t              xs_dir_lookup;
+       __uint32_t              xs_dir_create;
+       __uint32_t              xs_dir_remove;
+       __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
+       __uint32_t              xs_trans_sync;
+       __uint32_t              xs_trans_async;
+       __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
+       __uint32_t              xs_ig_attempts;
+       __uint32_t              xs_ig_found;
+       __uint32_t              xs_ig_frecycle;
+       __uint32_t              xs_ig_missed;
+       __uint32_t              xs_ig_dup;
+       __uint32_t              xs_ig_reclaims;
+       __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
+       __uint32_t              xs_log_writes;
+       __uint32_t              xs_log_blocks;
+       __uint32_t              xs_log_noiclogs;
+       __uint32_t              xs_log_force;
+       __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
+       __uint32_t              xs_try_logspace;
+       __uint32_t              xs_sleep_logspace;
+       __uint32_t              xs_push_ail;
+       __uint32_t              xs_push_ail_success;
+       __uint32_t              xs_push_ail_pushbuf;
+       __uint32_t              xs_push_ail_pinned;
+       __uint32_t              xs_push_ail_locked;
+       __uint32_t              xs_push_ail_flushing;
+       __uint32_t              xs_push_ail_restarts;
+       __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
+       __uint32_t              xs_xstrat_quick;
+       __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
+       __uint32_t              xs_write_calls;
+       __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
+       __uint32_t              xs_attr_get;
+       __uint32_t              xs_attr_set;
+       __uint32_t              xs_attr_remove;
+       __uint32_t              xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
+       __uint32_t              xs_iflush_count;
+       __uint32_t              xs_icluster_flushcnt;
+       __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
+       __uint32_t              vn_active;      /* # vnodes not on free lists */
+       __uint32_t              vn_alloc;       /* # times vn_alloc called */
+       __uint32_t              vn_get;         /* # times vn_get called */
+       __uint32_t              vn_hold;        /* # times vn_hold called */
+       __uint32_t              vn_rele;        /* # times vn_rele called */
+       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+       __uint32_t              vn_remove;      /* # times vn_remove called */
+       __uint32_t              vn_free;        /* # times vn_free called */
+#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
+       __uint32_t              xb_get;
+       __uint32_t              xb_create;
+       __uint32_t              xb_get_locked;
+       __uint32_t              xb_get_locked_waited;
+       __uint32_t              xb_busy_locked;
+       __uint32_t              xb_miss_locked;
+       __uint32_t              xb_page_retries;
+       __uint32_t              xb_page_found;
+       __uint32_t              xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
+       __uint32_t              xs_abtb_2_lookup;
+       __uint32_t              xs_abtb_2_compare;
+       __uint32_t              xs_abtb_2_insrec;
+       __uint32_t              xs_abtb_2_delrec;
+       __uint32_t              xs_abtb_2_newroot;
+       __uint32_t              xs_abtb_2_killroot;
+       __uint32_t              xs_abtb_2_increment;
+       __uint32_t              xs_abtb_2_decrement;
+       __uint32_t              xs_abtb_2_lshift;
+       __uint32_t              xs_abtb_2_rshift;
+       __uint32_t              xs_abtb_2_split;
+       __uint32_t              xs_abtb_2_join;
+       __uint32_t              xs_abtb_2_alloc;
+       __uint32_t              xs_abtb_2_free;
+       __uint32_t              xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
+       __uint32_t              xs_abtc_2_lookup;
+       __uint32_t              xs_abtc_2_compare;
+       __uint32_t              xs_abtc_2_insrec;
+       __uint32_t              xs_abtc_2_delrec;
+       __uint32_t              xs_abtc_2_newroot;
+       __uint32_t              xs_abtc_2_killroot;
+       __uint32_t              xs_abtc_2_increment;
+       __uint32_t              xs_abtc_2_decrement;
+       __uint32_t              xs_abtc_2_lshift;
+       __uint32_t              xs_abtc_2_rshift;
+       __uint32_t              xs_abtc_2_split;
+       __uint32_t              xs_abtc_2_join;
+       __uint32_t              xs_abtc_2_alloc;
+       __uint32_t              xs_abtc_2_free;
+       __uint32_t              xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
+       __uint32_t              xs_bmbt_2_lookup;
+       __uint32_t              xs_bmbt_2_compare;
+       __uint32_t              xs_bmbt_2_insrec;
+       __uint32_t              xs_bmbt_2_delrec;
+       __uint32_t              xs_bmbt_2_newroot;
+       __uint32_t              xs_bmbt_2_killroot;
+       __uint32_t              xs_bmbt_2_increment;
+       __uint32_t              xs_bmbt_2_decrement;
+       __uint32_t              xs_bmbt_2_lshift;
+       __uint32_t              xs_bmbt_2_rshift;
+       __uint32_t              xs_bmbt_2_split;
+       __uint32_t              xs_bmbt_2_join;
+       __uint32_t              xs_bmbt_2_alloc;
+       __uint32_t              xs_bmbt_2_free;
+       __uint32_t              xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
+       __uint32_t              xs_ibt_2_lookup;
+       __uint32_t              xs_ibt_2_compare;
+       __uint32_t              xs_ibt_2_insrec;
+       __uint32_t              xs_ibt_2_delrec;
+       __uint32_t              xs_ibt_2_newroot;
+       __uint32_t              xs_ibt_2_killroot;
+       __uint32_t              xs_ibt_2_increment;
+       __uint32_t              xs_ibt_2_decrement;
+       __uint32_t              xs_ibt_2_lshift;
+       __uint32_t              xs_ibt_2_rshift;
+       __uint32_t              xs_ibt_2_split;
+       __uint32_t              xs_ibt_2_join;
+       __uint32_t              xs_ibt_2_alloc;
+       __uint32_t              xs_ibt_2_free;
+       __uint32_t              xs_ibt_2_moves;
+/* Extra precision counters */
+       __uint64_t              xs_xstrat_bytes;
+       __uint64_t              xs_write_bytes;
+       __uint64_t              xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern int xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else  /* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static inline int xfs_init_procfs(void)
+{
+       return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
+
+#endif /* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
new file mode 100644 (file)
index 0000000..9a72dda
--- /dev/null
@@ -0,0 +1,1773 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_fsops.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
+#include "xfs_sync.h"
+#include "xfs_trace.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/mempool.h>
+#include <linux/writeback.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/parser.h>
+
+static const struct super_operations xfs_super_operations;
+static kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
+
+#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
+#define MNTOPT_LOGDEV  "logdev"        /* log device */
+#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
+#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
+#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
+#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
+#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
+#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
+#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
+#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
+#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
+#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
+#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
+#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
+                                        * in stat(). */
+#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
+#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
+#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
+#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
+#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
+#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
+#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
+#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
+#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
+#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
+#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
+
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+       Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static const match_table_t tokens = {
+       {Opt_barrier, "barrier"},
+       {Opt_nobarrier, "nobarrier"},
+       {Opt_err, NULL}
+};
+
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+       int     last, shift_left_factor = 0;
+       char    *value = s;
+
+       last = strlen(value) - 1;
+       if (value[last] == 'K' || value[last] == 'k') {
+               shift_left_factor = 10;
+               value[last] = '\0';
+       }
+       if (value[last] == 'M' || value[last] == 'm') {
+               shift_left_factor = 20;
+               value[last] = '\0';
+       }
+       if (value[last] == 'G' || value[last] == 'g') {
+               shift_left_factor = 30;
+               value[last] = '\0';
+       }
+
+       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
+STATIC int
+xfs_parseargs(
+       struct xfs_mount        *mp,
+       char                    *options)
+{
+       struct super_block      *sb = mp->m_super;
+       char                    *this_char, *value, *eov;
+       int                     dsunit = 0;
+       int                     dswidth = 0;
+       int                     iosize = 0;
+       __uint8_t               iosizelog = 0;
+
+       /*
+        * set up the mount name first so all the errors will refer to the
+        * correct device.
+        */
+       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+       if (!mp->m_fsname)
+               return ENOMEM;
+       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+       /*
+        * Copy binary VFS mount flags we are interested in.
+        */
+       if (sb->s_flags & MS_RDONLY)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       if (sb->s_flags & MS_DIRSYNC)
+               mp->m_flags |= XFS_MOUNT_DIRSYNC;
+       if (sb->s_flags & MS_SYNCHRONOUS)
+               mp->m_flags |= XFS_MOUNT_WSYNC;
+
+       /*
+        * Set some default flags that could be cleared by the mount option
+        * parsing.
+        */
+       mp->m_flags |= XFS_MOUNT_BARRIER;
+       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+
+       /*
+        * These can be overridden by the mount option parsing.
+        */
+       mp->m_logbufs = -1;
+       mp->m_logbsize = -1;
+
+       if (!options)
+               goto done;
+
+       while ((this_char = strsep(&options, ",")) != NULL) {
+               if (!*this_char)
+                       continue;
+               if ((value = strchr(this_char, '=')) != NULL)
+                       *value++ = 0;
+
+               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_logname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_rtname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = simple_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = suffix_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
+                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       mp->m_flags |= XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       mp->m_flags &= ~XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       mp->m_flags |= XFS_MOUNT_WSYNC;
+               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
+               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       mp->m_flags |= XFS_MOUNT_NOALIGN;
+               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       mp->m_flags |= XFS_MOUNT_SWALLOC;
+               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dsunit = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dswidth = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+#if !XFS_BIG_INUMS
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+#endif
+               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       mp->m_flags |= XFS_MOUNT_NOUUID;
+               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       mp->m_flags |= XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       mp->m_flags |= XFS_MOUNT_ATTR2;
+               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
+                       mp->m_flags |= XFS_MOUNT_NOATTR2;
+               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+                          !strcmp(this_char, MNTOPT_UQUOTA) ||
+                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                        XFS_UQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
+                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
+                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       mp->m_flags |= XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, "ihashsize")) {
+                       xfs_warn(mp,
+       "ihashsize no longer used, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisdsync")) {
+                       xfs_warn(mp,
+       "osyncisdsync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisosync")) {
+                       xfs_warn(mp,
+       "osyncisosync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "irixsgid")) {
+                       xfs_warn(mp,
+       "irixsgid is now a sysctl(2) variable, option is deprecated.");
+               } else {
+                       xfs_warn(mp, "unknown mount option [%s].", this_char);
+                       return EINVAL;
+               }
+       }
+
+       /*
+        * no recovery flag requires a read-only mount
+        */
+       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               xfs_warn(mp, "no-recovery mounts must be read-only.");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
+               xfs_warn(mp,
+       "sunit and swidth options incompatible with the noalign option");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
+           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
+               xfs_warn(mp,
+       "the discard option is incompatible with the nodelaylog option");
+               return EINVAL;
+       }
+
+#ifndef CONFIG_XFS_QUOTA
+       if (XFS_IS_QUOTA_RUNNING(mp)) {
+               xfs_warn(mp, "quota support not available in this kernel.");
+               return EINVAL;
+       }
+#endif
+
+       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
+               xfs_warn(mp, "cannot mount with both project and group quota");
+               return EINVAL;
+       }
+
+       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+               xfs_warn(mp, "sunit and swidth must be specified together");
+               return EINVAL;
+       }
+
+       if (dsunit && (dswidth % dsunit != 0)) {
+               xfs_warn(mp,
+       "stripe width (%d) must be a multiple of the stripe unit (%d)",
+                       dswidth, dsunit);
+               return EINVAL;
+       }
+
+done:
+       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+               /*
+                * At this point the superblock has not been read
+                * in, therefore we do not know the block size.
+                * Before the mount call ends we will convert
+                * these to FSBs.
+                */
+               if (dsunit) {
+                       mp->m_dalign = dsunit;
+                       mp->m_flags |= XFS_MOUNT_RETERR;
+               }
+
+               if (dswidth)
+                       mp->m_swidth = dswidth;
+       }
+
+       if (mp->m_logbufs != -1 &&
+           mp->m_logbufs != 0 &&
+           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+               return XFS_ERROR(EINVAL);
+       }
+       if (mp->m_logbsize != -1 &&
+           mp->m_logbsize !=  0 &&
+           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+            !is_power_of_2(mp->m_logbsize))) {
+               xfs_warn(mp,
+                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                       mp->m_logbsize);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (iosizelog) {
+               if (iosizelog > XFS_MAX_IO_LOG ||
+                   iosizelog < XFS_MIN_IO_LOG) {
+                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+                               iosizelog, XFS_MIN_IO_LOG,
+                               XFS_MAX_IO_LOG);
+                       return XFS_ERROR(EINVAL);
+               }
+
+               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+               mp->m_readio_log = iosizelog;
+               mp->m_writeio_log = iosizelog;
+       }
+
+       return 0;
+}
+
+struct proc_xfs_info {
+       int     flag;
+       char    *str;
+};
+
+STATIC int
+xfs_showargs(
+       struct xfs_mount        *mp,
+       struct seq_file         *m)
+{
+       static struct proc_xfs_info xfs_info_set[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
+               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
+               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
+               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
+               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
+               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
+               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
+               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
+               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
+               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
+               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
+               { 0, NULL }
+       };
+       static struct proc_xfs_info xfs_info_unset[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
+               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
+               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { 0, NULL }
+       };
+       struct proc_xfs_info    *xfs_infop;
+
+       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+               if (mp->m_flags & xfs_infop->flag)
+                       seq_puts(m, xfs_infop->str);
+       }
+       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+               if (!(mp->m_flags & xfs_infop->flag))
+                       seq_puts(m, xfs_infop->str);
+       }
+
+       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+                               (int)(1 << mp->m_writeio_log) >> 10);
+
+       if (mp->m_logbufs > 0)
+               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+       if (mp->m_logbsize > 0)
+               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+       if (mp->m_logname)
+               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+       if (mp->m_rtname)
+               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+       if (mp->m_dalign > 0)
+               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+       if (mp->m_swidth > 0)
+               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_USRQUOTA);
+       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+       /* Either project or group quotas can be active, not both */
+
+       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       }
+
+       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+               seq_puts(m, "," MNTOPT_NOQUOTA);
+
+       return 0;
+}
+__uint64_t
+xfs_max_file_offset(
+       unsigned int            blockshift)
+{
+       unsigned int            pagefactor = 1;
+       unsigned int            bitshift = BITS_PER_LONG - 1;
+
+       /* Figure out maximum filesize, on Linux this can depend on
+        * the filesystem blocksize (on 32 bit platforms).
+        * __block_write_begin does this in an [unsigned] long...
+        *      page->index << (PAGE_CACHE_SHIFT - bbits)
+        * So, for page sized blocks (4K on 32 bit platforms),
+        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+        * but for smaller blocksizes it is less (bbits = log2 bsize).
+        * Note1: get_block_t takes a long (implicit cast from above)
+        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+        * can optionally convert the [unsigned] long from above into
+        * an [unsigned] long long.
+        */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBDAF)
+       ASSERT(sizeof(sector_t) == 8);
+       pagefactor = PAGE_CACHE_SIZE;
+       bitshift = BITS_PER_LONG;
+# else
+       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+       return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC int
+xfs_blkdev_get(
+       xfs_mount_t             *mp,
+       const char              *name,
+       struct block_device     **bdevp)
+{
+       int                     error = 0;
+
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
+       if (IS_ERR(*bdevp)) {
+               error = PTR_ERR(*bdevp);
+               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
+       }
+
+       return -error;
+}
+
+STATIC void
+xfs_blkdev_put(
+       struct block_device     *bdev)
+{
+       if (bdev)
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+}
+
+void
+xfs_blkdev_issue_flush(
+       xfs_buftarg_t           *buftarg)
+{
+       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
+}
+
+STATIC void
+xfs_close_devices(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_logdev_targp);
+               xfs_blkdev_put(logdev);
+       }
+       if (mp->m_rtdev_targp) {
+               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+               xfs_blkdev_put(rtdev);
+       }
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *     (1) device (partition) with data and internal log
+ *     (2) logical volume with data and log subvolumes.
+ *     (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+       struct xfs_mount        *mp)
+{
+       struct block_device     *ddev = mp->m_super->s_bdev;
+       struct block_device     *logdev = NULL, *rtdev = NULL;
+       int                     error;
+
+       /*
+        * Open real time and log devices - order is important.
+        */
+       if (mp->m_logname) {
+               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+               if (error)
+                       goto out;
+       }
+
+       if (mp->m_rtname) {
+               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+               if (error)
+                       goto out_close_logdev;
+
+               if (rtdev == ddev || rtdev == logdev) {
+                       xfs_warn(mp,
+       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
+                       error = EINVAL;
+                       goto out_close_rtdev;
+               }
+       }
+
+       /*
+        * Setup xfs_mount buffer target pointers
+        */
+       error = ENOMEM;
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+       if (!mp->m_ddev_targp)
+               goto out_close_rtdev;
+
+       if (rtdev) {
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_rtdev_targp)
+                       goto out_free_ddev_targ;
+       }
+
+       if (logdev && logdev != ddev) {
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_logdev_targp)
+                       goto out_free_rtdev_targ;
+       } else {
+               mp->m_logdev_targp = mp->m_ddev_targp;
+       }
+
+       return 0;
+
+ out_free_rtdev_targ:
+       if (mp->m_rtdev_targp)
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ out_free_ddev_targ:
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+ out_close_rtdev:
+       if (rtdev)
+               xfs_blkdev_put(rtdev);
+ out_close_logdev:
+       if (logdev && logdev != ddev)
+               xfs_blkdev_put(logdev);
+ out:
+       return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+                                   mp->m_sb.sb_sectsize);
+       if (error)
+               return error;
+
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               unsigned int    log_sector_size = BBSIZE;
+
+               if (xfs_sb_version_hassector(&mp->m_sb))
+                       log_sector_size = mp->m_sb.sb_logsectsize;
+               error = xfs_setsize_buftarg(mp->m_logdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           log_sector_size);
+               if (error)
+                       return error;
+       }
+       if (mp->m_rtdev_targp) {
+               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           mp->m_sb.sb_sectsize);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+/* Catch misguided souls that try to use this interface on XFS */
+STATIC struct inode *
+xfs_fs_alloc_inode(
+       struct super_block      *sb)
+{
+       BUG();
+       return NULL;
+}
+
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
+STATIC void
+xfs_fs_destroy_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_destroy_inode(ip);
+
+       XFS_STATS_INC(vn_reclaim);
+
+       /* bad inode, get out here ASAP */
+       if (is_bad_inode(inode))
+               goto out_reclaim;
+
+       xfs_ioend_wait(ip);
+
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+       /*
+        * We should never get here with one of the reclaim flags already set.
+        */
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+       /*
+        * We always use background reclaim here because even if the
+        * inode is clean, it still may be under IO and hence we have
+        * to take the flush lock. The background reclaim path handles
+        * this more efficiently than we can here, so simply let background
+        * reclaim tear down all inodes.
+        */
+out_reclaim:
+       xfs_inode_set_reclaim_tag(ip);
+}
+
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly initialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
+STATIC void
+xfs_fs_inode_init_once(
+       void                    *inode)
+{
+       struct xfs_inode        *ip = inode;
+
+       memset(ip, 0, sizeof(struct xfs_inode));
+
+       /* vfs inode */
+       inode_init_once(VFS_I(ip));
+
+       /* xfs inode */
+       atomic_set(&ip->i_iocount, 0);
+       atomic_set(&ip->i_pincount, 0);
+       spin_lock_init(&ip->i_flags_lock);
+       init_waitqueue_head(&ip->i_ipin_wait);
+       /*
+        * Because we want to use a counting completion, complete
+        * the flush completion once to allow a single access to
+        * the flush completion without blocking.
+        */
+       init_completion(&ip->i_flush);
+       complete(&ip->i_flush);
+
+       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
+}
+
+/*
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
+ */
+STATIC void
+xfs_fs_dirty_inode(
+       struct inode    *inode,
+       int             flags)
+{
+       barrier();
+       XFS_I(inode)->i_update_core = 1;
+}
+
+STATIC int
+xfs_log_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /* we need to return with the lock hold shared */
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Note - it's possible that we might have pushed ourselves out of the
+        * way during trans_reserve which would flush the inode.  But there's
+        * no guarantee that the inode buffer has actually gone out yet (it's
+        * delwri).  Plus the buffer could be pinned anyway if it's part of
+        * an inode in another recent transaction.  So we play it safe and
+        * fire off the transaction anyway.
+        */
+       xfs_trans_ijoin(tp, ip);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       error = xfs_trans_commit(tp, 0);
+       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
+STATIC int
+xfs_fs_write_inode(
+       struct inode            *inode,
+       struct writeback_control *wbc)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = EAGAIN;
+
+       trace_xfs_write_inode(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       if (wbc->sync_mode == WB_SYNC_ALL) {
+               /*
+                * Make sure the inode has made it it into the log.  Instead
+                * of forcing it all the way to stable storage using a
+                * synchronous transaction we let the log force inside the
+                * ->sync_fs call do that for thus, which reduces the number
+                * of synchronous log foces dramatically.
+                */
+               xfs_ioend_wait(ip);
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               if (ip->i_update_core) {
+                       error = xfs_log_inode(ip);
+                       if (error)
+                               goto out_unlock;
+               }
+       } else {
+               /*
+                * We make this non-blocking if the inode is contended, return
+                * EAGAIN to indicate to the caller that they did not succeed.
+                * This prevents the flush path from blocking on inodes inside
+                * another operation right now, they get caught later by
+                * xfs_sync.
+                */
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+                       goto out;
+
+               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                       goto out_unlock;
+
+               /*
+                * Now we have the flush lock and the inode is not pinned, we
+                * can check if the inode is really clean as we know that
+                * there are no pending transaction completions, it is not
+                * waiting on the delayed write queue and there is no IO in
+                * progress.
+                */
+               if (xfs_inode_clean(ip)) {
+                       xfs_ifunlock(ip);
+                       error = 0;
+                       goto out_unlock;
+               }
+               error = xfs_iflush(ip, SYNC_TRYLOCK);
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
+       /*
+        * if we failed to write out the inode then mark
+        * it dirty again so we'll try again later.
+        */
+       if (error)
+               xfs_mark_inode_dirty_sync(ip);
+       return -error;
+}
+
+STATIC void
+xfs_fs_evict_inode(
+       struct inode            *inode)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+
+       trace_xfs_evict_inode(ip);
+
+       truncate_inode_pages(&inode->i_data, 0);
+       end_writeback(inode);
+       XFS_STATS_INC(vn_rele);
+       XFS_STATS_INC(vn_remove);
+       XFS_STATS_DEC(vn_active);
+
+       /*
+        * The iolock is used by the file system to coordinate reads,
+        * writes, and block truncates.  Up to this point the lock
+        * protected concurrent accesses by users of the inode.  But
+        * from here forward we're doing some final processing of the
+        * inode because we're done with it, and although we reuse the
+        * iolock for protection it is really a distinct lock class
+        * (in the lockdep sense) from before.  To keep lockdep happy
+        * (and basically indicate what we are doing), we explicitly
+        * re-init the iolock here.
+        */
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
+
+       xfs_inactive(ip);
+}
+
+STATIC void
+xfs_free_fsname(
+       struct xfs_mount        *mp)
+{
+       kfree(mp->m_fsname);
+       kfree(mp->m_rtname);
+       kfree(mp->m_logname);
+}
+
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_syncd_stop(mp);
+
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       xfs_freesb(mp);
+       xfs_icsb_destroy_counters(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
+STATIC int
+xfs_fs_sync_fs(
+       struct super_block      *sb,
+       int                     wait)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       int                     error;
+
+       /*
+        * Not much we can do for the first async pass.  Writing out the
+        * superblock would be counter-productive as we are going to redirty
+        * when writing out other data and metadata (and writing out a single
+        * block is quite fast anyway).
+        *
+        * Try to asynchronously kick off quota syncing at least.
+        */
+       if (!wait) {
+               xfs_qm_sync(mp, SYNC_TRYLOCK);
+               return 0;
+       }
+
+       error = xfs_quiesce_data(mp);
+       if (error)
+               return -error;
+
+       if (laptop_mode) {
+               /*
+                * The disk must be active because we're syncing.
+                * We schedule xfssyncd now (now that the disk is
+                * active) instead of later (when it might not be).
+                */
+               flush_delayed_work_sync(&mp->m_sync_work);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_statfs(
+       struct dentry           *dentry,
+       struct kstatfs          *statp)
+{
+       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
+       xfs_sb_t                *sbp = &mp->m_sb;
+       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       __uint64_t              fakeinos, id;
+       xfs_extlen_t            lsize;
+       __int64_t               ffree;
+
+       statp->f_type = XFS_SB_MAGIC;
+       statp->f_namelen = MAXNAMELEN - 1;
+
+       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+       statp->f_fsid.val[0] = (u32)id;
+       statp->f_fsid.val[1] = (u32)(id >> 32);
+
+       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+
+       spin_lock(&mp->m_sb_lock);
+       statp->f_bsize = sbp->sb_blocksize;
+       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+       statp->f_blocks = sbp->sb_dblocks - lsize;
+       statp->f_bfree = statp->f_bavail =
+                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       fakeinos = statp->f_bfree << sbp->sb_inopblog;
+       statp->f_files =
+           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       if (mp->m_maxicount)
+               statp->f_files = min_t(typeof(statp->f_files),
+                                       statp->f_files,
+                                       mp->m_maxicount);
+
+       /* make sure statp->f_ffree does not underflow */
+       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       statp->f_ffree = max_t(__int64_t, ffree, 0);
+
+       spin_unlock(&mp->m_sb_lock);
+
+       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               xfs_qm_statvfs(ip, statp);
+       return 0;
+}
+
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks = 0;
+
+       mp->m_resblks_save = mp->m_resblks;
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks;
+
+       if (mp->m_resblks_save) {
+               resblks = mp->m_resblks_save;
+               mp->m_resblks_save = 0;
+       } else
+               resblks = xfs_default_resblks(mp);
+
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC int
+xfs_fs_remount(
+       struct super_block      *sb,
+       int                     *flags,
+       char                    *options)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       substring_t             args[MAX_OPT_ARGS];
+       char                    *p;
+       int                     error;
+
+       while ((p = strsep(&options, ",")) != NULL) {
+               int token;
+
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_barrier:
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+                       break;
+               case Opt_nobarrier:
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                       break;
+               default:
+                       /*
+                        * Logically we would return an error here to prevent
+                        * users from believing they might have changed
+                        * mount options using remount which can't be changed.
+                        *
+                        * But unfortunately mount(8) adds all options from
+                        * mtab and fstab to the mount arguments in some cases
+                        * so we can't blindly reject options, but have to
+                        * check for each specified option if it actually
+                        * differs from the currently set option and only
+                        * reject it if that's the case.
+                        *
+                        * Until that is implemented we return success for
+                        * every remount request, and silently ignore all
+                        * options that we can't actually change.
+                        */
+#if 0
+                       xfs_info(mp,
+               "mount option \"%s\" not supported for remount\n", p);
+                       return -EINVAL;
+#else
+                       break;
+#endif
+               }
+       }
+
+       /* ro -> rw */
+       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               mp->m_flags &= ~XFS_MOUNT_RDONLY;
+
+               /*
+                * If this is the first remount to writeable state we
+                * might have some superblock changes to update.
+                */
+               if (mp->m_update_flags) {
+                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
+                       if (error) {
+                               xfs_warn(mp, "failed to write sb changes");
+                               return error;
+                       }
+                       mp->m_update_flags = 0;
+               }
+
+               /*
+                * Fill out the reserve pool if it is empty. Use the stashed
+                * value if it is non-zero, otherwise go with the default.
+                */
+               xfs_restore_resvblks(mp);
+       }
+
+       /* rw -> ro */
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+               /*
+                * After we have synced the data but before we sync the
+                * metadata, we need to free up the reserve block pool so that
+                * the used block count in the superblock on disk is correct at
+                * the end of the remount. Stash the current reserve pool size
+                * so that if we get remounted rw, we can return it to the same
+                * size.
+                */
+
+               xfs_quiesce_data(mp);
+               xfs_save_resvblks(mp);
+               xfs_quiesce_attr(mp);
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       }
+
+       return 0;
+}
+
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of the metadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
+STATIC int
+xfs_fs_freeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_save_resvblks(mp);
+       xfs_quiesce_attr(mp);
+       return -xfs_fs_log_dummy(mp);
+}
+
+STATIC int
+xfs_fs_unfreeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_restore_resvblks(mp);
+       return 0;
+}
+
+STATIC int
+xfs_fs_show_options(
+       struct seq_file         *m,
+       struct vfsmount         *mnt)
+{
+       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+       struct xfs_mount        *mp)
+{
+       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+       /* Fail a mount where the logbuf is smaller than the log stripe */
+       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+               if (mp->m_logbsize <= 0 &&
+                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
+                       mp->m_logbsize = mp->m_sb.sb_logsunit;
+               } else if (mp->m_logbsize > 0 &&
+                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
+                       xfs_warn(mp,
+               "logbuf size must be greater than or equal to log stripe size");
+                       return XFS_ERROR(EINVAL);
+               }
+       } else {
+               /* Fail a mount if the logbuf is larger than 32K */
+               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
+                       xfs_warn(mp,
+               "logbuf size for version 1 logs must be 16K or 32K");
+                       return XFS_ERROR(EINVAL);
+               }
+       }
+
+       /*
+        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+        * told by noattr2 to turn it off
+        */
+       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+           !(mp->m_flags & XFS_MOUNT_NOATTR2))
+               mp->m_flags |= XFS_MOUNT_ATTR2;
+
+       /*
+        * prohibit r/w mounts of read-only filesystems
+        */
+       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+               xfs_warn(mp,
+                       "cannot mount a read-only filesystem as read-write");
+               return XFS_ERROR(EROFS);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_fill_super(
+       struct super_block      *sb,
+       void                    *data,
+       int                     silent)
+{
+       struct inode            *root;
+       struct xfs_mount        *mp = NULL;
+       int                     flags = 0, error = ENOMEM;
+
+       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+       if (!mp)
+               goto out;
+
+       spin_lock_init(&mp->m_sb_lock);
+       mutex_init(&mp->m_growlock);
+       atomic_set(&mp->m_active_trans, 0);
+
+       mp->m_super = sb;
+       sb->s_fs_info = mp;
+
+       error = xfs_parseargs(mp, (char *)data);
+       if (error)
+               goto out_free_fsname;
+
+       sb_min_blocksize(sb, BBSIZE);
+       sb->s_xattr = xfs_xattr_handlers;
+       sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
+       sb->s_qcop = &xfs_quotactl_operations;
+#endif
+       sb->s_op = &xfs_super_operations;
+
+       if (silent)
+               flags |= XFS_MFSI_QUIET;
+
+       error = xfs_open_devices(mp);
+       if (error)
+               goto out_free_fsname;
+
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
+
+       error = xfs_readsb(mp, flags);
+       if (error)
+               goto out_destroy_counters;
+
+       error = xfs_finish_flags(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_setup_devices(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_filestream_mount(mp);
+       if (error)
+               goto out_free_sb;
+
+       /*
+        * we must configure the block size in the superblock before we run the
+        * full mount process as the mount process can lookup and cache inodes.
+        * For the same reason we must also initialise the syncd and register
+        * the inode cache shrinker so that inodes can be reclaimed during
+        * operations like a quotacheck that iterate all inodes in the
+        * filesystem.
+        */
+       sb->s_magic = XFS_SB_MAGIC;
+       sb->s_blocksize = mp->m_sb.sb_blocksize;
+       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
+       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+       sb->s_time_gran = 1;
+       set_posix_acl_flag(sb);
+
+       error = xfs_mountfs(mp);
+       if (error)
+               goto out_filestream_unmount;
+
+       error = xfs_syncd_init(mp);
+       if (error)
+               goto out_unmount;
+
+       root = igrab(VFS_I(mp->m_rootip));
+       if (!root) {
+               error = ENOENT;
+               goto out_syncd_stop;
+       }
+       if (is_bad_inode(root)) {
+               error = EINVAL;
+               goto out_syncd_stop;
+       }
+       sb->s_root = d_alloc_root(root);
+       if (!sb->s_root) {
+               error = ENOMEM;
+               goto out_iput;
+       }
+
+       return 0;
+
+ out_filestream_unmount:
+       xfs_filestream_unmount(mp);
+ out_free_sb:
+       xfs_freesb(mp);
+ out_destroy_counters:
+       xfs_icsb_destroy_counters(mp);
+ out_close_devices:
+       xfs_close_devices(mp);
+ out_free_fsname:
+       xfs_free_fsname(mp);
+       kfree(mp);
+ out:
+       return -error;
+
+ out_iput:
+       iput(root);
+ out_syncd_stop:
+       xfs_syncd_stop(mp);
+ out_unmount:
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       goto out_free_sb;
+}
+
+STATIC struct dentry *
+xfs_fs_mount(
+       struct file_system_type *fs_type,
+       int                     flags,
+       const char              *dev_name,
+       void                    *data)
+{
+       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+static int
+xfs_fs_nr_cached_objects(
+       struct super_block      *sb)
+{
+       return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+       struct super_block      *sb,
+       int                     nr_to_scan)
+{
+       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
+static const struct super_operations xfs_super_operations = {
+       .alloc_inode            = xfs_fs_alloc_inode,
+       .destroy_inode          = xfs_fs_destroy_inode,
+       .dirty_inode            = xfs_fs_dirty_inode,
+       .write_inode            = xfs_fs_write_inode,
+       .evict_inode            = xfs_fs_evict_inode,
+       .put_super              = xfs_fs_put_super,
+       .sync_fs                = xfs_fs_sync_fs,
+       .freeze_fs              = xfs_fs_freeze,
+       .unfreeze_fs            = xfs_fs_unfreeze,
+       .statfs                 = xfs_fs_statfs,
+       .remount_fs             = xfs_fs_remount,
+       .show_options           = xfs_fs_show_options,
+       .nr_cached_objects      = xfs_fs_nr_cached_objects,
+       .free_cached_objects    = xfs_fs_free_cached_objects,
+};
+
+static struct file_system_type xfs_fs_type = {
+       .owner                  = THIS_MODULE,
+       .name                   = "xfs",
+       .mount                  = xfs_fs_mount,
+       .kill_sb                = kill_block_super,
+       .fs_flags               = FS_REQUIRES_DEV,
+};
+
+STATIC int __init
+xfs_init_zones(void)
+{
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out;
+
+       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+                                                 xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_destroy_ioend_zone;
+
+       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                               "xfs_log_ticket");
+       if (!xfs_log_ticket_zone)
+               goto out_destroy_ioend_pool;
+
+       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+                                               "xfs_bmap_free_item");
+       if (!xfs_bmap_free_item_zone)
+               goto out_destroy_log_ticket_zone;
+
+       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+                                               "xfs_btree_cur");
+       if (!xfs_btree_cur_zone)
+               goto out_destroy_bmap_free_item_zone;
+
+       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+                                               "xfs_da_state");
+       if (!xfs_da_state_zone)
+               goto out_destroy_btree_cur_zone;
+
+       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+       if (!xfs_dabuf_zone)
+               goto out_destroy_da_state_zone;
+
+       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+       if (!xfs_ifork_zone)
+               goto out_destroy_dabuf_zone;
+
+       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+       if (!xfs_trans_zone)
+               goto out_destroy_ifork_zone;
+
+       xfs_log_item_desc_zone =
+               kmem_zone_init(sizeof(struct xfs_log_item_desc),
+                              "xfs_log_item_desc");
+       if (!xfs_log_item_desc_zone)
+               goto out_destroy_trans_zone;
+
+       /*
+        * The size of the zone allocated buf log item is the maximum
+        * size possible under XFS.  This wastes a little bit of memory,
+        * but it is much faster.
+        */
+       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
+                                 NBWORD) * sizeof(int))), "xfs_buf_item");
+       if (!xfs_buf_item_zone)
+               goto out_destroy_log_item_desc_zone;
+
+       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+                                sizeof(xfs_extent_t))), "xfs_efd_item");
+       if (!xfs_efd_zone)
+               goto out_destroy_buf_item_zone;
+
+       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+                               sizeof(xfs_extent_t))), "xfs_efi_item");
+       if (!xfs_efi_zone)
+               goto out_destroy_efd_zone;
+
+       xfs_inode_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+                       xfs_fs_inode_init_once);
+       if (!xfs_inode_zone)
+               goto out_destroy_efi_zone;
+
+       xfs_ili_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+                                       KM_ZONE_SPREAD, NULL);
+       if (!xfs_ili_zone)
+               goto out_destroy_inode_zone;
+
+       return 0;
+
+ out_destroy_inode_zone:
+       kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+       kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+       kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+       kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+ out_destroy_trans_zone:
+       kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+       kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+       kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+       kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+       kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+       kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+       mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+       kmem_zone_destroy(xfs_ili_zone);
+       kmem_zone_destroy(xfs_inode_zone);
+       kmem_zone_destroy(xfs_efi_zone);
+       kmem_zone_destroy(xfs_efd_zone);
+       kmem_zone_destroy(xfs_buf_item_zone);
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+       kmem_zone_destroy(xfs_trans_zone);
+       kmem_zone_destroy(xfs_ifork_zone);
+       kmem_zone_destroy(xfs_dabuf_zone);
+       kmem_zone_destroy(xfs_da_state_zone);
+       kmem_zone_destroy(xfs_btree_cur_zone);
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+       kmem_zone_destroy(xfs_log_ticket_zone);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_ioend_zone);
+
+}
+
+STATIC int __init
+xfs_init_workqueues(void)
+{
+       /*
+        * max_active is set to 8 to give enough concurency to allow
+        * multiple work operations on each CPU to run. This allows multiple
+        * filesystems to be running sync work concurrently, and scales with
+        * the number of CPUs in the system.
+        */
+       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_syncd_wq)
+               goto out;
+
+       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_ail_wq)
+               goto out_destroy_syncd;
+
+       return 0;
+
+out_destroy_syncd:
+       destroy_workqueue(xfs_syncd_wq);
+out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_workqueues(void)
+{
+       destroy_workqueue(xfs_ail_wq);
+       destroy_workqueue(xfs_syncd_wq);
+}
+
+STATIC int __init
+init_xfs_fs(void)
+{
+       int                     error;
+
+       printk(KERN_INFO XFS_VERSION_STRING " with "
+                        XFS_BUILD_OPTIONS " enabled\n");
+
+       xfs_ioend_init();
+       xfs_dir_startup();
+
+       error = xfs_init_zones();
+       if (error)
+               goto out;
+
+       error = xfs_init_workqueues();
+       if (error)
+               goto out_destroy_zones;
+
+       error = xfs_mru_cache_init();
+       if (error)
+               goto out_destroy_wq;
+
+       error = xfs_filestream_init();
+       if (error)
+               goto out_mru_cache_uninit;
+
+       error = xfs_buf_init();
+       if (error)
+               goto out_filestream_uninit;
+
+       error = xfs_init_procfs();
+       if (error)
+               goto out_buf_terminate;
+
+       error = xfs_sysctl_register();
+       if (error)
+               goto out_cleanup_procfs;
+
+       vfs_initquota();
+
+       error = register_filesystem(&xfs_fs_type);
+       if (error)
+               goto out_sysctl_unregister;
+       return 0;
+
+ out_sysctl_unregister:
+       xfs_sysctl_unregister();
+ out_cleanup_procfs:
+       xfs_cleanup_procfs();
+ out_buf_terminate:
+       xfs_buf_terminate();
+ out_filestream_uninit:
+       xfs_filestream_uninit();
+ out_mru_cache_uninit:
+       xfs_mru_cache_uninit();
+ out_destroy_wq:
+       xfs_destroy_workqueues();
+ out_destroy_zones:
+       xfs_destroy_zones();
+ out:
+       return error;
+}
+
+STATIC void __exit
+exit_xfs_fs(void)
+{
+       vfs_exitquota();
+       unregister_filesystem(&xfs_fs_type);
+       xfs_sysctl_unregister();
+       xfs_cleanup_procfs();
+       xfs_buf_terminate();
+       xfs_filestream_uninit();
+       xfs_mru_cache_uninit();
+       xfs_destroy_workqueues();
+       xfs_destroy_zones();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
new file mode 100644 (file)
index 0000000..50a3266
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#include <linux/exportfs.h>
+
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()       xfs_qm_init()
+# define vfs_exitquota()       xfs_qm_exit()
+#else
+# define vfs_initquota()       do { } while (0)
+# define vfs_exitquota()       do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING                "ACLs, "
+# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)        do { } while (0)
+#endif
+
+#define XFS_SECURITY_STRING    "security attributes, "
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING   "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING     "large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING     "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING                "debug"
+#else
+# define XFS_DBG_STRING                "no debug"
+#endif
+
+#define XFS_VERSION_STRING     "SGI XFS"
+#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
+                               XFS_SECURITY_STRING \
+                               XFS_REALTIME_STRING \
+                               XFS_BIGFS_STRING \
+                               XFS_DBG_STRING /* DBG must be last */
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+
+extern const struct export_operations xfs_export_operations;
+extern const struct xattr_handler *xfs_xattr_handlers[];
+extern const struct quotactl_ops xfs_quotactl_operations;
+
+#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
+
+#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
new file mode 100644 (file)
index 0000000..4604f90
--- /dev/null
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_fsops.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
+
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH       32
+
+STATIC int
+xfs_inode_ag_walk_grab(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+
+       ASSERT(rcu_read_lock_held());
+
+       /*
+        * check for stale RCU freed inode
+        *
+        * If the inode has been reallocated, it doesn't matter if it's not in
+        * the AG we are walking - we are walking for writeback, so if it
+        * passes all the "valid inode" checks and is dirty, then we'll write
+        * it back anyway.  If it has been reallocated and still being
+        * initialised, the XFS_INEW check below will catch it.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!ip->i_ino)
+               goto out_unlock_noent;
+
+       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+               goto out_unlock_noent;
+       spin_unlock(&ip->i_flags_lock);
+
+       /* nothing to sync during shutdown */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return EFSCORRUPTED;
+
+       /* If we can't grab the inode, it must on it's way to reclaim. */
+       if (!igrab(inode))
+               return ENOENT;
+
+       if (is_bad_inode(inode)) {
+               IRELE(ip);
+               return ENOENT;
+       }
+
+       /* inode is valid */
+       return 0;
+
+out_unlock_noent:
+       spin_unlock(&ip->i_flags_lock);
+       return ENOENT;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       uint32_t                first_index;
+       int                     last_error = 0;
+       int                     skipped;
+       int                     done;
+       int                     nr_found;
+
+restart:
+       done = 0;
+       skipped = 0;
+       first_index = 0;
+       nr_found = 0;
+       do {
+               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+               int             error = 0;
+               int             i;
+
+               rcu_read_lock();
+               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH);
+               if (!nr_found) {
+                       rcu_read_unlock();
+                       break;
+               }
+
+               /*
+                * Grab the inodes before we drop the lock. if we found
+                * nothing, nr == 0 and the loop will be skipped.
+                */
+               for (i = 0; i < nr_found; i++) {
+                       struct xfs_inode *ip = batch[i];
+
+                       if (done || xfs_inode_ag_walk_grab(ip))
+                               batch[i] = NULL;
+
+                       /*
+                        * Update the index for the next lookup. Catch
+                        * overflows into the next AG range which can occur if
+                        * we have inodes in the last block of the AG and we
+                        * are currently pointing to the last inode.
+                        *
+                        * Because we may see inodes that are from the wrong AG
+                        * due to RCU freeing and reallocation, only update the
+                        * index if it lies in this AG. It was a race that lead
+                        * us to see this inode, so another lookup from the
+                        * same index will not find it again.
+                        */
+                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+                               continue;
+                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                               done = 1;
+               }
+
+               /* unlock now we've grabbed the inodes. */
+               rcu_read_unlock();
+
+               for (i = 0; i < nr_found; i++) {
+                       if (!batch[i])
+                               continue;
+                       error = execute(batch[i], pag, flags);
+                       IRELE(batch[i]);
+                       if (error == EAGAIN) {
+                               skipped++;
+                               continue;
+                       }
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
+               }
+
+               /* bail out if the filesystem is corrupted.  */
+               if (error == EFSCORRUPTED)
+                       break;
+
+               cond_resched();
+
+       } while (nr_found && !done);
+
+       if (skipped) {
+               delay(1);
+               goto restart;
+       }
+       return last_error;
+}
+
+int
+xfs_inode_ag_iterator(
+       struct xfs_mount        *mp,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       ag = 0;
+       while ((pag = xfs_perag_get(mp, ag))) {
+               ag = pag->pag_agno + 1;
+               error = xfs_inode_ag_walk(mp, pag, execute, flags);
+               xfs_perag_put(pag);
+               if (error) {
+                       last_error = error;
+                       if (error == EFSCORRUPTED)
+                               break;
+               }
+       }
+       return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_sync_inode_data(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       struct address_space *mapping = inode->i_mapping;
+       int                     error = 0;
+
+       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+               goto out_wait;
+
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+               if (flags & SYNC_TRYLOCK)
+                       goto out_wait;
+               xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       }
+
+       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+                               0 : XBF_ASYNC, FI_NONE);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+       if (flags & SYNC_WAIT)
+               xfs_ioend_wait(ip);
+       return error;
+}
+
+STATIC int
+xfs_sync_inode_attr(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_inode_clean(ip))
+               goto out_unlock;
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(flags & SYNC_WAIT))
+                       goto out_unlock;
+               xfs_iflock(ip);
+       }
+
+       if (xfs_inode_clean(ip)) {
+               xfs_ifunlock(ip);
+               goto out_unlock;
+       }
+
+       error = xfs_iflush(ip, flags);
+
+       /*
+        * We don't want to try again on non-blocking flushes that can't run
+        * again immediately. If an inode really must be written, then that's
+        * what the SYNC_WAIT flag is for.
+        */
+       if (error == EAGAIN) {
+               ASSERT(!(flags & SYNC_WAIT));
+               error = 0;
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+STATIC int
+xfs_sync_data(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       int                     error;
+
+       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+       if (error)
+               return XFS_ERROR(error);
+
+       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
+       return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+STATIC int
+xfs_sync_attr(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       ASSERT((flags & ~SYNC_WAIT) == 0);
+
+       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
+}
+
+STATIC int
+xfs_sync_fsdata(
+       struct xfs_mount        *mp)
+{
+       struct xfs_buf          *bp;
+
+       /*
+        * If the buffer is pinned then push on the log so we won't get stuck
+        * waiting in the write for someone, maybe ourselves, to flush the log.
+        *
+        * Even though we just pushed the log above, we did not have the
+        * superblock buffer locked at that point so it can become pinned in
+        * between there and here.
+        */
+       bp = xfs_getsb(mp, 0);
+       if (xfs_buf_ispinned(bp))
+               xfs_log_force(mp, 0);
+
+       return xfs_bwrite(mp, bp);
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete.  Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+       struct xfs_mount        *mp)
+{
+       int                     error, error2 = 0;
+
+       xfs_qm_sync(mp, SYNC_TRYLOCK);
+       xfs_qm_sync(mp, SYNC_WAIT);
+
+       /* force out the newly dirtied log buffers */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /* write superblock and hoover up shutdown errors */
+       error = xfs_sync_fsdata(mp);
+
+       /* make sure all delwri buffers are written out */
+       xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+       /* mark the log as covered if needed */
+       if (xfs_log_need_covered(mp))
+               error2 = xfs_fs_log_dummy(mp);
+
+       /* flush data-only devices */
+       if (mp->m_rtdev_targp)
+               XFS_bflush(mp->m_rtdev_targp);
+
+       return error ? error : error2;
+}
+
+STATIC void
+xfs_quiesce_fs(
+       struct xfs_mount        *mp)
+{
+       int     count = 0, pincount;
+
+       xfs_reclaim_inodes(mp, 0);
+       xfs_flush_buftarg(mp->m_ddev_targp, 0);
+
+       /*
+        * This loop must run at least twice.  The first instance of the loop
+        * will flush most meta data but that will generate more meta data
+        * (typically directory updates).  Which then must be flushed and
+        * logged before we can write the unmount record. We also so sync
+        * reclaim of inodes to catch any that the above delwri flush skipped.
+        */
+       do {
+               xfs_reclaim_inodes(mp, SYNC_WAIT);
+               xfs_sync_attr(mp, SYNC_WAIT);
+               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+               if (!pincount) {
+                       delay(50);
+                       count++;
+               }
+       } while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceeding.
+ */
+void
+xfs_quiesce_attr(
+       struct xfs_mount        *mp)
+{
+       int     error = 0;
+
+       /* wait for all modifications to complete */
+       while (atomic_read(&mp->m_active_trans) > 0)
+               delay(100);
+
+       /* flush inodes and push all remaining buffers out to disk */
+       xfs_quiesce_fs(mp);
+
+       /*
+        * Just warn here till VFS can correctly support
+        * read-only remount without racing.
+        */
+       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+       /* Push the superblock and write an unmount record */
+       error = xfs_log_sbcount(mp);
+       if (error)
+               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+                               "Frozen image may not be consistent.");
+       xfs_log_unmount_write(mp);
+       xfs_unmountfs_writesb(mp);
+}
+
+static void
+xfs_syncd_queue_sync(
+       struct xfs_mount        *mp)
+{
+       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
+                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle and not frozen.
+ */
+STATIC void
+xfs_sync_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_sync_work);
+       int             error;
+
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               /* dgc: errors ignored here */
+               if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                   xfs_log_need_covered(mp))
+                       error = xfs_fs_log_dummy(mp);
+               else
+                       xfs_log_force(mp, 0);
+               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+
+               /* start pushing all the metadata that is currently dirty */
+               xfs_ail_push_all(mp->m_ail);
+       }
+
+       /* queue us up again */
+       xfs_syncd_queue_sync(mp);
+}
+
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+       struct xfs_mount        *mp)
+{
+
+       /*
+        * We can have inodes enter reclaim after we've shut down the syncd
+        * workqueue during unmount, so don't allow reclaim work to be queued
+        * during unmount.
+        */
+       if (!(mp->m_super->s_flags & MS_ACTIVE))
+               return;
+
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_reclaim_work);
+
+       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+       xfs_syncd_queue_reclaim(mp);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       queue_work(xfs_syncd_wq, &mp->m_flush_work);
+       flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(work,
+                                       struct xfs_mount, m_flush_work);
+
+       xfs_sync_data(mp, SYNC_TRYLOCK);
+       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+}
+
+int
+xfs_syncd_init(
+       struct xfs_mount        *mp)
+{
+       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
+       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
+       xfs_syncd_queue_sync(mp);
+       xfs_syncd_queue_reclaim(mp);
+
+       return 0;
+}
+
+void
+xfs_syncd_stop(
+       struct xfs_mount        *mp)
+{
+       cancel_delayed_work_sync(&mp->m_sync_work);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
+       cancel_work_sync(&mp->m_flush_work);
+}
+
+void
+__xfs_inode_set_reclaim_tag(
+       struct xfs_perag        *pag,
+       struct xfs_inode        *ip)
+{
+       radix_tree_tag_set(&pag->pag_ici_root,
+                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+                          XFS_ICI_RECLAIM_TAG);
+
+       if (!pag->pag_ici_reclaimable) {
+               /* propagate the reclaim tag up into the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+
+               /* schedule periodic background inode reclaim */
+               xfs_syncd_queue_reclaim(ip->i_mount);
+
+               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+       pag->pag_ici_reclaimable++;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+       xfs_inode_t     *ip)
+{
+       struct xfs_mount *mp = ip->i_mount;
+       struct xfs_perag *pag;
+
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+       spin_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+       __xfs_inode_set_reclaim_tag(pag, ip);
+       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+       spin_unlock(&ip->i_flags_lock);
+       spin_unlock(&pag->pag_ici_lock);
+       xfs_perag_put(pag);
+}
+
+STATIC void
+__xfs_inode_clear_reclaim(
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       pag->pag_ici_reclaimable--;
+       if (!pag->pag_ici_reclaimable) {
+               /* clear the reclaim tag from the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+       xfs_mount_t     *mp,
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       radix_tree_tag_clear(&pag->pag_ici_root,
+                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+       __xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       ASSERT(rcu_read_lock_held());
+
+       /* quick check for stale RCU freed inode */
+       if (!ip->i_ino)
+               return 1;
+
+       /*
+        * do some unlocked checks first to avoid unnecessary lock traffic.
+        * The first is a flush lock check, the second is a already in reclaim
+        * check. Only do these checks if we are not going to block on locks.
+        */
+       if ((flags & SYNC_TRYLOCK) &&
+           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+               return 1;
+       }
+
+       /*
+        * The radix tree lock here protects a thread in xfs_iget from racing
+        * with us starting reclaim on the inode.  Once we have the
+        * XFS_IRECLAIM flag set it will not touch us.
+        *
+        * Due to RCU lookup, we may find inodes that have been freed and only
+        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
+        * aren't candidates for reclaim at all, so we must check the
+        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+               /* not a reclaim candidate. */
+               spin_unlock(&ip->i_flags_lock);
+               return 1;
+       }
+       __xfs_iflags_set(ip, XFS_IRECLAIM);
+       spin_unlock(&ip->i_flags_lock);
+       return 0;
+}
+
+/*
+ * Inodes in different states need to be treated differently, and the return
+ * value of xfs_iflush is not sufficient to get this right. The following table
+ * lists the inode states and the reclaim actions necessary for non-blocking
+ * reclaim:
+ *
+ *
+ *     inode state          iflush ret         required action
+ *      ---------------      ----------         ---------------
+ *     bad                     -               reclaim
+ *     shutdown                EIO             unpin and reclaim
+ *     clean, unpinned         0               reclaim
+ *     stale, unpinned         0               reclaim
+ *     clean, pinned(*)        0               requeue
+ *     stale, pinned           EAGAIN          requeue
+ *     dirty, delwri ok        0               requeue
+ *     dirty, delwri blocked   EAGAIN          requeue
+ *     dirty, sync flush       0               reclaim
+ *
+ * (*) dgc: I don't think the clean, pinned state is possible but it gets
+ * handled anyway given the order of checks implemented.
+ *
+ * As can be seen from the table, the return value of xfs_iflush() is not
+ * sufficient to correctly decide the reclaim action here. The checks in
+ * xfs_iflush() might look like duplicates, but they are not.
+ *
+ * Also, because we get the flush lock first, we know that any inode that has
+ * been flushed delwri has had the flush completed by the time we check that
+ * the inode is clean. The clean inode check needs to be done before flushing
+ * the inode delwri otherwise we would loop forever requeuing clean inodes as
+ * we cannot tell apart a successful delwri flush and a clean inode from the
+ * return value of xfs_iflush().
+ *
+ * Note that because the inode is flushed delayed write by background
+ * writeback, the flush lock may already be held here and waiting on it can
+ * result in very long latencies. Hence for sync reclaims, where we wait on the
+ * flush lock, the caller should push out delayed write inodes first before
+ * trying to reclaim them to minimise the amount of time spent waiting. For
+ * background relaim, we just requeue the inode for the next pass.
+ *
+ * Hence the order of actions after gaining the locks should be:
+ *     bad             => reclaim
+ *     shutdown        => unpin and reclaim
+ *     pinned, delwri  => requeue
+ *     pinned, sync    => unpin
+ *     stale           => reclaim
+ *     clean           => reclaim
+ *     dirty, delwri   => flush and requeue
+ *     dirty, sync     => flush, wait and reclaim
+ */
+STATIC int
+xfs_reclaim_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     sync_mode)
+{
+       int     error;
+
+restart:
+       error = 0;
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(sync_mode & SYNC_WAIT))
+                       goto out;
+               xfs_iflock(ip);
+       }
+
+       if (is_bad_inode(VFS_I(ip)))
+               goto reclaim;
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_iunpin_wait(ip);
+               goto reclaim;
+       }
+       if (xfs_ipincount(ip)) {
+               if (!(sync_mode & SYNC_WAIT)) {
+                       xfs_ifunlock(ip);
+                       goto out;
+               }
+               xfs_iunpin_wait(ip);
+       }
+       if (xfs_iflags_test(ip, XFS_ISTALE))
+               goto reclaim;
+       if (xfs_inode_clean(ip))
+               goto reclaim;
+
+       /*
+        * Now we have an inode that needs flushing.
+        *
+        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+        * reclaim as we can deadlock with inode cluster removal.
+        * xfs_ifree_cluster() can lock the inode buffer before it locks the
+        * ip->i_lock, and we are doing the exact opposite here. As a result,
+        * doing a blocking xfs_itobp() to get the cluster buffer will result
+        * in an ABBA deadlock with xfs_ifree_cluster().
+        *
+        * As xfs_ifree_cluser() must gather all inodes that are active in the
+        * cache to mark them stale, if we hit this case we don't actually want
+        * to do IO here - we want the inode marked stale so we can simply
+        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+        * just unlock the inode, back off and try again. Hopefully the next
+        * pass through will see the stale flag set on the inode.
+        */
+       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
+       if (sync_mode & SYNC_WAIT) {
+               if (error == EAGAIN) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       /* backoff longer than in xfs_ifree_cluster */
+                       delay(2);
+                       goto restart;
+               }
+               xfs_iflock(ip);
+               goto reclaim;
+       }
+
+       /*
+        * When we have to flush an inode but don't have SYNC_WAIT set, we
+        * flush the inode out using a delwri buffer and wait for the next
+        * call into reclaim to find it in a clean state instead of waiting for
+        * it now. We also don't return errors here - if the error is transient
+        * then the next reclaim pass will flush the inode, and if the error
+        * is permanent then the next sync reclaim will reclaim the inode and
+        * pass on the error.
+        */
+       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_warn(ip->i_mount,
+                       "inode 0x%llx background reclaim flush failed with %d",
+                       (long long)ip->i_ino, error);
+       }
+out:
+       xfs_iflags_clear(ip, XFS_IRECLAIM);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * We could return EAGAIN here to make reclaim rescan the inode tree in
+        * a short while. However, this just burns CPU time scanning the tree
+        * waiting for IO to complete and xfssyncd never goes back to the idle
+        * state. Instead, return 0 to let the next scheduled background reclaim
+        * attempt to reclaim the inode again.
+        */
+       return 0;
+
+reclaim:
+       xfs_ifunlock(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       XFS_STATS_INC(xs_ig_reclaims);
+       /*
+        * Remove the inode from the per-AG radix tree.
+        *
+        * Because radix_tree_delete won't complain even if the item was never
+        * added to the tree assert that it's been there before to catch
+        * problems with the inode life time early on.
+        */
+       spin_lock(&pag->pag_ici_lock);
+       if (!radix_tree_delete(&pag->pag_ici_root,
+                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+               ASSERT(0);
+       __xfs_inode_clear_reclaim(pag, ip);
+       spin_unlock(&pag->pag_ici_lock);
+
+       /*
+        * Here we do an (almost) spurious inode lock in order to coordinate
+        * with inode cache radix tree lookups.  This is because the lookup
+        * can reference the inodes in the cache without taking references.
+        *
+        * We make that OK here by ensuring that we wait until the inode is
+        * unlocked after the lookup before we go ahead and free it.  We get
+        * both the ilock and the iolock because the code may need to drop the
+        * ilock one but will still hold the iolock.
+        */
+       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_qm_dqdetach(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       xfs_inode_free(ip);
+       return error;
+
+}
+
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+       struct xfs_mount        *mp,
+       int                     flags,
+       int                     *nr_to_scan)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+       int                     trylock = flags & SYNC_TRYLOCK;
+       int                     skipped;
+
+restart:
+       ag = 0;
+       skipped = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               unsigned long   first_index = 0;
+               int             done = 0;
+               int             nr_found = 0;
+
+               ag = pag->pag_agno + 1;
+
+               if (trylock) {
+                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                               skipped++;
+                               xfs_perag_put(pag);
+                               continue;
+                       }
+                       first_index = pag->pag_ici_reclaim_cursor;
+               } else
+                       mutex_lock(&pag->pag_ici_reclaim_lock);
+
+               do {
+                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                       int     i;
+
+                       rcu_read_lock();
+                       nr_found = radix_tree_gang_lookup_tag(
+                                       &pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH,
+                                       XFS_ICI_RECLAIM_TAG);
+                       if (!nr_found) {
+                               done = 1;
+                               rcu_read_unlock();
+                               break;
+                       }
+
+                       /*
+                        * Grab the inodes before we drop the lock. if we found
+                        * nothing, nr == 0 and the loop will be skipped.
+                        */
+                       for (i = 0; i < nr_found; i++) {
+                               struct xfs_inode *ip = batch[i];
+
+                               if (done || xfs_reclaim_inode_grab(ip, flags))
+                                       batch[i] = NULL;
+
+                               /*
+                                * Update the index for the next lookup. Catch
+                                * overflows into the next AG range which can
+                                * occur if we have inodes in the last block of
+                                * the AG and we are currently pointing to the
+                                * last inode.
+                                *
+                                * Because we may see inodes that are from the
+                                * wrong AG due to RCU freeing and
+                                * reallocation, only update the index if it
+                                * lies in this AG. It was a race that lead us
+                                * to see this inode, so another lookup from
+                                * the same index will not find it again.
+                                */
+                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+                                                               pag->pag_agno)
+                                       continue;
+                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                       done = 1;
+                       }
+
+                       /* unlock now we've grabbed the inodes. */
+                       rcu_read_unlock();
+
+                       for (i = 0; i < nr_found; i++) {
+                               if (!batch[i])
+                                       continue;
+                               error = xfs_reclaim_inode(batch[i], pag, flags);
+                               if (error && last_error != EFSCORRUPTED)
+                                       last_error = error;
+                       }
+
+                       *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+                       cond_resched();
+
+               } while (nr_found && !done && *nr_to_scan > 0);
+
+               if (trylock && !done)
+                       pag->pag_ici_reclaim_cursor = first_index;
+               else
+                       pag->pag_ici_reclaim_cursor = 0;
+               mutex_unlock(&pag->pag_ici_reclaim_lock);
+               xfs_perag_put(pag);
+       }
+
+       /*
+        * if we skipped any AG, and we still have scan count remaining, do
+        * another pass this time using blocking reclaim semantics (i.e
+        * waiting on the reclaim locks and ignoring the reclaim cursors). This
+        * ensure that when we get more reclaimers than AGs we block rather
+        * than spin trying to execute reclaim.
+        */
+       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
+               trylock = 0;
+               goto restart;
+       }
+       return XFS_ERROR(last_error);
+}
+
+int
+xfs_reclaim_inodes(
+       xfs_mount_t     *mp,
+       int             mode)
+{
+       int             nr_to_scan = INT_MAX;
+
+       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+}
+
+/*
+ * Scan a certain number of inodes for reclaim.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
+ */
+void
+xfs_reclaim_inodes_nr(
+       struct xfs_mount        *mp,
+       int                     nr_to_scan)
+{
+       /* kick background reclaimer and push the AIL */
+       xfs_syncd_queue_reclaim(mp);
+       xfs_ail_push_all(mp->m_ail);
+
+       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
+
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+       struct xfs_mount        *mp)
+{
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          ag = 0;
+       int                     reclaimable = 0;
+
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               ag = pag->pag_agno + 1;
+               reclaimable += pag->pag_ici_reclaimable;
+               xfs_perag_put(pag);
+       }
+       return reclaimable;
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
new file mode 100644 (file)
index 0000000..941202e
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+struct xfs_perag;
+
+#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
+#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
+
+extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inodes(struct xfs_inode *ip);
+
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+                               struct xfs_inode *ip);
+
+int xfs_sync_inode_grab(struct xfs_inode *ip);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+       int flags);
+
+#endif
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
new file mode 100644 (file)
index 0000000..ee2d2ad
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include "xfs_error.h"
+
+static struct ctl_table_header *xfs_table_header;
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             c, ret, *valp = ctl->data;
+       __uint32_t      vn_active;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+
+       if (!ret && write && *valp) {
+               xfs_notice(NULL, "Clearing xfsstats");
+               for_each_possible_cpu(c) {
+                       preempt_disable();
+                       /* save vn_active, it's a universal truth! */
+                       vn_active = per_cpu(xfsstats, c).vn_active;
+                       memset(&per_cpu(xfsstats, c), 0,
+                              sizeof(struct xfsstats));
+                       per_cpu(xfsstats, c).vn_active = vn_active;
+                       preempt_enable();
+               }
+               xfs_stats_clear = 0;
+       }
+
+       return ret;
+}
+
+STATIC int
+xfs_panic_mask_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             ret, *valp = ctl->data;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+       if (!ret && write) {
+               xfs_panic_mask = *valp;
+#ifdef DEBUG
+               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
+#endif
+       }
+       return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+static ctl_table xfs_table[] = {
+       {
+               .procname       = "irix_sgid_inherit",
+               .data           = &xfs_params.sgid_inherit.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.sgid_inherit.min,
+               .extra2         = &xfs_params.sgid_inherit.max
+       },
+       {
+               .procname       = "irix_symlink_mode",
+               .data           = &xfs_params.symlink_mode.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.symlink_mode.min,
+               .extra2         = &xfs_params.symlink_mode.max
+       },
+       {
+               .procname       = "panic_mask",
+               .data           = &xfs_params.panic_mask.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_panic_mask_proc_handler,
+               .extra1         = &xfs_params.panic_mask.min,
+               .extra2         = &xfs_params.panic_mask.max
+       },
+
+       {
+               .procname       = "error_level",
+               .data           = &xfs_params.error_level.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.error_level.min,
+               .extra2         = &xfs_params.error_level.max
+       },
+       {
+               .procname       = "xfssyncd_centisecs",
+               .data           = &xfs_params.syncd_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.syncd_timer.min,
+               .extra2         = &xfs_params.syncd_timer.max
+       },
+       {
+               .procname       = "inherit_sync",
+               .data           = &xfs_params.inherit_sync.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_sync.min,
+               .extra2         = &xfs_params.inherit_sync.max
+       },
+       {
+               .procname       = "inherit_nodump",
+               .data           = &xfs_params.inherit_nodump.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodump.min,
+               .extra2         = &xfs_params.inherit_nodump.max
+       },
+       {
+               .procname       = "inherit_noatime",
+               .data           = &xfs_params.inherit_noatim.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_noatim.min,
+               .extra2         = &xfs_params.inherit_noatim.max
+       },
+       {
+               .procname       = "xfsbufd_centisecs",
+               .data           = &xfs_params.xfs_buf_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_timer.min,
+               .extra2         = &xfs_params.xfs_buf_timer.max
+       },
+       {
+               .procname       = "age_buffer_centisecs",
+               .data           = &xfs_params.xfs_buf_age.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_age.min,
+               .extra2         = &xfs_params.xfs_buf_age.max
+       },
+       {
+               .procname       = "inherit_nosymlinks",
+               .data           = &xfs_params.inherit_nosym.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nosym.min,
+               .extra2         = &xfs_params.inherit_nosym.max
+       },
+       {
+               .procname       = "rotorstep",
+               .data           = &xfs_params.rotorstep.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.rotorstep.min,
+               .extra2         = &xfs_params.rotorstep.max
+       },
+       {
+               .procname       = "inherit_nodefrag",
+               .data           = &xfs_params.inherit_nodfrg.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodfrg.min,
+               .extra2         = &xfs_params.inherit_nodfrg.max
+       },
+       {
+               .procname       = "filestream_centisecs",
+               .data           = &xfs_params.fstrm_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.fstrm_timer.min,
+               .extra2         = &xfs_params.fstrm_timer.max,
+       },
+       /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+       {
+               .procname       = "stats_clear",
+               .data           = &xfs_params.stats_clear.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_stats_clear_proc_handler,
+               .extra1         = &xfs_params.stats_clear.min,
+               .extra2         = &xfs_params.stats_clear.max
+       },
+#endif /* CONFIG_PROC_FS */
+
+       {}
+};
+
+static ctl_table xfs_dir_table[] = {
+       {
+               .procname       = "xfs",
+               .mode           = 0555,
+               .child          = xfs_table
+       },
+       {}
+};
+
+static ctl_table xfs_root_table[] = {
+       {
+               .procname       = "fs",
+               .mode           = 0555,
+               .child          = xfs_dir_table
+       },
+       {}
+};
+
+int
+xfs_sysctl_register(void)
+{
+       xfs_table_header = register_sysctl_table(xfs_root_table);
+       if (!xfs_table_header)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+       unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
new file mode 100644 (file)
index 0000000..b9937d4
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+       int min;
+       int val;
+       int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
+                                        * not a member of parent dir GID. */
+       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
+       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
+       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
+       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
+       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
+       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
+       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
+       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
+       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
+       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
+       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0   No error reports
+ * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *     additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+       /* XFS_REFCACHE_SIZE = 1 */
+       /* XFS_REFCACHE_PURGE = 2 */
+       /* XFS_RESTRICT_CHOWN = 3 */
+       XFS_SGID_INHERIT = 4,
+       XFS_SYMLINK_MODE = 5,
+       XFS_PANIC_MASK = 6,
+       XFS_ERRLEVEL = 7,
+       XFS_SYNCD_TIMER = 8,
+       /* XFS_PROBE_DMAPI = 9 */
+       /* XFS_PROBE_IOOPS = 10 */
+       /* XFS_PROBE_QUOTA = 11 */
+       XFS_STATS_CLEAR = 12,
+       XFS_INHERIT_SYNC = 13,
+       XFS_INHERIT_NODUMP = 14,
+       XFS_INHERIT_NOATIME = 15,
+       XFS_BUF_TIMER = 16,
+       XFS_BUF_AGE = 17,
+       /* XFS_IO_BYPASS = 18 */
+       XFS_INHERIT_NOSYM = 19,
+       XFS_ROTORSTEP = 20,
+       XFS_INHERIT_NODFRG = 21,
+       XFS_FILESTREAM_TIMER = 22,
+};
+
+extern xfs_param_t     xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern int xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()         (0)
+# define xfs_sysctl_unregister()       do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
new file mode 100644 (file)
index 0000000..9010ce8
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_mount.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_quota.h"
+#include "xfs_iomap.h"
+#include "xfs_aops.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_inode_item.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
new file mode 100644 (file)
index 0000000..690fc7a
--- /dev/null
@@ -0,0 +1,1746 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xfs
+
+#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_XFS_H
+
+#include <linux/tracepoint.h>
+
+struct xfs_agf;
+struct xfs_alloc_arg;
+struct xfs_attr_list_context;
+struct xfs_buf_log_item;
+struct xfs_da_args;
+struct xfs_da_node_entry;
+struct xfs_dquot;
+struct xlog_ticket;
+struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
+
+DECLARE_EVENT_CLASS(xfs_attr_list_class,
+       TP_PROTO(struct xfs_attr_list_context *ctx),
+       TP_ARGS(ctx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
+       )
+)
+
+#define DEFINE_ATTR_LIST_EVENT(name) \
+DEFINE_EVENT(xfs_attr_list_class, name, \
+       TP_PROTO(struct xfs_attr_list_context *ctx), \
+       TP_ARGS(ctx))
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+
+DECLARE_EVENT_CLASS(xfs_perag_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agno, refcount, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, refcount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->refcount = refcount;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->refcount,
+                 (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name)   \
+DEFINE_EVENT(xfs_perag_class, name,    \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
+                unsigned long caller_ip),                                      \
+       TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+
+TRACE_EVENT(xfs_attr_list_node_descend,
+       TP_PROTO(struct xfs_attr_list_context *ctx,
+                struct xfs_da_node_entry *btree),
+       TP_ARGS(ctx, btree),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+               __field(u32, bt_hashval)
+               __field(u32, bt_before)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+               __entry->bt_hashval = be32_to_cpu(btree->hashval);
+               __entry->bt_before = be32_to_cpu(btree->before);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s "
+                 "node hashval %u, node before %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
+                  __entry->bt_hashval,
+                  __entry->bt_before)
+);
+
+TRACE_EVENT(xfs_iext_insert,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
+                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
+       TP_ARGS(ip, idx, r, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r->br_startoff;
+               __entry->startblock = r->br_startblock;
+               __entry->blockcount = r->br_blockcount;
+               __entry->state = r->br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_bmap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
+                unsigned long caller_ip),
+       TP_ARGS(ip, idx, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
+                                               ip->i_afp : &ip->i_df;
+               struct xfs_bmbt_irec    r;
+
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r.br_startoff;
+               __entry->startblock = r.br_startblock;
+               __entry->blockcount = r.br_blockcount;
+               __entry->state = r.br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_BMAP_EVENT(name) \
+DEFINE_EVENT(xfs_bmap_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip, idx, state, caller_ip))
+DEFINE_BMAP_EVENT(xfs_iext_remove);
+DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
+DEFINE_BMAP_EVENT(xfs_bmap_post_update);
+DEFINE_BMAP_EVENT(xfs_extlist);
+
+DECLARE_EVENT_CLASS(xfs_buf_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
+       TP_ARGS(bp, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_EVENT(name) \
+DEFINE_EVENT(xfs_buf_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
+       TP_ARGS(bp, caller_ip))
+DEFINE_BUF_EVENT(xfs_buf_init);
+DEFINE_BUF_EVENT(xfs_buf_free);
+DEFINE_BUF_EVENT(xfs_buf_hold);
+DEFINE_BUF_EVENT(xfs_buf_rele);
+DEFINE_BUF_EVENT(xfs_buf_iodone);
+DEFINE_BUF_EVENT(xfs_buf_iorequest);
+DEFINE_BUF_EVENT(xfs_buf_bawrite);
+DEFINE_BUF_EVENT(xfs_buf_bdwrite);
+DEFINE_BUF_EVENT(xfs_buf_lock);
+DEFINE_BUF_EVENT(xfs_buf_lock_done);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
+DEFINE_BUF_EVENT(xfs_buf_unlock);
+DEFINE_BUF_EVENT(xfs_buf_iowait);
+DEFINE_BUF_EVENT(xfs_buf_iowait_done);
+DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
+DEFINE_BUF_EVENT(xfs_bdstrat_shut);
+DEFINE_BUF_EVENT(xfs_buf_item_relse);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
+
+/* not really buffer traces, but the buf provides useful information */
+DEFINE_BUF_EVENT(xfs_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_reset_dqcounts);
+DEFINE_BUF_EVENT(xfs_inode_item_push);
+
+/* pass flags explicitly */
+DECLARE_EVENT_CLASS(xfs_buf_flags_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
+       TP_ARGS(bp, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->flags = flags;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_FLAGS_EVENT(name) \
+DEFINE_EVENT(xfs_buf_flags_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
+       TP_ARGS(bp, flags, caller_ip))
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
+
+TRACE_EVENT(xfs_buf_ioerror,
+       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+       TP_ARGS(bp, error, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(unsigned, flags)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(int, error)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->error = error;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d error %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __entry->error,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_buf_item_class,
+       TP_PROTO(struct xfs_buf_log_item *bip),
+       TP_ARGS(bip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, buf_bno)
+               __field(size_t, buf_len)
+               __field(int, buf_hold)
+               __field(int, buf_pincount)
+               __field(int, buf_lockval)
+               __field(unsigned, buf_flags)
+               __field(unsigned, bli_recur)
+               __field(int, bli_refcount)
+               __field(unsigned, bli_flags)
+               __field(void *, li_desc)
+               __field(unsigned, li_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = bip->bli_buf->b_target->bt_dev;
+               __entry->bli_flags = bip->bli_flags;
+               __entry->bli_recur = bip->bli_recur;
+               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
+               __entry->buf_bno = bip->bli_buf->b_bn;
+               __entry->buf_len = bip->bli_buf->b_buffer_length;
+               __entry->buf_flags = bip->bli_buf->b_flags;
+               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
+               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
+               __entry->buf_lockval = bip->bli_buf->b_sema.count;
+               __entry->li_desc = bip->bli_item.li_desc;
+               __entry->li_flags = bip->bli_item.li_flags;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s recur %d refcount %d bliflags %s "
+                 "lidesc 0x%p liflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->buf_bno,
+                 __entry->buf_len,
+                 __entry->buf_hold,
+                 __entry->buf_pincount,
+                 __entry->buf_lockval,
+                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
+                 __entry->bli_recur,
+                 __entry->bli_refcount,
+                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
+                 __entry->li_desc,
+                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_BUF_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_buf_item_class, name, \
+       TP_PROTO(struct xfs_buf_log_item *bip), \
+       TP_ARGS(bip))
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
+
+DECLARE_EVENT_CLASS(xfs_lock_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
+                unsigned long caller_ip),
+       TP_ARGS(ip,  lock_flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, lock_flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lock_flags = lock_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_LOCK_EVENT(name) \
+DEFINE_EVENT(xfs_lock_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip,  lock_flags, caller_ip))
+DEFINE_LOCK_EVENT(xfs_ilock);
+DEFINE_LOCK_EVENT(xfs_ilock_nowait);
+DEFINE_LOCK_EVENT(xfs_ilock_demote);
+DEFINE_LOCK_EVENT(xfs_iunlock);
+
+DECLARE_EVENT_CLASS(xfs_inode_class,
+       TP_PROTO(struct xfs_inode *ip),
+       TP_ARGS(ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino)
+)
+
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
+       TP_PROTO(struct xfs_inode *ip), \
+       TP_ARGS(ip))
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_get_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
+       TP_ARGS(ip, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, count)
+               __field(int, pincount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->count = atomic_read(&VFS_I(ip)->i_count);
+               __entry->pincount = atomic_read(&ip->i_pincount);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->count,
+                 __entry->pincount,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
+       TP_ARGS(ip, caller_ip))
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+       TP_ARGS(dp, name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, dp_ino)
+               __dynamic_array(char, name, name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(dp)->i_sb->s_dev;
+               __entry->dp_ino = dp->i_ino;
+               memcpy(__get_str(name), name->name, name->len);
+       ),
+       TP_printk("dev %d:%d dp ino 0x%llx name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->dp_ino,
+                 __get_str(name))
+)
+
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+       TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+                struct xfs_name *src_name, struct xfs_name *target_name),
+       TP_ARGS(src_dp, target_dp, src_name, target_name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, src_dp_ino)
+               __field(xfs_ino_t, target_dp_ino)
+               __dynamic_array(char, src_name, src_name->len)
+               __dynamic_array(char, target_name, target_name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+               __entry->src_dp_ino = src_dp->i_ino;
+               __entry->target_dp_ino = target_dp->i_ino;
+               memcpy(__get_str(src_name), src_name->name, src_name->len);
+               memcpy(__get_str(target_name), target_name->name, target_name->len);
+       ),
+       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+                 " src name %s target name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->src_dp_ino,
+                 __entry->target_dp_ino,
+                 __get_str(src_name),
+                 __get_str(target_name))
+)
+
+DECLARE_EVENT_CLASS(xfs_dquot_class,
+       TP_PROTO(struct xfs_dquot *dqp),
+       TP_ARGS(dqp),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(u32, id)
+               __field(unsigned, flags)
+               __field(unsigned, nrefs)
+               __field(unsigned long long, res_bcount)
+               __field(unsigned long long, bcount)
+               __field(unsigned long long, icount)
+               __field(unsigned long long, blk_hardlimit)
+               __field(unsigned long long, blk_softlimit)
+               __field(unsigned long long, ino_hardlimit)
+               __field(unsigned long long, ino_softlimit)
+       ), \
+       TP_fast_assign(
+               __entry->dev = dqp->q_mount->m_super->s_dev;
+               __entry->id = be32_to_cpu(dqp->q_core.d_id);
+               __entry->flags = dqp->dq_flags;
+               __entry->nrefs = dqp->q_nrefs;
+               __entry->res_bcount = dqp->q_res_bcount;
+               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
+               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
+               __entry->blk_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               __entry->blk_softlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               __entry->ino_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+               __entry->ino_softlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
+       ),
+       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
+                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
+                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->id,
+                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
+                 __entry->nrefs,
+                 __entry->res_bcount,
+                 __entry->bcount,
+                 __entry->blk_hardlimit,
+                 __entry->blk_softlimit,
+                 __entry->icount,
+                 __entry->ino_hardlimit,
+                 __entry->ino_softlimit)
+)
+
+#define DEFINE_DQUOT_EVENT(name) \
+DEFINE_EVENT(xfs_dquot_class, name, \
+       TP_PROTO(struct xfs_dquot *dqp), \
+       TP_ARGS(dqp))
+DEFINE_DQUOT_EVENT(xfs_dqadjust);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqattach_found);
+DEFINE_DQUOT_EVENT(xfs_dqattach_get);
+DEFINE_DQUOT_EVENT(xfs_dqinit);
+DEFINE_DQUOT_EVENT(xfs_dqreuse);
+DEFINE_DQUOT_EVENT(xfs_dqalloc);
+DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
+DEFINE_DQUOT_EVENT(xfs_dqread);
+DEFINE_DQUOT_EVENT(xfs_dqread_fail);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
+DEFINE_DQUOT_EVENT(xfs_dqget_hit);
+DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqput);
+DEFINE_DQUOT_EVENT(xfs_dqput_wait);
+DEFINE_DQUOT_EVENT(xfs_dqput_free);
+DEFINE_DQUOT_EVENT(xfs_dqrele);
+DEFINE_DQUOT_EVENT(xfs_dqflush);
+DEFINE_DQUOT_EVENT(xfs_dqflush_force);
+DEFINE_DQUOT_EVENT(xfs_dqflush_done);
+
+DECLARE_EVENT_CLASS(xfs_loggrant_class,
+       TP_PROTO(struct log *log, struct xlog_ticket *tic),
+       TP_ARGS(log, tic),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned, trans_type)
+               __field(char, ocnt)
+               __field(char, cnt)
+               __field(int, curr_res)
+               __field(int, unit_res)
+               __field(unsigned int, flags)
+               __field(int, reserveq)
+               __field(int, writeq)
+               __field(int, grant_reserve_cycle)
+               __field(int, grant_reserve_bytes)
+               __field(int, grant_write_cycle)
+               __field(int, grant_write_bytes)
+               __field(int, curr_cycle)
+               __field(int, curr_block)
+               __field(xfs_lsn_t, tail_lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->trans_type = tic->t_trans_type;
+               __entry->ocnt = tic->t_ocnt;
+               __entry->cnt = tic->t_cnt;
+               __entry->curr_res = tic->t_curr_res;
+               __entry->unit_res = tic->t_unit_res;
+               __entry->flags = tic->t_flags;
+               __entry->reserveq = list_empty(&log->l_reserveq);
+               __entry->writeq = list_empty(&log->l_writeq);
+               xlog_crack_grant_head(&log->l_grant_reserve_head,
+                               &__entry->grant_reserve_cycle,
+                               &__entry->grant_reserve_bytes);
+               xlog_crack_grant_head(&log->l_grant_write_head,
+                               &__entry->grant_write_cycle,
+                               &__entry->grant_write_bytes);
+               __entry->curr_cycle = log->l_curr_cycle;
+               __entry->curr_block = log->l_curr_block;
+               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
+       ),
+       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+                 "t_unit_res %u t_flags %s reserveq %s "
+                 "writeq %s grant_reserve_cycle %d "
+                 "grant_reserve_bytes %d grant_write_cycle %d "
+                 "grant_write_bytes %d curr_cycle %d curr_block %d "
+                 "tail_cycle %d tail_block %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
+                 __entry->ocnt,
+                 __entry->cnt,
+                 __entry->curr_res,
+                 __entry->unit_res,
+                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
+                 __entry->reserveq ? "empty" : "active",
+                 __entry->writeq ? "empty" : "active",
+                 __entry->grant_reserve_cycle,
+                 __entry->grant_reserve_bytes,
+                 __entry->grant_write_cycle,
+                 __entry->grant_write_bytes,
+                 __entry->curr_cycle,
+                 __entry->curr_block,
+                 CYCLE_LSN(__entry->tail_lsn),
+                 BLOCK_LSN(__entry->tail_lsn)
+       )
+)
+
+#define DEFINE_LOGGRANT_EVENT(name) \
+DEFINE_EVENT(xfs_loggrant_class, name, \
+       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+       TP_ARGS(log, tic))
+DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
+DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
+DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
+
+DECLARE_EVENT_CLASS(xfs_file_class,
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+       TP_ARGS(ip, count, offset, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count 0x%zx ioflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+)
+
+#define DEFINE_RW_EVENT(name)          \
+DEFINE_EVENT(xfs_file_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+       TP_ARGS(ip, count, offset, flags))
+DEFINE_RW_EVENT(xfs_file_read);
+DEFINE_RW_EVENT(xfs_file_buffered_write);
+DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_splice_read);
+DEFINE_RW_EVENT(xfs_file_splice_write);
+
+DECLARE_EVENT_CLASS(xfs_page_class,
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+       TP_ARGS(inode, page, off),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(pgoff_t, pgoff)
+               __field(loff_t, size)
+               __field(unsigned long, offset)
+               __field(int, delalloc)
+               __field(int, unwritten)
+       ),
+       TP_fast_assign(
+               int delalloc = -1, unwritten = -1;
+
+               if (page_has_buffers(page))
+                       xfs_count_page_state(page, &delalloc, &unwritten);
+               __entry->dev = inode->i_sb->s_dev;
+               __entry->ino = XFS_I(inode)->i_ino;
+               __entry->pgoff = page_offset(page);
+               __entry->size = i_size_read(inode);
+               __entry->offset = off;
+               __entry->delalloc = delalloc;
+               __entry->unwritten = unwritten;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+                 "delalloc %d unwritten %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->pgoff,
+                 __entry->size,
+                 __entry->offset,
+                 __entry->delalloc,
+                 __entry->unwritten)
+)
+
+#define DEFINE_PAGE_EVENT(name)                \
+DEFINE_EVENT(xfs_page_class, name,     \
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
+       TP_ARGS(inode, page, off))
+DEFINE_PAGE_EVENT(xfs_writepage);
+DEFINE_PAGE_EVENT(xfs_releasepage);
+DEFINE_PAGE_EVENT(xfs_invalidatepage);
+
+DECLARE_EVENT_CLASS(xfs_imap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+                int type, struct xfs_bmbt_irec *irec),
+       TP_ARGS(ip, offset, count, type, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, size)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, type)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->type = type;
+               __entry->startoff = irec ? irec->br_startoff : 0;
+               __entry->startblock = irec ? irec->br_startblock : 0;
+               __entry->blockcount = irec ? irec->br_blockcount : 0;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd type %s "
+                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_symbolic(__entry->type, XFS_IO_TYPES),
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount)
+)
+
+#define DEFINE_IOMAP_EVENT(name)       \
+DEFINE_EVENT(xfs_imap_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+                int type, struct xfs_bmbt_irec *irec),         \
+       TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+       TP_ARGS(ip, offset, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, isize)
+               __field(loff_t, disize)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->isize = ip->i_size;
+               __entry->disize = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->isize,
+                 __entry->disize,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count)
+);
+
+#define DEFINE_SIMPLE_IO_EVENT(name)   \
+DEFINE_EVENT(xfs_simple_io_class, name,        \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
+       TP_ARGS(ip, offset, count))
+DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
+DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+
+DECLARE_EVENT_CLASS(xfs_itrunc_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
+       TP_ARGS(ip, new_size),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = new_size;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size)
+)
+
+#define DEFINE_ITRUNC_EVENT(name) \
+DEFINE_EVENT(xfs_itrunc_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
+       TP_ARGS(ip, new_size))
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+
+TRACE_EVENT(xfs_pagecache_inval,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
+       TP_ARGS(ip, start, finish),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_off_t, start)
+               __field(xfs_off_t, finish)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->start = start;
+               __entry->finish = finish;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->start,
+                 __entry->finish)
+);
+
+TRACE_EVENT(xfs_bunmap,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
+                int flags, unsigned long caller_ip),
+       TP_ARGS(ip, bno, len, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fileoff_t, bno)
+               __field(xfs_filblks_t, len)
+               __field(unsigned long, caller_ip)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->bno = bno;
+               __entry->len = len;
+               __entry->caller_ip = caller_ip;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
+                 "flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->bno,
+                 __entry->len,
+                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
+                 (void *)__entry->caller_ip)
+
+);
+
+DECLARE_EVENT_CLASS(xfs_busy_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+);
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
+
+TRACE_EVENT(xfs_alloc_busy_trim,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len,
+                xfs_agblock_t tbno, xfs_extlen_t tlen),
+       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(xfs_agblock_t, tbno)
+               __field(xfs_extlen_t, tlen)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->tbno = tbno;
+               __entry->tlen = tlen;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->tbno,
+                 __entry->tlen)
+);
+
+TRACE_EVENT(xfs_trans_commit_lsn,
+       TP_PROTO(struct xfs_trans *trans),
+       TP_ARGS(trans),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(struct xfs_trans *, tp)
+               __field(xfs_lsn_t, lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = trans->t_mountp->m_super->s_dev;
+               __entry->tp = trans;
+               __entry->lsn = trans->t_commit_lsn;
+       ),
+       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tp,
+                 __entry->lsn)
+);
+
+TRACE_EVENT(xfs_agf,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agf, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, flags)
+               __field(__u32, length)
+               __field(__u32, bno_root)
+               __field(__u32, cnt_root)
+               __field(__u32, bno_level)
+               __field(__u32, cnt_level)
+               __field(__u32, flfirst)
+               __field(__u32, fllast)
+               __field(__u32, flcount)
+               __field(__u32, freeblks)
+               __field(__u32, longest)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = be32_to_cpu(agf->agf_seqno),
+               __entry->flags = flags;
+               __entry->length = be32_to_cpu(agf->agf_length),
+               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
+               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
+               __entry->bno_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
+               __entry->cnt_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
+               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
+               __entry->fllast = be32_to_cpu(agf->agf_fllast),
+               __entry->flcount = be32_to_cpu(agf->agf_flcount),
+               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
+               __entry->longest = be32_to_cpu(agf->agf_longest);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
+                 "levels b %u c %u flfirst %u fllast %u flcount %u "
+                 "freeblks %u longest %u caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
+                 __entry->length,
+                 __entry->bno_root,
+                 __entry->cnt_root,
+                 __entry->bno_level,
+                 __entry->cnt_level,
+                 __entry->flfirst,
+                 __entry->fllast,
+                 __entry->flcount,
+                 __entry->freeblks,
+                 __entry->longest,
+                 (void *)__entry->caller_ip)
+);
+
+TRACE_EVENT(xfs_free_extent,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
+       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(int, isfl)
+               __field(int, haveleft)
+               __field(int, haveright)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->isfl = isfl;
+               __entry->haveleft = haveleft;
+               __entry->haveright = haveright;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->isfl,
+                 __entry->haveleft ?
+                       (__entry->haveright ? "both" : "left") :
+                       (__entry->haveright ? "right" : "none"))
+
+);
+
+DECLARE_EVENT_CLASS(xfs_alloc_class,
+       TP_PROTO(struct xfs_alloc_arg *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, minlen)
+               __field(xfs_extlen_t, maxlen)
+               __field(xfs_extlen_t, mod)
+               __field(xfs_extlen_t, prod)
+               __field(xfs_extlen_t, minleft)
+               __field(xfs_extlen_t, total)
+               __field(xfs_extlen_t, alignment)
+               __field(xfs_extlen_t, minalignslop)
+               __field(xfs_extlen_t, len)
+               __field(short, type)
+               __field(short, otype)
+               __field(char, wasdel)
+               __field(char, wasfromfl)
+               __field(char, isfl)
+               __field(char, userdata)
+               __field(xfs_fsblock_t, firstblock)
+       ),
+       TP_fast_assign(
+               __entry->dev = args->mp->m_super->s_dev;
+               __entry->agno = args->agno;
+               __entry->agbno = args->agbno;
+               __entry->minlen = args->minlen;
+               __entry->maxlen = args->maxlen;
+               __entry->mod = args->mod;
+               __entry->prod = args->prod;
+               __entry->minleft = args->minleft;
+               __entry->total = args->total;
+               __entry->alignment = args->alignment;
+               __entry->minalignslop = args->minalignslop;
+               __entry->len = args->len;
+               __entry->type = args->type;
+               __entry->otype = args->otype;
+               __entry->wasdel = args->wasdel;
+               __entry->wasfromfl = args->wasfromfl;
+               __entry->isfl = args->isfl;
+               __entry->userdata = args->userdata;
+               __entry->firstblock = args->firstblock;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+                 "prod %u minleft %u total %u alignment %u minalignslop %u "
+                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
+                 "userdata %d firstblock 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->minlen,
+                 __entry->maxlen,
+                 __entry->mod,
+                 __entry->prod,
+                 __entry->minleft,
+                 __entry->total,
+                 __entry->alignment,
+                 __entry->minalignslop,
+                 __entry->len,
+                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
+                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
+                 __entry->wasdel,
+                 __entry->wasfromfl,
+                 __entry->isfl,
+                 __entry->userdata,
+                 (unsigned long long)__entry->firstblock)
+)
+
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_alloc_class, name, \
+       TP_PROTO(struct xfs_alloc_arg *args), \
+       TP_ARGS(args))
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
+
+DECLARE_EVENT_CLASS(xfs_dir2_class,
+       TP_PROTO(struct xfs_da_args *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __dynamic_array(char, name, args->namelen)
+               __field(int, namelen)
+               __field(xfs_dahash_t, hashval)
+               __field(xfs_ino_t, inumber)
+               __field(int, op_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               if (args->namelen)
+                       memcpy(__get_str(name), args->name, args->namelen);
+               __entry->namelen = args->namelen;
+               __entry->hashval = args->hashval;
+               __entry->inumber = args->inumber;
+               __entry->op_flags = args->op_flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
+                 "inumber 0x%llx op_flags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->namelen,
+                 __entry->namelen ? __get_str(name) : NULL,
+                 __entry->namelen,
+                 __entry->hashval,
+                 __entry->inumber,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
+)
+
+#define DEFINE_DIR2_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_class, name, \
+       TP_PROTO(struct xfs_da_args *args), \
+       TP_ARGS(args))
+DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
+DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+
+DECLARE_EVENT_CLASS(xfs_dir2_space_class,
+       TP_PROTO(struct xfs_da_args *args, int idx),
+       TP_ARGS(args, idx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, idx)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->idx = idx;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->idx)
+)
+
+#define DEFINE_DIR2_SPACE_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_space_class, name, \
+       TP_PROTO(struct xfs_da_args *args, int idx), \
+       TP_ARGS(args, idx))
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
+
+TRACE_EVENT(xfs_dir2_leafn_moveents,
+       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
+       TP_ARGS(args, src_idx, dst_idx, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, src_idx)
+               __field(int, dst_idx)
+               __field(int, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->src_idx = src_idx;
+               __entry->dst_idx = dst_idx;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
+                 "src_idx %d dst_idx %d count %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->src_idx,
+                 __entry->dst_idx,
+                 __entry->count)
+);
+
+#define XFS_SWAPEXT_INODES \
+       { 0,    "target" }, \
+       { 1,    "temp" }
+
+#define XFS_INODE_FORMAT_STR \
+       { 0,    "invalid" }, \
+       { 1,    "local" }, \
+       { 2,    "extent" }, \
+       { 3,    "btree" }
+
+DECLARE_EVENT_CLASS(xfs_swap_extent_class,
+       TP_PROTO(struct xfs_inode *ip, int which),
+       TP_ARGS(ip, which),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, which)
+               __field(xfs_ino_t, ino)
+               __field(int, format)
+               __field(int, nex)
+               __field(int, max_nex)
+               __field(int, broot_size)
+               __field(int, fork_off)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->which = which;
+               __entry->ino = ip->i_ino;
+               __entry->format = ip->i_d.di_format;
+               __entry->nex = ip->i_d.di_nextents;
+               __entry->max_nex = ip->i_df.if_ext_max;
+               __entry->broot_size = ip->i_df.if_broot_bytes;
+               __entry->fork_off = XFS_IFORK_BOFF(ip);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
+                 "Max in-fork extents %d, broot size %d, fork offset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
+                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
+                 __entry->nex,
+                 __entry->max_nex,
+                 __entry->broot_size,
+                 __entry->fork_off)
+)
+
+#define DEFINE_SWAPEXT_EVENT(name) \
+DEFINE_EVENT(xfs_swap_extent_class, name, \
+       TP_PROTO(struct xfs_inode *ip, int which), \
+       TP_ARGS(ip, which))
+
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+       TP_PROTO(struct log *log, struct xlog_recover *trans,
+               struct xlog_recover_item *item, int pass),
+       TP_ARGS(log, trans, item, pass),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, item)
+               __field(xlog_tid_t, tid)
+               __field(int, type)
+               __field(int, pass)
+               __field(int, count)
+               __field(int, total)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->item = (unsigned long)item;
+               __entry->tid = trans->r_log_tid;
+               __entry->type = ITEM_TYPE(item);
+               __entry->pass = pass;
+               __entry->count = item->ri_cnt;
+               __entry->total = item->ri_total;
+       ),
+       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+                 "item region count/total %d/%d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tid,
+                 __entry->pass,
+                 (void *)__entry->item,
+                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+                 __entry->count,
+                 __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+       TP_PROTO(struct log *log, struct xlog_recover *trans, \
+               struct xlog_recover_item *item, int pass), \
+       TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+       TP_ARGS(log, buf_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(__int64_t, blkno)
+               __field(unsigned short, len)
+               __field(unsigned short, flags)
+               __field(unsigned short, size)
+               __field(unsigned int, map_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->blkno = buf_f->blf_blkno;
+               __entry->len = buf_f->blf_len;
+               __entry->flags = buf_f->blf_flags;
+               __entry->size = buf_f->blf_size;
+               __entry->map_size = buf_f->blf_map_size;
+       ),
+       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+                       "map_size %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->flags,
+                 __entry->size,
+                 __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+       TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+       TP_ARGS(log, in_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(unsigned short, size)
+               __field(int, fields)
+               __field(unsigned short, asize)
+               __field(unsigned short, dsize)
+               __field(__int64_t, blkno)
+               __field(int, len)
+               __field(int, boffset)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->ino = in_f->ilf_ino;
+               __entry->size = in_f->ilf_size;
+               __entry->fields = in_f->ilf_fields;
+               __entry->asize = in_f->ilf_asize;
+               __entry->dsize = in_f->ilf_dsize;
+               __entry->blkno = in_f->ilf_blkno;
+               __entry->len = in_f->ilf_len;
+               __entry->boffset = in_f->ilf_boffset;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->fields,
+                 __entry->asize,
+                 __entry->dsize,
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+       TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
+DECLARE_EVENT_CLASS(xfs_discard_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
+#endif /* _TRACE_XFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE xfs_trace
+#include <trace/define_trace.h>
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
new file mode 100644 (file)
index 0000000..4d00ee6
--- /dev/null
@@ -0,0 +1,890 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp != tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(dqp->q_logitem.qli_dquot == dqp);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
+
+       /*
+        * Initialize d_transp so we can later determine if this dquot is
+        * associated with this transaction.
+        */
+       dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed.  The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp == tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+void
+xfs_trans_dup_dqinfo(
+       xfs_trans_t     *otp,
+       xfs_trans_t     *ntp)
+{
+       xfs_dqtrx_t     *oq, *nq;
+       int             i,j;
+       xfs_dqtrx_t     *oqa, *nqa;
+
+       if (!otp->t_dqinfo)
+               return;
+
+       xfs_trans_alloc_dqinfo(ntp);
+       oqa = otp->t_dqinfo->dqa_usrdquots;
+       nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+       /*
+        * Because the quota blk reservation is carried forward,
+        * it is also necessary to carry forward the DQ_DIRTY flag.
+        */
+       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       if (oqa[i].qt_dquot == NULL)
+                               break;
+                       oq = &oqa[i];
+                       nq = &nqa[i];
+
+                       nq->qt_dquot = oq->qt_dquot;
+                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+                       nq->qt_rtbcount_delta = 0;
+
+                       /*
+                        * Transfer whatever is left of the reservations.
+                        */
+                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+                       oq->qt_blk_res = oq->qt_blk_res_used;
+
+                       nq->qt_rtblk_res = oq->qt_rtblk_res -
+                               oq->qt_rtblk_res_used;
+                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+                       oq->qt_ino_res = oq->qt_ino_res_used;
+
+               }
+               oqa = otp->t_dqinfo->dqa_grpdquots;
+               nqa = ntp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            field,
+       long            delta)
+{
+       xfs_mount_t     *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       int             i;
+       xfs_dqtrx_t     *qa;
+
+       qa = XFS_QM_ISUDQ(dqp) ?
+               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
+
+       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+               if (qa[i].qt_dquot == NULL ||
+                   qa[i].qt_dquot == dqp)
+                       return &qa[i];
+       }
+
+       return NULL;
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp,
+       uint            field,
+       long            delta)
+{
+       xfs_dqtrx_t     *qtrx;
+
+       ASSERT(tp);
+       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+       qtrx = NULL;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+       /*
+        * Find either the first free slot or the slot that belongs
+        * to this dquot.
+        */
+       qtrx = xfs_trans_get_dqtrx(tp, dqp);
+       ASSERT(qtrx);
+       if (qtrx->qt_dquot == NULL)
+               qtrx->qt_dquot = dqp;
+
+       switch (field) {
+
+               /*
+                * regular disk blk reservation
+                */
+             case XFS_TRANS_DQ_RES_BLKS:
+               qtrx->qt_blk_res += (ulong)delta;
+               break;
+
+               /*
+                * inode reservation
+                */
+             case XFS_TRANS_DQ_RES_INOS:
+               qtrx->qt_ino_res += (ulong)delta;
+               break;
+
+               /*
+                * disk blocks used.
+                */
+             case XFS_TRANS_DQ_BCOUNT:
+               if (qtrx->qt_blk_res && delta > 0) {
+                       qtrx->qt_blk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+               }
+               qtrx->qt_bcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELBCOUNT:
+               qtrx->qt_delbcnt_delta += delta;
+               break;
+
+               /*
+                * Inode Count
+                */
+             case XFS_TRANS_DQ_ICOUNT:
+               if (qtrx->qt_ino_res && delta > 0) {
+                       qtrx->qt_ino_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+               }
+               qtrx->qt_icount_delta += delta;
+               break;
+
+               /*
+                * rtblk reservation
+                */
+             case XFS_TRANS_DQ_RES_RTBLKS:
+               qtrx->qt_rtblk_res += (ulong)delta;
+               break;
+
+               /*
+                * rtblk count
+                */
+             case XFS_TRANS_DQ_RTBCOUNT:
+               if (qtrx->qt_rtblk_res && delta > 0) {
+                       qtrx->qt_rtblk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+               }
+               qtrx->qt_rtbcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELRTBCOUNT:
+               qtrx->qt_delrtb_delta += delta;
+               break;
+
+             default:
+               ASSERT(0);
+       }
+       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+       xfs_trans_t     *tp,
+       xfs_dqtrx_t     *q)
+{
+       ASSERT(q[0].qt_dquot != NULL);
+       if (q[1].qt_dquot == NULL) {
+               xfs_dqlock(q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+       } else {
+               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[1].qt_dquot);
+       }
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       xfs_disk_dquot_t        *d;
+       long                    totalbdelta;
+       long                    totalrtbdelta;
+
+       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       ASSERT(tp->t_dqinfo);
+       qa = tp->t_dqinfo->dqa_usrdquots;
+       for (j = 0; j < 2; j++) {
+               if (qa[0].qt_dquot == NULL) {
+                       qa = tp->t_dqinfo->dqa_grpdquots;
+                       continue;
+               }
+
+               /*
+                * Lock all of the dquots and join them to the transaction.
+                */
+               xfs_trans_dqlockedjoin(tp, qa);
+
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * The array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+
+                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+                       ASSERT(dqp->q_transp == tp);
+
+                       /*
+                        * adjust the actual number of blocks used
+                        */
+                       d = &dqp->q_core;
+
+                       /*
+                        * The issue here is - sometimes we don't make a blkquota
+                        * reservation intentionally to be fair to users
+                        * (when the amount is small). On the other hand,
+                        * delayed allocs do make reservations, but that's
+                        * outside of a transaction, so we have no
+                        * idea how much was really reserved.
+                        * So, here we've accumulated delayed allocation blks and
+                        * non-delay blks. The assumption is that the
+                        * delayed ones are always reserved (outside of a
+                        * transaction), and the others may or may not have
+                        * quota reservations.
+                        */
+                       totalbdelta = qtrx->qt_bcount_delta +
+                               qtrx->qt_delbcnt_delta;
+                       totalrtbdelta = qtrx->qt_rtbcount_delta +
+                               qtrx->qt_delrtb_delta;
+#ifdef DEBUG
+                       if (totalbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_bcount) >=
+                                      -totalbdelta);
+
+                       if (totalrtbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
+                                      -totalrtbdelta);
+
+                       if (qtrx->qt_icount_delta < 0)
+                               ASSERT(be64_to_cpu(d->d_icount) >=
+                                      -qtrx->qt_icount_delta);
+#endif
+                       if (totalbdelta)
+                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
+
+                       if (qtrx->qt_icount_delta)
+                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+                       if (totalrtbdelta)
+                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
+
+                       /*
+                        * Get any default limits in use.
+                        * Start/reset the timer(s) if needed.
+                        */
+                       if (d->d_id) {
+                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
+                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+                       }
+
+                       dqp->dq_flags |= XFS_DQ_DIRTY;
+                       /*
+                        * add this to the list of items to get logged
+                        */
+                       xfs_trans_log_dquot(tp, dqp);
+                       /*
+                        * Take off what's left of the original reservation.
+                        * In case of delayed allocations, there's no
+                        * reservation that a transaction structure knows of.
+                        */
+                       if (qtrx->qt_blk_res != 0) {
+                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+                                       if (qtrx->qt_blk_res >
+                                           qtrx->qt_blk_res_used)
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res -
+                                                        qtrx->qt_blk_res_used);
+                                       else
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res_used -
+                                                        qtrx->qt_blk_res);
+                               }
+                       } else {
+                               /*
+                                * These blks were never reserved, either inside
+                                * a transaction or outside one (in a delayed
+                                * allocation). Also, this isn't always a
+                                * negative number since we sometimes
+                                * deliberately skip quota reservations.
+                                */
+                               if (qtrx->qt_bcount_delta) {
+                                       dqp->q_res_bcount +=
+                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
+                               }
+                       }
+                       /*
+                        * Adjust the RT reservation.
+                        */
+                       if (qtrx->qt_rtblk_res != 0) {
+                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
+                                       if (qtrx->qt_rtblk_res >
+                                           qtrx->qt_rtblk_res_used)
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res -
+                                                       qtrx->qt_rtblk_res_used);
+                                       else
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res_used -
+                                                       qtrx->qt_rtblk_res);
+                               }
+                       } else {
+                               if (qtrx->qt_rtbcount_delta)
+                                       dqp->q_res_rtbcount +=
+                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+                       }
+
+                       /*
+                        * Adjust the inode reservation.
+                        */
+                       if (qtrx->qt_ino_res != 0) {
+                               ASSERT(qtrx->qt_ino_res >=
+                                      qtrx->qt_ino_res_used);
+                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+                                       dqp->q_res_icount -= (xfs_qcnt_t)
+                                               (qtrx->qt_ino_res -
+                                                qtrx->qt_ino_res_used);
+                       } else {
+                               if (qtrx->qt_icount_delta)
+                                       dqp->q_res_icount +=
+                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
+                       }
+
+                       ASSERT(dqp->q_res_bcount >=
+                               be64_to_cpu(dqp->q_core.d_bcount));
+                       ASSERT(dqp->q_res_icount >=
+                               be64_to_cpu(dqp->q_core.d_icount));
+                       ASSERT(dqp->q_res_rtbcount >=
+                               be64_to_cpu(dqp->q_core.d_rtbcount));
+               }
+               /*
+                * Do the group quotas next
+                */
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+void
+xfs_trans_unreserve_and_mod_dquots(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       boolean_t               locked;
+
+       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       qa = tp->t_dqinfo->dqa_usrdquots;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * We assume that the array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+                       /*
+                        * Unreserve the original reservation. We don't care
+                        * about the number of blocks used field, or deltas.
+                        * Also we don't bother to zero the fields.
+                        */
+                       locked = B_FALSE;
+                       if (qtrx->qt_blk_res) {
+                               xfs_dqlock(dqp);
+                               locked = B_TRUE;
+                               dqp->q_res_bcount -=
+                                       (xfs_qcnt_t)qtrx->qt_blk_res;
+                       }
+                       if (qtrx->qt_ino_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_icount -=
+                                       (xfs_qcnt_t)qtrx->qt_ino_res;
+                       }
+
+                       if (qtrx->qt_rtblk_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_rtbcount -=
+                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
+                       }
+                       if (locked)
+                               xfs_dqunlock(dqp);
+
+               }
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+STATIC void
+xfs_quota_warn(
+       struct xfs_mount        *mp,
+       struct xfs_dquot        *dqp,
+       int                     type)
+{
+       /* no warnings for project quotas - we just return ENOSPC later */
+       if (dqp->dq_flags & XFS_DQ_PROJ)
+               return;
+       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
+                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
+                          type);
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ */
+STATIC int
+xfs_trans_dqresv(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       xfs_qcnt_t      hardlimit;
+       xfs_qcnt_t      softlimit;
+       time_t          timer;
+       xfs_qwarncnt_t  warns;
+       xfs_qwarncnt_t  warnlimit;
+       xfs_qcnt_t      count;
+       xfs_qcnt_t      *resbcountp;
+       xfs_quotainfo_t *q = mp->m_quotainfo;
+
+
+       xfs_dqlock(dqp);
+
+       if (flags & XFS_TRANS_DQ_RES_BLKS) {
+               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_bhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_bsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_btimer);
+               warns = be16_to_cpu(dqp->q_core.d_bwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
+               resbcountp = &dqp->q_res_bcount;
+       } else {
+               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_rtbhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_rtbsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
+               resbcountp = &dqp->q_res_rtbcount;
+       }
+
+       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+           dqp->q_core.d_id &&
+           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
+            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
+             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
+               if (nblks > 0) {
+                       /*
+                        * dquot is locked already. See if we'd go over the
+                        * hardlimit or exceed the timelimit if we allocate
+                        * nblks.
+                        */
+                       if (hardlimit > 0ULL &&
+                           hardlimit <= nblks + *resbcountp) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL &&
+                           softlimit <= nblks + *resbcountp) {
+                               if ((timer != 0 && get_seconds() > timer) ||
+                                   (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_BSOFTLONGWARN);
+                                       goto error_return;
+                               }
+
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
+                       }
+               }
+               if (ninos > 0) {
+                       count = be64_to_cpu(dqp->q_core.d_icount);
+                       timer = be32_to_cpu(dqp->q_core.d_itimer);
+                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
+                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+                       if (!hardlimit)
+                               hardlimit = q->qi_ihardlimit;
+                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+                       if (!softlimit)
+                               softlimit = q->qi_isoftlimit;
+
+                       if (hardlimit > 0ULL && count >= hardlimit) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL && count >= softlimit) {
+                               if  ((timer != 0 && get_seconds() > timer) ||
+                                    (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_ISOFTLONGWARN);
+                                       goto error_return;
+                               }
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
+                       }
+               }
+       }
+
+       /*
+        * Change the reservation, but not the actual usage.
+        * Note that q_res_bcount = q_core.d_bcount + resv
+        */
+       (*resbcountp) += (xfs_qcnt_t)nblks;
+       if (ninos != 0)
+               dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+       /*
+        * note the reservation amt in the trans struct too,
+        * so that the transaction knows how much was reserved by
+        * it against this particular dquot.
+        * We don't do this when we are reserving for a delayed allocation,
+        * because we don't have the luxury of a transaction envelope then.
+        */
+       if (tp) {
+               ASSERT(tp->t_dqinfo);
+               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+               if (nblks != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           flags & XFS_QMOPT_RESBLK_MASK,
+                                           nblks);
+               if (ninos != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           XFS_TRANS_DQ_RES_INOS,
+                                           ninos);
+       }
+       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
+       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
+       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
+
+       xfs_dqunlock(dqp);
+       return 0;
+
+error_return:
+       xfs_dqunlock(dqp);
+       if (flags & XFS_QMOPT_ENOSPC)
+               return ENOSPC;
+       return EDQUOT;
+}
+
+
+/*
+ * Given dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp/prj quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
+ *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       int             resvd = 0, error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       if (tp && tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+
+       if (udqp) {
+               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
+                                       (flags & ~XFS_QMOPT_ENOSPC));
+               if (error)
+                       return error;
+               resvd = 1;
+       }
+
+       if (gdqp) {
+               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
+               if (error) {
+                       /*
+                        * can't do it, so backout previous reservation
+                        */
+                       if (resvd) {
+                               flags |= XFS_QMOPT_FORCE_RES;
+                               xfs_trans_dqresv(tp, mp, udqp,
+                                                -nblks, -ninos, flags);
+                       }
+                       return error;
+               }
+       }
+
+       /*
+        * Didn't change anything critical, so, no need to log
+        */
+       return 0;
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ */
+int
+xfs_trans_reserve_quota_nblks(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       long                    nblks,
+       long                    ninos,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+       if (XFS_IS_PQUOTA_ON(mp))
+               flags |= XFS_QMOPT_ENOSPC;
+
+       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_RTBLKS ||
+              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_BLKS);
+
+       /*
+        * Reserve nblks against these dquots, with trans as the mediator.
+        */
+       return xfs_trans_reserve_quota_bydquots(tp, mp,
+                                               ip->i_udquot, ip->i_gdquot,
+                                               nblks, ninos, flags);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_qoff_logitem_t      *q;
+
+       ASSERT(tp != NULL);
+
+       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+       ASSERT(q != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &q->qql_item);
+       return q;
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed.  The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *qlp)
+{
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+       xfs_trans_t     *tp)
+{
+       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+void
+xfs_trans_free_dqinfo(
+       xfs_trans_t     *tp)
+{
+       if (!tp->t_dqinfo)
+               return;
+       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+       tp->t_dqinfo = NULL;
+}
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
new file mode 100644 (file)
index 0000000..7c220b4
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+#include "xfs_fs.h"
+
+struct file;
+struct xfs_inode;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Return values for xfs_inactive.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define        VN_INACTIVE_CACHE       0
+#define        VN_INACTIVE_NOCACHE     1
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISDIRECT    0x00004         /* bypass page cache */
+#define IO_INVIS       0x00020         /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+       { IO_ISDIRECT,  "DIRECT" }, \
+       { IO_INVIS,     "INVIS"}
+
+/*
+ * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
+ */
+#define FI_NONE                        0       /* none */
+#define FI_REMAPF              1       /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
+                                          Prevent VM access to the pages until
+                                          the operation completes. */
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
+#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
+#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
+                                       PAGECACHE_TAG_DIRTY)
+
+
+#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
new file mode 100644 (file)
index 0000000..87d3e03
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+static int
+xfs_xattr_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       int error, asize = size;
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (!size) {
+               xflags |= ATTR_KERNOVAL;
+               value = NULL;
+       }
+
+       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+       if (error)
+               return error;
+       return asize;
+}
+
+static int
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (flags & XATTR_CREATE)
+               xflags |= ATTR_CREATE;
+       if (flags & XATTR_REPLACE)
+               xflags |= ATTR_REPLACE;
+
+       if (!value)
+               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
+       return -xfs_attr_set(ip, (unsigned char *)name,
+                               (void *)value, size, xflags);
+}
+
+static const struct xattr_handler xfs_xattr_user_handler = {
+       .prefix = XATTR_USER_PREFIX,
+       .flags  = 0, /* no flags implies user namespace */
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_trusted_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .flags  = ATTR_ROOT,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_security_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .flags  = ATTR_SECURE,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+const struct xattr_handler *xfs_xattr_handlers[] = {
+       &xfs_xattr_user_handler,
+       &xfs_xattr_trusted_handler,
+       &xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
+       &xfs_xattr_acl_access_handler,
+       &xfs_xattr_acl_default_handler,
+#endif
+       NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return sizeof("security");
+       else if (flags & XFS_ATTR_ROOT)
+               return sizeof("trusted");
+       else
+               return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return xfs_xattr_security_handler.prefix;
+       else if (flags & XFS_ATTR_ROOT)
+               return xfs_xattr_trusted_handler.prefix;
+       else
+               return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+       char *offset;
+       int arraytop;
+
+       ASSERT(context->count >= 0);
+
+       /*
+        * Only show root namespace entries if we are actually allowed to
+        * see them.
+        */
+       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+               return 0;
+
+       arraytop = context->count + prefix_len + namelen + 1;
+       if (arraytop > context->firstu) {
+               context->count = -1;    /* insufficient space */
+               return 1;
+       }
+       offset = (char *)context->alist + context->count;
+       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+       offset += prefix_len;
+       strncpy(offset, (char *)name, namelen);                 /* real name */
+       offset += namelen;
+       *offset = '\0';
+       context->count += prefix_len + namelen + 1;
+       return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+       return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+               size_t size, ssize_t *result)
+{
+       char *p = data + *result;
+
+       *result += len;
+       if (!size)
+               return 0;
+       if (*result > size)
+               return -ERANGE;
+
+       strcpy(p, name);
+       return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+       struct xfs_attr_list_context context;
+       struct attrlist_cursor_kern cursor = { 0 };
+       struct inode            *inode = dentry->d_inode;
+       int                     error;
+
+       /*
+        * First read the regular on-disk attributes.
+        */
+       memset(&context, 0, sizeof(context));
+       context.dp = XFS_I(inode);
+       context.cursor = &cursor;
+       context.resynch = 1;
+       context.alist = data;
+       context.bufsize = size;
+       context.firstu = context.bufsize;
+
+       if (size)
+               context.put_listent = xfs_xattr_put_listent;
+       else
+               context.put_listent = xfs_xattr_put_listent_sizes;
+
+       xfs_attr_list_int(&context);
+       if (context.count < 0)
+               return -ERANGE;
+
+       /*
+        * Then add the two synthetic ACL attributes.
+        */
+       if (posix_acl_access_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       if (posix_acl_default_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       return context.count;
+}
index 0c69ad8..3c9c54f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     connector.h
  * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004-2005 Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
  * All rights reserved.
  * 
  * This program is free software; you can redistribute it and/or modify
index 178cdb4..c2bd68f 100644 (file)
@@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te
 extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+#else
+static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
+#endif
 extern void unlock_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 
index d464de5..464cff5 100644 (file)
@@ -47,6 +47,9 @@
  *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
  *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
  *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
  */
 
 #ifndef _LINUX_FUSE_H
@@ -78,7 +81,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 16
+#define FUSE_KERNEL_MINOR_VERSION 17
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -153,8 +156,10 @@ struct fuse_file_lock {
 /**
  * INIT request/reply flags
  *
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -163,6 +168,7 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT    (1 << 4)
 #define FUSE_BIG_WRITES                (1 << 5)
 #define FUSE_DONT_MASK         (1 << 6)
+#define FUSE_FLOCK_LOCKS       (1 << 10)
 
 /**
  * CUSE INIT request/reply flags
@@ -175,6 +181,7 @@ struct fuse_file_lock {
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH     (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK      (1 << 1)
 
 /**
  * Getattr flags
index eec3bae..8fc7dd1 100644 (file)
@@ -22,6 +22,7 @@ extern int            __set_personality(unsigned int);
  * These occupy the top three bytes.
  */
 enum {
+       UNAME26 =               0x0020000,
        ADDR_NO_RANDOMIZE =     0x0040000,      /* disable randomization of VA space */
        FDPIC_FUNCPTRS =        0x0080000,      /* userspace function ptrs point to descriptors
                                                 * (signal handling)
index 5e3e25a..63d2df4 100644 (file)
@@ -14,6 +14,7 @@ struct platform_pwm_backlight_data {
        unsigned int pwm_period_ns;
        int (*init)(struct device *dev);
        int (*notify)(struct device *dev, int brightness);
+       void (*notify_after)(struct device *dev, int brightness);
        void (*exit)(struct device *dev);
        int (*check_fb)(struct device *dev, struct fb_info *info);
 };
index 9026b30..218168a 100644 (file)
 #define  RIO_PEF_PROCESSOR             0x20000000      /* [I] Processor */
 #define  RIO_PEF_SWITCH                        0x10000000      /* [I] Switch */
 #define  RIO_PEF_MULTIPORT             0x08000000      /* [VI, 2.1] Multiport */
-#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II] Mailboxes */
-#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II] Mailbox 0 */
-#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II] Mailbox 1 */
-#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II] Mailbox 2 */
-#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II] Mailbox 3 */
-#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II] Doorbells */
+#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II, <= 1.2] Mailboxes */
+#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II, <= 1.2] Mailbox 0 */
+#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II, <= 1.2] Mailbox 1 */
+#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II, <= 1.2] Mailbox 2 */
+#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II, <= 1.2] Mailbox 3 */
+#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II, <= 1.2] Doorbells */
 #define  RIO_PEF_EXT_RT                        0x00000200      /* [III, 1.3] Extended route table support */
 #define  RIO_PEF_STD_RT                        0x00000100      /* [III, 1.3] Standard route table support */
 #define  RIO_PEF_CTLS                  0x00000010      /* [III] CTLS */
 #define        RIO_SWITCH_RT_LIMIT     0x34    /* [III, 1.3] Switch Route Table Destination ID Limit CAR */
 #define         RIO_RT_MAX_DESTID              0x0000ffff
 
-#define RIO_MBOX_CSR           0x40    /* [II] Mailbox CSR */
+#define RIO_MBOX_CSR           0x40    /* [II, <= 1.2] Mailbox CSR */
 #define  RIO_MBOX0_AVAIL               0x80000000      /* [II] Mbox 0 avail */
 #define  RIO_MBOX0_FULL                        0x40000000      /* [II] Mbox 0 full */
 #define  RIO_MBOX0_EMPTY               0x20000000      /* [II] Mbox 0 empty */
 #define  RIO_MBOX3_FAIL                        0x00000008      /* [II] Mbox 3 fail */
 #define  RIO_MBOX3_ERROR               0x00000004      /* [II] Mbox 3 error */
 
-#define RIO_WRITE_PORT_CSR     0x44    /* [I] Write Port CSR */
-#define RIO_DOORBELL_CSR       0x44    /* [II] Doorbell CSR */
+#define RIO_WRITE_PORT_CSR     0x44    /* [I, <= 1.2] Write Port CSR */
+#define RIO_DOORBELL_CSR       0x44    /* [II, <= 1.2] Doorbell CSR */
 #define  RIO_DOORBELL_AVAIL            0x80000000      /* [II] Doorbell avail */
 #define  RIO_DOORBELL_FULL             0x40000000      /* [II] Doorbell full */
 #define  RIO_DOORBELL_EMPTY            0x20000000      /* [II] Doorbell empty */
index f1bfa12..2b8963f 100644 (file)
  *
  *     (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
  *
- * The 1/16 region above the global dirty limit will be put to maximum pauses:
- *
- *     (limit, limit + limit/DIRTY_MAXPAUSE_AREA)
- *
- * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put
- * to loops:
- *
- *     (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA)
- *
  * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
  * time) for the dirty pages to drop, unless written enough pages.
  *
@@ -31,8 +22,6 @@
  */
 #define DIRTY_SCOPE            8
 #define DIRTY_FULL_SCOPE       (DIRTY_SCOPE / 2)
-#define DIRTY_MAXPAUSE_AREA            16
-#define DIRTY_PASSGOOD_AREA            8
 
 /*
  * 4MB minimal write chunk size
index 2de8fe9..126c675 100644 (file)
@@ -27,6 +27,12 @@ struct target_core_fabric_ops {
        int (*tpg_check_demo_mode_cache)(struct se_portal_group *);
        int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *);
        int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *);
+       /*
+        * Optionally used by fabrics to allow demo-mode login, but not
+        * expose any TPG LUNs, and return 'not connected' in standard
+        * inquiry response
+        */
+       int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
        struct se_node_acl *(*tpg_alloc_fabric_acl)(
                                        struct se_portal_group *);
        void (*tpg_release_fabric_acl)(struct se_portal_group *,
index 2e94258..9b956fa 100644 (file)
@@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
                if (!thread_fn)
                        return -EINVAL;
                handler = irq_default_primary_handler;
-               irqflags |= IRQF_ONESHOT;
        }
 
        action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
index 836a2ae..28a40d8 100644 (file)
@@ -1604,7 +1604,7 @@ static int __init printk_late_init(void)
        struct console *con;
 
        for_each_console(con) {
-               if (con->flags & CON_BOOT) {
+               if (!keep_bootcon && con->flags & CON_BOOT) {
                        printk(KERN_INFO "turn off boot console %s%d\n",
                                con->name, con->index);
                        unregister_console(con);
index dd948a1..18ee1d2 100644 (file)
@@ -37,6 +37,8 @@
 #include <linux/fs_struct.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -44,6 +46,8 @@
 #include <linux/user_namespace.h>
 
 #include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1161,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
 #define override_architecture(name)    0
 #endif
 
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+       int ret = 0;
+       char buf[len];
+
+       if (current->personality & UNAME26) {
+               char *rest = UTS_RELEASE;
+               int ndots = 0;
+               unsigned v;
+
+               while (*rest) {
+                       if (*rest == '.' && ++ndots >= 3)
+                               break;
+                       if (!isdigit(*rest) && *rest != '.')
+                               break;
+                       rest++;
+               }
+               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+               snprintf(buf, len, "2.6.%u%s", v, rest);
+               ret = copy_to_user(release, buf, len);
+       }
+       return ret;
+}
+
 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 {
        int errno = 0;
@@ -1170,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
                errno = -EFAULT;
        up_read(&uts_sem);
 
+       if (!errno && override_release(name->release, sizeof(name->release)))
+               errno = -EFAULT;
        if (!errno && override_architecture(name))
                errno = -EFAULT;
        return errno;
@@ -1191,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
                error = -EFAULT;
        up_read(&uts_sem);
 
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
        if (!error && override_architecture(name))
                error = -EFAULT;
        return error;
@@ -1225,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 
        if (!error && override_architecture(name))
                error = -EFAULT;
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
        return error ? -EFAULT : 0;
 }
 #endif
index 3b8e028..e8bffbe 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
index 4e4932a..362da65 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
index 930de94..ebd1e86 100644 (file)
@@ -1841,29 +1841,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
  */
 static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
 {
-       int lock_count = -1;
        struct mem_cgroup *iter, *failed = NULL;
        bool cond = true;
 
        for_each_mem_cgroup_tree_cond(iter, mem, cond) {
-               bool locked = iter->oom_lock;
-
-               iter->oom_lock = true;
-               if (lock_count == -1)
-                       lock_count = iter->oom_lock;
-               else if (lock_count != locked) {
+               if (iter->oom_lock) {
                        /*
                         * this subtree of our hierarchy is already locked
                         * so we cannot give a lock.
                         */
-                       lock_count = 0;
                        failed = iter;
                        cond = false;
-               }
+               } else
+                       iter->oom_lock = true;
        }
 
        if (!failed)
-               goto done;
+               return true;
 
        /*
         * OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1871,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
                }
                iter->oom_lock = false;
        }
-done:
-       return lock_count;
+       return false;
 }
 
 /*
@@ -2169,13 +2162,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
 
        /* Notify other cpus that system-wide "drain" is running */
        get_online_cpus();
-       /*
-        * Get a hint for avoiding draining charges on the current cpu,
-        * which must be exhausted by our charging.  It is not required that
-        * this be a precise check, so we use raw_smp_processor_id() instead of
-        * getcpu()/putcpu().
-        */
-       curcpu = raw_smp_processor_id();
+       curcpu = get_cpu();
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *mem;
@@ -2192,6 +2179,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
                                schedule_work_on(cpu, &stock->work);
                }
        }
+       put_cpu();
 
        if (!sync)
                goto out;
index d196074..0e309cd 100644 (file)
@@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * 200ms is typically more than enough to curb heavy dirtiers;
                 * (b) the pause time limit makes the dirtiers more responsive.
                 */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_MAXPAUSE_AREA &&
+               if (nr_dirty < dirty_thresh &&
+                   bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 &&
                    time_after(jiffies, start_time + MAX_PAUSE))
                        break;
-               /*
-                * pass-good area. When some bdi gets blocked (eg. NFS server
-                * not responding), or write bandwidth dropped dramatically due
-                * to concurrent reads, or dirty threshold suddenly dropped and
-                * the dirty pages cannot be brought down anytime soon (eg. on
-                * slow USB stick), at least let go of the good bdi's.
-                */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_PASSGOOD_AREA &&
-                   bdi_dirty < bdi_thresh)
-                       break;
 
                /*
                 * Increase the delay for each loop, up to our previous
index 7ef6912..b7719ec 100644 (file)
@@ -2283,7 +2283,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                .mem_cgroup = mem,
                .memcg_record = rec,
        };
-       unsigned long start, end;
+       ktime_t start, end;
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2292,7 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                                                      sc.may_writepage,
                                                      sc.gfp_mask);
 
-       start = sched_clock();
+       start = ktime_get();
        /*
         * NOTE: Although we can get the priority field, using it
         * here is not a good idea, since it limits the pages we can scan.
@@ -2301,10 +2301,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
         * the priority and make it zero.
         */
        shrink_zone(0, zone, &sc);
-       end = sched_clock();
+       end = ktime_get();
 
        if (rec)
-               rec->elapsed += end - start;
+               rec->elapsed += ktime_to_ns(ktime_sub(end, start));
        *scanned = sc.nr_scanned;
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2319,7 +2319,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 {
        struct zonelist *zonelist;
        unsigned long nr_reclaimed;
-       unsigned long start, end;
+       ktime_t start, end;
        int nid;
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
@@ -2337,7 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                .gfp_mask = sc.gfp_mask,
        };
 
-       start = sched_clock();
+       start = ktime_get();
        /*
         * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
         * take care of from where we get pages. So the node where we start the
@@ -2352,9 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                            sc.gfp_mask);
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
-       end = sched_clock();
+       end = ktime_get();
        if (rec)
-               rec->elapsed += end - start;
+               rec->elapsed += ktime_to_ns(ktime_sub(end, start));
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -2529,6 +2529,9 @@ loop_again:
                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
                                break;
+                       } else {
+                               /* If balanced, clear the congested flag */
+                               zone_clear_flag(zone, ZONE_CONGESTED);
                        }
                }
                if (i < 0)
index 5f27f8e..f1f2f7b 100644 (file)
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
        if (unlikely(!skb))
                goto err_free;
 
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
        return skb;
 
 err_free:
index 52cfd0c..d07223c 100644 (file)
@@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
        spin_unlock_irqrestore(&rq->lock, flags);
 
        skb_queue_walk_safe(&queue, skb, tmp) {
-               struct net_device *dev = skb->dev;
+               struct net_device *dev;
+
+               br2684_push(atmvcc, skb);
+               dev = skb->dev;
 
                dev->stats.rx_bytes -= skb->len;
                dev->stats.rx_packets--;
-
-               br2684_push(atmvcc, skb);
        }
 
        /* initialize netdev carrier state */
index 2cdf007..e738154 100644 (file)
@@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 int br_add_bridge(struct net *net, const char *name)
 {
        struct net_device *dev;
+       int res;
 
        dev = alloc_netdev(sizeof(struct net_bridge), name,
                           br_dev_setup);
@@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name)
 
        dev_net_set(dev, net);
 
-       return register_netdev(dev);
+       res = register_netdev(dev);
+       if (res)
+               free_netdev(dev);
+       return res;
 }
 
 int br_del_bridge(struct net *net, const char *name)
index 9cb191e..147ede3 100644 (file)
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 }
 
 static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
-                   char __user *optval, int __user *optlen)
+                   char __user *optval, int __user *optlen, unsigned flags)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 
                msg.msg_control = optval;
                msg.msg_controllen = len;
-               msg.msg_flags = 0;
+               msg.msg_flags = flags;
 
                lock_sock(sk);
                skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
        if(level != SOL_IPV6)
                return -ENOPROTOOPT;
 
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
                return compat_mc_getsockopt(sk, level, optname, optval, optlen,
                        ipv6_getsockopt);
 
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+                                MSG_CMSG_COMPAT);
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
index 07bf108..00b15ac 100644 (file)
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
        if (skb->protocol != htons(ETH_P_IPV6))
                goto tx_error;
 
+       if (tos == 1)
+               tos = ipv6_get_dsfield(iph6);
+
        /* ISATAP (RFC4214) - must come before 6to4 */
        if (dev->priv_flags & IFF_ISATAP) {
                struct neighbour *neigh = NULL;
index 102fc21..e051398 100644 (file)
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
        skb2->skb_iif = skb->dev->ifindex;
        skb2->dev = dev;
-       dev_queue_xmit(skb2);
-       err = 0;
+       err = dev_queue_xmit(skb2);
 
 out:
        if (err) {
index 9d761c9..3dfc471 100755 (executable)
@@ -2574,7 +2574,8 @@ sub process {
                                } else {
                                        $cast = $cast2;
                                }
-                               WARN("$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
+                               WARN("MINMAX",
+                                    "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
                        }
                }
 
index eb2f1e6..4594f33 100755 (executable)
@@ -1389,7 +1389,7 @@ sub vcs_exists {
        warn("$P: No supported VCS found.  Add --nogit to options?\n");
        warn("Using a git repository produces better results.\n");
        warn("Try Linus Torvalds' latest git repository using:\n");
-       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git\n");
+       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
        $printed_novcs = 1;
     }
     return 0;
index 502fc94..7696d05 100644 (file)
@@ -3348,6 +3348,8 @@ static hda_nid_t get_unassigned_dac(struct hda_codec *codec, hda_nid_t pin,
 
 #define MAX_AUTO_DACS  5
 
+#define DAC_SLAVE_FLAG 0x8000  /* filled dac is a slave */
+
 /* fill analog DAC list from the widget tree */
 static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 {
@@ -3370,16 +3372,26 @@ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 /* fill pin_dac_pair list from the pin and dac list */
 static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins,
                              int num_pins, hda_nid_t *dacs, int *rest,
-                             struct pin_dac_pair *filled, int type)
+                             struct pin_dac_pair *filled, int nums, 
+                             int type)
 {
-       int i, nums;
+       int i, start = nums;
 
-       nums = 0;
-       for (i = 0; i < num_pins; i++) {
+       for (i = 0; i < num_pins; i++, nums++) {
                filled[nums].pin = pins[i];
                filled[nums].type = type;
                filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest);
-               nums++;
+               if (filled[nums].dac) 
+                       continue;
+               if (filled[start].dac && get_connection_index(codec, pins[i], filled[start].dac) >= 0) {
+                       filled[nums].dac = filled[start].dac | DAC_SLAVE_FLAG;
+                       continue;
+               }
+               if (filled[0].dac && get_connection_index(codec, pins[i], filled[0].dac) >= 0) {
+                       filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG;
+                       continue;
+               }
+               snd_printdd("Failed to find a DAC for pin 0x%x", pins[i]);
        }
        return nums;
 }
@@ -3395,19 +3407,19 @@ static void cx_auto_parse_output(struct hda_codec *codec)
        rest = fill_cx_auto_dacs(codec, dacs);
        /* parse all analog output pins */
        nums = fill_dacs_for_pins(codec, cfg->line_out_pins, cfg->line_outs,
-                                 dacs, &rest, spec->dac_info,
-                                 AUTO_PIN_LINE_OUT);
-       nums += fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
-                                 dacs, &rest, spec->dac_info + nums,
-                                 AUTO_PIN_HP_OUT);
-       nums += fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
-                                 dacs, &rest, spec->dac_info + nums,
-                                 AUTO_PIN_SPEAKER_OUT);
+                         dacs, &rest, spec->dac_info, 0,
+                         AUTO_PIN_LINE_OUT);
+       nums = fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
+                         dacs, &rest, spec->dac_info, nums,
+                         AUTO_PIN_HP_OUT);
+       nums = fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
+                         dacs, &rest, spec->dac_info, nums,
+                         AUTO_PIN_SPEAKER_OUT);
        spec->dac_info_filled = nums;
        /* fill multiout struct */
        for (i = 0; i < nums; i++) {
                hda_nid_t dac = spec->dac_info[i].dac;
-               if (!dac)
+               if (!dac || (dac & DAC_SLAVE_FLAG))
                        continue;
                switch (spec->dac_info[i].type) {
                case AUTO_PIN_LINE_OUT:
@@ -3862,7 +3874,7 @@ static void cx_auto_parse_input(struct hda_codec *codec)
        }
        if (imux->num_items >= 2 && cfg->num_inputs == imux->num_items)
                cx_auto_check_auto_mic(codec);
-       if (imux->num_items > 1 && !spec->auto_mic) {
+       if (imux->num_items > 1) {
                for (i = 1; i < imux->num_items; i++) {
                        if (spec->imux_info[i].adc != spec->imux_info[0].adc) {
                                spec->adc_switching = 1;
@@ -4035,6 +4047,8 @@ static void cx_auto_init_output(struct hda_codec *codec)
                nid = spec->dac_info[i].dac;
                if (!nid)
                        nid = spec->multiout.dac_nids[0];
+               else if (nid & DAC_SLAVE_FLAG)
+                       nid &= ~DAC_SLAVE_FLAG;
                select_connection(codec, spec->dac_info[i].pin, nid);
        }
        if (spec->auto_mute) {
@@ -4167,9 +4181,11 @@ static int try_add_pb_volume(struct hda_codec *codec, hda_nid_t dac,
                             hda_nid_t pin, const char *name, int idx)
 {
        unsigned int caps;
-       caps = query_amp_caps(codec, dac, HDA_OUTPUT);
-       if (caps & AC_AMPCAP_NUM_STEPS)
-               return cx_auto_add_pb_volume(codec, dac, name, idx);
+       if (dac && !(dac & DAC_SLAVE_FLAG)) {
+               caps = query_amp_caps(codec, dac, HDA_OUTPUT);
+               if (caps & AC_AMPCAP_NUM_STEPS)
+                       return cx_auto_add_pb_volume(codec, dac, name, idx);
+       }
        caps = query_amp_caps(codec, pin, HDA_OUTPUT);
        if (caps & AC_AMPCAP_NUM_STEPS)
                return cx_auto_add_pb_volume(codec, pin, name, idx);
@@ -4191,8 +4207,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
        for (i = 0; i < spec->dac_info_filled; i++) {
                const char *label;
                int idx, type;
-               if (!spec->dac_info[i].dac)
-                       continue;
+               hda_nid_t dac = spec->dac_info[i].dac;
                type = spec->dac_info[i].type;
                if (type == AUTO_PIN_LINE_OUT)
                        type = spec->autocfg.line_out_type;
@@ -4211,7 +4226,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
                        idx = num_spk++;
                        break;
                }
-               err = try_add_pb_volume(codec, spec->dac_info[i].dac,
+               err = try_add_pb_volume(codec, dac,
                                        spec->dac_info[i].pin,
                                        label, idx);
                if (err < 0)
index fcb11af..7cabd73 100644 (file)
@@ -565,11 +565,11 @@ static void alc_hp_automute(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
 
-       if (!spec->automute)
-               return;
        spec->jack_present =
                detect_jacks(codec, ARRAY_SIZE(spec->autocfg.hp_pins),
                             spec->autocfg.hp_pins);
+       if (!spec->automute)
+               return;
        update_speakers(codec);
 }
 
@@ -578,11 +578,11 @@ static void alc_line_automute(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
 
-       if (!spec->automute || !spec->detect_line)
-               return;
        spec->line_jack_present =
                detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins),
                             spec->autocfg.line_out_pins);
+       if (!spec->automute || !spec->detect_line)
+               return;
        update_speakers(codec);
 }
 
@@ -3083,16 +3083,22 @@ static void alc_auto_init_multi_out(struct hda_codec *codec)
 static void alc_auto_init_extra_out(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t pin;
+       hda_nid_t pin, dac;
 
        pin = spec->autocfg.hp_pins[0];
-       if (pin)
-               alc_auto_set_output_and_unmute(codec, pin, PIN_HP,
-                                                 spec->multiout.hp_nid);
+       if (pin) {
+               dac = spec->multiout.hp_nid;
+               if (!dac)
+                       dac = spec->multiout.dac_nids[0];
+               alc_auto_set_output_and_unmute(codec, pin, PIN_HP, dac);
+       }
        pin = spec->autocfg.speaker_pins[0];
-       if (pin)
-               alc_auto_set_output_and_unmute(codec, pin, PIN_OUT,
-                                       spec->multiout.extra_out_nid[0]);
+       if (pin) {
+               dac = spec->multiout.extra_out_nid[0];
+               if (!dac)
+                       dac = spec->multiout.dac_nids[0];
+               alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, dac);
+       }
 }
 
 /*
index d6651c0..a118a0f 100644 (file)
@@ -56,7 +56,7 @@ static int bf5xx_ad193x_hw_params(struct snd_pcm_substream *substream,
 
        switch (params_rate(params)) {
        case 48000:
-               clk = 12288000;
+               clk = 24576000;
                break;
        }
 
index 2374ca5..eedb6f5 100644 (file)
@@ -27,11 +27,6 @@ struct ad193x_priv {
        int sysclk;
 };
 
-/* ad193x register cache & default register settings */
-static const u8 ad193x_reg[AD193X_NUM_REGS] = {
-       0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0,
-};
-
 /*
  * AD193X volume/mute/de-emphasis etc. controls
  */
@@ -307,7 +302,8 @@ static int ad193x_hw_params(struct snd_pcm_substream *substream,
        snd_soc_write(codec, AD193X_PLL_CLK_CTRL0, reg);
 
        reg = snd_soc_read(codec, AD193X_DAC_CTRL2);
-       reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) | word_len;
+       reg = (reg & (~AD193X_DAC_WORD_LEN_MASK))
+               | (word_len << AD193X_DAC_WORD_LEN_SHFT);
        snd_soc_write(codec, AD193X_DAC_CTRL2, reg);
 
        reg = snd_soc_read(codec, AD193X_ADC_CTRL1);
@@ -389,9 +385,6 @@ static int ad193x_probe(struct snd_soc_codec *codec)
 
 static struct snd_soc_codec_driver soc_codec_dev_ad193x = {
        .probe =        ad193x_probe,
-       .reg_cache_default = ad193x_reg,
-       .reg_cache_size = AD193X_NUM_REGS,
-       .reg_word_size = sizeof(u16),
 };
 
 #if defined(CONFIG_SPI_MASTER)
index 9747b54..cccc2e8 100644 (file)
@@ -34,7 +34,8 @@
 #define AD193X_DAC_LEFT_HIGH    (1 << 3)
 #define AD193X_DAC_BCLK_INV     (1 << 7)
 #define AD193X_DAC_CTRL2        0x804
-#define AD193X_DAC_WORD_LEN_MASK       0xC
+#define AD193X_DAC_WORD_LEN_SHFT        3
+#define AD193X_DAC_WORD_LEN_MASK        0x18
 #define AD193X_DAC_MASTER_MUTE  1
 #define AD193X_DAC_CHNL_MUTE    0x805
 #define AD193X_DACL1_MUTE       0
@@ -63,7 +64,7 @@
 #define AD193X_ADC_CTRL1        0x80f
 #define AD193X_ADC_SERFMT_MASK         0x60
 #define AD193X_ADC_SERFMT_STEREO       (0 << 5)
-#define AD193X_ADC_SERFMT_TDM          (1 << 2)
+#define AD193X_ADC_SERFMT_TDM          (1 << 5)
 #define AD193X_ADC_SERFMT_AUX          (2 << 5)
 #define AD193X_ADC_WORD_LEN_MASK       0x3
 #define AD193X_ADC_CTRL2        0x810
index 409d89d..fbd7eb9 100644 (file)
@@ -857,6 +857,7 @@ static __devinit int sta32x_i2c_probe(struct i2c_client *i2c,
        ret = snd_soc_register_codec(&i2c->dev, &sta32x_codec, &sta32x_dai, 1);
        if (ret != 0) {
                dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret);
+               kfree(sta32x);
                return ret;
        }
 
index 60d740e..1725550 100644 (file)
@@ -2221,6 +2221,8 @@ static int sysclk_event(struct snd_soc_dapm_widget *w,
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                if (fll) {
+                       try_wait_for_completion(&wm8962->fll_lock);
+
                        snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
                                            WM8962_FLL_ENA, WM8962_FLL_ENA);
                        if (wm8962->irq) {
@@ -2927,10 +2929,6 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec,
                                            WM8962_BIAS_ENA | 0x180);
 
                        msleep(5);
-
-                       snd_soc_update_bits(codec, WM8962_CLOCKING2,
-                                           WM8962_CLKREG_OVD,
-                                           WM8962_CLKREG_OVD);
                }
 
                /* VMID 2*250k */
@@ -3288,6 +3286,8 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
        snd_soc_write(codec, WM8962_FLL_CONTROL_7, fll_div.lambda);
        snd_soc_write(codec, WM8962_FLL_CONTROL_8, fll_div.n);
 
+       try_wait_for_completion(&wm8962->fll_lock);
+
        snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
                            WM8962_FLL_FRAC | WM8962_FLL_REFCLK_SRC_MASK |
                            WM8962_FLL_ENA, fll1);
@@ -3868,6 +3868,10 @@ static int wm8962_probe(struct snd_soc_codec *codec)
         */
        snd_soc_update_bits(codec, WM8962_CLOCKING2, WM8962_SYSCLK_ENA, 0);
 
+       /* Ensure we have soft control over all registers */
+       snd_soc_update_bits(codec, WM8962_CLOCKING2,
+                           WM8962_CLKREG_OVD, WM8962_CLKREG_OVD);
+
        regulator_bulk_disable(ARRAY_SIZE(wm8962->supplies), wm8962->supplies);
 
        if (pdata) {
index ab8e9d1..0cdb9d1 100644 (file)
@@ -420,7 +420,7 @@ static const char *sidetone_hpf_text[] = {
 };
 
 static const struct soc_enum sidetone_hpf =
-       SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 6, sidetone_hpf_text);
+       SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 7, sidetone_hpf_text);
 
 static const char *hpf_mode_text[] = {
        "HiFi", "Custom", "Voice"
@@ -988,15 +988,10 @@ SND_SOC_DAPM_MICBIAS("MICB1", WM8996_POWER_MANAGEMENT_1, 8, 0),
 SND_SOC_DAPM_PGA("IN1L PGA", WM8996_POWER_MANAGEMENT_2, 5, 0, NULL, 0),
 SND_SOC_DAPM_PGA("IN1R PGA", WM8996_POWER_MANAGEMENT_2, 4, 0, NULL, 0),
 
-SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN2L Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-SND_SOC_DAPM_MUX("IN2R Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-
-SND_SOC_DAPM_PGA("IN1L", WM8996_POWER_MANAGEMENT_7, 2, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN1R", WM8996_POWER_MANAGEMENT_7, 3, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2L", WM8996_POWER_MANAGEMENT_7, 6, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2R", WM8996_POWER_MANAGEMENT_7, 7, 0, NULL, 0),
+SND_SOC_DAPM_MUX("IN1L Mux", WM8996_POWER_MANAGEMENT_7, 2, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN1R Mux", WM8996_POWER_MANAGEMENT_7, 3, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN2L Mux", WM8996_POWER_MANAGEMENT_7, 6, 0, &in2_mux),
+SND_SOC_DAPM_MUX("IN2R Mux", WM8996_POWER_MANAGEMENT_7, 7, 0, &in2_mux),
 
 SND_SOC_DAPM_SUPPLY("DMIC2", WM8996_POWER_MANAGEMENT_7, 9, 0, NULL, 0),
 SND_SOC_DAPM_SUPPLY("DMIC1", WM8996_POWER_MANAGEMENT_7, 8, 0, NULL, 0),
@@ -1213,6 +1208,16 @@ static const struct snd_soc_dapm_route wm8996_dapm_routes[] = {
        { "AIF2RX0", NULL, "AIFCLK" },
        { "AIF2RX1", NULL, "AIFCLK" },
 
+       { "AIF1TX0", NULL, "AIFCLK" },
+       { "AIF1TX1", NULL, "AIFCLK" },
+       { "AIF1TX2", NULL, "AIFCLK" },
+       { "AIF1TX3", NULL, "AIFCLK" },
+       { "AIF1TX4", NULL, "AIFCLK" },
+       { "AIF1TX5", NULL, "AIFCLK" },
+
+       { "AIF2TX0", NULL, "AIFCLK" },
+       { "AIF2TX1", NULL, "AIFCLK" },
+
        { "DSP1RXL", NULL, "SYSDSPCLK" },
        { "DSP1RXR", NULL, "SYSDSPCLK" },
        { "DSP2RXL", NULL, "SYSDSPCLK" },
@@ -2106,6 +2111,9 @@ static int wm8996_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
 
        snd_soc_write(codec, WM8996_FLL_EFS_1, fll_div.lambda);
 
+       /* Clear any pending completions (eg, from failed startups) */
+       try_wait_for_completion(&wm8996->fll_lock);
+
        snd_soc_update_bits(codec, WM8996_FLL_CONTROL_1,
                            WM8996_FLL_ENA, WM8996_FLL_ENA);
 
index 56efa0c..099614e 100644 (file)
@@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev)
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res) {
                err = -ENODEV;
-               goto fail;
+               goto fail_free_info;
        }
 
        info->mem = request_mem_region(res->start, resource_size(res),
                                       pdev->name);
        if (!info->mem) {
                err = -EBUSY;
-               goto fail;
+               goto fail_free_info;
        }
 
        info->regs = ioremap(info->mem->start, resource_size(info->mem));
@@ -435,6 +435,7 @@ fail_unmap_mem:
        iounmap(info->regs);
 fail_release_mem:
        release_mem_region(info->mem->start, resource_size(info->mem));
+fail_free_info:
        kfree(info);
 fail:
        return err;
index 732208c..cb50598 100644 (file)
@@ -879,10 +879,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np)
                 * assume that device_node pointers are a valid comparison.
                 */
                np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0);
+               of_node_put(np);
                if (np == dma_channel_np)
                        return ssi_np;
 
                np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0);
+               of_node_put(np);
                if (np == dma_channel_np)
                        return ssi_np;
        }
index a192979..358f0ba 100644 (file)
@@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
        }
 
        machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL);
-       if (!machine_data)
-               return -ENOMEM;
+       if (!machine_data) {
+               ret = -ENOMEM;
+               goto error_alloc;
+       }
 
        machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev);
        machine_data->dai[0].ops = &mpc8610_hpcd_ops;
@@ -494,7 +496,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
        ret = platform_device_add(sound_device);
        if (ret) {
                dev_err(&pdev->dev, "platform device add failed\n");
-               goto error;
+               goto error_sound;
        }
        dev_set_drvdata(&pdev->dev, sound_device);
 
@@ -502,14 +504,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
 
        return 0;
 
+error_sound:
+       platform_device_unregister(sound_device);
 error:
-       of_node_put(codec_np);
-
-       if (sound_device)
-               platform_device_unregister(sound_device);
-
        kfree(machine_data);
-
+error_alloc:
+       of_node_put(codec_np);
        return ret;
 }
 
index 8fa4d5f..fcb862e 100644 (file)
@@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np,
         * dai->platform name should already point to an allocated buffer.
         */
        ret = of_address_to_resource(dma_channel_np, 0, &res);
-       if (ret)
+       if (ret) {
+               of_node_put(dma_channel_np);
                return ret;
+       }
        snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s",
                 (unsigned long long) res.start, dma_channel_np->name);
 
index a33fc51..8f16cd3 100644 (file)
@@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev)
        if (!priv->mem) {
                dev_err(&pdev->dev, "request_mem_region failed\n");
                err = -EBUSY;
-               goto error;
+               goto error_alloc;
        }
 
        priv->io = ioremap(priv->mem->start, SZ_16K);
index 30fe0d0..0aa475f 100644 (file)
@@ -514,7 +514,7 @@ static int ams_delta_cx20442_init(struct snd_soc_pcm_runtime *rtd)
        }
 
        /* Set codec bias level */
-       ams_delta_set_bias_level(card, SND_SOC_BIAS_STANDBY);
+       ams_delta_set_bias_level(card, dapm, SND_SOC_BIAS_STANDBY);
 
        /* Add hook switch - can be used to control the codec from userspace
         * even if line discipline fails */
@@ -649,7 +649,9 @@ static void __exit ams_delta_module_exit(void)
                        ams_delta_hook_switch_gpios);
 
        /* Keep modem power on */
-       ams_delta_set_bias_level(&ams_delta_audio_card, SND_SOC_BIAS_STANDBY);
+       ams_delta_set_bias_level(&ams_delta_audio_card,
+                                &ams_delta_audio_card.rtd[0].codec->dapm,
+                                SND_SOC_BIAS_STANDBY);
 
        platform_device_unregister(cx20442_platform_device);
        platform_device_unregister(ams_delta_audio_platform_device);
index b99091f..65f980e 100644 (file)
@@ -185,6 +185,7 @@ config SND_SOC_SPEYSIDE
        select SND_SAMSUNG_I2S
        select SND_SOC_WM8996
        select SND_SOC_WM9081
+       select SND_SOC_WM1250_EV1
 
 config SND_SOC_SPEYSIDE_WM8962
        tristate "Audio support for Wolfson Speyside with WM8962"
index 241f55d..c6c6589 100644 (file)
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
index 1e574a5..bc8c167 100644 (file)
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
index 0b9eb5f..72535f2 100644 (file)
@@ -23,6 +23,9 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card,
        struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
        int ret;
 
+       if (dapm->dev != codec_dai->dev)
+               return 0;
+
        switch (level) {
        case SND_SOC_BIAS_PREPARE:
                if (dapm->bias_level == SND_SOC_BIAS_STANDBY) {
@@ -57,6 +60,9 @@ static int speyside_wm8962_set_bias_level_post(struct snd_soc_card *card,
        struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
        int ret;
 
+       if (dapm->dev != codec_dai->dev)
+               return 0;
+
        switch (level) {
        case SND_SOC_BIAS_STANDBY:
                ret = snd_soc_dai_set_sysclk(codec_dai, WM8962_SYSCLK_MCLK,
index 83ad8ca..b085d8e 100644 (file)
@@ -1913,7 +1913,7 @@ struct snd_kcontrol *snd_soc_cnew(const struct snd_kcontrol_new *_template,
 
        if (prefix) {
                name_len = strlen(long_name) + strlen(prefix) + 2;
-               name = kmalloc(name_len, GFP_ATOMIC);
+               name = kmalloc(name_len, GFP_KERNEL);
                if (!name)
                        return NULL;
 
index cca490c..a62f7dd 100644 (file)
@@ -205,6 +205,25 @@ static unsigned int snd_soc_16_8_read_i2c(struct snd_soc_codec *codec,
 #define snd_soc_16_8_read_i2c NULL
 #endif
 
+#if defined(CONFIG_SPI_MASTER)
+static unsigned int snd_soc_16_8_read_spi(struct snd_soc_codec *codec,
+                                         unsigned int r)
+{
+       struct spi_device *spi = codec->control_data;
+
+       const u16 reg = cpu_to_be16(r | 0x100);
+       u8 data;
+       int ret;
+
+       ret = spi_write_then_read(spi, &reg, 2, &data, 1);
+       if (ret < 0)
+               return 0;
+       return data;
+}
+#else
+#define snd_soc_16_8_read_spi NULL
+#endif
+
 static int snd_soc_16_8_write(struct snd_soc_codec *codec, unsigned int reg,
                              unsigned int value)
 {
@@ -295,6 +314,7 @@ static struct {
        int (*write)(struct snd_soc_codec *codec, unsigned int, unsigned int);
        unsigned int (*read)(struct snd_soc_codec *, unsigned int);
        unsigned int (*i2c_read)(struct snd_soc_codec *, unsigned int);
+       unsigned int (*spi_read)(struct snd_soc_codec *, unsigned int);
 } io_types[] = {
        {
                .addr_bits = 4, .data_bits = 12,
@@ -318,6 +338,7 @@ static struct {
                .addr_bits = 16, .data_bits = 8,
                .write = snd_soc_16_8_write,
                .i2c_read = snd_soc_16_8_read_i2c,
+               .spi_read = snd_soc_16_8_read_spi,
        },
        {
                .addr_bits = 16, .data_bits = 16,
@@ -383,6 +404,8 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec,
 #ifdef CONFIG_SPI_MASTER
                codec->hw_write = do_spi_write;
 #endif
+               if (io_types[i].spi_read)
+                       codec->hw_read = io_types[i].spi_read;
 
                codec->control_data = container_of(codec->dev,
                                                   struct spi_device,
index 7c17b98..38b0013 100644 (file)
@@ -327,7 +327,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count,
                                              IRQF_TRIGGER_FALLING,
                                              gpios[i].name,
                                              &gpios[i]);
-               if (ret)
+               if (ret < 0)
                        goto err;
 
                if (gpios[i].wake) {
index b575939..2879c88 100644 (file)
@@ -290,6 +290,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
        codec_dai->active--;
        codec->active--;
 
+       if (!cpu_dai->active && !codec_dai->active)
+               rtd->rate = 0;
+
        /* Muting the DAC suppresses artifacts caused during digital
         * shutdown, for example from stopping clocks.
         */
index 661373c..be27f1d 100644 (file)
@@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd)
        snd_soc_dapm_force_enable_pin(dapm, "Mic Bias");
 
        /* FIXME: Calculate automatically based on DAPM routes? */
-       if (!machine_is_harmony() && !machine_is_ventana())
+       if (!machine_is_harmony())
                snd_soc_dapm_nc_pin(dapm, "IN1L");
        if (!machine_is_seaboard() && !machine_is_aebl())
                snd_soc_dapm_nc_pin(dapm, "IN1R");
@@ -395,7 +395,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, card);
        snd_soc_card_set_drvdata(card, machine);
 
-       if (machine_is_harmony() || machine_is_ventana()) {
+       if (machine_is_harmony()) {
                card->dapm_routes = harmony_audio_map;
                card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map);
        } else if (machine_is_seaboard()) {
index 94c2cf0..e8a03ac 100644 (file)
@@ -24,7 +24,7 @@
 
 # Set the following to `true' to make a unstripped, unoptimized
 # binary. Leave this set to `false' for production use.
-DEBUG ?=       false
+DEBUG ?=       true
 
 # make the build silent. Set this to something else to make it noisy again.
 V ?=           false
@@ -35,7 +35,7 @@ NLS ?=                true
 
 # Set the following to 'true' to build/install the
 # cpufreq-bench benchmarking tool
-CPUFRQ_BENCH ?= true
+CPUFREQ_BENCH ?= true
 
 # Prefix to the directories we're installing to
 DESTDIR ?=
@@ -137,9 +137,10 @@ CFLAGS +=  -pipe
 ifeq ($(strip $(NLS)),true)
        INSTALL_NLS += install-gmo
        COMPILE_NLS += create-gmo
+       CFLAGS += -DNLS
 endif
 
-ifeq ($(strip $(CPUFRQ_BENCH)),true)
+ifeq ($(strip $(CPUFREQ_BENCH)),true)
        INSTALL_BENCH += install-bench
        COMPILE_BENCH += compile-bench
 endif
index dbf1399..3326217 100644 (file)
@@ -1,10 +1,10 @@
 default: all
 
-centrino-decode: centrino-decode.c
-       $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c
+centrino-decode: ../i386/centrino-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
 
-powernow-k8-decode: powernow-k8-decode.c
-       $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c
+powernow-k8-decode: ../i386/powernow-k8-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
 
 all: centrino-decode powernow-k8-decode
 
diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c
deleted file mode 120000 (symlink)
index 26fb3f1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../i386/centrino-decode.c
\ No newline at end of file
diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c
deleted file mode 120000 (symlink)
index eb30c79..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../i386/powernow-k8-decode.c
\ No newline at end of file
index 3194811..bb60a8d 100644 (file)
@@ -1,10 +1,10 @@
-.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-info \- Utility to retrieve cpufreq kernel information
+cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
 .SH "SYNTAX"
 .LP 
-cpufreq\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
 A small tool which prints out cpufreq information helpful to developers and interested users.
index 26e3e13..685f469 100644 (file)
@@ -1,13 +1,13 @@
-.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-set \- A small tool which allows to modify cpufreq settings.
+cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
 .SH "SYNTAX"
 .LP 
-cpufreq\-set [\fIoptions\fP]
+cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
-cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
+cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
 .SH "OPTIONS"
 .LP 
 .TP 
index 78c20fe..baf741d 100644 (file)
@@ -3,7 +3,7 @@
 cpupower \- Shows and sets processor power related values
 .SH SYNOPSIS
 .ft B
-.B cpupower [ \-c cpulist ] subcommand [ARGS]
+.B cpupower [ \-c cpulist ] <command> [ARGS]
 
 .B cpupower \-v|\-\-version
 
@@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values
 \fBcpupower \fP is a collection of tools to examine and tune power saving
 related features of your processor.
 
-The manpages of the subcommands (cpupower\-<subcommand>(1)) provide detailed
+The manpages of the commands (cpupower\-<command>(1)) provide detailed
 descriptions of supported features. Run \fBcpupower help\fP to get an overview
-of supported subcommands.
+of supported commands.
 
 .SH Options
 .PP
 \-\-help, \-h
 .RS 4
-Shows supported subcommands and general usage.
+Shows supported commands and general usage.
 .RE
 .PP
 \-\-cpu cpulist,  \-c cpulist
 .RS 4
 Only show or set values for specific cores.
-This option is not supported by all subcommands, details can be found in the
-manpages of the subcommands.
+This option is not supported by all commands, details can be found in the
+manpages of the commands.
 
-Some subcommands access all cores (typically the *\-set commands), some only
+Some commands access all cores (typically the *\-set commands), some only
 the first core (typically the *\-info commands) by default.
 
 The syntax for <cpulist> is based on how the kernel exports CPU bitmasks via
index c870ffb..c10496f 100644 (file)
@@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv);
 extern int cmd_idle_info(int argc, const char **argv);
 extern int cmd_monitor(int argc, const char **argv);
 
-extern void set_help(void);
-extern void info_help(void);
-extern void freq_set_help(void);
-extern void freq_info_help(void);
-extern void idle_info_help(void);
-extern void monitor_help(void);
-
 #endif
index 5a1d25f..28953c9 100644 (file)
@@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human)
        return 0;
 }
 
-void freq_info_help(void)
-{
-       printf(_("Usage: cpupower freqinfo [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -e, --debug          Prints out debug information [default]\n"));
-       printf(_("  -f, --freq           Get frequency the CPU currently runs at, according\n"
-              "                       to the cpufreq core *\n"));
-       printf(_("  -w, --hwfreq         Get frequency the CPU currently runs at, by reading\n"
-              "                       it from hardware (only available to root) *\n"));
-       printf(_("  -l, --hwlimits       Determine the minimum and maximum CPU frequency allowed *\n"));
-       printf(_("  -d, --driver         Determines the used cpufreq kernel driver *\n"));
-       printf(_("  -p, --policy         Gets the currently used cpufreq policy *\n"));
-       printf(_("  -g, --governors      Determines available cpufreq governors *\n"));
-       printf(_("  -r, --related-cpus   Determines which CPUs run at the same hardware frequency *\n"));
-       printf(_("  -a, --affected-cpus  Determines which CPUs need to have their frequency\n"
-                       "                       coordinated by software *\n"));
-       printf(_("  -s, --stats          Shows cpufreq statistics if available\n"));
-       printf(_("  -y, --latency        Determines the maximum latency on CPU frequency changes *\n"));
-       printf(_("  -b, --boost          Checks for turbo or boost modes  *\n"));
-       printf(_("  -o, --proc           Prints out information like provided by the /proc/cpufreq\n"
-              "                       interface in 2.4. and early 2.6. kernels\n"));
-       printf(_("  -m, --human          human-readable output for the -f, -w, -s and -y parameters\n"));
-       printf(_("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-       printf(_("If no argument is given, full output about\n"
-              "cpufreq is printed which is useful e.g. for reporting bugs.\n\n"));
-       printf(_("By default info of CPU 0 is shown which can be overridden\n"
-                "with the cpupower --cpu main command option.\n"));
-}
-
 static struct option info_opts[] = {
        { .name = "debug",      .has_arg = no_argument,         .flag = NULL,   .val = 'e'},
        { .name = "boost",      .has_arg = no_argument,         .flag = NULL,   .val = 'b'},
@@ -556,7 +525,6 @@ static struct option info_opts[] = {
        { .name = "latency",    .has_arg = no_argument,         .flag = NULL,   .val = 'y'},
        { .name = "proc",       .has_arg = no_argument,         .flag = NULL,   .val = 'o'},
        { .name = "human",      .has_arg = no_argument,         .flag = NULL,   .val = 'm'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
@@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv)
        int output_param = 0;
 
        do {
-               ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL);
+               ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL);
                switch (ret) {
                case '?':
                        output_param = '?';
                        cont = 0;
                        break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                case -1:
                        cont = 0;
                        break;
@@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv)
                return -EINVAL;
        case '?':
                printf(_("invalid or unknown argument\n"));
-               freq_info_help();
                return -EINVAL;
-       case 'h':
-               freq_info_help();
-               return EXIT_SUCCESS;
        case 'o':
                proc_cpufreq_output();
                return EXIT_SUCCESS;
index 5f78362..dd1539e 100644 (file)
 
 #define NORM_FREQ_LEN 32
 
-void freq_set_help(void)
-{
-       printf(_("Usage: cpupower frequency-set [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -d FREQ, --min FREQ      new minimum CPU frequency the governor may select\n"));
-       printf(_("  -u FREQ, --max FREQ      new maximum CPU frequency the governor may select\n"));
-       printf(_("  -g GOV, --governor GOV   new cpufreq governor\n"));
-       printf(_("  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
-              "                           governor to be available and loaded\n"));
-       printf(_("  -r, --related            Switches all hardware-related CPUs\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-       printf(_("Notes:\n"
-              "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"));
-       printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n"
-              "   except the -c CPU, --cpu CPU parameter\n"
-              "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
-              "   by postfixing the value with the wanted unit name, without any space\n"
-              "   (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"));
-
-}
-
 static struct option set_opts[] = {
        { .name = "min",        .has_arg = required_argument,   .flag = NULL,   .val = 'd'},
        { .name = "max",        .has_arg = required_argument,   .flag = NULL,   .val = 'u'},
        { .name = "governor",   .has_arg = required_argument,   .flag = NULL,   .val = 'g'},
        { .name = "freq",       .has_arg = required_argument,   .flag = NULL,   .val = 'f'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { .name = "related",    .has_arg = no_argument,         .flag = NULL,   .val='r'},
        { },
 };
@@ -80,7 +57,6 @@ const struct freq_units def_units[] = {
 static void print_unknown_arg(void)
 {
        printf(_("invalid or unknown argument\n"));
-       freq_set_help();
 }
 
 static unsigned long string_to_frequency(const char *str)
@@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv)
 
        /* parameter parsing */
        do {
-               ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL);
+               ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL);
                switch (ret) {
                case '?':
                        print_unknown_arg();
                        return -EINVAL;
-               case 'h':
-                       freq_set_help();
-                       return 0;
                case -1:
                        cont = 0;
                        break;
index 70da357..b028267 100644 (file)
@@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
        }
 }
 
-/* --freq / -f */
-
-void idle_info_help(void)
-{
-       printf(_ ("Usage: cpupower idleinfo [options]\n"));
-       printf(_ ("Options:\n"));
-       printf(_ ("  -s, --silent         Only show general C-state information\n"));
-       printf(_ ("  -o, --proc           Prints out information like provided by the /proc/acpi/processor/*/power\n"
-              "                       interface in older kernels\n"));
-       printf(_ ("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-}
-
 static struct option info_opts[] = {
        { .name = "silent",     .has_arg = no_argument, .flag = NULL,   .val = 's'},
        { .name = "proc",       .has_arg = no_argument, .flag = NULL,   .val = 'o'},
-       { .name = "help",       .has_arg = no_argument, .flag = NULL,   .val = 'h'},
        { },
 };
 
 static inline void cpuidle_exit(int fail)
 {
-       idle_info_help();
        exit(EXIT_FAILURE);
 }
 
@@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv)
        unsigned int cpu = 0;
 
        do {
-               ret = getopt_long(argc, argv, "hos", info_opts, NULL);
+               ret = getopt_long(argc, argv, "os", info_opts, NULL);
                if (ret == -1)
                        break;
                switch (ret) {
@@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv)
                        output_param = '?';
                        cont = 0;
                        break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                case 's':
                        verbose = 0;
                        break;
@@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv)
        case '?':
                printf(_("invalid or unknown argument\n"));
                cpuidle_exit(EXIT_FAILURE);
-       case 'h':
-               cpuidle_exit(EXIT_SUCCESS);
        }
 
        /* Default is: show output of CPU 0 only */
index 85253cb..3f68632 100644 (file)
 #include "helpers/helpers.h"
 #include "helpers/sysfs.h"
 
-void info_help(void)
-{
-       printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias    Gets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc     Gets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt    Gets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf(_("\nPassing no option will show all info, by default only on core 0\n"));
-       printf("\n");
-}
-
 static struct option set_opts[] = {
        { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
        { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
        { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
 static void print_wrong_arg_exit(void)
 {
        printf(_("invalid or unknown argument\n"));
-       info_help();
        exit(EXIT_FAILURE);
 }
 
@@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv)
        textdomain(PACKAGE);
 
        /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) {
+       while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) {
                switch (ret) {
-               case 'h':
-                       info_help();
-                       return 0;
                case 'b':
                        if (params.perf_bias)
                                print_wrong_arg_exit();
index bc1b391..dc4de37 100644 (file)
 #include "helpers/sysfs.h"
 #include "helpers/bitmask.h"
 
-void set_help(void)
-{
-       printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias [VAL]    Sets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc  [VAL]    Sets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt [VAL]    Sets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-}
-
 static struct option set_opts[] = {
        { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
        { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
        { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
 static void print_wrong_arg_exit(void)
 {
        printf(_("invalid or unknown argument\n"));
-       set_help();
        exit(EXIT_FAILURE);
 }
 
@@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv)
 
        params.params = 0;
        /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "m:s:b:h",
+       while ((ret = getopt_long(argc, argv, "m:s:b:",
                                                set_opts, NULL)) != -1) {
                switch (ret) {
-               case 'h':
-                       set_help();
-                       return 0;
                case 'b':
                        if (params.perf_bias)
                                print_wrong_arg_exit();
@@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv)
                }
        };
 
-       if (!params.params) {
-               set_help();
-               return -EINVAL;
-       }
+       if (!params.params)
+               print_wrong_arg_exit();
 
        if (params.sched_mc) {
                ret = sysfs_set_sched("mc", sched_mc);
index 5844ae0..52bee59 100644 (file)
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <errno.h>
 
 #include "builtin.h"
 #include "helpers/helpers.h"
 struct cmd_struct {
        const char *cmd;
        int (*main)(int, const char **);
-       void (*usage)(void);
        int needs_root;
 };
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
-int cmd_help(int argc, const char **argv);
+static int cmd_help(int argc, const char **argv);
 
 /* Global cpu_info object available for all binaries
  * Info only retrieved from CPU 0
@@ -44,55 +44,66 @@ int be_verbose;
 static void print_help(void);
 
 static struct cmd_struct commands[] = {
-       { "frequency-info",     cmd_freq_info,  freq_info_help, 0       },
-       { "frequency-set",      cmd_freq_set,   freq_set_help,  1       },
-       { "idle-info",          cmd_idle_info,  idle_info_help, 0       },
-       { "set",                cmd_set,        set_help,       1       },
-       { "info",               cmd_info,       info_help,      0       },
-       { "monitor",            cmd_monitor,    monitor_help,   0       },
-       { "help",               cmd_help,       print_help,     0       },
-       /*      { "bench",      cmd_bench,      NULL,           1       }, */
+       { "frequency-info",     cmd_freq_info,  0       },
+       { "frequency-set",      cmd_freq_set,   1       },
+       { "idle-info",          cmd_idle_info,  0       },
+       { "set",                cmd_set,        1       },
+       { "info",               cmd_info,       0       },
+       { "monitor",            cmd_monitor,    0       },
+       { "help",               cmd_help,       0       },
+       /*      { "bench",      cmd_bench,      1       }, */
 };
 
-int cmd_help(int argc, const char **argv)
-{
-       unsigned int i;
-
-       if (argc > 1) {
-               for (i = 0; i < ARRAY_SIZE(commands); i++) {
-                       struct cmd_struct *p = commands + i;
-                       if (strcmp(p->cmd, argv[1]))
-                               continue;
-                       if (p->usage) {
-                               p->usage();
-                               return EXIT_SUCCESS;
-                       }
-               }
-       }
-       print_help();
-       if (argc == 1)
-               return EXIT_SUCCESS; /* cpupower help */
-       return EXIT_FAILURE;
-}
-
 static void print_help(void)
 {
        unsigned int i;
 
 #ifdef DEBUG
-       printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n"));
-       printf(_("  -d, --debug      May increase output (stderr) on some subcommands\n"));
+       printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n"));
 #else
-       printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n"));
+       printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n"));
 #endif
-       printf(_("cpupower --version\n"));
-       printf(_("Supported subcommands are:\n"));
+       printf(_("Supported commands are:\n"));
        for (i = 0; i < ARRAY_SIZE(commands); i++)
                printf("\t%s\n", commands[i].cmd);
-       printf(_("\nSome subcommands can make use of the -c cpulist option.\n"));
-       printf(_("Look at the general cpupower manpage how to use it\n"));
-       printf(_("and read up the subcommand's manpage whether it is supported.\n"));
-       printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n"));
+       printf(_("\nNot all commands can make use of the -c cpulist option.\n"));
+       printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n"));
+}
+
+static int print_man_page(const char *subpage)
+{
+       int len;
+       char *page;
+
+       len = 10; /* enough for "cpupower-" */
+       if (subpage != NULL)
+               len += strlen(subpage);
+
+       page = malloc(len);
+       if (!page)
+               return -ENOMEM;
+
+       sprintf(page, "cpupower");
+       if ((subpage != NULL) && strcmp(subpage, "help")) {
+               strcat(page, "-");
+               strcat(page, subpage);
+       }
+
+       execlp("man", "man", page, NULL);
+
+       /* should not be reached */
+       return -EINVAL;
+}
+
+static int cmd_help(int argc, const char **argv)
+{
+       if (argc > 1) {
+               print_man_page(argv[1]); /* exits within execlp() */
+               return EXIT_FAILURE;
+       }
+
+       print_help();
+       return EXIT_SUCCESS;
 }
 
 static void print_version(void)
index 592ee36..2747e73 100644 (file)
 #include "helpers/bitmask.h"
 
 /* Internationalization ****************************/
+#ifdef NLS
+
 #define _(String) gettext(String)
 #ifndef gettext_noop
 #define gettext_noop(String) String
 #endif
 #define N_(String) gettext_noop(String)
+
+#else /* !NLS */
+
+#define _(String) String
+#define N_(String) String
+
+#endif
 /* Internationalization ****************************/
 
 extern int run_as_root;
@@ -96,6 +105,9 @@ struct cpupower_topology {
                int pkg;
                int core;
                int cpu;
+
+               /* flags */
+               unsigned int is_online:1;
        } *core_info;
 };
 
index 55e2466..c634302 100644 (file)
@@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path,
        return (unsigned int) numwrite;
 }
 
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int sysfs_is_cpu_online(unsigned int cpu)
+{
+       char path[SYSFS_PATH_MAX];
+       int fd;
+       ssize_t numread;
+       unsigned long long value;
+       char linebuf[MAX_LINE_LEN];
+       char *endp;
+       struct stat statbuf;
+
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+       if (stat(path, &statbuf) != 0)
+               return 0;
+
+       /*
+        * kernel without CONFIG_HOTPLUG_CPU
+        * -> cpuX directory exists, but not cpuX/online file
+        */
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+       if (stat(path, &statbuf) != 0)
+               return 1;
+
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+               return -errno;
+
+       numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+       if (numread < 1) {
+               close(fd);
+               return -EIO;
+       }
+       linebuf[numread] = '\0';
+       close(fd);
+
+       value = strtoull(linebuf, &endp, 0);
+       if (value > 1 || value < 0)
+               return -EINVAL;
+
+       return value;
+}
+
 /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
 
 /*
index f9373e0..8cb797b 100644 (file)
@@ -7,6 +7,8 @@
 
 extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
 
+extern int sysfs_is_cpu_online(unsigned int cpu);
+
 extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
                                                unsigned int idlestate);
 extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
index 385ee5c..4eae2c4 100644 (file)
@@ -41,6 +41,8 @@ struct cpuid_core_info {
        unsigned int pkg;
        unsigned int thread;
        unsigned int cpu;
+       /* flags */
+       unsigned int is_online:1;
 };
 
 static int __compare(const void *t1, const void *t2)
@@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                return -ENOMEM;
        cpu_top->pkgs = cpu_top->cores = 0;
        for (cpu = 0; cpu < cpus; cpu++) {
+               cpu_top->core_info[cpu].cpu = cpu;
+               cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
                cpu_top->core_info[cpu].pkg =
                        sysfs_topology_read_file(cpu, "physical_package_id");
                if ((int)cpu_top->core_info[cpu].pkg != -1 &&
@@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                        cpu_top->pkgs = cpu_top->core_info[cpu].pkg;
                cpu_top->core_info[cpu].core =
                        sysfs_topology_read_file(cpu, "core_id");
-               cpu_top->core_info[cpu].cpu = cpu;
        }
        cpu_top->pkgs++;
 
index d048b96..bcd22a1 100644 (file)
@@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
        /* Assume idle state count is the same for all CPUs */
        cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
 
-       if (cpuidle_sysfs_monitor.hw_states_num == 0)
+       if (cpuidle_sysfs_monitor.hw_states_num <= 0)
                return NULL;
 
        for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
index ba4bf06..0d6571e 100644 (file)
@@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top;
 /* ToDo: Document this in the manpage */
 static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
 
+static void print_wrong_arg_exit(void)
+{
+       printf(_("invalid or unknown argument\n"));
+       exit(EXIT_FAILURE);
+}
+
 long long timespec_diff_us(struct timespec start, struct timespec end)
 {
        struct timespec temp;
@@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end)
        return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
 }
 
-void monitor_help(void)
-{
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n"));
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n"));
-       printf(_("cpupower monitor: -l\n"));
-       printf(_("\t command: pass an arbitrary command to measure specific workload\n"));
-       printf(_("\t -i: time intervall to measure for in seconds (default 1)\n"));
-       printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n"));
-       printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n"));
-       printf(_("\t -h: print this help\n"));
-       printf("\n");
-       printf(_("only one of: -l, -m are allowed\nIf none of them is passed,"));
-       printf(_(" all supported monitors are shown\n"));
-}
-
 void print_n_spaces(int n)
 {
        int x;
@@ -149,6 +140,10 @@ void print_results(int topology_depth, int cpu)
        unsigned long long result;
        cstate_t s;
 
+       /* Be careful CPUs may got resorted for pkg value do not just use cpu */
+       if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+               return;
+
        if (topology_depth > 2)
                printf("%4d|", cpu_top.core_info[cpu].pkg);
        if (topology_depth > 1)
@@ -190,9 +185,13 @@ void print_results(int topology_depth, int cpu)
                        }
                }
        }
-       /* cpu offline */
-       if (cpu_top.core_info[cpu].pkg == -1 ||
-           cpu_top.core_info[cpu].core == -1) {
+       /*
+        * The monitor could still provide useful data, for example
+        * AMD HW counters partly sit in PCI config space.
+        * It's up to the monitor plug-in to check .is_online, this one
+        * is just for additional info.
+        */
+       if (!cpu_top.core_info[cpu].is_online) {
                printf(_(" *is offline\n"));
                return;
        } else
@@ -238,7 +237,6 @@ static void parse_monitor_param(char *param)
        if (hits == 0) {
                printf(_("No matching monitor found in %s, "
                         "try -l option\n"), param);
-               monitor_help();
                exit(EXIT_FAILURE);
        }
        /* Override detected/registerd monitors array with requested one */
@@ -335,37 +333,27 @@ static void cmdline(int argc, char *argv[])
        int opt;
        progname = basename(argv[0]);
 
-       while ((opt = getopt(argc, argv, "+hli:m:")) != -1) {
+       while ((opt = getopt(argc, argv, "+li:m:")) != -1) {
                switch (opt) {
-               case 'h':
-                       monitor_help();
-                       exit(EXIT_SUCCESS);
                case 'l':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                        mode = list;
                        break;
                case 'i':
                        /* only allow -i with -m or no option */
-                       if (mode && mode != show) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode && mode != show)
+                               print_wrong_arg_exit();
                        interval = atoi(optarg);
                        break;
                case 'm':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                        mode = show;
                        show_monitors_param = optarg;
                        break;
                default:
-                       monitor_help();
-                       exit(EXIT_FAILURE);
+                       print_wrong_arg_exit();
                }
        }
        if (!mode)
@@ -385,6 +373,10 @@ int cmd_monitor(int argc, char **argv)
                return EXIT_FAILURE;
        }
 
+       /* Default is: monitor all CPUs */
+       if (bitmask_isallclear(cpus_chosen))
+               bitmask_setall(cpus_chosen);
+
        dprint("System has up to %d CPU cores\n", cpu_count);
 
        for (num = 0; all_monitors[num]; num++) {
index 63ca87a..5650ab5 100644 (file)
 
 #define MSR_TSC        0x10
 
+#define MSR_AMD_HWCR 0xc0010015
+
 enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
 
 static int mperf_get_count_percent(unsigned int self_id, double *percent,
                                   unsigned int cpu);
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                unsigned int cpu);
+static struct timespec time_start, time_end;
 
 static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
        {
@@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
        },
 };
 
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ *   1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ *   2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
 static unsigned long long tsc_at_measure_start;
 static unsigned long long tsc_at_measure_end;
-static unsigned long max_frequency;
 static unsigned long long *mperf_previous_count;
 static unsigned long long *aperf_previous_count;
 static unsigned long long *mperf_current_count;
 static unsigned long long *aperf_current_count;
+
 /* valid flag for all CPUs. If a MSR read failed it will be zero */
 static int *is_valid;
 
 static int mperf_get_tsc(unsigned long long *tsc)
 {
-       return read_msr(0, MSR_TSC, tsc);
+       int ret;
+       ret = read_msr(0, MSR_TSC, tsc);
+       if (ret)
+               dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+       return ret;
 }
 
 static int mperf_init_stats(unsigned int cpu)
@@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu)
        return 0;
 }
 
-/*
- * get_average_perf()
- *
- * Returns the average performance (also considers boosted frequencies)
- *
- * Input:
- *   aperf_diff: Difference of the aperf register over a time period
- *   mperf_diff: Difference of the mperf register over the same time period
- *   max_freq:   Maximum frequency (P0)
- *
- * Returns:
- *   Average performance over the time period
- */
-static unsigned long get_average_perf(unsigned long long aperf_diff,
-                                     unsigned long long mperf_diff)
-{
-       unsigned int perf_percent = 0;
-       if (((unsigned long)(-1) / 100) < aperf_diff) {
-               int shift_count = 7;
-               aperf_diff >>= shift_count;
-               mperf_diff >>= shift_count;
-       }
-       perf_percent = (aperf_diff * 100) / mperf_diff;
-       return (max_frequency * perf_percent) / 100;
-}
-
 static int mperf_get_count_percent(unsigned int id, double *percent,
                                   unsigned int cpu)
 {
        unsigned long long aperf_diff, mperf_diff, tsc_diff;
+       unsigned long long timediff;
 
        if (!is_valid[cpu])
                return -1;
@@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 
        mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
        aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
-       tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
 
-       *percent = 100.0 * mperf_diff / tsc_diff;
-       dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
-              mperf_cstates[id].name, mperf_diff, tsc_diff);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               *percent = 100.0 * mperf_diff / tsc_diff;
+               dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, tsc_diff);
+       } else if (max_freq_mode == MAX_FREQ_SYSFS) {
+               timediff = timespec_diff_us(time_start, time_end);
+               *percent = 100.0 * mperf_diff / timediff;
+               dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, timediff);
+       } else
+               return -1;
 
        if (id == Cx)
                *percent = 100.0 - *percent;
@@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                unsigned int cpu)
 {
-       unsigned long long aperf_diff, mperf_diff;
+       unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
 
        if (id != AVG_FREQ)
                return 1;
@@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
        mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
        aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
 
-       /* Return MHz for now, might want to return KHz if column width is more
-          generic */
-       *count = get_average_perf(aperf_diff, mperf_diff) / 1000;
-       dprint("%s: %llu\n", mperf_cstates[id].name, *count);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               /* Calculate max_freq from TSC count */
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               time_diff = timespec_diff_us(time_start, time_end);
+               max_frequency = tsc_diff / time_diff;
+       }
 
+       *count = max_frequency * ((double)aperf_diff / mperf_diff);
+       dprint("%s: Average freq based on %s maximum frequency:\n",
+              mperf_cstates[id].name,
+              (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+       dprint("%max_frequency: %lu", max_frequency);
+       dprint("aperf_diff: %llu\n", aperf_diff);
+       dprint("mperf_diff: %llu\n", mperf_diff);
+       dprint("avg freq:   %llu\n", *count);
        return 0;
 }
 
@@ -178,6 +188,7 @@ static int mperf_start(void)
        int cpu;
        unsigned long long dbg;
 
+       clock_gettime(CLOCK_REALTIME, &time_start);
        mperf_get_tsc(&tsc_at_measure_start);
 
        for (cpu = 0; cpu < cpu_count; cpu++)
@@ -193,32 +204,104 @@ static int mperf_stop(void)
        unsigned long long dbg;
        int cpu;
 
-       mperf_get_tsc(&tsc_at_measure_end);
-
        for (cpu = 0; cpu < cpu_count; cpu++)
                mperf_measure_stats(cpu);
 
+       mperf_get_tsc(&tsc_at_measure_end);
+       clock_gettime(CLOCK_REALTIME, &time_end);
+
        mperf_get_tsc(&dbg);
        dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
 
        return 0;
 }
 
-struct cpuidle_monitor mperf_monitor;
-
-struct cpuidle_monitor *mperf_register(void)
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
 {
+       int ret;
+       unsigned long long hwcr;
        unsigned long min;
 
-       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
-               return NULL;
-
-       /* Assume min/max all the same on all cores */
+       if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)
+               goto use_sysfs;
+
+       if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+               /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+                * freq.
+                * A test whether hwcr is accessable/available would be:
+                * (cpupower_cpu_info.family > 0x10 ||
+                *   cpupower_cpu_info.family == 0x10 &&
+                *   cpupower_cpu_info.model >= 0x2))
+                * This should be the case for all aperf/mperf
+                * capable AMD machines and is therefore safe to test here.
+                * Compare with Linus kernel git commit: acf01734b1747b1ec4
+                */
+               ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+               /*
+                * If the MSR read failed, assume a Xen system that did
+                * not explicitly provide access to it and assume TSC works
+               */
+               if (ret != 0) {
+                       dprint("TSC read 0x%x failed - assume TSC working\n",
+                              MSR_AMD_HWCR);
+                       return 0;
+               } else if (1 & (hwcr >> 24)) {
+                       max_freq_mode = MAX_FREQ_TSC_REF;
+                       return 0;
+               } else { /* Use sysfs max frequency if available */ }
+       } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+               /*
+                * On Intel we assume mperf (in C0) is ticking at same
+                * rate than TSC
+                */
+               max_freq_mode = MAX_FREQ_TSC_REF;
+               return 0;
+       }
+use_sysfs:
        if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
                dprint("Cannot retrieve max freq from cpufreq kernel "
                       "subsystem\n");
-               return NULL;
+               return -1;
        }
+       max_freq_mode = MAX_FREQ_SYSFS;
+       return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ *    This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ *    Works if mperf timer stops ticking in sleep states which
+ *    seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+               return NULL;
+
+       if (init_maxfreq_mode())
+               return NULL;
 
        /* Free this at program termination */
        is_valid = calloc(cpu_count, sizeof(int));