operations" subsection for information on where to use these.
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+ These are for use with consistent memory to guarantee the ordering
+ of writes or reads of shared memory accessible to both the CPU and a
+ DMA capable device.
+
+ For example, consider a device driver that shares memory with a device
+ and uses a descriptor status value to indicate if the descriptor belongs
+ to the device or the CPU, and a doorbell to notify it when new
+ descriptors are available:
+
+ if (desc->status != DEVICE_OWN) {
+ /* do not read data until we own descriptor */
+ dma_rmb();
+
+ /* read/modify data */
+ read_data = desc->data;
+ desc->data = write_data;
+
+ /* flush modifications before status update */
+ dma_wmb();
+
+ /* assign ownership */
+ desc->status = DEVICE_OWN;
+
+ /* force memory to sync before notifying device via MMIO */
+ wmb();
+
+ /* notify device of new descriptors */
+ writel(DESC_NOTIFY, doorbell);
+ }
+
+ The dma_rmb() allows us guarantee the device has released ownership
+ before we read the data from the descriptor, and he dma_wmb() allows
+ us to guarantee the data is written to the descriptor before the device
+ can see it now has ownership. The wmb() is needed to guarantee that the
+ cache coherent memory writes have completed before attempting a write to
+ the cache incoherent MMIO region.
+
+ See Documentation/DMA-API.txt for more information on consistent memory.
+
MMIO WRITE BARRIER
------------------
#define rmb() __asm__ __volatile__("mb": : :"memory")
#define wmb() __asm__ __volatile__("wmb": : :"memory")
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
#ifdef CONFIG_SMP
#define mb() do { dsb(); outer_sync(); } while (0)
#define rmb() dsb()
#define wmb() do { dsb(st); outer_sync(); } while (0)
+#define dma_rmb() dmb(osh)
+#define dma_wmb() dmb(oshst)
#else
#define mb() barrier()
#define rmb() barrier()
#define wmb() barrier()
+#define dma_rmb() barrier()
+#define dma_wmb() barrier()
#endif
#ifndef CONFIG_SMP
#define rmb() dsb(ld)
#define wmb() dsb(st)
+#define dma_rmb() dmb(oshld)
+#define dma_wmb() dmb(oshst)
+
#ifndef CONFIG_SMP
#define smp_mb() barrier()
#define smp_rmb() barrier()
# define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
# define rmb() do { barrier(); smp_check_barrier(); } while (0)
# define wmb() do { barrier(); smp_mark_barrier(); } while (0)
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0)
#endif
* it's (presumably) much slower than mf and (b) mf.a is supported for
* sequential memory pages only.
*/
-#define mb() ia64_mf()
-#define rmb() mb()
-#define wmb() mb()
-#define read_barrier_depends() do { } while(0)
+#define mb() ia64_mf()
+#define rmb() mb()
+#define wmb() mb()
+
+#define dma_rmb() mb()
+#define dma_wmb() mb()
#ifdef CONFIG_SMP
# define smp_mb() mb()
-# define smp_rmb() rmb()
-# define smp_wmb() wmb()
-# define smp_read_barrier_depends() read_barrier_depends()
-
#else
-
# define smp_mb() barrier()
-# define smp_rmb() barrier()
-# define smp_wmb() barrier()
-# define smp_read_barrier_depends() do { } while(0)
-
#endif
+#define smp_rmb() smp_mb()
+#define smp_wmb() smp_mb()
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#define smp_mb__before_atomic() barrier()
#define smp_mb__after_atomic() barrier()
#include <asm/metag_mem.h>
#define nop() asm volatile ("NOP")
-#define mb() wmb()
-#define rmb() barrier()
#ifdef CONFIG_METAG_META21
#endif /* !CONFIG_METAG_META21 */
-static inline void wmb(void)
-{
- /* flush writes through the write combiner */
- wr_fence();
-}
+/* flush writes through the write combiner */
+#define mb() wr_fence()
+#define rmb() barrier()
+#define wmb() mb()
-#define read_barrier_depends() do { } while (0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
#ifndef CONFIG_SMP
#define fence() do { } while (0)
#define smp_wmb() barrier()
#endif
#endif
-#define smp_read_barrier_depends() do { } while (0)
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define smp_store_release(p, v) \
#include <asm/addrspace.h>
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
#include <asm/wbflush.h>
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
#define mb() wbflush()
#define iob() wbflush()
#else /* !CONFIG_CPU_HAS_WB */
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
#define mb() fast_mb()
#define iob() fast_iob()
#endif /* !CONFIG_CPU_HAS_WB */
+#define wmb() fast_wmb()
+#define rmb() fast_rmb()
+#define dma_wmb() fast_wmb()
+#define dma_rmb() fast_rmb()
+
#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
# ifdef CONFIG_CPU_CAVIUM_OCTEON
# define smp_mb() __sync()
#define mb() __asm__ __volatile__ ("sync" : : : "memory")
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; mb(); } while (0)
-#ifdef CONFIG_SMP
-
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
#else
#endif
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define dma_rmb() __lwsync()
+#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_lwsync() __lwsync()
#define smp_mb() mb()
#define smp_rmb() __lwsync()
#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_read_barrier_depends() read_barrier_depends()
#else
-#define __lwsync() barrier()
+#define smp_lwsync() barrier()
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
#endif /* CONFIG_SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
/*
* This is a barrier which prevents following instructions from being
* started until the value of the argument x is known. For example, if
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
+ smp_lwsync(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
+ smp_lwsync(); \
___p1; \
})
#define rmb() mb()
#define wmb() mb()
-#define read_barrier_depends() do { } while(0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
+
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
#define rmb() __asm__ __volatile__("":::"memory")
#define wmb() __asm__ __volatile__("":::"memory")
-#define read_barrier_depends() do { } while(0)
+#define dma_rmb() rmb()
+#define dma_wmb() wmb()
+
#define set_mb(__var, __value) \
do { __var = __value; membar_safe("#StoreLoad"); } while(0)
#define smp_wmb() __asm__ __volatile__("":::"memory")
#endif
-#define smp_read_barrier_depends() do { } while(0)
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
#define smp_store_release(p, v) \
do { \
#define wmb() asm volatile("sfence" ::: "memory")
#endif
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb() rmb()
+#define dma_rmb() rmb()
#else
-# define smp_rmb() barrier()
+#define dma_rmb() barrier()
#endif
+#define dma_wmb() barrier()
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#if defined(CONFIG_X86_PPRO_FENCE)
/*
#endif /* CONFIG_X86_32 */
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-
-#define smp_mb() mb()
#ifdef CONFIG_X86_PPRO_FENCE
-#define smp_rmb() rmb()
+#define dma_rmb() rmb()
#else /* CONFIG_X86_PPRO_FENCE */
-#define smp_rmb() barrier()
+#define dma_rmb() barrier()
#endif /* CONFIG_X86_PPRO_FENCE */
+#define dma_wmb() barrier()
-#define smp_wmb() barrier()
+#ifdef CONFIG_SMP
-#define smp_read_barrier_depends() read_barrier_depends()
+#define smp_mb() mb()
+#define smp_rmb() dma_rmb()
+#define smp_wmb() barrier()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* CONFIG_SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* CONFIG_SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
/*
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
+ if (!rx_desc->d.staterr)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STATUS_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.status_error)
break;
- /*
- * This memory barrier is needed to keep us from reading
+ /* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
- * RXD_STAT_DD bit is set
+ * descriptor has been written back
*/
- rmb();
+ dma_rmb();
/* retrieve a buffer from the ring */
skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
{
u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
+
desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
}
u32 rx_buf_sz)
{
desc->addr = cpu_to_le64(mapping);
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}
skb_tx_timestamp(skb);
- wmb();
+ /* Force memory writes to complete before releasing descriptor */
+ dma_wmb();
/* Anti gcc 2.95.3 bugware (sic) */
status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
txd->opts1 = cpu_to_le32(status);
- tp->cur_tx += frags + 1;
-
+ /* Force all memory writes to complete before notifying device */
wmb();
+ tp->cur_tx += frags + 1;
+
RTL_W8(TxPoll, NPQ);
mmiowb();
struct ring_info *tx_skb = tp->tx_skb + entry;
u32 status;
- rmb();
status = le32_to_cpu(tp->TxDescArray[entry].opts1);
if (status & DescOwn)
break;
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Tx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
tp->TxDescArray + entry);
if (status & LastFrag) {
struct RxDesc *desc = tp->RxDescArray + entry;
u32 status;
- rmb();
status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
-
if (status & DescOwn)
break;
+
+ /* This barrier is needed to keep us from reading
+ * any other fields out of the Rx descriptor until
+ * we know the status of DescOwn
+ */
+ dma_rmb();
+
if (unlikely(status & RxRES)) {
netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
status);
}
release_descriptor:
desc->opts2 = 0;
- wmb();
rtl8169_mark_to_asic(desc, rx_buf_sz);
}
#define wmb() mb()
#endif
+#ifndef dma_rmb
+#define dma_rmb() rmb()
+#endif
+
+#ifndef dma_wmb
+#define dma_wmb() wmb()
+#endif
+
#ifndef read_barrier_depends
#define read_barrier_depends() do { } while (0)
#endif