177 lines
8.0 KiB
C
177 lines
8.0 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2003, 2010 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#ifndef MEMCOHERENCY_H_INCLUDED
|
|
#define MEMCOHERENCY_H_INCLUDED
|
|
|
|
/* for Uniprocessor systems, no need for "memory barrier" as memory is always coherent.
|
|
* But almost always we expect to be running on a multi-processor system so we want to avoid the cost
|
|
* of the if check and do the memory barrier ALWAYS.
|
|
*/
|
|
|
|
#ifdef __alpha
|
|
|
|
#include <c_asm.h>
|
|
|
|
/* Read Alpha Architecture Reference Manual, edited by Richard L Sites,
|
|
* Chapter "System Architecture and Programming Implications" for memory
|
|
* coherency issues and behavior of "mb" instruction (memory barrier)
|
|
*/
|
|
|
|
/* NOTES about Alpha (pp. 5-20, section 5.6.5 Implications for Hardware, Chapter 5 System Architecture and Programming
|
|
* Implications, Alpha Architecture Reference Manual, Edited by Richard L Sites
|
|
*
|
|
* MB and IMB force all preceding writes to at least reach their respective coherency points. This does not mean that
|
|
* main-memory writes have been done, just that the order of the eventual writes is committed. MB and IMB also force all
|
|
* queued cache invalidates to be delivered to the local caches before starting any subsequent reads (that may otherwise
|
|
* cache hit on stale data) or writes (that may otherwise write the cache, only to have the write effectively overwriiten
|
|
* by a late-delivered invalidate)
|
|
*/
|
|
#define SHM_WRITE_MEMORY_BARRIER asm("mb")
|
|
|
|
#define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same MB instruction for both read and write barriers */
|
|
|
|
#ifdef __vms
|
|
#define SECSHR_SHM_WRITE_MEMORY_BARRIER asm("mb")
|
|
#define SECSHR_SHM_READ_MEMORY_BARRIER SECSHR_SHM_WRITE_MEMORY_BARRIER
|
|
#endif /* __vms */
|
|
|
|
#elif defined(POWER) || defined(PWRPC) /* GT.M defines POWER and PWRPC if _AIX is defined, see sr_rs6000/mdefsp.h */
|
|
|
|
/* Refer to article "POWER4 and shared memory synchronization by R. William Hay and Gary R. Hook" available at
|
|
* http://www-106.ibm.com/developerworks/eserver/articles/power4_mem.html
|
|
*/
|
|
|
|
/* prototypes */
|
|
void do_sync(void);
|
|
void do_lwsync(void);
|
|
void do_eieio(void);
|
|
void do_isync(void);
|
|
|
|
/* The machine codes were fetched from http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html */
|
|
|
|
/* sync : Creates a memory barrier. On a given processor, any load or store instructions ahead of the sync instruction
|
|
* in the program sequence must complete their accesses to memory first, and then any load or store instructions after
|
|
* sync can begin
|
|
*/
|
|
#pragma mc_func do_sync{"7c0004ac"}
|
|
#pragma reg_killed_by do_sync
|
|
|
|
/* lwsync : Creates a memory barrier that provides the same ordering function as the sync instruction, except that a
|
|
* load caused by an instruction following the lwsync may be performed before a store caused by an instruction that
|
|
* precedes the lwsync, and the ordering does not apply to accesses to I/O memory (memory-mapped I/O).
|
|
* lwsync is a new variant of the sync instruction and is interpreted by older processors as a sync. The instruction,
|
|
* as its name implies, has much less performance impact than sync, and is recommended for syncrhonisation of most
|
|
* memory (but not I/O) references.
|
|
*/
|
|
#pragma mc_func do_lwsync{"7c2004ac"}
|
|
#pragma reg_killed_by do_lwsync
|
|
|
|
/* eieio : Creates a memory barrier that provides the same ordering function as the sync instruction except that
|
|
* ordering applies only to accesses to I/O memory
|
|
*/
|
|
#pragma mc_func do_eieio{"7c0006ac"}
|
|
#pragma reg_killed_by do_eieio
|
|
|
|
/* isync : Causes the processor to discard any prefetched (and possibly speculatively executed) instructions and
|
|
* refetch the next following instructions. It is used in locking code (e.g. __check_lock()) to ensure that no
|
|
* loads following entry into a critical section can access data (because of aggressive out-of-order and speculative
|
|
* execution in the processor) before the lock is acquired.
|
|
*/
|
|
#pragma mc_func do_isync{"4c00012c"}
|
|
#pragma reg_killed_by do_isync
|
|
|
|
#define SHM_WRITE_MEMORY_BARRIER \
|
|
{ /* Ensure that code does not rely on ordering of "loads" following lwsync in programming sequence to occur */ \
|
|
/* after "stores" before lwsync. Use do_sync() if such ordering is required. Replication code (t_end.c, */ \
|
|
/* tp_end.c) do not rely on store-load order across memory barrier. Note that grab/rel_lock() perform */ \
|
|
/* "sync" (via call to _clear_lock()), and so, we are guaranteed strict ordering of loads and stores of */ \
|
|
/* code that reads/writes to journal pool in transaction logic */ \
|
|
do_lwsync(); \
|
|
}
|
|
|
|
#define SHM_READ_MEMORY_BARRIER \
|
|
{ \
|
|
do_isync(); \
|
|
}
|
|
|
|
#elif defined(__hppa)
|
|
/* For _PA_RISC1_0, _PA_RISC1_1, accesses to the address space (both to memory and I/O) through load, store and
|
|
* semaphore instructions are strongly ordered. This means that accesses appear to software to be done in program order
|
|
* For _PA_RISC2_0, accesses could be "strongly ordered", "ordered", or "weakly ordered" (read PA-RISC 2.0 ARCHITECTURE
|
|
* by Gerry Kane, appendix "Memory Ordering Model").
|
|
*
|
|
* For all PA-RISC architectures, cache flush operations are weakly ordered. Flushes may be delayed or held pending, and
|
|
* a sequence of flush operations may be executed in any order.
|
|
*
|
|
* SYNC : Enforce program order of memory references
|
|
* Any load, store, semaphore, cache flush, or cache purge instructions that follow the SYNC instruction get executed
|
|
* only after all such instructions prior to the SYNC instruction have completed executing. On implementations which
|
|
* execute such instructions out of sequence, this instruction enforces program ordering. In sytems in which all memory
|
|
* references are performed in order, this instruction executes as a null instruction.
|
|
*
|
|
* IMPORTANT: SYNC instruction enforces ordering of only those accesses caused by the instructions executed on the
|
|
* same processor which executes the SYNC instruction.
|
|
*
|
|
* [Vinaya] Research results: Accesses to fields (global) that are defined volatile are ordered (compiler generates
|
|
* LDW (or STW) instruction with the O (ordered) completer, i.e., instructions generated are LDW,O (STW,O). Depending
|
|
* on the requirements, it may be sufficient to define shared fields as volatile to enforce ordering. With replication
|
|
* though, it is important that pending cache flushes are completed so that source server sees the transaction data
|
|
* in its entirety.
|
|
*/
|
|
|
|
#define SHM_WRITE_MEMORY_BARRIER (void)_asm("SYNC")
|
|
|
|
#define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same SYNC instruction for both read and write barriers.
|
|
* For read, we want all cache purges to be completed before
|
|
* we load shared fields */
|
|
#elif defined(__ia64)
|
|
|
|
#if defined(__hpux)
|
|
|
|
#include <machine/sys/kern_inline.h>
|
|
#define SHM_WRITE_MEMORY_BARRIER _MF()
|
|
|
|
#elif defined(__linux__) && defined(__INTEL_COMPILER)
|
|
|
|
# define SHM_WRITE_MEMORY_BARRIER __mf()
|
|
|
|
#elif defined(__linux__) /* gcc */
|
|
|
|
# define SHM_WRITE_MEMORY_BARRIER __asm__ __volatile__ ("mf" ::: "memory")
|
|
#endif /* __linux__ */
|
|
|
|
/* On IA64, cross processor notifications of write barriers are automatic so no read barrier is necessary */
|
|
#define SHM_READ_MEMORY_BARRIER
|
|
|
|
#else /* SPARC, I386, S390, Itanium */
|
|
|
|
/* Although SPARC architecture allows for out-of-order memory accesses, Solaris forces strong ordering on memory accesses.
|
|
* We do not need memory barrier primitives on Solaris/SPARC.
|
|
*/
|
|
|
|
/* Memory accesses in Intel x86 and IBM S390 archtectures are strongly ordered */
|
|
|
|
#define SHM_WRITE_MEMORY_BARRIER
|
|
#define SHM_READ_MEMORY_BARRIER
|
|
|
|
#endif
|
|
|
|
#if !defined(SECSHR_SHM_WRITE_MEMORY_BARRIER)
|
|
#define SECSHR_SHM_WRITE_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* default definition */
|
|
#endif
|
|
|
|
#if !defined(SECSHR_SHM_READ_MEMORY_BARRIER)
|
|
#define SECSHR_SHM_READ_MEMORY_BARRIER SHM_READ_MEMORY_BARRIER /* default definition */
|
|
#endif
|
|
|
|
#endif /* MEMCOHERENCY_H_INCLUDED */
|