/**************************************************************** * * * Copyright 2003, 2010 Fidelity Information Services, Inc * * * * This source code contains the intellectual property * * of its copyright holder(s), and is made available * * under a license. If you do not know the terms of * * the license, please stop and do not read further. * * * ****************************************************************/ #ifndef MEMCOHERENCY_H_INCLUDED #define MEMCOHERENCY_H_INCLUDED /* for Uniprocessor systems, no need for "memory barrier" as memory is always coherent. * But almost always we expect to be running on a multi-processor system so we want to avoid the cost * of the if check and do the memory barrier ALWAYS. */ #ifdef __alpha #include /* Read Alpha Architecture Reference Manual, edited by Richard L Sites, * Chapter "System Architecture and Programming Implications" for memory * coherency issues and behavior of "mb" instruction (memory barrier) */ /* NOTES about Alpha (pp. 5-20, section 5.6.5 Implications for Hardware, Chapter 5 System Architecture and Programming * Implications, Alpha Architecture Reference Manual, Edited by Richard L Sites * * MB and IMB force all preceding writes to at least reach their respective coherency points. This does not mean that * main-memory writes have been done, just that the order of the eventual writes is committed. MB and IMB also force all * queued cache invalidates to be delivered to the local caches before starting any subsequent reads (that may otherwise * cache hit on stale data) or writes (that may otherwise write the cache, only to have the write effectively overwriiten * by a late-delivered invalidate) */ #define SHM_WRITE_MEMORY_BARRIER asm("mb") #define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same MB instruction for both read and write barriers */ #ifdef __vms #define SECSHR_SHM_WRITE_MEMORY_BARRIER asm("mb") #define SECSHR_SHM_READ_MEMORY_BARRIER SECSHR_SHM_WRITE_MEMORY_BARRIER #endif /* __vms */ #elif defined(POWER) || defined(PWRPC) /* GT.M defines POWER and PWRPC if _AIX is defined, see sr_rs6000/mdefsp.h */ /* Refer to article "POWER4 and shared memory synchronization by R. William Hay and Gary R. Hook" available at * http://www-106.ibm.com/developerworks/eserver/articles/power4_mem.html */ /* prototypes */ void do_sync(void); void do_lwsync(void); void do_eieio(void); void do_isync(void); /* The machine codes were fetched from http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html */ /* sync : Creates a memory barrier. On a given processor, any load or store instructions ahead of the sync instruction * in the program sequence must complete their accesses to memory first, and then any load or store instructions after * sync can begin */ #pragma mc_func do_sync{"7c0004ac"} #pragma reg_killed_by do_sync /* lwsync : Creates a memory barrier that provides the same ordering function as the sync instruction, except that a * load caused by an instruction following the lwsync may be performed before a store caused by an instruction that * precedes the lwsync, and the ordering does not apply to accesses to I/O memory (memory-mapped I/O). * lwsync is a new variant of the sync instruction and is interpreted by older processors as a sync. The instruction, * as its name implies, has much less performance impact than sync, and is recommended for syncrhonisation of most * memory (but not I/O) references. */ #pragma mc_func do_lwsync{"7c2004ac"} #pragma reg_killed_by do_lwsync /* eieio : Creates a memory barrier that provides the same ordering function as the sync instruction except that * ordering applies only to accesses to I/O memory */ #pragma mc_func do_eieio{"7c0006ac"} #pragma reg_killed_by do_eieio /* isync : Causes the processor to discard any prefetched (and possibly speculatively executed) instructions and * refetch the next following instructions. It is used in locking code (e.g. __check_lock()) to ensure that no * loads following entry into a critical section can access data (because of aggressive out-of-order and speculative * execution in the processor) before the lock is acquired. */ #pragma mc_func do_isync{"4c00012c"} #pragma reg_killed_by do_isync #define SHM_WRITE_MEMORY_BARRIER \ { /* Ensure that code does not rely on ordering of "loads" following lwsync in programming sequence to occur */ \ /* after "stores" before lwsync. Use do_sync() if such ordering is required. Replication code (t_end.c, */ \ /* tp_end.c) do not rely on store-load order across memory barrier. Note that grab/rel_lock() perform */ \ /* "sync" (via call to _clear_lock()), and so, we are guaranteed strict ordering of loads and stores of */ \ /* code that reads/writes to journal pool in transaction logic */ \ do_lwsync(); \ } #define SHM_READ_MEMORY_BARRIER \ { \ do_isync(); \ } #elif defined(__hppa) /* For _PA_RISC1_0, _PA_RISC1_1, accesses to the address space (both to memory and I/O) through load, store and * semaphore instructions are strongly ordered. This means that accesses appear to software to be done in program order * For _PA_RISC2_0, accesses could be "strongly ordered", "ordered", or "weakly ordered" (read PA-RISC 2.0 ARCHITECTURE * by Gerry Kane, appendix "Memory Ordering Model"). * * For all PA-RISC architectures, cache flush operations are weakly ordered. Flushes may be delayed or held pending, and * a sequence of flush operations may be executed in any order. * * SYNC : Enforce program order of memory references * Any load, store, semaphore, cache flush, or cache purge instructions that follow the SYNC instruction get executed * only after all such instructions prior to the SYNC instruction have completed executing. On implementations which * execute such instructions out of sequence, this instruction enforces program ordering. In sytems in which all memory * references are performed in order, this instruction executes as a null instruction. * * IMPORTANT: SYNC instruction enforces ordering of only those accesses caused by the instructions executed on the * same processor which executes the SYNC instruction. * * [Vinaya] Research results: Accesses to fields (global) that are defined volatile are ordered (compiler generates * LDW (or STW) instruction with the O (ordered) completer, i.e., instructions generated are LDW,O (STW,O). Depending * on the requirements, it may be sufficient to define shared fields as volatile to enforce ordering. With replication * though, it is important that pending cache flushes are completed so that source server sees the transaction data * in its entirety. */ #define SHM_WRITE_MEMORY_BARRIER (void)_asm("SYNC") #define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same SYNC instruction for both read and write barriers. * For read, we want all cache purges to be completed before * we load shared fields */ #elif defined(__ia64) #if defined(__hpux) #include #define SHM_WRITE_MEMORY_BARRIER _MF() #elif defined(__linux__) && defined(__INTEL_COMPILER) # define SHM_WRITE_MEMORY_BARRIER __mf() #elif defined(__linux__) /* gcc */ # define SHM_WRITE_MEMORY_BARRIER __asm__ __volatile__ ("mf" ::: "memory") #endif /* __linux__ */ /* On IA64, cross processor notifications of write barriers are automatic so no read barrier is necessary */ #define SHM_READ_MEMORY_BARRIER #else /* SPARC, I386, S390, Itanium */ /* Although SPARC architecture allows for out-of-order memory accesses, Solaris forces strong ordering on memory accesses. * We do not need memory barrier primitives on Solaris/SPARC. */ /* Memory accesses in Intel x86 and IBM S390 archtectures are strongly ordered */ #define SHM_WRITE_MEMORY_BARRIER #define SHM_READ_MEMORY_BARRIER #endif #if !defined(SECSHR_SHM_WRITE_MEMORY_BARRIER) #define SECSHR_SHM_WRITE_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* default definition */ #endif #if !defined(SECSHR_SHM_READ_MEMORY_BARRIER) #define SECSHR_SHM_READ_MEMORY_BARRIER SHM_READ_MEMORY_BARRIER /* default definition */ #endif #endif /* MEMCOHERENCY_H_INCLUDED */