fis-gtm/sr_port/memcoherency.h

177 lines
8.0 KiB
C

/****************************************************************
* *
* Copyright 2003, 2010 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#ifndef MEMCOHERENCY_H_INCLUDED
#define MEMCOHERENCY_H_INCLUDED
/* for Uniprocessor systems, no need for "memory barrier" as memory is always coherent.
* But almost always we expect to be running on a multi-processor system so we want to avoid the cost
* of the if check and do the memory barrier ALWAYS.
*/
#ifdef __alpha
#include <c_asm.h>
/* Read Alpha Architecture Reference Manual, edited by Richard L Sites,
* Chapter "System Architecture and Programming Implications" for memory
* coherency issues and behavior of "mb" instruction (memory barrier)
*/
/* NOTES about Alpha (pp. 5-20, section 5.6.5 Implications for Hardware, Chapter 5 System Architecture and Programming
* Implications, Alpha Architecture Reference Manual, Edited by Richard L Sites
*
* MB and IMB force all preceding writes to at least reach their respective coherency points. This does not mean that
* main-memory writes have been done, just that the order of the eventual writes is committed. MB and IMB also force all
* queued cache invalidates to be delivered to the local caches before starting any subsequent reads (that may otherwise
* cache hit on stale data) or writes (that may otherwise write the cache, only to have the write effectively overwriiten
* by a late-delivered invalidate)
*/
#define SHM_WRITE_MEMORY_BARRIER asm("mb")
#define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same MB instruction for both read and write barriers */
#ifdef __vms
#define SECSHR_SHM_WRITE_MEMORY_BARRIER asm("mb")
#define SECSHR_SHM_READ_MEMORY_BARRIER SECSHR_SHM_WRITE_MEMORY_BARRIER
#endif /* __vms */
#elif defined(POWER) || defined(PWRPC) /* GT.M defines POWER and PWRPC if _AIX is defined, see sr_rs6000/mdefsp.h */
/* Refer to article "POWER4 and shared memory synchronization by R. William Hay and Gary R. Hook" available at
* http://www-106.ibm.com/developerworks/eserver/articles/power4_mem.html
*/
/* prototypes */
void do_sync(void);
void do_lwsync(void);
void do_eieio(void);
void do_isync(void);
/* The machine codes were fetched from http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html */
/* sync : Creates a memory barrier. On a given processor, any load or store instructions ahead of the sync instruction
* in the program sequence must complete their accesses to memory first, and then any load or store instructions after
* sync can begin
*/
#pragma mc_func do_sync{"7c0004ac"}
#pragma reg_killed_by do_sync
/* lwsync : Creates a memory barrier that provides the same ordering function as the sync instruction, except that a
* load caused by an instruction following the lwsync may be performed before a store caused by an instruction that
* precedes the lwsync, and the ordering does not apply to accesses to I/O memory (memory-mapped I/O).
* lwsync is a new variant of the sync instruction and is interpreted by older processors as a sync. The instruction,
* as its name implies, has much less performance impact than sync, and is recommended for syncrhonisation of most
* memory (but not I/O) references.
*/
#pragma mc_func do_lwsync{"7c2004ac"}
#pragma reg_killed_by do_lwsync
/* eieio : Creates a memory barrier that provides the same ordering function as the sync instruction except that
* ordering applies only to accesses to I/O memory
*/
#pragma mc_func do_eieio{"7c0006ac"}
#pragma reg_killed_by do_eieio
/* isync : Causes the processor to discard any prefetched (and possibly speculatively executed) instructions and
* refetch the next following instructions. It is used in locking code (e.g. __check_lock()) to ensure that no
* loads following entry into a critical section can access data (because of aggressive out-of-order and speculative
* execution in the processor) before the lock is acquired.
*/
#pragma mc_func do_isync{"4c00012c"}
#pragma reg_killed_by do_isync
#define SHM_WRITE_MEMORY_BARRIER \
{ /* Ensure that code does not rely on ordering of "loads" following lwsync in programming sequence to occur */ \
/* after "stores" before lwsync. Use do_sync() if such ordering is required. Replication code (t_end.c, */ \
/* tp_end.c) do not rely on store-load order across memory barrier. Note that grab/rel_lock() perform */ \
/* "sync" (via call to _clear_lock()), and so, we are guaranteed strict ordering of loads and stores of */ \
/* code that reads/writes to journal pool in transaction logic */ \
do_lwsync(); \
}
#define SHM_READ_MEMORY_BARRIER \
{ \
do_isync(); \
}
#elif defined(__hppa)
/* For _PA_RISC1_0, _PA_RISC1_1, accesses to the address space (both to memory and I/O) through load, store and
* semaphore instructions are strongly ordered. This means that accesses appear to software to be done in program order
* For _PA_RISC2_0, accesses could be "strongly ordered", "ordered", or "weakly ordered" (read PA-RISC 2.0 ARCHITECTURE
* by Gerry Kane, appendix "Memory Ordering Model").
*
* For all PA-RISC architectures, cache flush operations are weakly ordered. Flushes may be delayed or held pending, and
* a sequence of flush operations may be executed in any order.
*
* SYNC : Enforce program order of memory references
* Any load, store, semaphore, cache flush, or cache purge instructions that follow the SYNC instruction get executed
* only after all such instructions prior to the SYNC instruction have completed executing. On implementations which
* execute such instructions out of sequence, this instruction enforces program ordering. In sytems in which all memory
* references are performed in order, this instruction executes as a null instruction.
*
* IMPORTANT: SYNC instruction enforces ordering of only those accesses caused by the instructions executed on the
* same processor which executes the SYNC instruction.
*
* [Vinaya] Research results: Accesses to fields (global) that are defined volatile are ordered (compiler generates
* LDW (or STW) instruction with the O (ordered) completer, i.e., instructions generated are LDW,O (STW,O). Depending
* on the requirements, it may be sufficient to define shared fields as volatile to enforce ordering. With replication
* though, it is important that pending cache flushes are completed so that source server sees the transaction data
* in its entirety.
*/
#define SHM_WRITE_MEMORY_BARRIER (void)_asm("SYNC")
#define SHM_READ_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* same SYNC instruction for both read and write barriers.
* For read, we want all cache purges to be completed before
* we load shared fields */
#elif defined(__ia64)
#if defined(__hpux)
#include <machine/sys/kern_inline.h>
#define SHM_WRITE_MEMORY_BARRIER _MF()
#elif defined(__linux__) && defined(__INTEL_COMPILER)
# define SHM_WRITE_MEMORY_BARRIER __mf()
#elif defined(__linux__) /* gcc */
# define SHM_WRITE_MEMORY_BARRIER __asm__ __volatile__ ("mf" ::: "memory")
#endif /* __linux__ */
/* On IA64, cross processor notifications of write barriers are automatic so no read barrier is necessary */
#define SHM_READ_MEMORY_BARRIER
#else /* SPARC, I386, S390, Itanium */
/* Although SPARC architecture allows for out-of-order memory accesses, Solaris forces strong ordering on memory accesses.
* We do not need memory barrier primitives on Solaris/SPARC.
*/
/* Memory accesses in Intel x86 and IBM S390 archtectures are strongly ordered */
#define SHM_WRITE_MEMORY_BARRIER
#define SHM_READ_MEMORY_BARRIER
#endif
#if !defined(SECSHR_SHM_WRITE_MEMORY_BARRIER)
#define SECSHR_SHM_WRITE_MEMORY_BARRIER SHM_WRITE_MEMORY_BARRIER /* default definition */
#endif
#if !defined(SECSHR_SHM_READ_MEMORY_BARRIER)
#define SECSHR_SHM_READ_MEMORY_BARRIER SHM_READ_MEMORY_BARRIER /* default definition */
#endif
#endif /* MEMCOHERENCY_H_INCLUDED */