2012-02-05 11:35:58 -05:00
|
|
|
/****************************************************************
|
|
|
|
* *
|
2012-03-24 14:06:46 -04:00
|
|
|
* Copyright 2001, 2012 Fidelity Information Services, Inc *
|
2012-02-05 11:35:58 -05:00
|
|
|
* *
|
|
|
|
* This source code contains the intellectual property *
|
|
|
|
* of its copyright holder(s), and is made available *
|
|
|
|
* under a license. If you do not know the terms of *
|
|
|
|
* the license, please stop and do not read further. *
|
|
|
|
* *
|
|
|
|
****************************************************************/
|
|
|
|
|
|
|
|
#ifndef GDSBT_H
|
|
|
|
#define GDSBT_H
|
|
|
|
/* this requires gdsroot.h */
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#ifdef MUTEX_MSEM_WAKE
|
|
|
|
#ifdef POSIX_MSEM
|
|
|
|
# include <semaphore.h>
|
|
|
|
#else
|
|
|
|
# include <sys/mman.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef VMS
|
|
|
|
# include <ssdef.h> /* for SS$_WASSET */
|
|
|
|
# include "ast.h" /* for ENABLE/DISABLE */
|
|
|
|
#elif defined(UNIX)
|
|
|
|
# include <gtm_limits.h> /* for _POSIX_HOST_NAME_MAX */
|
|
|
|
# if defined(__osf__)
|
|
|
|
# include <sys/param.h> /* for _POSIX_HOST_NAME_MAX */
|
|
|
|
# elif defined(SUNOS) && !defined(_POSIX_HOST_NAME_MAX)
|
|
|
|
# include <netdb.h> /* for MAXHOSTNAMELEN (Solaris 9) */
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "gvstats_rec.h"
|
|
|
|
|
|
|
|
#define CR_NOTVALID (-1L)
|
|
|
|
|
|
|
|
#define GTM64_WC_MAX_BUFFS (2*1024*1024)-1 /* to fit in an int4 */
|
|
|
|
#define WC_MAX_BUFFS 64*1024
|
|
|
|
|
|
|
|
#define WC_DEF_BUFFS 128
|
|
|
|
#define WC_MIN_BUFFS 64
|
|
|
|
|
|
|
|
#define MAX_LOCK_SPACE 65536 /* need to change these whenever global directory defaults change */
|
|
|
|
#define MIN_LOCK_SPACE 10
|
|
|
|
|
|
|
|
#define MAX_REL_NAME 36
|
|
|
|
#define MAX_MCNAMELEN 256 /* We do not support hostname truncation */
|
|
|
|
#if defined(UNIX)
|
|
|
|
# if defined(_POSIX_HOST_NAME_MAX)
|
|
|
|
# if MAX_MCNAMELEN <= _POSIX_HOST_NAME_MAX /* _POSIX_HOST_NAME_MAX excludes terminating NULL */
|
|
|
|
# error MAX_MCNAMELEN is not greater than _POSIX_HOST_NAME_MAX.
|
|
|
|
# endif
|
|
|
|
# elif defined(MAXHOSTNAMELEN)
|
|
|
|
# if MAX_MCNAMELEN < MAXHOSTNAMELEN /* MAXHOSTNAMELEN includes terminating NULL */
|
|
|
|
# error MAX_MCNAMELEN is less than MAXHOSTNAMELEN.
|
|
|
|
# endif
|
|
|
|
# else
|
|
|
|
# error _POSIX_HOST_NAME_MAX or MAXHOSTNAMELEN not defined.
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define GDS_LABEL_SZ 12
|
|
|
|
|
|
|
|
#define MAX_DB_WTSTARTS 2 /* Max number of "flush-timer driven" simultaneous writers in wcs_wtstart */
|
|
|
|
#define MAX_WTSTART_PID_SLOTS 4 * MAX_DB_WTSTARTS /* Max number of PIDs for wcs_wtstart to save */
|
|
|
|
#define MAX_KIP_PID_SLOTS 8
|
|
|
|
|
|
|
|
#define BT_FACTOR(X) (X)
|
|
|
|
#define FLUSH_FACTOR(X) ((X)-(X)/16)
|
|
|
|
#define BT_QUEHEAD (-2)
|
|
|
|
#define BT_NOTVALID (-1)
|
|
|
|
#define BT_MAXRETRY 3
|
|
|
|
#define BT_SIZE(X) ((((sgmnt_data_ptr_t)X)->bt_buckets + 1 + ((sgmnt_data_ptr_t)X)->n_bts) * SIZEOF(bt_rec))
|
|
|
|
/* parameter is *sgmnt_data*/
|
|
|
|
/* note that the + 1 above is for the th_queue head which falls between the hash table and the */
|
|
|
|
/* actual bts */
|
|
|
|
#define HEADER_UPDATE_COUNT 1024
|
|
|
|
#define LAST_WBOX_SEQ_NUM 1000
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
trans_num curr_tn;
|
|
|
|
trans_num early_tn;
|
|
|
|
trans_num last_mm_sync; /* Last tn where a full mm sync was done */
|
|
|
|
char filler_8byte[8]; /* previously header_open_tn but no longer used.
|
|
|
|
* cannot remove as this is part of database file header */
|
|
|
|
trans_num mm_tn; /* Used to see if CCP must update master map */
|
|
|
|
uint4 lock_sequence; /* Used to see if CCP must update lock section */
|
|
|
|
uint4 ccp_jnl_filesize; /* Passes size of journal file if extended */
|
|
|
|
volatile uint4 total_blks; /* Placed here so can be passed to other machines on cluster */
|
|
|
|
volatile uint4 free_blocks;
|
|
|
|
} th_index;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
sm_off_t fl;
|
|
|
|
sm_off_t bl; /* self-relative queue entry */
|
|
|
|
} blkque, tnque; /* for block number hash, lru queue */
|
|
|
|
trans_num tn; /* transaction # #*/
|
|
|
|
trans_num killtn; /* last transaction when this block was updated as part of an M-kill */
|
|
|
|
block_id blk; /* block #*/
|
|
|
|
int4 cache_index;
|
|
|
|
bool flushing; /* buffer is being flushed after a machine switch on a cluster */
|
|
|
|
char filler[3];
|
|
|
|
int4 filler_int4; /* maintain 8 byte alignment */
|
|
|
|
} bt_rec; /* block table record */
|
|
|
|
|
|
|
|
/* This structure is used to access the transaction queue. It points at all but the
|
|
|
|
first two longwords of a bt_rec. CAUTION: there is no such thing as a queue of
|
|
|
|
th_recs, they are always bt_recs, and the extra two longwords are always there */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
sm_off_t fl;
|
|
|
|
sm_off_t bl;
|
|
|
|
} tnque;
|
|
|
|
trans_num tn;
|
|
|
|
trans_num killtn; /* last transaction when this block was updated as part of an M-kill */
|
|
|
|
block_id blk;
|
|
|
|
int4 cache_index;
|
|
|
|
bool flushing;
|
|
|
|
char filler[3];
|
|
|
|
int4 filler_int4; /* maintain 8 byte alignment */
|
|
|
|
} th_rec;
|
|
|
|
|
|
|
|
/* This structure is used to maintain all cache records. The BT queue contains
|
|
|
|
a history of those blocks that have been updated. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Definitions for GT.M Mutex Control
|
|
|
|
*
|
|
|
|
* See MUTEX.MAR for VMS functional implementation
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
sm_off_t fl,
|
|
|
|
bl;
|
|
|
|
} que;
|
|
|
|
int4 pid;
|
|
|
|
void *super_crit;
|
|
|
|
#if defined(UNIX)
|
|
|
|
/*
|
|
|
|
* If the following fields are to be made part of VMS too, change
|
|
|
|
* size of mutex_que_entry in mutex.mar (lines 118, 152).
|
|
|
|
* Make sure that the size of mutex_que_entry is a multiple of 8 bytes
|
|
|
|
* for quadword alignment requirements of remqhi and insqti.
|
|
|
|
*/
|
|
|
|
int4 mutex_wake_instance;
|
|
|
|
int4 filler1; /* for dword alignment */
|
|
|
|
#ifdef MUTEX_MSEM_WAKE
|
|
|
|
#ifdef POSIX_MSEM
|
|
|
|
sem_t mutex_wake_msem; /* Not two ints .. somewhat larger */
|
|
|
|
#else
|
|
|
|
msemaphore mutex_wake_msem; /* Two ints (incidentally two int4s) */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
} mutex_que_entry;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
sm_off_t fl,
|
|
|
|
bl;
|
|
|
|
} que;
|
|
|
|
global_latch_t latch;
|
|
|
|
} mutex_que_head;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
#if defined(UNIX)
|
|
|
|
global_latch_t semaphore;
|
|
|
|
CACHELINE_PAD(SIZEOF(global_latch_t), 1)
|
|
|
|
latch_t crashcnt;
|
|
|
|
int4 filler1; /* for alignment */
|
|
|
|
global_latch_t crashcnt_latch;
|
|
|
|
CACHELINE_PAD(SIZEOF(latch_t) + SIZEOF(latch_t) + SIZEOF(global_latch_t), 2)
|
|
|
|
latch_t queslots;
|
|
|
|
int4 filler2; /* for alignment */
|
|
|
|
CACHELINE_PAD(SIZEOF(latch_t) + SIZEOF(latch_t), 3)
|
|
|
|
mutex_que_head prochead;
|
|
|
|
CACHELINE_PAD(SIZEOF(mutex_que_head), 4)
|
|
|
|
mutex_que_head freehead;
|
|
|
|
#elif defined(VMS)
|
|
|
|
short semaphore;
|
|
|
|
unsigned short wrtpnd;
|
|
|
|
short crashcnt,
|
|
|
|
queslots;
|
|
|
|
#ifdef __alpha
|
|
|
|
/* use constant instead of defining CACHELINE_SIZE since we do not want to affect other structures
|
|
|
|
the 64 must match padding in mutex.mar and mutex_stoprel.mar */
|
|
|
|
char filler1[64 - SIZEOF(short)*4];
|
|
|
|
#endif
|
|
|
|
mutex_que_entry prochead;
|
|
|
|
#ifdef __alpha
|
|
|
|
char filler2[64 - SIZEOF(mutex_que_entry)];
|
|
|
|
#endif
|
|
|
|
mutex_que_entry freehead;
|
|
|
|
#else
|
|
|
|
#error UNSUPPORTED PLATFORM
|
|
|
|
#endif
|
|
|
|
} mutex_struct;
|
|
|
|
|
|
|
|
typedef struct { /* keep this structure and member offsets defined in sr_avms/mutex.mar in sync */
|
|
|
|
int4 mutex_hard_spin_count;
|
|
|
|
int4 mutex_sleep_spin_count;
|
|
|
|
int4 mutex_spin_sleep_mask;
|
|
|
|
int4 filler1;
|
|
|
|
} mutex_spin_parms_struct;
|
|
|
|
|
|
|
|
enum crit_ops
|
|
|
|
{ crit_ops_gw = 1, /* grab [write] crit */
|
|
|
|
crit_ops_rw, /* rel [write] crit */
|
|
|
|
crit_ops_nocrit /* did a rel_crit when now_crit flag was off */
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
caddr_t call_from;
|
|
|
|
enum crit_ops crit_act;
|
|
|
|
int4 epid;
|
2012-03-24 14:06:46 -04:00
|
|
|
trans_num curr_tn;
|
2012-02-05 11:35:58 -05:00
|
|
|
} crit_trace;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
sm_off_t cr_off;
|
|
|
|
trans_num cr_tn;
|
|
|
|
uint4 process_id;
|
|
|
|
block_id blk;
|
|
|
|
uint4 cycle;
|
|
|
|
} dskread_trace;
|
|
|
|
|
|
|
|
#define OP_LOCK_SIZE 4
|
|
|
|
|
|
|
|
/* Structure to hold lock history */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
sm_int_ptr_t lock_addr; /* Address of actual lock */
|
|
|
|
caddr_t lock_callr; /* Address of (un)locker */
|
|
|
|
int4 lock_pid; /* Process id of (un)locker */
|
|
|
|
int4 loop_cnt; /* iteration count of lock retry loop */
|
|
|
|
char lock_op[OP_LOCK_SIZE]; /* Operation performed (either OBTN or RLSE) */
|
|
|
|
} lockhist;
|
|
|
|
|
|
|
|
enum ftok_ops
|
|
|
|
{
|
|
|
|
ftok_ops_lock = 1,
|
|
|
|
ftok_ops_release
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
enum ftok_ops ftok_oper;
|
|
|
|
uint4 process_id;
|
|
|
|
trans_num cr_tn;
|
|
|
|
} ftokhist;
|
|
|
|
|
|
|
|
#define DSKREAD_OPS_ARRAY_SIZE 512
|
|
|
|
#define CRIT_OPS_ARRAY_SIZE 512
|
|
|
|
#define LOCKHIST_ARRAY_SIZE 512
|
|
|
|
#define SECSHR_OPS_ARRAY_SIZE 1023 /* 1 less than 1K to accommodate the variable secshr_ops_index */
|
|
|
|
#define FTOK_OPS_ARRAY_SIZE 512
|
|
|
|
|
|
|
|
/* SECSHR_ACCOUNTING macro assumes csa->nl is dereferencible and does accounting if variable "do_accounting" is set to TRUE */
|
|
|
|
#define SECSHR_ACCOUNTING(value) \
|
|
|
|
{ \
|
|
|
|
if (do_accounting) \
|
|
|
|
{ \
|
|
|
|
if (csa->nl->secshr_ops_index < SECSHR_OPS_ARRAY_SIZE) \
|
|
|
|
csa->nl->secshr_ops_array[csa->nl->secshr_ops_index] = (gtm_uint64_t)(value); \
|
|
|
|
csa->nl->secshr_ops_index++; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Mapped space local to each node on the cluster */
|
|
|
|
typedef struct node_local_struct
|
|
|
|
{
|
|
|
|
unsigned char label[GDS_LABEL_SZ]; /* 12 signature for GDS shared memory */
|
|
|
|
unsigned char fname[MAX_FN_LEN + 1]; /* 256 filename of corresponding database */
|
|
|
|
char now_running[MAX_REL_NAME]; /* 36 current active GT.M version stamp */
|
|
|
|
char machine_name[MAX_MCNAMELEN]; /* 256 machine name for clustering */
|
|
|
|
sm_off_t bt_header_off; /* (QW alignment) offset to hash table */
|
|
|
|
sm_off_t bt_base_off; /* bt first entry */
|
|
|
|
sm_off_t th_base_off;
|
|
|
|
sm_off_t cache_off;
|
|
|
|
sm_off_t cur_lru_cache_rec_off; /* current LRU cache_rec pointer offset */
|
|
|
|
sm_off_t critical;
|
|
|
|
sm_off_t jnl_buff;
|
|
|
|
sm_off_t shmpool_buffer; /* Shared memory buffer pool area */
|
|
|
|
sm_off_t lock_addrs;
|
|
|
|
sm_off_t hdr; /* Offset to file-header (BG mode ONLY!) */
|
|
|
|
volatile int4 in_crit;
|
|
|
|
int4 in_reinit;
|
|
|
|
unsigned short ccp_cycle;
|
|
|
|
unsigned short filler; /* Align for ccp_cycle. Not changing to int
|
|
|
|
as that would perturb to many things at this point */
|
|
|
|
boolean_t ccp_crit_blocked;
|
|
|
|
int4 ccp_state;
|
|
|
|
boolean_t ccp_jnl_closed;
|
|
|
|
boolean_t glob_sec_init;
|
|
|
|
uint4 wtstart_pid[MAX_WTSTART_PID_SLOTS]; /* Maintain pids of wcs_wtstart processes */
|
|
|
|
int4 filler8_int; /* 8-byte alignment filler */
|
|
|
|
global_latch_t wc_var_lock; /* latch used for access to various wc_* ref counters */
|
|
|
|
CACHELINE_PAD(SIZEOF(global_latch_t), 1) /* Keep these two latches in separate cache lines */
|
|
|
|
global_latch_t db_latch; /* latch for interlocking on hppa and tandem */
|
|
|
|
CACHELINE_PAD(SIZEOF(global_latch_t), 2)
|
|
|
|
int4 cache_hits;
|
|
|
|
int4 wc_in_free; /* number of write cache records in free queue */
|
|
|
|
/* All the counters below (declared using CNTR4DCL are 4-byte counters. We would like to keep them in separate
|
|
|
|
* cachelines on load-lock/store-conditional platforms particularly and other platforms too just to be safe.
|
|
|
|
*/
|
|
|
|
volatile CNTR4DCL(wcs_timers, 1); /* number of write cache timers in use - 1 */
|
|
|
|
CACHELINE_PAD(4, 3)
|
|
|
|
volatile CNTR4DCL(wcs_active_lvl, 2); /* number of entries in active queue */
|
|
|
|
CACHELINE_PAD(4, 4)
|
|
|
|
volatile CNTR4DCL(wcs_staleness, 3);
|
|
|
|
CACHELINE_PAD(4, 5)
|
|
|
|
volatile CNTR4DCL(ref_cnt, 4); /* reference count. How many people are using the database */
|
|
|
|
CACHELINE_PAD(4, 6)
|
|
|
|
volatile CNTR4DCL(intent_wtstart, 5); /* Count of processes that INTEND to enter wcs_wtstart code */
|
|
|
|
CACHELINE_PAD(4, 7)
|
|
|
|
volatile CNTR4DCL(in_wtstart, 6); /* Count of processes that are INSIDE wcs_wtstart code */
|
|
|
|
CACHELINE_PAD(4, 8)
|
|
|
|
volatile CNTR4DCL(wcs_phase2_commit_pidcnt, 7); /* number of processes actively finishing phase2 commit */
|
|
|
|
CACHELINE_PAD(4, 9)
|
|
|
|
int4 mm_extender_pid; /* pid of the process executing gdsfilext in MM mode */
|
|
|
|
int4 highest_lbm_blk_changed; /* Records highest local bit map block that
|
|
|
|
changed so we know how much of master bit
|
|
|
|
map to write out. Modified only under crit */
|
|
|
|
int4 nbb; /* Next backup block -- for online backup */
|
|
|
|
int4 lockhist_idx; /* (DW alignment) "circular" index into lockhists array */
|
|
|
|
int4 crit_ops_index; /* "circular" index into crit_ops_array */
|
|
|
|
int4 dskread_ops_index; /* "circular" index into dskread_ops_array */
|
|
|
|
int4 ftok_ops_index; /* "circular" index into ftok_ops_array */
|
|
|
|
lockhist lockhists[LOCKHIST_ARRAY_SIZE]; /* Keep lock histories here */
|
|
|
|
crit_trace crit_ops_array[CRIT_OPS_ARRAY_SIZE]; /* space for CRIT_TRACE macro to record info */
|
|
|
|
dskread_trace dskread_ops_array[DSKREAD_OPS_ARRAY_SIZE]; /* space for DSKREAD_TRACE macro to record info */
|
|
|
|
unique_file_id unique_id;
|
|
|
|
uint4 owner_node;
|
|
|
|
volatile int4 wcsflu_pid; /* pid of the process executing wcs_flu in BG mode */
|
|
|
|
int4 creation_date_time4; /* Lower order 4-bytes of database's creation time to be
|
|
|
|
* compared at sm attach time */
|
|
|
|
int4 inhibit_kills; /* inhibit new KILLs while MUPIP BACKUP, INTEG or FREEZE are
|
|
|
|
* waiting for kill-in-progress to become zero
|
|
|
|
*/
|
|
|
|
boolean_t remove_shm; /* can this shm be removed by the last process to rundown */
|
|
|
|
union
|
|
|
|
{
|
|
|
|
gds_file_id jnl_file_id; /* needed on UNIX to hold space */
|
|
|
|
unix_file_id u; /* from gdsroot.h even for VMS */
|
|
|
|
} jnl_file; /* Note that in versions before V4.3-001B, "jnl_file" used to be a member of sgmnt_data.
|
|
|
|
* Now it is a filler there and rightly used here since it is non-zero only when shared memory is active.
|
|
|
|
*/
|
|
|
|
boolean_t donotflush_dbjnl; /* whether database and journal can be flushed to disk or not (TRUE for mupip recover) */
|
|
|
|
int4 n_pre_read;
|
|
|
|
char replinstfilename[MAX_FN_LEN + 1];/* 256 : Name of the replication instance file corresponding to this db.
|
|
|
|
* In VMS, this is the name of the corresponding global directory.
|
|
|
|
*/
|
|
|
|
int4 secshr_ops_index;
|
|
|
|
gtm_uint64_t secshr_ops_array[SECSHR_OPS_ARRAY_SIZE]; /* taking up 8K */
|
|
|
|
gvstats_rec_t gvstats_rec;
|
|
|
|
trans_num last_wcsflu_tn; /* curr_tn when last wcs_flu was done on this database */
|
|
|
|
sm_off_t encrypt_glo_buff_off; /* offset from unencrypted global buffer to its encrypted counterpart */
|
|
|
|
global_latch_t snapshot_crit_latch; /* To be acquired by any process that wants to clean up an orphaned snapshot or
|
|
|
|
* initiate a new snapshot
|
|
|
|
*/
|
|
|
|
long ss_shmid; /* Identifier of the shared memory for the snapshot that started
|
|
|
|
* recently.
|
|
|
|
*/
|
|
|
|
uint4 ss_shmcycle; /* incremented everytime a new snapshot creates a new shared memory identifier */
|
|
|
|
boolean_t snapshot_in_prog; /* Tells GT.M if any snapshots are in progress */
|
|
|
|
uint4 num_snapshots_in_effect; /* how many snapshots are currently in place for this region */
|
|
|
|
uint4 wbox_test_seq_num; /* used to coordinate with sequential testing steps */
|
2012-03-24 14:06:46 -04:00
|
|
|
NON_GTM64_ONLY(int4 filler_8byte_align;) /* To align the following member at an 8-byte boundry on 32-bit platforms */
|
2012-02-05 11:35:58 -05:00
|
|
|
uint4 kip_pid_array[MAX_KIP_PID_SLOTS]; /* Processes actively doing kill (0 denotes empty slots) */
|
|
|
|
|
|
|
|
gtm_uint64_t sec_size; /* Upon going to larger shared memory sizes, we realized that this does not */
|
|
|
|
/* need to be in the file header but the node local since it can be calculated */
|
|
|
|
/* from info in the file header. */
|
2012-03-24 14:06:46 -04:00
|
|
|
int4 jnlpool_shmid; /* copy of jnlpool.repl_inst_filehdr->jnlpool_shmid to prevent mixing of multiple
|
|
|
|
* journal pools within the same database.
|
|
|
|
*/
|
|
|
|
boolean_t lockspacefull_logged; /* Avoids flooding syslog with LOCKSPACEFULL messages.
|
|
|
|
* If TRUE: LOCKSPACEFULL is written to the syslog.
|
|
|
|
* If FALSE: Do not write LOCKSPACEFULL to syslog.
|
|
|
|
* Set this to FALSE if free space ratio is above
|
|
|
|
* LOCK_SPACE_FULL_SYSLOG_THRESHOLD (defined in mlk_unlock.h).
|
|
|
|
* We exclude mlk_shrsub, and only consider mlk_prcblk & mlk_shrblk.
|
|
|
|
*/
|
|
|
|
uint4 trunc_pid; /* Operating truncate. */
|
|
|
|
block_id highest_lbm_with_busy_blk; /* Furthest lmap block known to have had a busy block during truncate. */
|
2012-02-05 11:35:58 -05:00
|
|
|
# if defined(UNIX)
|
|
|
|
ftokhist ftok_ops_array[FTOK_OPS_ARRAY_SIZE];
|
2012-03-24 14:06:46 -04:00
|
|
|
volatile uint4 onln_rlbk_cycle; /* incremented everytime an online rollback ends */
|
|
|
|
volatile uint4 db_onln_rlbkd_cycle; /* incremented everytime an online rollback takes the database back in time */
|
|
|
|
volatile uint4 onln_rlbk_pid; /* process ID of currently running online rollback. */
|
|
|
|
uint4 dbrndwn_skip_cnt; /* # of processes that skipped gds_rundown due to a concurrent online rollback */
|
2012-02-05 11:35:58 -05:00
|
|
|
# endif
|
|
|
|
} node_local;
|
|
|
|
|
|
|
|
#define DSKREAD_TRACE(CSA, CR_OFF, CR_TN, PID, BLK, CYCLE) \
|
|
|
|
{ \
|
|
|
|
int4 doidx; \
|
|
|
|
node_local_ptr_t cnl; \
|
|
|
|
assert((NULL != CSA)&& (NULL != (CSA->nl))); \
|
|
|
|
cnl = CSA->nl; \
|
|
|
|
doidx = ++cnl->dskread_ops_index; \
|
|
|
|
if (DSKREAD_OPS_ARRAY_SIZE <= doidx) \
|
|
|
|
doidx = cnl->dskread_ops_index = 0; \
|
|
|
|
cnl->dskread_ops_array[doidx].cr_off = CR_OFF; \
|
|
|
|
cnl->dskread_ops_array[doidx].cr_tn = CR_TN; \
|
|
|
|
cnl->dskread_ops_array[doidx].process_id = PID; \
|
|
|
|
cnl->dskread_ops_array[doidx].blk = BLK; \
|
|
|
|
cnl->dskread_ops_array[doidx].cycle = CYCLE; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
/* The following macro does not use a separate semaphore to protect its maintenance of the shared memory
|
|
|
|
* value crit_ops_index (which would complicate precisely the situation it was created to examine) therefore,
|
|
|
|
* in order to to maximize the chances of gathering meaningful data, it seems better placed after grab_crit
|
|
|
|
* and before rel_crit. Also we will increment the index first and cache it so we can shorten our exposure window.
|
|
|
|
*/
|
|
|
|
#define CRIT_TRACE(X) \
|
|
|
|
{ \
|
|
|
|
int4 coidx; \
|
|
|
|
node_local_ptr_t cnl; \
|
|
|
|
boolean_t in_ast; \
|
|
|
|
unsigned int ast_status; \
|
2012-03-24 14:06:46 -04:00
|
|
|
\
|
2012-02-05 11:35:58 -05:00
|
|
|
assert((NULL != csa) && (NULL !=(csa->nl))); \
|
|
|
|
cnl = csa->nl; \
|
|
|
|
coidx = ++cnl->crit_ops_index; \
|
|
|
|
if (CRIT_OPS_ARRAY_SIZE <= coidx) \
|
|
|
|
coidx = cnl->crit_ops_index = 0; \
|
|
|
|
VMS_ONLY( \
|
|
|
|
in_ast = lib$ast_in_prog(); \
|
|
|
|
if (!in_ast) \
|
|
|
|
ast_status = sys$setast(DISABLE); \
|
|
|
|
) \
|
|
|
|
cnl->crit_ops_array[coidx].call_from = (caddr_t)caller_id(); \
|
|
|
|
VMS_ONLY( \
|
|
|
|
if ((!in_ast) && (SS$_WASSET == ast_status)) \
|
|
|
|
sys$setast(ENABLE); \
|
|
|
|
) \
|
|
|
|
cnl->crit_ops_array[coidx].epid = process_id; \
|
|
|
|
cnl->crit_ops_array[coidx].crit_act = (X); \
|
2012-03-24 14:06:46 -04:00
|
|
|
cnl->crit_ops_array[coidx].curr_tn = (NULL != csa->hdr) ? \
|
|
|
|
csa->ti->curr_tn : 0; \
|
2012-02-05 11:35:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The following macro checks that curr_tn and early_tn are equal right before beginning a transaction commit.
|
|
|
|
* The only exception we know of is if a process in the midst of commit had been killed (kill -9 or STOP/ID)
|
|
|
|
* after having incremented early_tn but before it finished the commit (and therefore incremented curr_tn).
|
|
|
|
* In that case another process that did a rundown (and executed secshr_db_clnup) at around the same time
|
|
|
|
* could have cleaned up the CRIT lock (sensing that the crit holder pid is no longer alive) making the crit
|
|
|
|
* lock available for other processes. To check if that is the case, we need to go back the crit_ops_array and check that
|
|
|
|
* a) the most recent crit operation was a grab crit done by the current pid (crit_act == crit_ops_gw) AND
|
|
|
|
* b) the immediately previous crit operation should NOT be a release crit crit_ops_rw but instead should be a crit_ops_gw
|
|
|
|
* c) there are two exceptions to this and they are
|
|
|
|
* (i) that there could be one or more crit_ops_nocrit actions from processes that tried releasing crit
|
|
|
|
* even though they dont own it (cases we know of are in gds_rundown and in t_end/tp_tend if
|
|
|
|
* t_commit_cleanup completes the transaction after a mid-commit error).
|
|
|
|
* (ii) there could be one or more crit_ops_gw/crit_ops_rw pair of operations by a pid in between.
|
|
|
|
*/
|
|
|
|
#define ASSERT_CURR_TN_EQUALS_EARLY_TN(csa, currtn) \
|
|
|
|
{ \
|
|
|
|
GBLREF uint4 process_id; \
|
|
|
|
\
|
|
|
|
assert((currtn) == csa->ti->curr_tn); \
|
|
|
|
if (csa->ti->early_tn != (currtn)) \
|
|
|
|
{ \
|
|
|
|
int4 coidx, lcnt; \
|
|
|
|
node_local_ptr_t cnl; \
|
|
|
|
uint4 expect_gw_pid = 0; \
|
|
|
|
\
|
|
|
|
cnl = csa->nl; \
|
|
|
|
assert(NULL != (node_local_ptr_t)cnl); \
|
|
|
|
coidx = cnl->crit_ops_index; \
|
|
|
|
assert(CRIT_OPS_ARRAY_SIZE > coidx); \
|
|
|
|
assert(crit_ops_gw == cnl->crit_ops_array[coidx].crit_act); \
|
|
|
|
assert(process_id == cnl->crit_ops_array[coidx].epid); \
|
|
|
|
for (lcnt = 0; CRIT_OPS_ARRAY_SIZE > lcnt; lcnt++) \
|
|
|
|
{ \
|
|
|
|
if (coidx) \
|
|
|
|
coidx--; \
|
|
|
|
else \
|
|
|
|
coidx = CRIT_OPS_ARRAY_SIZE - 1; \
|
|
|
|
if (crit_ops_nocrit == cnl->crit_ops_array[coidx].crit_act) \
|
|
|
|
continue; \
|
|
|
|
if (crit_ops_rw == cnl->crit_ops_array[coidx].crit_act) \
|
|
|
|
{ \
|
|
|
|
assert(0 == expect_gw_pid); \
|
|
|
|
expect_gw_pid = cnl->crit_ops_array[coidx].epid; \
|
|
|
|
} else if (crit_ops_gw == cnl->crit_ops_array[coidx].crit_act) \
|
|
|
|
{ \
|
|
|
|
if (!expect_gw_pid) \
|
|
|
|
break; /* found lone grab-crit */ \
|
|
|
|
assert(expect_gw_pid == cnl->crit_ops_array[coidx].epid); \
|
|
|
|
expect_gw_pid = 0;/* found paired grab-crit. continue search */ \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
assert(CRIT_OPS_ARRAY_SIZE > lcnt); /* assert if did not find lone grab-crit */ \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following macro places lock history entries in an array for debugging.
|
|
|
|
* NOTE: Users of this macro, set either of the following prior to using this macro.
|
|
|
|
* (i) gv_cur_region to the region whose history we are storing.
|
|
|
|
* (ii) global variable "locknl" to correspond to the node-local of the region whose history we are storing.
|
|
|
|
* If "locknl" is non-NULL, it is used to store the lock history. If not only then is gv_cur_region used.
|
|
|
|
*/
|
|
|
|
#define LOCK_HIST(OP, LOC, ID, CNT) \
|
|
|
|
{ \
|
|
|
|
GBLREF node_local_ptr_t locknl; \
|
|
|
|
\
|
|
|
|
int lockidx; \
|
|
|
|
node_local_ptr_t lcknl; \
|
|
|
|
\
|
|
|
|
if (NULL == locknl) \
|
|
|
|
{ \
|
|
|
|
assert(NULL != gv_cur_region); \
|
|
|
|
lcknl = FILE_INFO(gv_cur_region)->s_addrs.nl; \
|
|
|
|
assert(NULL != lcknl); \
|
|
|
|
} else \
|
|
|
|
lcknl = locknl; \
|
|
|
|
lockidx = ++lcknl->lockhist_idx; \
|
|
|
|
if (LOCKHIST_ARRAY_SIZE <= lockidx) \
|
|
|
|
lockidx = lcknl->lockhist_idx = 0; \
|
|
|
|
GET_LONGP(&lcknl->lockhists[lockidx].lock_op[0], (OP)); \
|
|
|
|
lcknl->lockhists[lockidx].lock_addr = (sm_int_ptr_t)(LOC); \
|
|
|
|
lcknl->lockhists[lockidx].lock_callr = (caddr_t)caller_id(); \
|
|
|
|
lcknl->lockhists[lockidx].lock_pid = (int4)(ID); \
|
|
|
|
lcknl->lockhists[lockidx].loop_cnt = (int4)(CNT); \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define DUMP_LOCKHIST() dump_lockhist()
|
|
|
|
#else
|
|
|
|
#define CRIT_TRACE(X)
|
|
|
|
#define ASSERT_CURR_TN_EQUALS_EARLY_TN(csa, currtn)
|
|
|
|
#define LOCK_HIST(OP, LOC, ID, CNT)
|
|
|
|
#define DUMP_LOCKHIST()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define FTOK_TRACE(CSA, CR_TN, FTOK_OPER, PID) \
|
|
|
|
{ \
|
|
|
|
node_local_ptr_t cnl; \
|
|
|
|
int4 foindx; \
|
|
|
|
assert(NULL != CSA); \
|
|
|
|
if (cnl = (CSA->nl)) \
|
|
|
|
{ \
|
|
|
|
foindx = ++cnl->ftok_ops_index; \
|
|
|
|
if (FTOK_OPS_ARRAY_SIZE <= cnl->ftok_ops_index) \
|
|
|
|
foindx = cnl->ftok_ops_index = 0; \
|
|
|
|
cnl->ftok_ops_array[foindx].process_id = PID; \
|
|
|
|
cnl->ftok_ops_array[foindx].cr_tn = CR_TN; \
|
|
|
|
cnl->ftok_ops_array[foindx].ftok_oper = FTOK_OPER; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define BT_NOT_ALIGNED(bt, bt_base) (!IS_PTR_ALIGNED((bt), (bt_base), SIZEOF(bt_rec)))
|
|
|
|
#define BT_NOT_IN_RANGE(bt, bt_lo, bt_hi) (!IS_PTR_IN_RANGE((bt), (bt_lo), (bt_hi)))
|
|
|
|
|
|
|
|
#define NUM_CRIT_ENTRY 1024
|
|
|
|
#define CRIT_SPACE (NUM_CRIT_ENTRY * SIZEOF(mutex_que_entry) + SIZEOF(mutex_struct))
|
|
|
|
#define NODE_LOCAL_SIZE (ROUND_UP(SIZEOF(node_local), OS_PAGE_SIZE))
|
|
|
|
#define NODE_LOCAL_SPACE (ROUND_UP(CRIT_SPACE + NODE_LOCAL_SIZE, OS_PAGE_SIZE))
|
|
|
|
/* In order for gtmsecshr not to pull in OTS library, NODE_LOCAL_SIZE_DBS is used in secshr_db_clnup instead of NODE_LOCAL_SIZE */
|
|
|
|
#define NODE_LOCAL_SIZE_DBS (ROUND_UP(SIZEOF(node_local), DISK_BLOCK_SIZE))
|
|
|
|
|
|
|
|
/* Define pointer types for above structures that may be in shared memory and need 64
|
|
|
|
bit pointers. */
|
|
|
|
#ifdef DB64
|
|
|
|
# ifdef __osf__
|
|
|
|
# pragma pointer_size(save)
|
|
|
|
# pragma pointer_size(long)
|
|
|
|
# else
|
|
|
|
# error UNSUPPORTED PLATFORM
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef bt_rec *bt_rec_ptr_t;
|
|
|
|
typedef th_rec *th_rec_ptr_t;
|
|
|
|
typedef th_index *th_index_ptr_t;
|
|
|
|
typedef mutex_struct *mutex_struct_ptr_t;
|
|
|
|
typedef mutex_spin_parms_struct *mutex_spin_parms_ptr_t;
|
|
|
|
typedef mutex_que_entry *mutex_que_entry_ptr_t;
|
|
|
|
typedef node_local *node_local_ptr_t;
|
|
|
|
|
|
|
|
#ifdef DB64
|
|
|
|
# ifdef __osf__
|
|
|
|
# pragma pointer_size(restore)
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "cdb_sc.h"
|
|
|
|
|
|
|
|
bt_rec_ptr_t bt_get(int4 block);
|
|
|
|
void dump_lockhist(void);
|
|
|
|
void wait_for_block_flush(bt_rec *bt, block_id block);
|
|
|
|
|
|
|
|
#endif
|