3199 lines
139 KiB
C
3199 lines
139 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2011 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#ifndef GDSFHEAD_H_INCLUDED
|
|
#define GDSFHEAD_H_INCLUDED
|
|
|
|
/* gdsfhead.h */
|
|
/* this requires gdsroot.h gtm_facility.h fileinfo.h gdsbt.h */
|
|
|
|
#include <sys/types.h>
|
|
#include "gdsdbver.h"
|
|
#include "gtm_unistd.h"
|
|
#include "gtm_limits.h"
|
|
#include "gtm_stdlib.h"
|
|
#include "gtm_string.h"
|
|
#include "send_msg.h"
|
|
#include "iosp.h"
|
|
#ifdef VMS
|
|
#include "iosb_disk.h"
|
|
#endif
|
|
#ifdef GTM_CRYPT
|
|
#include "gtmcrypt.h" /* for gtmcrypt_key_t */
|
|
#endif
|
|
|
|
#define CACHE_STATE_OFF SIZEOF(que_ent)
|
|
|
|
error_def(ERR_DBCRERR);
|
|
error_def(ERR_DBENDIAN);
|
|
error_def(ERR_GVIS);
|
|
error_def(ERR_GVSUBOFLOW);
|
|
error_def(ERR_REPLINSTMISMTCH);
|
|
error_def(ERR_SCNDDBNOUPD);
|
|
error_def(ERR_SSATTACHSHM);
|
|
error_def(ERR_SSFILOPERR);
|
|
error_def(ERR_STACKCRIT);
|
|
error_def(ERR_STACKOFLOW);
|
|
error_def(ERR_TNTOOLARGE);
|
|
error_def(ERR_TNWARN);
|
|
\
|
|
/* all this record's fields should exactly be the first members of the cache_rec in the same order */
|
|
typedef struct mmblk_rec_struct
|
|
{
|
|
struct
|
|
{
|
|
sm_off_t fl;
|
|
sm_off_t bl;
|
|
}
|
|
blkque, /* cache records whose block numbers hash to the same location */
|
|
state_que; /* cache records in same state (either wip or active) */
|
|
union
|
|
{
|
|
short semaphore;
|
|
volatile int4 latch; /* int required for atomic swap on Unix */
|
|
/* volatile required as this value is referenced outside of the lock in db_csh_getn() */
|
|
} interlock;
|
|
block_id blk;
|
|
uint4 refer;
|
|
enum db_ver ondsk_blkver; /* Actual block version from block header as it exists on disk
|
|
(prior to any dynamic conversion that may have occurred when read in).
|
|
*/
|
|
trans_num dirty;
|
|
} mmblk_rec;
|
|
|
|
/* all the fields of this record should exactly be the first members of the cache_state_rec in the same order */
|
|
typedef struct mmblk_state_rec_struct
|
|
{
|
|
struct
|
|
{
|
|
sm_off_t fl;
|
|
sm_off_t bl;
|
|
}
|
|
state_que; /* WARNING -- from this point onwards this should be identical to a mmblk_rec */
|
|
union
|
|
{
|
|
short semaphore;
|
|
volatile int4 latch; /* int required for atomic swap on Unix */
|
|
/* volatile required as this value is referenced outside of the lock in db_csh_getn() */
|
|
} interlock;
|
|
block_id blk;
|
|
uint4 refer;
|
|
enum db_ver ondsk_blkver; /* Actual block version from block header as it exists on disk
|
|
(prior to any dynamic conversion that may have occurred when read in).
|
|
*/
|
|
trans_num dirty;
|
|
} mmblk_state_rec;
|
|
|
|
typedef struct
|
|
{
|
|
mmblk_que_head mmblkq_wip, /* write-in-progress queue -- unused in Unix */
|
|
mmblkq_active; /* active queue */
|
|
mmblk_rec mmblk_array[1]; /* the first mmblk record */
|
|
} mmblk_que_heads;
|
|
|
|
|
|
/* need to keep quadword aligned */
|
|
|
|
/* Cache record -- NOTE: the head portion of this should exactly match with mmblk_rec */
|
|
typedef struct cache_rec_struct
|
|
{
|
|
struct
|
|
{
|
|
sm_off_t fl;
|
|
sm_off_t bl;
|
|
}
|
|
blkque, /* cache records whose block numbers hash to the same location */
|
|
state_que; /* cache records in same state (either wip or active) */
|
|
union
|
|
{
|
|
short semaphore;
|
|
volatile int4 latch; /* int required for atomic swap on Unix */
|
|
/* volatile required as this value is referenced outside of the lock in db_csh_getn() */
|
|
} interlock;
|
|
block_id blk;
|
|
uint4 refer; /* reference bit for the clock algorithm */
|
|
enum db_ver ondsk_blkver; /* Actual block version from block header as it exists on disk
|
|
(prior to any dynamic conversion that may have occurred when read in).
|
|
*/
|
|
/* Keep our 64 bit fields up front */
|
|
/* this point should be quad-word aligned */
|
|
trans_num dirty; /* block has been modified since last written to disk; used by bt_put, db_csh_getn
|
|
* mu_rndwn_file wcs_recover, secshr_db_clnup, wr_wrtfin_all and extensively by the ccp */
|
|
trans_num flushed_dirty_tn; /* value of dirty at the time of flushing */
|
|
trans_num tn;
|
|
sm_off_t bt_index; /* offset to bt_rec */
|
|
sm_off_t buffaddr; /* offset to buffer holding actual data*/
|
|
sm_off_t twin; /* (VMS) offset to cache_rec of another copy of the same block from bg_update & wcs_wt_all
|
|
* (Unix & VMS) offset to cache_rec holding before-image for wcs_recover to backup */
|
|
#ifdef VMS
|
|
sm_off_t shmpool_blk_off; /* Offset to shmpool block containing the reformat buffer for this CR */
|
|
int4 backup_cr_off; /* Offset to backup_cr (set/used by bg_update_phase1/2 routines) */
|
|
#endif
|
|
off_jnl_t jnl_addr; /* offset from bg_update to prevent wcs_wtstart from writing a block ahead of the journal */
|
|
global_latch_t rip_latch; /* for read_in_progress - note contains extra 16 bytes for HPPA. Usage note: this
|
|
latch is used on those platforms where read_in_progress is not directly updated
|
|
by atomic routines/instructions. As such there needs be no cache line padding between
|
|
this field and read_in_progress.
|
|
*/
|
|
/* and now the rest */
|
|
int4 image_count; /* maintained with r_epid in vms to ensure that the process has stayed in gt.m */
|
|
int4 epid; /* set by wcs_wtstart to id the write initiator; cleared by wcs_wtfini
|
|
* used by t_commit_cleanup, secshr_db_clnup and wcs_recover */
|
|
int4 cycle; /* relative stamp indicates changing versions of the block for concurrency checking */
|
|
int4 r_epid; /* set by db_csh_getn, cleared by t_qread, bg_update, wcs_recover or secshr_db_clnup
|
|
* used to check for process leaving without releasing the buffer
|
|
* must be word aligned on the VAX */
|
|
#ifdef VMS
|
|
io_status_block_disk iosb; /* used on VMS write */
|
|
#endif
|
|
CNTR4DCL(read_in_progress, 10); /* -1 for normal and 0 for rip used by t_qread and checked by others */
|
|
uint4 in_tend; /* non-zero pid from bg_update indicates secshr_db_clnup should finish update */
|
|
uint4 in_cw_set; /* non-zero pid from t_end, tp_tend or bg_update protects block from db_csh_getn;
|
|
* returned to 0 by t_end, tp_tend or t_commit_cleanup */
|
|
uint4 data_invalid; /* non-zero pid from bg_update indicates t_commit_cleanup/wcs_recover should invalidate */
|
|
boolean_t stopped; /* TRUE indicates to wcs_recover that secshr_db_clnup built the block */
|
|
boolean_t wip_stopped; /* TRUE indicates to wcs_recover, wcs_wtfini, wcs_get_blk and gds_rundown
|
|
* that secshr_db_clnup cancelled the qio */
|
|
} cache_rec;
|
|
|
|
/* A note about cache line separation of the latches contained in these blocks. Because this block is duplicated
|
|
many (ptentially tens+ of) thousands of times in a running system, we have decided against providing cacheline
|
|
padding so as to force each cache record into a separate cacheline (due to it containing a latch and/or atomic
|
|
counter field) to prevent processes from causing interference with each other. We decided that the probability
|
|
of two processes working on adjacent cache records simultaneously was low enough that the interference was
|
|
minimal whereas increasing the cache record size to prevent that interference could cause storage problems
|
|
on some platforms where processes are already running near the edge.
|
|
*/
|
|
|
|
/* cache_state record -- NOTE: the first few fields of this should be identical to that of mmblk_state_rec */
|
|
typedef struct
|
|
{
|
|
struct
|
|
{
|
|
sm_off_t fl;
|
|
sm_off_t bl;
|
|
}
|
|
state_que; /* WARNING from this point, this structure must be identical to a cache_rec */
|
|
union
|
|
{
|
|
short semaphore;
|
|
volatile int4 latch; /* int required for atomic swap on Unix */
|
|
/* volatile required as this value is referenced outside of the lock in db_csh_getn() */
|
|
} interlock;
|
|
block_id blk;
|
|
uint4 refer; /* reference bit for the LRU algorithm */
|
|
enum db_ver ondsk_blkver; /* Actual block version from block header as it exists on disk
|
|
(prior to any dynamic conversion that may have occurred when read in).
|
|
*/
|
|
/* Keep our 64 bit fields up front */
|
|
/* this point should be quad-word aligned */
|
|
trans_num dirty; /* block has been modified since last written to disk; used by bt_put, db_csh_getn
|
|
* mu_rndwn_file wcs_recover, secshr_db_clnup, wr_wrtfin_all and extensively by the ccp */
|
|
trans_num flushed_dirty_tn; /* value of dirty at the time of flushing */
|
|
trans_num tn;
|
|
sm_off_t bt_index; /* offset to bt_rec */
|
|
sm_off_t buffaddr; /* offset to buffer holding actual data*/
|
|
sm_off_t twin; /* (VMS) offset to cache_rec of another copy of the same block from bg_update & wcs_wt_all
|
|
* (Unix & VMS) offset to cache_rec holding before-image for wcs_recover to backup */
|
|
#ifdef VMS
|
|
sm_off_t shmpool_blk_off; /* Offset to shmpool block containing the reformat buffer for this CR */
|
|
int4 backup_cr_off; /* Offset to backup_cr (set/used by bg_update_phase1/2 routines) */
|
|
#endif
|
|
off_jnl_t jnl_addr; /* offset from bg_update to prevent wcs_wtstart from writing a block ahead of the journal */
|
|
global_latch_t rip_latch; /* for read_in_progress - note contains extra 16 bytes for HPPA. Usage note: this
|
|
latch is used on those platforms where read_in_progress is not directly updated
|
|
by atomic routines/instructions. As such there needs be no cache line padding between
|
|
this field and read_in_progress.
|
|
*/
|
|
/* and now the rest */
|
|
int4 image_count; /* maintained with r_epid in vms to ensure that the process has stayed in gt.m */
|
|
int4 epid; /* set by wcs_start to id the write initiator; cleared by wcs_wtfini
|
|
* used by t_commit_cleanup, secshr_db_clnup and wcs_recover */
|
|
int4 cycle; /* relative stamp indicates changing versions of the block for concurrency checking */
|
|
int4 r_epid; /* set by db_csh_getn, cleared by t_qread, bg_update, wcs_recover or secshr_db_clnup
|
|
* used to check for process leaving without releasing the buffer
|
|
* must be word aligned on the VAX */
|
|
#ifdef VMS
|
|
io_status_block_disk iosb; /* used on VMS write */
|
|
#endif
|
|
CNTR4DCL(read_in_progress, 10); /* -1 for normal and 0 for rip used by t_qread and checked by others */
|
|
uint4 in_tend; /* non-zero pid from bg_update indicates secshr_db_clnup should finish update */
|
|
uint4 in_cw_set; /* non-zero pid from t_end, tp_tend or bg_update protects block from db_csh_getn;
|
|
* returned to 0 by t_end, tp_tend or t_commit_cleanup */
|
|
uint4 data_invalid; /* non-zero pid from bg_update indicates t_commit_cleanup/wcs_recover should invalidate */
|
|
boolean_t stopped; /* TRUE indicates to wcs_recover that secshr_db_clnup built the block */
|
|
boolean_t wip_stopped; /* TRUE indicates to wcs_recover, wcs_wtfini, wcs_get_blk and gds_rundown
|
|
* that secshr_db_clnup cancelled the qio */
|
|
} cache_state_rec;
|
|
|
|
#define CR_BLKEMPTY -1
|
|
#define MBR_BLKEMPTY -1
|
|
#define FROZEN_BY_ROOT (uint4)(0xFFFFFFFF)
|
|
#define BACKUP_NOT_IN_PROGRESS 0x7FFFFFFF
|
|
|
|
typedef struct
|
|
{
|
|
cache_que_head cacheq_wip, /* write-in-progress queue -- unused in Unix */
|
|
cacheq_active; /* active queue */
|
|
cache_rec cache_array[1]; /*the first cache record*/
|
|
} cache_que_heads;
|
|
|
|
/* Define pointer types to some previously defined structures */
|
|
|
|
#ifdef DB64
|
|
# ifdef __osf__
|
|
# pragma pointer_size(save)
|
|
# pragma pointer_size(long)
|
|
# else
|
|
# error UNSUPPORTED PLATFORM
|
|
# endif
|
|
#endif
|
|
|
|
typedef cache_que_head *cache_que_head_ptr_t;
|
|
typedef cache_rec *cache_rec_ptr_t;
|
|
typedef cache_rec **cache_rec_ptr_ptr_t;
|
|
typedef cache_state_rec *cache_state_rec_ptr_t;
|
|
typedef cache_que_heads *cache_que_heads_ptr_t;
|
|
|
|
typedef mmblk_que_head *mmblk_que_head_ptr_t;
|
|
typedef mmblk_rec *mmblk_rec_ptr_t;
|
|
typedef mmblk_rec **mmblk_rec_ptr_ptr_t;
|
|
typedef mmblk_state_rec *mmblk_state_rec_ptr_t;
|
|
typedef mmblk_que_heads *mmblk_que_heads_ptr_t;
|
|
|
|
void verify_queue_lock(que_head_ptr_t qhdr);
|
|
void verify_queue(que_head_ptr_t qhdr);
|
|
|
|
#ifdef DB64
|
|
# ifdef __osf__
|
|
# pragma pointer_size(restore)
|
|
# endif
|
|
#endif
|
|
|
|
#ifdef DEBUG_QUEUE
|
|
#define VERIFY_QUEUE(base) verify_queue(base)
|
|
#define VERIFY_QUEUE_LOCK(base,latch) verify_queue_lock(base,latch)
|
|
#else
|
|
#define VERIFY_QUEUE(base)
|
|
#define VERIFY_QUEUE_LOCK(base,latch)
|
|
#endif
|
|
|
|
/* The following 3 macros were introduced while solving a problem with $view where a call to $view in */
|
|
/* mumps right after a change to $zgbldir gave the old global directory - not the new one. On VMS it */
|
|
/* caused a core dump. If one were to access a global variable via $data right after the change, however, */
|
|
/* the $view worked correctly. The solution was to make sure the gd_map information matched the current */
|
|
/* gd_header in op_fnview.c. The code used as a template for this change was in gvinit.c. The first */
|
|
/* macro gets the gd_header using an mval. The second macro establishes the gd_map from the gd_header. */
|
|
/* The third macro is an assert (when DEBUG_ONLY is defined) for those cases where the gd_header is already */
|
|
/* set to make sure the mapping is correct. The first 2 macros are executed when the gd_header is null, */
|
|
/* and the 3rd macro is associated with an else clause if it is not. Therefore, they should be maintained */
|
|
/* as a group. */
|
|
|
|
#define SET_GD_HEADER(inmval) \
|
|
{ \
|
|
inmval.mvtype = MV_STR; \
|
|
inmval.str.len = 0; \
|
|
gd_header = zgbldir(&inmval); \
|
|
}
|
|
|
|
#define SET_GD_MAP \
|
|
{ \
|
|
GBLREF gd_binding *gd_map, *gd_map_top; \
|
|
\
|
|
gd_map = gd_header->maps; \
|
|
gd_map_top = gd_map + gd_header->n_maps; \
|
|
TREF(gd_targ_addr) = gd_header; \
|
|
}
|
|
|
|
#define GD_HEADER_ASSERT \
|
|
{ \
|
|
GBLREF gd_binding *gd_map, *gd_map_top; \
|
|
\
|
|
assert(gd_map == gd_header->maps); \
|
|
assert(gd_map_top == gd_map + gd_header->n_maps); \
|
|
assert(TREF(gd_targ_addr) == gd_header); \
|
|
}
|
|
|
|
/* If reallocating gv_currkey/gv_altkey, preserve pre-existing values */
|
|
#define GVKEY_INIT(GVKEY, KEYSIZE) \
|
|
{ \
|
|
gv_key *new_KEY, *old_KEY; \
|
|
int4 keySZ; \
|
|
\
|
|
old_KEY = GVKEY; \
|
|
keySZ = KEYSIZE; \
|
|
/* KEYSIZE should have been the output of a DBKEYSIZE command so \
|
|
* should be a multiple of 4. Assert that. \
|
|
*/ \
|
|
assert(ROUND_UP2(keySZ, 4) == keySZ); \
|
|
new_KEY = (gv_key *)malloc(SIZEOF(gv_key) - 1 + keySZ); \
|
|
if (NULL != old_KEY) \
|
|
{ \
|
|
assert(KEYSIZE >= old_KEY->top); \
|
|
assert(old_KEY->top > old_KEY->end); \
|
|
memcpy(new_KEY, old_KEY, SIZEOF(gv_key) + old_KEY->end); \
|
|
free(old_KEY); \
|
|
} else \
|
|
{ \
|
|
new_KEY->base[0] = '\0'; \
|
|
new_KEY->end = 0; \
|
|
new_KEY->prev = 0; \
|
|
} \
|
|
new_KEY->top = keySZ; \
|
|
GVKEY = new_KEY; \
|
|
}
|
|
|
|
#define GVKEY_FREE_IF_NEEDED(GVKEY) \
|
|
{ \
|
|
if (NULL != GVKEY) \
|
|
{ \
|
|
free(GVKEY); \
|
|
GVKEY = NULL; \
|
|
} \
|
|
}
|
|
|
|
#define GVKEYSIZE_INCREASE_IF_NEEDED(KEYSIZE) \
|
|
{ \
|
|
int keySIZE; \
|
|
\
|
|
GBLREF int4 gv_keysize; \
|
|
GBLREF gv_key *gv_altkey; \
|
|
GBLREF gv_key *gv_currkey; \
|
|
\
|
|
keySIZE = KEYSIZE; \
|
|
assert(keySIZE); \
|
|
if (keySIZE > gv_keysize) \
|
|
{ \
|
|
gv_keysize = keySIZE; \
|
|
GVKEY_INIT(gv_currkey, keySIZE); \
|
|
GVKEY_INIT(gv_altkey, keySIZE); \
|
|
} else \
|
|
assert((NULL != gv_currkey) && (NULL != gv_altkey) && gv_keysize \
|
|
&& (gv_keysize == gv_currkey->top) && (gv_keysize == gv_altkey->top)); \
|
|
}
|
|
|
|
#define SET_CSA_DIR_TREE(csa, keysize, reg) \
|
|
{ \
|
|
if (NULL == csa->dir_tree) \
|
|
{ \
|
|
csa->dir_tree = targ_alloc(keysize, NULL, reg); \
|
|
GTMTRIG_ONLY(assert(NULL == csa->hasht_tree)); \
|
|
} else \
|
|
assert((csa->dir_tree->gd_csa == csa) && (DIR_ROOT == csa->dir_tree->root)); \
|
|
}
|
|
|
|
#define FREE_CSA_DIR_TREE(csa) \
|
|
{ \
|
|
sgmnt_addrs *lcl_csa; \
|
|
gv_namehead *dir_tree, *hasht_tree; \
|
|
\
|
|
lcl_csa = csa; \
|
|
GTMTRIG_ONLY( \
|
|
hasht_tree = lcl_csa->hasht_tree; \
|
|
if (NULL != hasht_tree) \
|
|
{ \
|
|
assert(hasht_tree->gd_csa == csa); \
|
|
hasht_tree->regcnt--; /* targ_free relies on this */ \
|
|
targ_free(hasht_tree); \
|
|
lcl_csa->hasht_tree = NULL; \
|
|
} \
|
|
) \
|
|
dir_tree = lcl_csa->dir_tree; \
|
|
assert(NULL != dir_tree); \
|
|
dir_tree->regcnt--; /* targ_free relies on this */ \
|
|
targ_free(dir_tree); \
|
|
lcl_csa->dir_tree = NULL; \
|
|
}
|
|
|
|
#define PROCESS_GVT_PENDING_LIST(GREG, CSA, GVT_PENDING_LIST) \
|
|
{ \
|
|
if (NULL != GVT_PENDING_LIST) \
|
|
{ /* Now that the region has been opened, check if there are any gv_targets that were \
|
|
* allocated for this region BEFORE the open. If so, re-allocate them if necessary. \
|
|
*/ \
|
|
process_gvt_pending_list(GREG, CSA); \
|
|
} \
|
|
}
|
|
|
|
#define T_COMMIT_CRIT_PHASE1 1 /* csa->t_commit_crit gets set to this in during bg_update_phase1 */
|
|
#define T_COMMIT_CRIT_PHASE2 2 /* csa->t_commit_crit gets set to this in during bg_update_phase2 */
|
|
|
|
/* macro to check if we hold crit or are committing (with or without crit) */
|
|
#define T_IN_CRIT_OR_COMMIT(CSA) ((CSA)->now_crit || (CSA)->t_commit_crit)
|
|
|
|
/* Macro to check if we hold crit or are committing (with or without crit) or are in wcs_wtstart for this region.
|
|
* This is used in timer handling code to determine if it is ok to interrupt. We do not want to interrupt if holding
|
|
* crit or in the midst of commit or in wcs_wtstart (in the last case, we could be causing another process HOLDING CRIT
|
|
* on the region to wait in bg_update_phase1 if we hold the write interlock).
|
|
*/
|
|
#define T_IN_CRIT_OR_COMMIT_OR_WRITE(CSA) (T_IN_CRIT_OR_COMMIT(CSA) || (CSA)->in_wtstart)
|
|
|
|
/* macro to check if a database commit is past the point where it can be successfully rolled back */
|
|
#define T_UPDATE_UNDERWAY(CSA) ((CSA)->t_commit_crit)
|
|
|
|
/* the file header has relative pointers to its data structures so each process will malloc
|
|
* one of these and fill it in with absolute pointers upon file initialization.
|
|
*/
|
|
#define GDS_REL2ABS(x) (((sm_uc_ptr_t)cs_addrs->lock_addrs[0] + (sm_off_t)(x)))
|
|
#define GDS_ABS2REL(x) (sm_off_t)(((sm_uc_ptr_t)(x) - (sm_uc_ptr_t)cs_addrs->lock_addrs[0]))
|
|
#define GDS_ANY_REL2ABS(w,x) (((sm_uc_ptr_t)(w->lock_addrs[0]) + (sm_off_t)(x)))
|
|
#define GDS_ANY_ABS2REL(w,x) (sm_off_t)(((sm_uc_ptr_t)(x) - (sm_uc_ptr_t)w->lock_addrs[0]))
|
|
#ifdef GTM_CRYPT
|
|
#define GDS_ANY_ENCRYPTGLOBUF(w,x) ((sm_uc_ptr_t)(w) + (sm_off_t)(x->nl->encrypt_glo_buff_off))
|
|
#endif
|
|
#define ASSERT_IS_WITHIN_SHM_BOUNDS(ptr, csa) \
|
|
assert((NULL == (ptr)) || (((ptr) >= csa->db_addrs[0]) && ((0 == csa->db_addrs[1]) || ((ptr) < csa->db_addrs[1]))))
|
|
|
|
#ifdef DEBUG
|
|
#define DBG_ENSURE_PTR_IS_VALID_GLOBUFF(CSA, CSD, PTR) \
|
|
{ \
|
|
cache_rec_ptr_t cache_start; \
|
|
long bufindx; \
|
|
sm_uc_ptr_t bufstart; \
|
|
\
|
|
cache_start = &(CSA)->acc_meth.bg.cache_state->cache_array[0]; \
|
|
cache_start += CSD->bt_buckets; \
|
|
bufstart = (sm_uc_ptr_t)GDS_ANY_REL2ABS((CSA), cache_start->buffaddr); \
|
|
assert((PTR) >= bufstart); \
|
|
bufindx = (PTR - bufstart) / CSD->blk_size; \
|
|
assert(bufindx < CSD->n_bts); \
|
|
assert((bufstart + (bufindx * CSD->blk_size)) == (PTR)); \
|
|
}
|
|
#define DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(CSA, CSD, PTR) \
|
|
{ \
|
|
cache_rec_ptr_t cache_start; \
|
|
long bufindx; \
|
|
sm_uc_ptr_t bufstart; \
|
|
\
|
|
cache_start = &(CSA)->acc_meth.bg.cache_state->cache_array[0]; \
|
|
cache_start += CSD->bt_buckets; \
|
|
bufstart = (sm_uc_ptr_t)GDS_ANY_REL2ABS((CSA), cache_start->buffaddr); \
|
|
bufstart += (gtm_uint64_t)CSD->blk_size * CSD->n_bts; \
|
|
assert((PTR) >= bufstart); \
|
|
bufindx = (PTR - bufstart) / CSD->blk_size; \
|
|
assert(bufindx < CSD->n_bts); \
|
|
assert((bufstart + (bufindx * (gtm_uint64_t)CSD->blk_size)) == (PTR)); \
|
|
}
|
|
|
|
#define DBG_ENSURE_OLD_BLOCK_IS_VALID(cse, is_mm, csa, csd) \
|
|
{ \
|
|
cache_rec_ptr_t cache_start; \
|
|
long bufindx; \
|
|
sm_uc_ptr_t bufstart; \
|
|
GBLREF boolean_t dse_running, write_after_image; \
|
|
\
|
|
assert((gds_t_write != cse->mode) && (gds_t_write_recycled != cse->mode) && gds_t_writemap != cse->mode \
|
|
|| (NULL != cse->old_block)); /* don't miss writing a PBLK */ \
|
|
if (NULL != cse->old_block) \
|
|
{ \
|
|
if (!is_mm) \
|
|
{ \
|
|
cache_start = &csa->acc_meth.bg.cache_state->cache_array[0]; \
|
|
cache_start += csd->bt_buckets; \
|
|
bufstart = (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cache_start->buffaddr); \
|
|
bufindx = (cse->old_block - bufstart) / csd->blk_size; \
|
|
assert(bufindx < csd->n_bts); \
|
|
assert(cse->blk == cache_start[bufindx].blk); \
|
|
assert(dse_running || write_after_image || (process_id == cache_start[bufindx].in_cw_set)); \
|
|
} else \
|
|
{ \
|
|
assert(cse->old_block == csa->db_addrs[0] + cse->blk * csd->blk_size \
|
|
+ (csd->start_vbn - 1) * DISK_BLOCK_SIZE); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/* Check if a given address corresponds to a global buffer (BG) in database shared memory AND if
|
|
* we are in phase2 of commit. If so check whether the corresponding cache-record is pinned.
|
|
* Used by gvcst_blk_build to ensure the update array points to valid contents even though we dont hold crit.
|
|
*/
|
|
#define DBG_BG_PHASE2_CHECK_CR_IS_PINNED(csa, seg) \
|
|
{ \
|
|
cache_rec_ptr_t cache_start; \
|
|
long bufindx; \
|
|
sm_uc_ptr_t bufstart, bufend, bufaddr; \
|
|
\
|
|
GBLREF uint4 process_id; \
|
|
\
|
|
if ((seg)->len && (T_COMMIT_CRIT_PHASE2 == csa->t_commit_crit) && (dba_bg == csa->hdr->acc_meth)) \
|
|
{ \
|
|
cache_start = &csa->acc_meth.bg.cache_state->cache_array[0]; \
|
|
cache_start += csa->hdr->bt_buckets; \
|
|
bufstart = (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cache_start->buffaddr); \
|
|
bufend = bufstart + ((gtm_uint64_t)csa->hdr->n_bts * csa->hdr->blk_size); \
|
|
bufaddr = (sm_uc_ptr_t)(seg)->addr; \
|
|
/* Check if given address is within database shared memory range */ \
|
|
if ((bufaddr >= bufstart) && (bufaddr < bufend)) \
|
|
{ \
|
|
bufindx = (bufaddr - bufstart) / csa->hdr->blk_size; \
|
|
assert(bufindx < csa->hdr->n_bts); \
|
|
/* Assert that we have the cache-record pinned */ \
|
|
assert(process_id == cache_start[bufindx].in_cw_set); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/* Macro to check that we have not pinned any more buffers than we are updating.
|
|
* This check is done only for BG access method and in dbg mode.
|
|
* This is invoked by t_end/tp_tend just before beginning phase2 of commit.
|
|
*/
|
|
#define DBG_CHECK_PINNED_CR_ARRAY_CONTENTS(is_mm, crarray, crarrayindex, bplmap) \
|
|
{ \
|
|
GBLREF boolean_t write_after_image; \
|
|
\
|
|
if (!is_mm) \
|
|
{ \
|
|
int4 crindex; \
|
|
\
|
|
for (crindex = 0; crindex < crarrayindex; crindex++) \
|
|
{ \
|
|
if (process_id == crarray[crindex]->in_cw_set) \
|
|
{ /* We have pinned that cache-record implies we are planning on updating it \
|
|
* (so should have set in_tend). \
|
|
* \
|
|
* Since bitmap blocks are done with phase2 inside of crit, they should not \
|
|
* show up in the pinned array list at end of phase1 for GT.M. But DSE is an \
|
|
* exception as it could operate on a bitmap block as if it is updating a \
|
|
* non-bitmap block (i.e. without invoking gvcst_map_build). MUPIP JOURNAL \
|
|
* RECOVER also could do the same thing while applying an AIMG record. \
|
|
* \
|
|
* In addition, VMS has an exception in case this is a twinned cache-record. \
|
|
* In that case, for the older twin in_cw_set will be set to non-zero, but \
|
|
* in_tend will be set to FALSE. Since we are outside of crit at this point, \
|
|
* it is possible cr->twin field might be 0 (could have gotten cleared by \
|
|
* wcs_wtfini concurrently) so we cannot assert on the twin field but \
|
|
* cr->bt_index should still be 0 since we have not yet finished the \
|
|
* update on the newer twin so we can check on that. \
|
|
*/ \
|
|
assert(crarray[crindex]->in_tend \
|
|
&& ((0 != crarray[crindex]->blk % bplmap) || write_after_image) \
|
|
VMS_ONLY(|| !crarray[crindex]->bt_index)); \
|
|
} \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#else
|
|
#define DBG_ENSURE_PTR_IS_VALID_GLOBUFF(CSA, CSD, PTR)
|
|
#define DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(CSA, CSD, PTR)
|
|
#define DBG_ENSURE_OLD_BLOCK_IS_VALID(cse, is_mm, csa, csd)
|
|
#define DBG_BG_PHASE2_CHECK_CR_IS_PINNED(csa, bufaddr)
|
|
#define DBG_CHECK_PINNED_CR_ARRAY_CONTENTS(is_mm, crarray, crarrayindex, bplmap)
|
|
#endif
|
|
|
|
/* The TP_CHANGE_REG macro is a replica of the tp_change_reg() routine to be used for performance considerations.
|
|
* The TP_CHANGE_REG_IF_NEEDED macro tries to optimize on processing if reg is same as gv_cur_region. But it can be
|
|
* used only if the region passed is not NULL and if gv_cur_region, cs_addrs and cs_data are known to be in sync.
|
|
* Note that timers can interrupt the syncing and hence any routines that are called by timers should be safe
|
|
* and use the TP_CHANGE_REG macro only.
|
|
*/
|
|
#define TP_CHANGE_REG(reg) \
|
|
{ \
|
|
gv_cur_region = reg; \
|
|
if (NULL == gv_cur_region || FALSE == gv_cur_region->open) \
|
|
{ \
|
|
cs_addrs = (sgmnt_addrs *)0; \
|
|
cs_data = (sgmnt_data_ptr_t)0; \
|
|
} else \
|
|
{ \
|
|
switch (reg->dyn.addr->acc_meth) \
|
|
{ \
|
|
case dba_mm: \
|
|
case dba_bg: \
|
|
cs_addrs = &FILE_INFO(gv_cur_region)->s_addrs; \
|
|
cs_data = cs_addrs->hdr; \
|
|
break; \
|
|
case dba_usr: \
|
|
case dba_cm: \
|
|
cs_addrs = (sgmnt_addrs *)0; \
|
|
cs_data = (sgmnt_data_ptr_t)0; \
|
|
break; \
|
|
default: \
|
|
GTMASSERT; \
|
|
break; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define TP_CHANGE_REG_IF_NEEDED(reg) \
|
|
{ \
|
|
assert(reg); \
|
|
if (reg != gv_cur_region) \
|
|
{ \
|
|
gv_cur_region = reg; \
|
|
switch (reg->dyn.addr->acc_meth) \
|
|
{ \
|
|
case dba_mm: \
|
|
case dba_bg: \
|
|
assert(reg->open); \
|
|
cs_addrs = &FILE_INFO(gv_cur_region)->s_addrs; \
|
|
cs_data = cs_addrs->hdr; \
|
|
break; \
|
|
case dba_usr: \
|
|
case dba_cm: \
|
|
cs_addrs = (sgmnt_addrs *)0; \
|
|
cs_data = (sgmnt_data_ptr_t)0; \
|
|
break; \
|
|
default: \
|
|
GTMASSERT; \
|
|
break; \
|
|
} \
|
|
} \
|
|
assert(&FILE_INFO(gv_cur_region)->s_addrs == cs_addrs && cs_addrs->hdr == cs_data); \
|
|
}
|
|
|
|
/* The TP_TEND_CHANGE_REG macro is a special macro used in tp_tend.c to optimize out the unnecessary checks in
|
|
* the TP_CHANGE_REG_IF_NEEDED macro. Also it sets cs_addrs and cs_data to precomputed values instead of recomputing
|
|
* them from the region by dereferencing through a multitude of pointers. It does not check if gv_cur_region is
|
|
* different from the input region. It assumes it is different enough % of times that the cost of the if check
|
|
* is not worth the additional unconditional sets.
|
|
*/
|
|
#define TP_TEND_CHANGE_REG(si) \
|
|
{ \
|
|
gv_cur_region = si->gv_cur_region; \
|
|
cs_addrs = si->tp_csa; \
|
|
cs_data = si->tp_csd; \
|
|
}
|
|
|
|
#define GTCM_CHANGE_REG(reghead) \
|
|
{ \
|
|
GBLREF cm_region_head *curr_cm_reg_head; \
|
|
GBLREF gd_region *gv_cur_region; \
|
|
GBLREF sgmnt_data *cs_data; \
|
|
GBLREF sgmnt_addrs *cs_addrs; \
|
|
\
|
|
curr_cm_reg_head = (reghead); \
|
|
gv_cur_region = curr_cm_reg_head->reg; \
|
|
if ((dba_bg == gv_cur_region->dyn.addr->acc_meth) || (dba_mm == gv_cur_region->dyn.addr->acc_meth)) \
|
|
{ \
|
|
cs_addrs = &FILE_INFO(gv_cur_region)->s_addrs; \
|
|
cs_data = cs_addrs->hdr; \
|
|
} else \
|
|
GTMASSERT; \
|
|
}
|
|
|
|
/* Macro to be used whenever cr->data_invalid needs to be set */
|
|
#define SET_DATA_INVALID(cr) \
|
|
{ \
|
|
uint4 in_tend, data_invalid; \
|
|
\
|
|
DEBUG_ONLY(in_tend = cr->in_tend); \
|
|
DEBUG_ONLY(data_invalid = cr->data_invalid); \
|
|
assert((process_id == in_tend) || (0 == in_tend) && (0 == data_invalid)); \
|
|
assert((0 == in_tend) \
|
|
|| (process_id == in_tend) && ((0 == data_invalid) || (process_id == data_invalid))); \
|
|
cr->data_invalid = process_id; \
|
|
}
|
|
|
|
/* Macro to be used whenever cr->data_invalid needs to be re-set */
|
|
#define RESET_DATA_INVALID(cr) \
|
|
{ \
|
|
uint4 data_invalid; \
|
|
\
|
|
DEBUG_ONLY(data_invalid = cr->data_invalid); \
|
|
assert(process_id == data_invalid); \
|
|
cr->data_invalid = 0; \
|
|
}
|
|
|
|
/* Macro to be used whenever cr->in_cw_set needs to be set (PIN) inside a TP transaction */
|
|
#define TP_PIN_CACHE_RECORD(cr, si) \
|
|
{ \
|
|
assert(0 <= si->cr_array_index); \
|
|
assert(si->cr_array_index < si->cr_array_size); \
|
|
PIN_CACHE_RECORD(cr, si->cr_array, si->cr_array_index); \
|
|
}
|
|
|
|
GBLREF cache_rec_ptr_t pin_fail_cr; /* Pointer to the cache-record that we failed while pinning */
|
|
GBLREF cache_rec pin_fail_cr_contents; /* Contents of the cache-record that we failed while pinning */
|
|
GBLREF cache_rec_ptr_t pin_fail_twin_cr; /* Pointer to twin of the cache-record that we failed to pin */
|
|
GBLREF cache_rec pin_fail_twin_cr_contents; /* Contents of twin of the cache-record that we failed to pin */
|
|
GBLREF bt_rec_ptr_t pin_fail_bt; /* Pointer to bt of the cache-record that we failed to pin */
|
|
GBLREF bt_rec pin_fail_bt_contents; /* Contents of bt of the cache-record that we failed to pin */
|
|
GBLREF int4 pin_fail_in_crit; /* Holder of crit at the time we failed to pin */
|
|
GBLREF int4 pin_fail_wc_in_free; /* Number of write cache records in free queue when we failed to pin */
|
|
GBLREF int4 pin_fail_wcs_active_lvl; /* Number of entries in active queue when we failed to pin */
|
|
GBLREF int4 pin_fail_ref_cnt; /* Reference count when we failed to pin */
|
|
GBLREF int4 pin_fail_in_wtstart; /* Count of processes in wcs_wtstart when we failed to pin */
|
|
GBLREF int4 pin_fail_phase2_commit_pidcnt; /* Number of processes in phase2 commit when we failed to pin */
|
|
/* Macro to be used whenever cr->in_cw_set needs to be set (PIN) outside of a TP transaction */
|
|
#define PIN_CACHE_RECORD(cr, crarray, crarrayindex) \
|
|
{ \
|
|
uint4 in_tend, data_invalid, in_cw_set; \
|
|
\
|
|
DEBUG_ONLY(in_tend = cr->in_tend); \
|
|
DEBUG_ONLY(data_invalid = cr->data_invalid); \
|
|
assert((process_id == in_tend) || (0 == in_tend)); \
|
|
assert((process_id == data_invalid) || (0 == data_invalid)); \
|
|
in_cw_set = cr->in_cw_set; \
|
|
if (0 != in_cw_set) \
|
|
{ \
|
|
pin_fail_cr = cr; \
|
|
pin_fail_cr_contents = *cr; \
|
|
if (cr->bt_index) \
|
|
{ \
|
|
pin_fail_bt = (bt_rec_ptr_t)GDS_ANY_REL2ABS(cs_addrs, cr->bt_index); \
|
|
pin_fail_bt_contents = *pin_fail_bt; \
|
|
} \
|
|
if (cr->twin) \
|
|
{ \
|
|
pin_fail_twin_cr = (cache_rec_ptr_t)GDS_ANY_REL2ABS(cs_addrs, cr->twin); \
|
|
pin_fail_twin_cr_contents = *pin_fail_twin_cr; \
|
|
} \
|
|
pin_fail_in_crit = cs_addrs->nl->in_crit; \
|
|
pin_fail_wc_in_free = cs_addrs->nl->wc_in_free; \
|
|
pin_fail_wcs_active_lvl = cs_addrs->nl->wcs_active_lvl; \
|
|
pin_fail_ref_cnt = cs_addrs->nl->ref_cnt; \
|
|
pin_fail_in_wtstart = cs_addrs->nl->in_wtstart; \
|
|
pin_fail_phase2_commit_pidcnt = cs_addrs->nl->wcs_phase2_commit_pidcnt; \
|
|
GTMASSERT; \
|
|
} \
|
|
/* In VMS we should never set in_cw_set on an OLDER twin. */ \
|
|
VMS_ONLY(assert(!cr->twin || cr->bt_index)); \
|
|
/* stuff it in the array before setting in_cw_set */ \
|
|
crarray[crarrayindex] = cr; \
|
|
crarrayindex++; \
|
|
cr->in_cw_set = process_id; \
|
|
}
|
|
|
|
/* Macro to be used whenever cr->in_cw_set needs to be re-set (UNPIN) in TP or non-TP) */
|
|
#define UNPIN_CACHE_RECORD(cr) \
|
|
{ \
|
|
uint4 in_tend, data_invalid, in_cw_set; \
|
|
\
|
|
in_cw_set = cr->in_cw_set; \
|
|
if (process_id == cr->in_cw_set) /* reset in_cw_set only if we hold it */ \
|
|
{ \
|
|
DEBUG_ONLY(in_tend = cr->in_tend); \
|
|
DEBUG_ONLY(data_invalid = cr->data_invalid); \
|
|
assert((process_id == in_tend) || (0 == in_tend)); \
|
|
assert((process_id == data_invalid) || (0 == data_invalid)); \
|
|
cr->in_cw_set = 0; \
|
|
} \
|
|
}
|
|
|
|
/* Macro to reset cr->in_cw_set for the entire cr_array in case of a retry (TP or non-TP) */
|
|
#define UNPIN_CR_ARRAY_ON_RETRY(crarray, crarrayindex) \
|
|
{ \
|
|
int4 lcl_crarrayindex; \
|
|
cache_rec_ptr_ptr_t cr_ptr; \
|
|
cache_rec_ptr_t cr; \
|
|
uint4 in_tend, data_invalid, in_cw_set; \
|
|
\
|
|
lcl_crarrayindex = crarrayindex; \
|
|
if (lcl_crarrayindex) \
|
|
{ \
|
|
cr_ptr = (cache_rec_ptr_ptr_t)&crarray[lcl_crarrayindex-1]; \
|
|
while (lcl_crarrayindex--) \
|
|
{ \
|
|
cr = *cr_ptr; \
|
|
DEBUG_ONLY(in_tend = cr->in_tend); \
|
|
DEBUG_ONLY(data_invalid = cr->data_invalid); \
|
|
DEBUG_ONLY(in_cw_set = cr->in_cw_set); \
|
|
assert(!data_invalid); \
|
|
assert(!in_tend); \
|
|
assert(process_id == in_cw_set); \
|
|
UNPIN_CACHE_RECORD(cr); \
|
|
cr_ptr--; \
|
|
} \
|
|
crarrayindex = 0; \
|
|
} \
|
|
}
|
|
|
|
/* Macro to reset cr->in_cw_set (UNPIN) for the entire cr_array in case of a commit (TP or non-TP).
|
|
* Usually in_cw_set is set for all cache-records that we are planning on updating before we start phase1.
|
|
* After updating each cse in phase2, we reset the corresponding cse->cr->in_cw_set.
|
|
* Therefore on a successful commit, after completing all cses in phase2, we dont expect any pinned cr->in_cw_set at all.
|
|
* This is true for Unix but in VMS where we could have twins, both the older and newer twins have the in_cw_set set in
|
|
* phase1 while only the newer twin's in_cw_set gets reset in phase2 (since only this cr will be stored in cse->cr).
|
|
* Therefore there could be a few cache-records which need to be unpinned even after all cses are done in phase2.
|
|
* The following macro unpins those. It is structured such a way that in Unix, it only checks that all have been reset
|
|
* while it actually does the reset only in VMS.
|
|
*/
|
|
#if defined(VMS)
|
|
#define UNPIN_CR_ARRAY_ON_COMMIT(crarray, crarrayindex) \
|
|
{ \
|
|
int4 lcl_crarrayindex; \
|
|
cache_rec_ptr_ptr_t cr_ptr; \
|
|
cache_rec_ptr_t cr; \
|
|
\
|
|
lcl_crarrayindex = crarrayindex; \
|
|
if (lcl_crarrayindex) \
|
|
{ \
|
|
cr_ptr = (cache_rec_ptr_ptr_t)&crarray[lcl_crarrayindex-1]; \
|
|
while (lcl_crarrayindex--) \
|
|
{ \
|
|
cr = *cr_ptr; \
|
|
UNPIN_CACHE_RECORD(cr); \
|
|
cr_ptr--; \
|
|
} \
|
|
crarrayindex = 0; \
|
|
} \
|
|
}
|
|
#elif defined(UNIX)
|
|
# ifdef DEBUG
|
|
# define UNPIN_CR_ARRAY_ON_COMMIT(crarray, crarrayindex) \
|
|
{ \
|
|
int4 lcl_crarrayindex; \
|
|
cache_rec_ptr_ptr_t cr_ptr; \
|
|
cache_rec_ptr_t cr; \
|
|
\
|
|
lcl_crarrayindex = crarrayindex; \
|
|
if (lcl_crarrayindex) \
|
|
{ \
|
|
cr_ptr = (cache_rec_ptr_ptr_t)&crarray[lcl_crarrayindex-1]; \
|
|
while (lcl_crarrayindex--) \
|
|
{ \
|
|
cr = *cr_ptr; \
|
|
assert(process_id != cr->in_cw_set); \
|
|
cr_ptr--; \
|
|
} \
|
|
crarrayindex = 0; \
|
|
} \
|
|
}
|
|
# else
|
|
# define UNPIN_CR_ARRAY_ON_COMMIT(crarray, crarrayindex) \
|
|
crarrayindex = 0;
|
|
# endif
|
|
#endif
|
|
|
|
#define JNLPOOL_INIT_IF_NEEDED(CSA, CSD, CNL) \
|
|
{ \
|
|
unsigned char instfilename_copy[MAX_FN_LEN + 1]; \
|
|
sm_uc_ptr_t jnlpool_instfilename; \
|
|
\
|
|
GBLREF jnlpool_ctl_ptr_t jnlpool_ctl; \
|
|
GBLREF boolean_t is_replicator; \
|
|
GBLREF boolean_t pool_init; \
|
|
GBLREF boolean_t is_updproc; \
|
|
\
|
|
if (REPL_ALLOWED(CSD) && is_replicator) \
|
|
{ \
|
|
if (FALSE == pool_init) \
|
|
jnlpool_init((jnlpool_user)GTMPROC, (boolean_t)FALSE, (boolean_t *)NULL); \
|
|
assert(pool_init); \
|
|
if (!CSA->replinst_matches_db) \
|
|
{ \
|
|
if (jnlpool_ctl->upd_disabled && !is_updproc) \
|
|
{ /* Updates are disabled in this journal pool. Detach from journal pool and issue error. */ \
|
|
assert(NULL != jnlpool.jnlpool_ctl); \
|
|
jnlpool_detach(); \
|
|
assert(NULL == jnlpool.jnlpool_ctl); \
|
|
assert(FALSE == pool_init); \
|
|
rts_error(VARLSTCNT(1) ERR_SCNDDBNOUPD); \
|
|
} \
|
|
UNIX_ONLY(jnlpool_instfilename = (sm_uc_ptr_t)jnlpool_ctl->jnlpool_id.instfilename;) \
|
|
VMS_ONLY(jnlpool_instfilename = (sm_uc_ptr_t)jnlpool_ctl->jnlpool_id.gtmgbldir;) \
|
|
if (STRCMP(CNL->replinstfilename, jnlpool_instfilename)) \
|
|
{ /* Replication instance file mismatch. Issue error. But before that detach from journal pool. \
|
|
* Copy replication instance file name in journal pool to temporary memory before detaching. \
|
|
*/ \
|
|
UNIX_ONLY(assert(SIZEOF(instfilename_copy) == SIZEOF(jnlpool_ctl->jnlpool_id.instfilename))); \
|
|
VMS_ONLY(assert(SIZEOF(instfilename_copy) == SIZEOF(jnlpool_ctl->jnlpool_id.gtmgbldir))); \
|
|
memcpy(&instfilename_copy[0], jnlpool_instfilename, SIZEOF(instfilename_copy)); \
|
|
assert(NULL != jnlpool.jnlpool_ctl); \
|
|
jnlpool_detach(); \
|
|
assert(NULL == jnlpool.jnlpool_ctl); \
|
|
assert(FALSE == pool_init); \
|
|
rts_error(VARLSTCNT(8) ERR_REPLINSTMISMTCH, 6, LEN_AND_STR(instfilename_copy), \
|
|
DB_LEN_STR(gv_cur_region), LEN_AND_STR(CNL->replinstfilename)); \
|
|
} \
|
|
CSA->replinst_matches_db = TRUE; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/* Explanation for why we need the following macro.
|
|
*
|
|
* Normally a cdb_sc_blkmod check is done using the "bt". This is done in t_end and tp_tend.
|
|
* But that is possible only if we hold crit. There are a few routines (TP only) that need
|
|
* to do this check outside of crit (e.g. tp_hist, gvcst_search). For those, the following macro
|
|
* is defined. This macro compares transaction numbers directly from the buffer instead of
|
|
* going through the bt or blk queues. This is done to speed up processing. One consequence
|
|
* is that we might encounter a situation where the buffer's contents hasn't been modified,
|
|
* but the block might actually have been changed i.e. in VMS a twin buffer might have been
|
|
* created or the "blk" field in the cache-record corresponding to this buffer might have
|
|
* been made CR_BLKEMPTY etc. In these cases, we rely on the fact that the cycle for the
|
|
* buffer would have been incremented thereby saving us in the cdb_sc_lostcr check which will
|
|
* always FOLLOW (never PRECEDE) this check.
|
|
*
|
|
* Note that in case of BG, it is possible that the buffer could be in the process of being updated
|
|
* (phase2 outside of crit). In this case we have to restart as otherwise we could incorrectly
|
|
* validate an inconsistent state of the database as okay. For example, say our search path
|
|
* contains a level-1 root-block and a level-0 data block. If both of these blocks were
|
|
* concurrently being updated in phase2 (outside of crit) by another process, it is possible
|
|
* (because of the order in which blocks are committed) that the data block contents get
|
|
* modified first but the index block is still unchanged. If we traversed down the tree at
|
|
* this instant, we are looking at a search path that contains a mix of pre-update and post-update
|
|
* blocks and should never validate this traversal as okay. In this case, the cache record
|
|
* corresponding to the index block would have its "in_tend" flag non-zero indicating update is pending.
|
|
*
|
|
* The order of the check should be cr->in_tend BEFORE the buffaddr->tn check. Doing it otherwise
|
|
* would mean it is posible for the buffaddr->tn check to succeed and before the cr->in_tend
|
|
* check is done the buffer gets rebuilt (from start to finish in phase2). This would result
|
|
* in us falsely validating this transaction as okay when in fact we should have restarted.
|
|
*
|
|
* Because we rely on the fact that cr->in_tend is reset to 0 AFTER t1->buffaddr->tn is updated, and
|
|
* since these could be updated concurrently, and since this macro is used outside of crit, we need to
|
|
* ensure a read memory barrier is done. Currently, the only two places which use this macro are tp_hist.c
|
|
* and gvcst_search.c. Out of this, the latter uses this only as a performance measure and not for correctness.
|
|
* But the former uses this for correctness. In fact tp_tend.c relies on tp_hist.c doing a proper validation.
|
|
* Therefore the read memory barrier is essential in tp_hist.c and not needed in gvcst_search.c. See tp_hist.c
|
|
* for use of the read memory barrier and a comment describing why it is ok to do it only once per function
|
|
* invocation (instead of using it once per block that gets validated).
|
|
*
|
|
* There are two variants of this macro.
|
|
* TP_IS_CDB_SC_BLKMOD : That calculates the blktn by doing t1->buffaddr->tn explicitly.
|
|
* TP_IS_CDB_SC_BLKMOD3 : This is provided the blktn as input so can avoid the explicit calculation.
|
|
*/
|
|
#define TP_IS_CDB_SC_BLKMOD(cr, t1) (((NULL != (cr)) && (cr)->in_tend) || ((t1)->tn <= ((blk_hdr_ptr_t)(t1)->buffaddr)->tn))
|
|
#define TP_IS_CDB_SC_BLKMOD3(cr, t1, blktn) (((NULL != (cr)) && (cr)->in_tend) || ((t1)->tn <= blktn))
|
|
|
|
#define MM_ADDR(SGD) ((sm_uc_ptr_t)(((sgmnt_data_ptr_t)SGD) + 1))
|
|
#ifdef VMS
|
|
#define MASTER_MAP_BLOCKS_DFLT 64 /* 64 gives 128M possible blocks */
|
|
#else
|
|
#define MASTER_MAP_BLOCKS_DFLT 112 /* 112 gives 224M possible blocks */
|
|
#endif
|
|
#define MASTER_MAP_BLOCKS_V4 32 /* 32 gives 64M possible blocks */
|
|
#define MASTER_MAP_BLOCKS_MAX 112 /* 112 gives 224M possible blocks */
|
|
#define MASTER_MAP_BLOCKS_V5_OLD 64 /* V5 database previous master map block size */
|
|
#define MASTER_MAP_SIZE_V5_OLD (MASTER_MAP_BLOCKS_V5_OLD * DISK_BLOCK_SIZE)
|
|
#define MASTER_MAP_SIZE_V4 (MASTER_MAP_BLOCKS_V4 * DISK_BLOCK_SIZE) /* MUST be a multiple of DISK_BLOCK_SIZE */
|
|
#define MASTER_MAP_SIZE_MAX (MASTER_MAP_BLOCKS_MAX * DISK_BLOCK_SIZE) /* MUST be a multiple of DISK_BLOCK_SIZE */
|
|
#define MASTER_MAP_SIZE_DFLT (MASTER_MAP_BLOCKS_DFLT * DISK_BLOCK_SIZE) /* MUST be a multiple of DISK_BLOCK_SIZE */
|
|
#define MASTER_MAP_SIZE(SGD) (((sgmnt_data_ptr_t)SGD)->master_map_len)
|
|
#define SGMNT_HDR_LEN SIZEOF(sgmnt_data)
|
|
#define SIZEOF_FILE_HDR(SGD) (SGMNT_HDR_LEN + MASTER_MAP_SIZE(SGD))
|
|
#define SIZEOF_FILE_HDR_DFLT (SGMNT_HDR_LEN + MASTER_MAP_SIZE_DFLT)
|
|
#define SIZEOF_FILE_HDR_MIN (SGMNT_HDR_LEN + MASTER_MAP_SIZE_V4)
|
|
#define SIZEOF_FILE_HDR_MAX (SGMNT_HDR_LEN + MASTER_MAP_SIZE_MAX)
|
|
#define MM_BLOCK (SGMNT_HDR_LEN / DISK_BLOCK_SIZE + 1) /* gt.m numbers blocks from 1 */
|
|
#define TH_BLOCK 1
|
|
|
|
#define JNL_NAME_SIZE 256 /* possibly expanded when opened */
|
|
#define JNL_NAME_EXP_SIZE 1024 /* MAXPATHLEN, before jnl_buffer in shared memory */
|
|
|
|
#define BLKS_PER_LMAP 512
|
|
#define MAXTOTALBLKS_V4 (MASTER_MAP_SIZE_V4 * 8 * BLKS_PER_LMAP)
|
|
#define MAXTOTALBLKS_V5 (MASTER_MAP_SIZE_MAX * 8 * BLKS_PER_LMAP)
|
|
#define MAXTOTALBLKS_MAX (MASTER_MAP_SIZE_MAX * 8 * BLKS_PER_LMAP)
|
|
#define MAXTOTALBLKS(SGD) (MASTER_MAP_SIZE(SGD) * 8 * BLKS_PER_LMAP)
|
|
#define IS_BITMAP_BLK(blk) (ROUND_DOWN2(blk, BLKS_PER_LMAP) == blk) /* TRUE if blk is a bitmap */
|
|
/*
|
|
* UNIX - 8K fileheader (= 16 blocks) + 56K mastermap (= 112 blocks) + 1
|
|
* VMS - 8K fileheader (= 16 blocks) + 32K mastermap (= 64 blocks) + 24K padding (= 48 blocks) + 1 */
|
|
#define START_VBN_V5 129
|
|
#define START_VBN_V4 49 /* 8K fileheader (= 16 blocks) + 16K mastermap (= 32 blocks) + 1 */
|
|
#define START_VBN_CURRENT START_VBN_V5
|
|
|
|
#define STEP_FACTOR 64 /* the factor by which flush_trigger is incremented/decremented */
|
|
#define MIN_FLUSH_TRIGGER(n_bts) ((n_bts)/4) /* the minimum flush_trigger as a function of n_bts */
|
|
#define MAX_FLUSH_TRIGGER(n_bts) ((n_bts)*15/16) /* the maximum flush_trigger as a function of n_bts */
|
|
|
|
#define MIN_FILLFACTOR 30
|
|
#define MAX_FILLFACTOR 100
|
|
|
|
#ifdef DEBUG_DYNGRD
|
|
# define DEBUG_DYNGRD_ONLY(X) X
|
|
#else
|
|
# define DEBUG_DYNGRD_ONLY(X)
|
|
#endif
|
|
|
|
#ifdef VMS
|
|
/* RET is a dummy that is not really used on VMS */
|
|
#define DCLAST_WCS_WTSTART(reg, num_bufs, RET) \
|
|
{ \
|
|
unsigned int status; \
|
|
\
|
|
if (SS$_NORMAL != (status = sys$dclast(wcs_wtstart, reg, 0))) \
|
|
{ \
|
|
assert(FALSE); \
|
|
status = DISABLE_AST; \
|
|
wcs_wtstart(reg); \
|
|
if (SS$_WASSET == status) \
|
|
ENABLE_AST; \
|
|
} \
|
|
}
|
|
#elif defined(UNIX)
|
|
#define DCLAST_WCS_WTSTART(reg, num_bufs, RET) RET = wcs_wtstart(reg, num_bufs);
|
|
#else
|
|
#error UNSUPPORTED PLATFORM
|
|
#endif
|
|
|
|
#define SAVE_WTSTART_PID(cnl, pid, index) \
|
|
{ \
|
|
for (index = 0; index < MAX_WTSTART_PID_SLOTS; index++) \
|
|
if (0 == cnl->wtstart_pid[index]) \
|
|
break; \
|
|
if (MAX_WTSTART_PID_SLOTS > index) \
|
|
cnl->wtstart_pid[index] = pid; \
|
|
}
|
|
|
|
#define CLEAR_WTSTART_PID(cnl, index) \
|
|
{ \
|
|
if (MAX_WTSTART_PID_SLOTS > index) \
|
|
cnl->wtstart_pid[index] = 0; \
|
|
}
|
|
|
|
#define WRITERS_ACTIVE(cnl) ((0 < cnl->intent_wtstart) || (0 < cnl->in_wtstart))
|
|
|
|
#define SIGNAL_WRITERS_TO_STOP(csd) \
|
|
{ \
|
|
SET_TRACEABLE_VAR((csd)->wc_blocked, TRUE); /* to stop all active writers */ \
|
|
/* memory barrier needed to broadcast this information to other processors */ \
|
|
SHM_WRITE_MEMORY_BARRIER; \
|
|
}
|
|
|
|
#define WAIT_FOR_WRITERS_TO_STOP(cnl, lcnt, maxiters) \
|
|
{ /* We need to ensure that an uptodate value of cnl->intent_wtstart is read in the \
|
|
* WRITERS_ACTIVE macro every iteration of the loop hence the read memory barrier. \
|
|
*/ \
|
|
SHM_READ_MEMORY_BARRIER; \
|
|
for (lcnt=1; WRITERS_ACTIVE(cnl) && (lcnt <= maxiters); lcnt++) \
|
|
{ /* wait for any processes INSIDE or at ENTRY of wcs_wtstart to finish */ \
|
|
wcs_sleep(lcnt); \
|
|
SHM_READ_MEMORY_BARRIER; \
|
|
} \
|
|
}
|
|
|
|
#define SIGNAL_WRITERS_TO_RESUME(csd) \
|
|
{ \
|
|
SET_TRACEABLE_VAR((csd)->wc_blocked, FALSE); /* to let active writers resume */ \
|
|
/* memory barrier needed to broadcast this information to other processors */ \
|
|
SHM_WRITE_MEMORY_BARRIER; \
|
|
}
|
|
|
|
#define INCR_INTENT_WTSTART(cnl) \
|
|
{ \
|
|
INCR_CNT(&cnl->intent_wtstart, &cnl->wc_var_lock); /* signal intent to enter wcs_wtstart */ \
|
|
if (0 >= cnl->intent_wtstart) \
|
|
{ /* possible if wcs_verify had reset this flag */ \
|
|
INCR_CNT(&cnl->intent_wtstart, &cnl->wc_var_lock); \
|
|
/* wcs_verify cannot possibly have reset this flag again because it does this only \
|
|
* after wcs_recover waits for a maximum of 1 minute (for this flag to become zero) \
|
|
* before giving up. Therefore for that to happen, we should have been context \
|
|
* switched out for 1 minute after the second INCR_CNT but before the below assert) \
|
|
* We believe that is an extremely unlikely condition so dont do anything about it. \
|
|
* In the worst case this will get reset to 0 by the next wcs_verify or INCR_CNT \
|
|
* (may need multiple INCR_CNTs depending on how negative a value this is) whichever \
|
|
* happens sooner. \
|
|
*/ \
|
|
assert(0 < cnl->intent_wtstart); \
|
|
} \
|
|
}
|
|
|
|
#define DECR_INTENT_WTSTART(cnl) \
|
|
{ \
|
|
if (0 < cnl->intent_wtstart) \
|
|
DECR_CNT(&cnl->intent_wtstart, &cnl->wc_var_lock); \
|
|
/* else possible if wcs_verify had reset this flag */ \
|
|
}
|
|
|
|
#define ENSURE_JNL_OPEN(csa, reg) \
|
|
{ \
|
|
boolean_t was_crit; \
|
|
jnl_private_control *jpc; \
|
|
sgmnt_data_ptr_t csd; \
|
|
uint4 jnl_status; \
|
|
\
|
|
assert(cs_addrs == csa); \
|
|
assert(gv_cur_region == reg); \
|
|
assert(FALSE == reg->read_only); \
|
|
csd = csa->hdr; \
|
|
if (JNL_ENABLED(csd)) \
|
|
{ \
|
|
was_crit = csa->now_crit; \
|
|
if (!was_crit) \
|
|
grab_crit(reg); \
|
|
jnl_status = JNL_ENABLED(csd) ? jnl_ensure_open() : 0; \
|
|
if (!was_crit) \
|
|
rel_crit(reg); \
|
|
if (0 != jnl_status) \
|
|
{ \
|
|
jpc = csa->jnl; \
|
|
assert(NULL != jpc); \
|
|
if (SS_NORMAL != jpc->status) \
|
|
rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), \
|
|
jpc->status); \
|
|
else \
|
|
rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/* the RET is meaningful only on UNIX */
|
|
#define JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, num_bufs, RET) \
|
|
{ \
|
|
ENSURE_JNL_OPEN(csa, reg); \
|
|
DCLAST_WCS_WTSTART(reg, num_bufs, RET); \
|
|
}
|
|
|
|
/* Macros to effect changes in the blks_to_upgrd field of the file-header.
|
|
* We should hold crit on the region in all cases except for one when we are in MUPIP CREATE (but we are still standalone here).
|
|
* Therefore we need not use any interlocks to update this field. This is asserted below.
|
|
* Although we can derive "csd" from "csa", we pass them as two separate arguments for performance reasons.
|
|
* Use local variables to record shared memory information doe debugging purposes in case of an assert failure.
|
|
*/
|
|
#define INCR_BLKS_TO_UPGRD(csa, csd, delta) \
|
|
{ \
|
|
int4 new_blks_to_upgrd; \
|
|
int4 cur_blks_to_upgrd; \
|
|
int4 cur_delta; \
|
|
\
|
|
assert((csd)->createinprogress || (csa)->now_crit); \
|
|
cur_delta = (delta); \
|
|
assert((csa)->hdr == (csd)); \
|
|
assert(0 != cur_delta); \
|
|
cur_blks_to_upgrd = (csd)->blks_to_upgrd; \
|
|
assert(0 <= (csd)->blks_to_upgrd); \
|
|
new_blks_to_upgrd = cur_delta + cur_blks_to_upgrd; \
|
|
assert(0 <= new_blks_to_upgrd); \
|
|
(csd)->blks_to_upgrd = new_blks_to_upgrd; \
|
|
if (0 >= new_blks_to_upgrd) \
|
|
{ \
|
|
if (0 == new_blks_to_upgrd) \
|
|
(csd)->tn_upgrd_blks_0 = (csd)->trans_hist.curr_tn; \
|
|
else \
|
|
{ /* blks_to_upgrd counter in the fileheader should never hold a \
|
|
* negative value. Note down the negative value in a separate \
|
|
* field for debugging and set the counter to 0. \
|
|
*/ \
|
|
(csd)->blks_to_upgrd = 0; \
|
|
(csd)->blks_to_upgrd_subzero_error -= (new_blks_to_upgrd); \
|
|
} \
|
|
} else \
|
|
(csd)->fully_upgraded = FALSE; \
|
|
}
|
|
#define DECR_BLKS_TO_UPGRD(csa, csd, delta) INCR_BLKS_TO_UPGRD((csa), (csd), -(delta))
|
|
|
|
/* Interlocked queue instruction constants ... */
|
|
|
|
#define QI_STARVATION 3
|
|
#define EMPTY_QUEUE 0L
|
|
#define QUEUE_WAS_EMPTY 1
|
|
#define INTERLOCK_FAIL -1L
|
|
#define QUEUE_INSERT_SUCCESS 1
|
|
|
|
typedef trans_num bg_trc_rec_tn;
|
|
typedef int4 bg_trc_rec_cntr;
|
|
|
|
typedef struct
|
|
{
|
|
int4 curr_count; /* count for this invocation of shared memory */
|
|
int4 cumul_count; /* count from the creation of database (not including this invocation) */
|
|
} db_csh_acct_rec;
|
|
|
|
#define TAB_DB_CSH_ACCT_REC(A,B,C) A,
|
|
enum db_csh_acct_rec_type
|
|
{
|
|
#include "tab_db_csh_acct_rec.h"
|
|
n_db_csh_acct_rec_types
|
|
};
|
|
#undef TAB_DB_CSH_ACCT_REC
|
|
|
|
#include "gvstats_rec.h"
|
|
|
|
#define GVSTATS_SET_CSA_STATISTIC(csa, counter, value) \
|
|
{ \
|
|
csa->gvstats_rec.counter = value; \
|
|
}
|
|
|
|
#define INCR_GVSTATS_COUNTER(csa, cnl, counter, increment) \
|
|
{ \
|
|
csa->gvstats_rec.counter += increment; \
|
|
cnl->gvstats_rec.counter += increment; \
|
|
}
|
|
|
|
#if defined(DEBUG) || defined(DEBUG_DB_CSH_COUNTER)
|
|
# define INCR_DB_CSH_COUNTER(csa, counter, increment) \
|
|
if (csa->read_write || dba_bg == csa->hdr->acc_meth) \
|
|
csa->hdr->counter.curr_count += increment;
|
|
#else
|
|
# define INCR_DB_CSH_COUNTER(csa, counter, increment)
|
|
#endif
|
|
|
|
enum tp_blkmod_type /* used for accounting in cs_data->tp_cdb_sc_blkmod[] */
|
|
{
|
|
tp_blkmod_nomod = 0,
|
|
tp_blkmod_gvcst_srch,
|
|
tp_blkmod_t_qread,
|
|
tp_blkmod_tp_tend,
|
|
tp_blkmod_tp_hist,
|
|
n_tp_blkmod_types
|
|
};
|
|
|
|
/* Below is a list of macro bitmasks used to set the global variable "donot_commit". This variable should normally be 0.
|
|
* But in rare cases, we could end up in situations where we know it is a restartable situation but decide not to
|
|
* restart right away (because of interface issues that the function where this is detected cannot signal a restart
|
|
* or because we dont want to take a performance hit to check this restartable situation in highly frequented code if
|
|
* the restart will anyway be detected before commit. In this cases, this variable will take on non-zero values.
|
|
* The commit logic will assert that this variable is indeed zero after validation but before proceeding with commit.
|
|
*/
|
|
#define DONOTCOMMIT_TPHIST_BLKTARGET_MISMATCH (1 << 0) /* Restartable situation encountered in tp_hist */
|
|
#define DONOTCOMMIT_GVCST_DELETE_BLK_CSE_TLEVEL (1 << 1) /* Restartable situation encountered in gvcst_delete_blk */
|
|
#define DONOTCOMMIT_JNLGETCHECKSUM_NULL_CR (1 << 2) /* Restartable situation encountered in jnl_get_checksum.h */
|
|
#define DONOTCOMMIT_GVCST_KILL_ZERO_TRIGGERS (1 << 3) /* Restartable situation encountered in gvcst_kill */
|
|
#define DONOTCOMMIT_GVCST_BLK_BUILD_TPCHAIN (1 << 4) /* Restartable situation encountered in gvcst_blk_build */
|
|
|
|
#define TAB_BG_TRC_REC(A,B) B,
|
|
enum bg_trc_rec_type
|
|
{
|
|
#include "tab_bg_trc_rec.h"
|
|
n_bg_trc_rec_types
|
|
};
|
|
#undef TAB_BG_TRC_REC
|
|
|
|
#define UPGRD_WARN_INTERVAL (60 * 60 * 24) /* Once every 24 hrs */
|
|
typedef struct compswap_time_field_struct
|
|
{ /* This structure is used where we want to do a compare-n-swap (CAS) on a time value. The CAS interfaces
|
|
need an instance of global_latch_t to operate on. We will utilize the "latch_pid" field to hold the
|
|
time and the latch_word is unused except on VMS where it will hold 0. Since this structure must be of
|
|
a constant size (size of global_latch_t varies), pad the latch with sufficient space to match the
|
|
size of global_latch_t's largest size (on HPUX).
|
|
*/
|
|
global_latch_t time_latch;
|
|
#ifndef __hppa
|
|
int4 hp_latch_space[4]; /* padding only on non-hpux systems */
|
|
#endif
|
|
} compswap_time_field;
|
|
/* takes value of time() but needs to be 4 byte so can use compswap on it. Not
|
|
using time_t as that is an indeterminate size on various platforms.
|
|
Value is time (in seconds) in a compare/swap updated field so only one
|
|
process performs a given task in a given interval
|
|
*/
|
|
#define cas_time time_latch.u.parts.latch_pid
|
|
|
|
/* The following structure is used to determine
|
|
the endianess of a database header.
|
|
*/
|
|
|
|
typedef union
|
|
{
|
|
struct {
|
|
unsigned short little_endian;
|
|
unsigned short big_endian;
|
|
} shorts;
|
|
uint4 word32;
|
|
} endian32_struct;
|
|
|
|
#ifdef BIGENDIAN
|
|
# define ENDIANCHECKTHIS big_endian
|
|
#else
|
|
# define ENDIANCHECKTHIS little_endian
|
|
#endif
|
|
|
|
#define CHECK_DB_ENDIAN(CSD,FNLEN,FNNAME) \
|
|
{ \
|
|
endian32_struct check_endian; \
|
|
check_endian.word32 = (CSD)->minor_dbver; \
|
|
if (!check_endian.shorts.ENDIANCHECKTHIS) \
|
|
rts_error(VARLSTCNT(6) ERR_DBENDIAN, 4, FNLEN, FNNAME, ENDIANOTHER, ENDIANTHIS); \
|
|
}
|
|
|
|
/* This is the structure describing a segment. It is used as a database file header (for MM or BG access methods).
|
|
* The overloaded fields for MM and BG are n_bts, bt_buckets. */
|
|
|
|
/* ***NOTE*** If the field minor_dbver is updated, please also update gdsdbver.h and db_auto_upgrade.c appropriately
|
|
(see db_auto_upgrade for reasons and description). SE 5/2006
|
|
*/
|
|
typedef struct sgmnt_data_struct
|
|
{
|
|
/************* MOSTLY STATIC DATABASE STATE FIELDS **************************/
|
|
unsigned char label[GDS_LABEL_SZ];
|
|
int4 blk_size; /* Block size for the file. Static data defined at db creation time */
|
|
int4 master_map_len; /* Length of master map */
|
|
int4 bplmap; /* Blocks per local map (bitmap). static data defined at db creation time */
|
|
int4 start_vbn; /* starting virtual block number. */
|
|
enum db_acc_method acc_meth; /* Access method (BG or MM) */
|
|
uint4 max_bts; /* Maximum number of bt records allowed in file */
|
|
int4 n_bts; /* number of cache record/blocks */
|
|
int4 bt_buckets; /* Number of buckets in bt table */
|
|
int4 reserved_bytes; /* Database blocks will always leave this many bytes unused */
|
|
int4 max_rec_size; /* maximum record size allowed for this file */
|
|
int4 max_key_size; /* maximum key size allowed for this file */
|
|
uint4 lock_space_size; /* Number of bytes to be used for locks (in database for bg) */
|
|
uint4 extension_size; /* Number of gds data blocks to extend by */
|
|
uint4 def_coll; /* Default collation type for new globals */
|
|
uint4 def_coll_ver; /* Default collation type version */
|
|
boolean_t std_null_coll; /* 0 -> GT.M null collation,i,e, null subs collate between numeric and string
|
|
* 1-> standard null collation i.e. null subs collate before numeric and string */
|
|
boolean_t null_subs;
|
|
uint4 free_space; /* Space in file header not being used */
|
|
mutex_spin_parms_struct mutex_spin_parms;
|
|
int4 max_update_array_size; /* maximum size of update array needed for one non-TP set/kill */
|
|
int4 max_non_bm_update_array_size;/* maximum size of update array excepting bitmaps */
|
|
boolean_t file_corrupt; /* If set, it shuts the file down. No process (except DSE) can
|
|
* successfully map this section after the flag is set to TRUE. Processes
|
|
* that already have it mapped should produce an error the next time that
|
|
* they use the file. The flag can only be reset by the DSE utility.
|
|
*/
|
|
enum mdb_ver minor_dbver; /* Minor DB version field that is incremented when minor changes to this
|
|
* file-header or API changes occur. See note at top of sgmnt_data.
|
|
*/
|
|
uint4 jnl_checksum;
|
|
uint4 wcs_phase2_commit_wait_spincnt; /* # of spin iterations before sleeping while waiting for phase2 commits */
|
|
enum mdb_ver last_mdb_ver; /* Minor DB version of the GT.M version that last accessed this database.
|
|
* Maintained only by GT.M versions V5.3-003 and greater.
|
|
*/
|
|
/* The structure is 128-bytes in size at this point */
|
|
/************* FIELDS SET AT CREATION TIME ********************************/
|
|
char filler_created[52]; /* Now unused .. was "file_info created" */
|
|
boolean_t createinprogress; /* TRUE only if MUPIP CREATE is in progress. FALSE otherwise */
|
|
int4 creation_time4; /* Lower order 4-bytes of time when the database file was created */
|
|
int4 creation_filler_8byte;
|
|
|
|
/************* FIELDS USED BY TN WARN PROCESSING *************************/
|
|
trans_num max_tn; /* Hardstop TN for this database */
|
|
trans_num max_tn_warn; /* TN for next TN_RESET warning for this database */
|
|
|
|
/************* FIELDS SET BY MUPIP BACKUP/REORG *************************/
|
|
trans_num last_inc_backup;
|
|
trans_num last_com_backup;
|
|
trans_num last_rec_backup;
|
|
block_id last_inc_bkup_last_blk; /* Last block in the database at time of last incremental backup */
|
|
block_id last_com_bkup_last_blk; /* Last block in the database at time of last comprehensive backup */
|
|
block_id last_rec_bkup_last_blk; /* Last block in the database at time of last record-ed backup */
|
|
block_id reorg_restart_block;
|
|
char filler_256[8];
|
|
/************* FIELDS SET WHEN DB IS OPEN ********************************/
|
|
char now_running[MAX_REL_NAME];/* for active version stamp */
|
|
uint4 owner_node; /* Node on cluster that "owns" the file */
|
|
uint4 image_count; /* for db freezing. Set to "process_id" on Unix and "image_count" on VMS */
|
|
uint4 freeze; /* for db freezing. Set to "getuid" on Unix and "process_id" on VMS */
|
|
int4 kill_in_prog; /* counter for multi-crit kills that are not done yet */
|
|
int4 abandoned_kills;
|
|
char filler_320[8];
|
|
/************* FIELDS USED IN V4 <==> V5 COMPATIBILITY MODE ****************/
|
|
trans_num tn_upgrd_blks_0; /* TN when blks_to_upgrd becomes 0.
|
|
* Never set = 0 => we have not achieved this yet,
|
|
* Always set = 1 => database was created as V5 (or current version)
|
|
*/
|
|
trans_num desired_db_format_tn; /* Database tn when last db format change occurred */
|
|
trans_num reorg_db_fmt_start_tn; /* Copy of desired_db_format_tn when MUPIP REORG UPGRADE/DOWNGRADE started */
|
|
block_id reorg_upgrd_dwngrd_restart_block; /* Block numbers lesser than this were last upgraded/downgraded by
|
|
* MUPIP REORG UPGRADE|DOWNGRADE before being interrupted */
|
|
int4 blks_to_upgrd; /* Blocks not at current block version level */
|
|
int4 blks_to_upgrd_subzero_error; /* number of times "blks_to_upgrd" potentially became negative */
|
|
enum db_ver desired_db_format; /* Output version for database blocks (normally current version) */
|
|
boolean_t fully_upgraded; /* Set to TRUE by MUPIP REORG UPGRADE when ALL blocks (including RECYCLED blocks)
|
|
* have been examined and upgraded (if necessary) and blks_to_upgrd is set to 0;
|
|
* If set to TRUE, this guarantees all blocks in the database are upgraded.
|
|
* "blks_to_upgrd" being 0 does not necessarily guarantee the same since the
|
|
* counter might have become incorrect (due to presently unknown reasons).
|
|
* set to FALSE whenever desired_db_format changes or the database is
|
|
* updated with V4 format blocks (by MUPIP JOURNAL).
|
|
*/
|
|
boolean_t db_got_to_v5_once; /* Set to TRUE by the FIRST MUPIP REORG UPGRADE (since MUPIP UPGRADE was run
|
|
* to upgrade the file header to V5 format) when it completes successfully.
|
|
* The FIRST reorg upgrade marks all RECYCLED blocks as FREE. Successive reorg
|
|
* upgrades keep RECYCLED blocks as they are while still trying to upgrade them.
|
|
* This is because ONLY the FIRST reorg upgrade could see RECYCLED blocks in V4
|
|
* format that are too full (lack the additional space needed by the V5 block
|
|
* header) to be upgraded to V5 format. Once these are marked FREE, all future
|
|
* block updates happen in V5 format in the database buffers so even if they
|
|
* are written in V4 format to disk, they are guaranteed to be upgradeable.
|
|
* This field marks that transition in the db and is never updated thereafter.
|
|
*/
|
|
boolean_t opened_by_gtmv53; /* Set to TRUE the first time this database is opened by GT.M V5.3-000 and higher */
|
|
char filler_384[12];
|
|
/************* FIELDS RELATED TO DB TRANSACTION HISTORY *****************************/
|
|
th_index trans_hist; /* transaction history - if moved from 1st filehdr block, change TH_BLOCK */
|
|
char filler_trans_hist[8];
|
|
/************* FIELDS RELATED TO WRITE CACHE FLUSHING *******************************/
|
|
int4 flush_time[2];
|
|
int4 flush_trigger;
|
|
int4 n_wrt_per_flu; /* Number of writes per flush call. Overloaded for BG and MM */
|
|
int4 wait_disk_space; /* seconds to wait for diskspace before giving up on a db block write */
|
|
int4 defer_time; /* defer write
|
|
* 0 => immediate,
|
|
* -1 => infinite defer,
|
|
* >0 => defer_time * flush_time[0] is actual defer time
|
|
* default value = 1 => a write-timer every csd->flush_time[0] seconds
|
|
*/
|
|
volatile boolean_t wc_blocked; /* Set to TRUE by process that knows it is leaving the cache in a possibly
|
|
* inconsistent state. Next process grabbing crit will do cache recovery.
|
|
* This setting also stops all concurrent writers from working on the cache.
|
|
* In MM mode, it is used to call wcs_recover during a file extension
|
|
*/
|
|
char filler_512[20];
|
|
/************* FIELDS Used for update process performance improvement. Some may go away in later releases ********/
|
|
uint4 reserved_for_upd; /* Percentage (%) of blocks reserved for update process disk read */
|
|
uint4 avg_blks_per_100gbl; /* Number of blocks read on average for 100 global key read */
|
|
uint4 pre_read_trigger_factor;/* Percentage (%) of blocks reserved for prereader disk read */
|
|
uint4 writer_trigger_factor; /* For update process writers flush trigger */
|
|
/************* FIELDS USED ONLY BY UNIX ********************************/
|
|
int4 semid; /* Since int may not be of fixed size, int4 is used */
|
|
int4 shmid; /* Since int may not be of fixed size, int4 is used */
|
|
gtm_time8 gt_sem_ctime; /* time of creation of semaphore */
|
|
gtm_time8 gt_shm_ctime; /* time of creation of shared memory */
|
|
char filler_unixonly[40]; /* to ensure this section has 64-byte multiple size */
|
|
/************* ACCOUNTING INFORMATION ********************************/
|
|
int4 filler_n_retries[CDB_MAX_TRIES];/* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_puts; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_kills; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_queries; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_gets; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_order; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_zprevs; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_data; /* Now moved to TAB_GVSTATS_REC section */
|
|
uint4 filler_n_puts_duplicate; /* Now moved to TAB_GVSTATS_REC section */
|
|
uint4 filler_n_tp_updates; /* Now moved to TAB_GVSTATS_REC section */
|
|
uint4 filler_n_tp_updates_duplicate; /* Now moved to TAB_GVSTATS_REC section */
|
|
char filler_accounting_64_align[4]; /* to ensure this section has 64-byte multiple size */
|
|
/************* CCP/RC RELATED FIELDS (CCP STUFF IS NOT USED CURRENTLY BY GT.M) *************/
|
|
int4 staleness[2]; /* timer value */
|
|
int4 ccp_tick_interval[2]; /* quantum to release write mode if no write occurs and others are queued
|
|
* These three values are all set at creation by mupip_create
|
|
*/
|
|
int4 ccp_quantum_interval[2];/* delta timer for ccp quantum */
|
|
int4 ccp_response_interval[2];/* delta timer for ccp mailbox response */
|
|
boolean_t ccp_jnl_before; /* used for clustered to pass if jnl file has before images */
|
|
boolean_t clustered; /* FALSE (clustering is currently unsupported) */
|
|
boolean_t unbacked_cache; /* FALSE for clustering. TRUE otherwise */
|
|
|
|
int4 rc_srv_cnt; /* Count of RC servers accessing database */
|
|
int4 dsid; /* DSID value, non-zero when being accessed by RC */
|
|
int4 rc_node;
|
|
char filler_ccp_rc[8]; /* to ensure this section has 64-byte multiple size */
|
|
/************* REPLICATION RELATED FIELDS ****************/
|
|
/* VMS does not yet have multi-site replication functionality. Hence the two sets of fields in this section. */
|
|
#ifdef VMS
|
|
seq_num reg_seqno; /* the jnl seqno of the last update to this region -- 8-byte aligned */
|
|
seq_num resync_seqno; /* the resync-seqno to be sent to the secondary */
|
|
trans_num resync_tn; /* db tn corresponding to resync_seqno - used in losttrans handling */
|
|
seq_num old_resync_seqno; /* to find out if transactions were sent from primary to secondary */
|
|
int4 repl_state; /* state of replication whether open/closed/was_open */
|
|
char filler_repl[28]; /* to ensure this section has 64-byte multiple size */
|
|
#else
|
|
seq_num reg_seqno; /* the jnl seqno of the last update to this region -- 8-byte aligned */
|
|
seq_num pre_multisite_resync_seqno; /* previous resync-seqno field now moved to the replication instance file */
|
|
trans_num zqgblmod_tn; /* db tn corresponding to zqgblmod_seqno - used in losttrans handling */
|
|
seq_num zqgblmod_seqno; /* minimum resync seqno of ALL -fetchresync rollbacks that happened on a secondary
|
|
* (that was formerly a root primary) AFTER the most recent
|
|
* MUPIP REPLIC -LOSTTNCOMPLETE command */
|
|
int4 repl_state; /* state of replication whether open/closed/was_open */
|
|
boolean_t multi_site_open; /* Set to TRUE the first time a process opens the database using
|
|
* a GT.M version that supports multi-site replication. FALSE until then */
|
|
seq_num dualsite_resync_seqno; /* Last known seqno communicated with the other side of the replication pipe.
|
|
* This field is maintained as long as the other side is still running a
|
|
* dual-site GT.M version. Once all replication instances in a configuration
|
|
* are upgraded to the multi-site GT.M version, this field is no longer used.
|
|
*/
|
|
char filler_repl[16]; /* to ensure this section has 64-byte multiple size */
|
|
#endif
|
|
/************* TP RELATED FIELDS ********************/
|
|
int4 filler_n_tp_retries[12]; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 filler_n_tp_retries_conflicts[12]; /* Now moved to TAB_GVSTATS_REC section */
|
|
int4 tp_cdb_sc_blkmod[8]; /* Notes down the number of times each place got a cdb_sc_blkmod in tp.
|
|
* Only first 4 array entries are updated now, but space is allocated
|
|
* for 4 more if needed in the future. */
|
|
/************* JOURNALLING RELATED FIELDS ****************/
|
|
uint4 jnl_alq;
|
|
uint4 jnl_deq;
|
|
int4 jnl_buffer_size; /* in 512-byte pages */
|
|
boolean_t jnl_before_image;
|
|
int4 jnl_state; /* journaling state: same as enum jnl_state_codes in jnl.h */
|
|
uint4 jnl_file_len; /* journal file name length */
|
|
uint4 autoswitchlimit; /* limit in disk blocks (max 4GB) when jnl should be auto switched */
|
|
int4 epoch_interval; /* Time between successive epochs in epoch-seconds */
|
|
uint4 alignsize; /* alignment size for JRT_ALIGN */
|
|
int4 jnl_sync_io; /* drives sync I/O ('direct' if applicable) for journals, if set (UNIX) */
|
|
/* writers open NOCACHING to bypass XFC cache, if set (VMS) */
|
|
int4 yield_lmt; /* maximum number of times a process yields to get optimal jnl writes */
|
|
boolean_t turn_around_point;
|
|
trans_num jnl_eovtn; /* last tn for a closed jnl; otherwise epoch tn from the epoch before last */
|
|
char filler_jnl[8]; /* to ensure this section has 64-byte multiple size */
|
|
/************* INTERRUPTED RECOVERY RELATED FIELDS ****************/
|
|
seq_num intrpt_recov_resync_seqno;/* resync/fetchresync jnl_seqno of interrupted rollback */
|
|
jnl_tm_t intrpt_recov_tp_resolve_time;/* since-time for the interrupted recover */
|
|
boolean_t recov_interrupted; /* whether a MUPIP JOURNAL RECOVER/ROLLBACK on this db got interrupted */
|
|
int4 intrpt_recov_jnl_state; /* journaling state at start of interrupted recover/rollback */
|
|
int4 intrpt_recov_repl_state;/* replication state at start of interrupted recover/rollback */
|
|
char filler_1k[40];
|
|
/************* HUGE CHARACTER ARRAYS **************/
|
|
unsigned char jnl_file_name[JNL_NAME_SIZE]; /* journal file name */
|
|
unsigned char reorg_restart_key[256]; /* 1st key of a leaf block where reorg was done last time */
|
|
char machine_name[MAX_MCNAMELEN];
|
|
char encryption_hash[GTMCRYPT_RESERVED_HASH_LEN];
|
|
/* char filler_2k[256] was here before adding the encryption_hash. Since the GTMCRYPT_RESERVED_HASH_LEN
|
|
* consumes 256 bytes, filler_2k has been removed. */
|
|
/************* BG_TRC_REC RELATED FIELDS ***********/
|
|
# define TAB_BG_TRC_REC(A,B) bg_trc_rec_tn B##_tn;
|
|
# include "tab_bg_trc_rec.h"
|
|
# undef TAB_BG_TRC_REC
|
|
char bg_trc_rec_tn_filler [1200 - (SIZEOF(bg_trc_rec_tn) * n_bg_trc_rec_types)];
|
|
|
|
# define TAB_BG_TRC_REC(A,B) bg_trc_rec_cntr B##_cntr;
|
|
# include "tab_bg_trc_rec.h"
|
|
# undef TAB_BG_TRC_REC
|
|
char bg_trc_rec_cntr_filler[ 600 - (SIZEOF(bg_trc_rec_cntr) * n_bg_trc_rec_types)];
|
|
|
|
/************* DB_CSH_ACCT_REC RELATED FIELDS ***********/
|
|
# define TAB_DB_CSH_ACCT_REC(A,B,C) db_csh_acct_rec A;
|
|
# include "tab_db_csh_acct_rec.h"
|
|
# undef TAB_DB_CSH_ACCT_REC
|
|
char db_csh_acct_rec_filler_4k[ 248 - (SIZEOF(db_csh_acct_rec) * n_db_csh_acct_rec_types)];
|
|
|
|
/************* GVSTATS_REC RELATED FIELDS ***********/
|
|
gvstats_rec_t gvstats_rec;
|
|
char gvstats_rec_filler_4k_plus_512[ 512 - SIZEOF(gvstats_rec_t)];
|
|
char filler_5k_minus_16[512 - 16];
|
|
/************* DB CREATION AND UPGRADE CERTIFICATION FIELDS ***********/
|
|
enum db_ver creation_db_ver; /* Major DB version at time of creation */
|
|
enum mdb_ver creation_mdb_ver; /* Minor DB version at time of creation */
|
|
enum db_ver certified_for_upgrade_to; /* Version the database is certified for upgrade to */
|
|
int filler_5k;
|
|
/************* SECSHR_DB_CLNUP RELATED FIELDS (now moved to node_local) ***********/
|
|
int4 secshr_ops_index_filler;
|
|
int4 secshr_ops_array_filler[255]; /* taking up 1k */
|
|
/********************************************************/
|
|
compswap_time_field next_upgrd_warn; /* Time when we can send the next upgrade warning to the operator log */
|
|
boolean_t is_encrypted;
|
|
uint4 db_trigger_cycle; /* incremented every MUPIP TRIGGER command that changes ^#t global contents */
|
|
char filler_7k[992];
|
|
char filler_8k[1024];
|
|
/********************************************************/
|
|
/* Master bitmap immediately follows. Tells whether the local bitmaps have any free blocks or not. */
|
|
} sgmnt_data;
|
|
|
|
#ifdef DB64
|
|
# ifdef __osf__
|
|
# pragma pointer_size(save)
|
|
# pragma pointer_size(long)
|
|
# else
|
|
# error UNSUPPORTED PLATFORM
|
|
# endif
|
|
#endif
|
|
|
|
typedef sgmnt_data *sgmnt_data_ptr_t;
|
|
|
|
#ifdef DB64
|
|
# ifdef __osf__
|
|
# pragma pointer_size(restore)
|
|
# endif
|
|
#endif
|
|
|
|
typedef struct
|
|
{
|
|
FILL8DCL(cache_que_heads_ptr_t, cache_state, 1); /* pointer to beginnings of state queues */
|
|
} sgbg_addrs;
|
|
|
|
typedef struct
|
|
{
|
|
FILL8DCL(sm_uc_ptr_t, base_addr, 1);
|
|
FILL8DCL(mmblk_que_heads_ptr_t, mmblk_state, 2); /* pointer to beginnings of state and blk queues */
|
|
} sgmm_addrs;
|
|
|
|
#define MAX_NM_LEN MAX_MIDENT_LEN
|
|
#define MIN_RN_LEN 1
|
|
#define MAX_RN_LEN MAX_MIDENT_LEN
|
|
#define V4_MAX_RN_LEN 31 /* required for dbcertify.h */
|
|
#define MIN_SN_LEN 1
|
|
#define MAX_SN_LEN MAX_MIDENT_LEN
|
|
#define STR_SUB_PREFIX 0x0FF
|
|
#define SUBSCRIPT_STDCOL_NULL 0x01
|
|
#define STR_SUB_ESCAPE 0X01
|
|
#define STR_SUB_MAXVAL 0xFF
|
|
#define SUBSCRIPT_ZERO 0x080
|
|
#define SUBSCRIPT_BIAS 0x0BE
|
|
#define NEG_MNTSSA_END 0x0FF
|
|
#define KEY_DELIMITER 0X00
|
|
#define MIN_DB_BLOCKS 10 /* this should be maintained in conjunction with the mimimum allocation in GDEINIT.M */
|
|
|
|
/* definition for NULL_SUBSCRIPTS */
|
|
#define NEVER 0
|
|
#define ALWAYS 1
|
|
#define ALLOWEXISTING 2
|
|
|
|
#define OFFSET(x,y) ((uchar_ptr_t)x - (uchar_ptr_t)y)
|
|
|
|
#define FC_READ 0
|
|
#define FC_WRITE 1
|
|
#define FC_OPEN 2
|
|
#define FC_CLOSE 3
|
|
|
|
typedef struct file_control_struct
|
|
{
|
|
sm_uc_ptr_t op_buff;
|
|
UNIX_ONLY(gtm_int64_t) VMS_ONLY(int4) op_pos;
|
|
int op_len;
|
|
void *file_info; /* Pointer for OS specific struct */
|
|
char file_type;
|
|
char op;
|
|
} file_control;
|
|
|
|
typedef struct header_struct_struct
|
|
{
|
|
char label[12];
|
|
unsigned filesize; /* size of file excluding GDE info */
|
|
|
|
/* removed unused file_log struct */
|
|
} header_struct;
|
|
|
|
typedef struct gdr_name_struct
|
|
{
|
|
mstr name;
|
|
mstr exp_name;
|
|
struct gdr_name_struct *link;
|
|
struct gd_addr_struct *gd_ptr;
|
|
} gdr_name;
|
|
|
|
typedef struct gd_addr_struct
|
|
{
|
|
struct gd_region_struct *local_locks;
|
|
int4 max_rec_size;
|
|
short n_maps;
|
|
short n_regions;
|
|
short n_segments;
|
|
short filler;
|
|
struct gd_binding_struct *maps;
|
|
struct gd_region_struct *regions;
|
|
struct gd_segment_struct *segments;
|
|
struct gd_addr_struct *link;
|
|
struct hash_table_mname_struct *tab_ptr;
|
|
gd_id *id;
|
|
UINTPTR_T end;
|
|
} gd_addr;
|
|
typedef gd_addr *(*gd_addr_fn_ptr)();
|
|
|
|
typedef struct gd_segment_struct
|
|
{
|
|
unsigned short sname_len;
|
|
unsigned char sname[MAX_SN_LEN + 1];
|
|
unsigned short fname_len;
|
|
unsigned char fname[MAX_FN_LEN + 1];
|
|
unsigned short blk_size;
|
|
unsigned short ext_blk_count;
|
|
uint4 allocation;
|
|
struct CLB *cm_blk;
|
|
unsigned char defext[4];
|
|
char defer_time; /* Was passed in cs_addrs */
|
|
unsigned char file_type;
|
|
unsigned char buckets; /* Was passed in FAB */
|
|
unsigned char windows; /* Was passed in FAB */
|
|
uint4 lock_space;
|
|
uint4 global_buffers; /* Was passed in FAB */
|
|
uint4 reserved_bytes; /* number of bytes to be left in every database block */
|
|
enum db_acc_method acc_meth;
|
|
file_control *file_cntl;
|
|
struct gd_region_struct *repl_list;
|
|
UNIX_ONLY(boolean_t is_encrypted;)
|
|
} gd_segment;
|
|
|
|
typedef union
|
|
{
|
|
int4 offset; /* relative offset to segment */
|
|
gd_segment *addr; /* absolute address of segment */
|
|
} gd_seg_addr;
|
|
|
|
typedef struct gd_region_struct
|
|
{
|
|
unsigned short rname_len;
|
|
unsigned char rname[MAX_RN_LEN + 1];
|
|
unsigned short max_key_size;
|
|
uint4 max_rec_size;
|
|
gd_seg_addr dyn;
|
|
gd_seg_addr stat;
|
|
bool open;
|
|
bool lock_write; /* Field is not currently used by GT.M */
|
|
char null_subs; /* 0 ->NEVER(previous NO), 1->ALWAYS(previous YES), 2->ALLOWEXISTING
|
|
* i.e. will allow read null subs but prohibit set */
|
|
unsigned char jnl_state;
|
|
|
|
/* deleted gbl_lk_root and lcl_lk_root, obsolete fields */
|
|
|
|
uint4 jnl_alq;
|
|
unsigned short jnl_deq;
|
|
short jnl_buffer_size;
|
|
bool jnl_before_image;
|
|
bool opening;
|
|
bool read_only;
|
|
bool was_open;
|
|
unsigned char cmx_regnum;
|
|
unsigned char def_coll;
|
|
bool std_null_coll; /* 0 -> GT.M null collation,i,e, null subs collate between numeric and string
|
|
* 1-> standard null collation i.e. null subs collate before numeric and string */
|
|
unsigned char jnl_file_len;
|
|
unsigned char jnl_file_name[JNL_NAME_SIZE];
|
|
|
|
/* VMS file id struct goes to OS specific struct */
|
|
/* VMS lock structure for reference goes to OS specific struct */
|
|
|
|
int4 node;
|
|
int4 sec_size;
|
|
} gd_region;
|
|
|
|
typedef struct sgmnt_addrs_struct
|
|
{
|
|
sgmnt_data_ptr_t hdr;
|
|
sm_uc_ptr_t bmm;
|
|
sm_uc_ptr_t wc;
|
|
bt_rec_ptr_t bt_header;
|
|
bt_rec_ptr_t bt_base;
|
|
th_rec_ptr_t th_base;
|
|
th_index_ptr_t ti;
|
|
node_local_ptr_t nl;
|
|
mutex_struct_ptr_t critical;
|
|
struct shmpool_buff_hdr_struct *shmpool_buffer; /* 1MB chunk of shared memory that we micro manage */
|
|
sm_uc_ptr_t db_addrs[2];
|
|
sm_uc_ptr_t lock_addrs[2];
|
|
struct gv_namehead_struct *dir_tree;
|
|
# ifdef GTM_TRIGGER
|
|
struct gv_namehead_struct *hasht_tree;
|
|
# endif
|
|
struct sgmnt_addrs_struct *next_fenced; /* NULL if db has journaling turned off (or disabled)
|
|
* Otherwise (db has journaling turned on), it is
|
|
* NULL if this db was not updated in this TP/ZTP
|
|
* non-NULL if this db was updated in this TP/ZTP
|
|
* The non-NULL value points to the next csa that
|
|
* has a non-NULL next_fenced value i.e. a linked list
|
|
* of csas. The end of the list is JNL_FENCE_LIST_END
|
|
* (cannot use NULL due to special meaning described
|
|
* above and hence using a macro which evaluates to -1).
|
|
*/
|
|
struct jnl_private_control_struct *jnl;
|
|
struct sgm_info_struct *sgm_info_ptr;
|
|
gd_region *region; /* the region corresponding to this csa */
|
|
struct hash_table_mname_struct *gvt_hashtab; /* NON-NULL only if regcnt > 1;
|
|
* Maintains all gv_targets mapped to this db file */
|
|
struct reg_ctl_list_struct *rctl; /* pointer to rctl for this region (used only if jgbl.forw_phase_recovery) */
|
|
struct sgmnt_addrs_struct *next_csa; /* points to csa of NEXT database that has been opened by this process */
|
|
# ifdef GTM_CRYPT
|
|
char *encrypted_blk_contents;
|
|
gtmcrypt_key_t encr_key_handle;
|
|
int4 encrypt_init_status;
|
|
# endif
|
|
# ifdef GTM_SNAPSHOT
|
|
struct snapshot_context_struct *ss_ctx;
|
|
# endif
|
|
union
|
|
{
|
|
sgmm_addrs mm;
|
|
sgbg_addrs bg;
|
|
/* May add new pointers here for other methods or change to void ptr */
|
|
} acc_meth;
|
|
gvstats_rec_t gvstats_rec;
|
|
/* 8-byte aligned at this point on all platforms (32-bit, 64-bit or Tru64 which is a mix of 32-bit and 64-bit pointers) */
|
|
sgmnt_data_ptr_t mm_core_hdr; /* Most OSs don't include memory mapped files in the core dump. For MM access
|
|
* mode, this is a pointer to a copy of the header that will be in the corefile.
|
|
* The pointer is only used for MM and that too only in Unix.
|
|
*/
|
|
size_t fullblockwrite_len; /* Length of a full block write */
|
|
uint4 total_blks; /* Last we knew, file was this big. Used to
|
|
* signal MM processing file was extended and
|
|
* needs to be remapped. */
|
|
uint4 prev_free_blks;
|
|
/* The following uint4's are treated as bools but must be 4 bytes to avoid interaction between
|
|
bools in interrupted routines and possibly lost data */
|
|
volatile uint4 timer; /* This process has a timer for this region */
|
|
volatile uint4 in_wtstart; /* flag we are busy writing */
|
|
volatile uint4 now_crit; /* This process has the critical write lock */
|
|
volatile uint4 wbuf_dqd; /* A write buffer has been dequeued - signals that
|
|
extra cleanup required if die while on */
|
|
uint4 stale_defer; /* Stale processing deferred this region */
|
|
boolean_t freeze;
|
|
volatile boolean_t dbsync_timer; /* whether a timer to sync the filehdr (and write epoch) is active */
|
|
block_id reorg_last_dest; /* last destinition block used for swap */
|
|
boolean_t jnl_before_image;
|
|
boolean_t read_write;
|
|
boolean_t extending;
|
|
boolean_t persistent_freeze; /* if true secshr_db_clnup() won't unfreeze this region */
|
|
/* The following 3 fields are in cs_addrs instead of in the file-header since they are a function
|
|
* of the journal-record sizes that can change with journal-version-numbers (for the same database).
|
|
*/
|
|
int4 pblk_align_jrecsize; /* maximum size of a PBLK record with corresponding ALIGN record */
|
|
int4 min_total_tpjnl_rec_size; /* minimum journal space requirement for a TP transaction */
|
|
int4 min_total_nontpjnl_rec_size; /* minimum journal space requirement for a non-TP transaction */
|
|
int4 jnl_state; /* journaling state: it can be 0, 1 or 2 (same as enum jnl_state_codes in jnl.h) */
|
|
int4 repl_state; /* state of replication whether open/closed/was_open */
|
|
uint4 crit_check_cycle; /* Used to mark which regions in a transaction legiticamtely have crit */
|
|
int4 backup_in_prog; /* true if online backup in progress for this region (used in op_tcommit/tp_tend) */
|
|
boolean_t snapshot_in_prog; /* true if snapshots are in progress for this region */
|
|
int4 ref_cnt; /* count of number of times csa->nl->ref_cnt was incremented by this process */
|
|
int4 fid_index; /* index for region ordering based on unique_id */
|
|
boolean_t do_fullblockwrites; /* This region enabled for full block writes */
|
|
int4 regnum; /* Region number (region open counter) used by journaling so all tokens
|
|
have a unique prefix per region (and all regions have same prefix)
|
|
*/
|
|
int4 n_pre_read_trigger; /* For update process to keep track of progress and when to trigger pre-read */
|
|
boolean_t replinst_matches_db; /* TRUE if replication instance file name stored in db shared memory matches the
|
|
* instance file name stored in the journal pool that this process has attached to.
|
|
* Updates are allowed to this replicated database only if this is TRUE.
|
|
*/
|
|
int4 regcnt; /* # of regions that have this as their csa */
|
|
boolean_t t_commit_crit; /* set to FALSE by default. set to TRUE if in the middle of database commit.
|
|
* if access method is BG, this assumes a multi-state value.
|
|
* FALSE -> T_COMMIT_CRIT_PHASE1 -> T_COMMIT_CRIT_PHASE2 -> FALSE
|
|
* (bg_update_phase1) (bg_update_phase2) (finish commit)
|
|
*/
|
|
boolean_t wcs_pidcnt_incremented; /* set to TRUE if we incremented cnl->wcs_phase2_commit_pidcnt.
|
|
* used by secshr_db_clnup to decrement the shared counter. */
|
|
boolean_t incr_db_trigger_cycle; /* set to FALSE by default. set to TRUE if trigger state change (in ^#t) occurs for
|
|
* any global in this database which means an increment to csa->db_trigger_cycle and
|
|
* csd->db_trigger_cycle. Currently used by MUPIP TRIGGER/$ZTRIGGER(), MUPIP RECOVER
|
|
* and UPDATE PROCESS
|
|
*/
|
|
uint4 db_trigger_cycle; /* mirror of csd->db_trigger_cycle; used to detect concurrent ^#t global changes */
|
|
uint4 db_dztrigger_cycle; /* incremented on every $ZTRIGGER() operation. Due to the presence of $ZTRIGGER()
|
|
* and ZTRIGGER command the 'd' prefix for ztrigger in db_dztrigger_cycle is used
|
|
* to denote the '$' in $ZTRIGGER() */
|
|
boolean_t hold_onto_crit; /* TRUE currently for dse if a CRIT -SEIZE has been done on this region.
|
|
* Set to FALSE by a DSE CRIT -RELEASE done on this region. Will also be TRUE in
|
|
* case of ONLINE ROLLBACK when that is implemented. Any code that can be invoked
|
|
* by both DSE and ROLLBACK should use csa->hold_onto_crit. Any code that can be
|
|
* invoked only by ROLLBACK can use the global variable "hold_onto_locks" as that
|
|
* is faster (avoids the csa-> dereference) and encompasses all database regions.
|
|
*/
|
|
} sgmnt_addrs;
|
|
|
|
typedef struct gd_binding_struct
|
|
{
|
|
unsigned char name[MAX_NM_LEN + 1];
|
|
union
|
|
{
|
|
gd_region *addr;
|
|
int4 offset;
|
|
} reg;
|
|
} gd_binding;
|
|
|
|
typedef struct
|
|
{
|
|
unsigned short offset;
|
|
unsigned short match;
|
|
} srch_rec_status;
|
|
|
|
typedef struct srch_blk_status_struct
|
|
{
|
|
cache_rec_ptr_t cr;
|
|
sm_uc_ptr_t buffaddr;
|
|
block_id blk_num;
|
|
trans_num tn;
|
|
srch_rec_status prev_rec,
|
|
curr_rec;
|
|
int4 cycle;
|
|
int4 level;
|
|
struct cw_set_element_struct *cse;
|
|
struct srch_blk_status_struct *first_tp_srch_status;
|
|
struct gv_namehead_struct *blk_target;
|
|
} srch_blk_status;
|
|
|
|
/* Defines for "cycle" member in srch_blk_status.
|
|
* For histories pointing to shared-memory buffers,
|
|
* "cycle" will be CYCLE_SHRD_COPY in MM and some positive number in BG.
|
|
* For histories pointing to privately-built blocks,
|
|
* "cycle" will be CYCLE_PVT_COPY for both BG and MM.
|
|
*/
|
|
#define CYCLE_PVT_COPY -1
|
|
#define CYCLE_SHRD_COPY -2
|
|
|
|
typedef struct
|
|
{
|
|
int4 depth;
|
|
int4 filler;
|
|
srch_blk_status h[MAX_BT_DEPTH + 1];
|
|
} srch_hist;
|
|
|
|
typedef struct gv_key_struct
|
|
{
|
|
unsigned short top; /* Offset to top of buffer allocated for the key */
|
|
unsigned short end; /* End of the current key. Offset to the second null */
|
|
unsigned short prev; /* Offset to the start of the previous subscript.
|
|
* This is used for global nakeds.
|
|
*/
|
|
unsigned char base[1]; /* Base of the key */
|
|
} gv_key;
|
|
|
|
/* The direction that the newly added record went after a block split at a given level */
|
|
enum split_dir
|
|
{
|
|
NEWREC_DIR_FORCED, /* direction forced due to one of the sides being too-full i.e. no choice */
|
|
NEWREC_DIR_LEFT, /* new record went into the end of the left block after the split */
|
|
NEWREC_DIR_RIGHT, /* new record went into the beginning of the right block after the split */
|
|
};
|
|
|
|
/* Any change to this structure should also have a corresponding [re]initialization in mupip_recover.c
|
|
* in the code where we play the records in the forward phase i.e. go through each of the jnl_files
|
|
* and within if (mur_options.update), initialize necessary fields of gv_target before proceeding with mur_forward().
|
|
*/
|
|
typedef struct gv_namehead_struct
|
|
{
|
|
gv_key *first_rec, *last_rec; /* Boundary recs of clue's data block */
|
|
struct gv_namehead_struct *next_gvnh; /* Used to chain gv_target's together */
|
|
struct gv_namehead_struct *prev_gvnh; /* Used to chain gv_target's together */
|
|
struct gv_namehead_struct *next_tp_gvnh; /* Used to chain gv_targets participating in THIS TP transaction */
|
|
sgmnt_addrs *gd_csa; /* Pointer to Segment corresponding to this key */
|
|
srch_hist *alt_hist; /* alternate history. initialized once per gv_target */
|
|
struct collseq_struct *collseq; /* pointer to a linked list of user supplied routine addresses
|
|
for internationalization */
|
|
trans_num read_local_tn; /* local_tn of last reference for this global */
|
|
GTMTRIG_ONLY(trans_num trig_local_tn;) /* local_tn of last trigger driven for this global */
|
|
boolean_t noisolation; /* whether isolation is turned on or off for this global */
|
|
block_id root; /* Root of global variable tree */
|
|
mname_entry gvname; /* the name of the global */
|
|
NON_GTM64_ONLY(uint4 filler_8byte_align0;) /* for 8-byte alignment of "hist" member */
|
|
srch_hist hist; /* block history array */
|
|
int4 regcnt; /* number of global directories whose hash-tables point to this gv_target.
|
|
* 1 by default. > 1 if the same name in TWO DIFFERENT global directories
|
|
* maps to the same physical file (i.e. two regions in different global
|
|
* directories have the same physical file).
|
|
*/
|
|
unsigned char nct; /* numerical collation type for internalization */
|
|
unsigned char act; /* alternative collation type for internalization */
|
|
unsigned char ver;
|
|
bool split_cleanup_needed;
|
|
char last_split_direction[MAX_BT_DEPTH - 1]; /* maintain last split direction for each level in the GVT */
|
|
char filler_8byte_align1[2];
|
|
block_id last_split_blk_num[MAX_BT_DEPTH - 1];
|
|
# ifdef GTM_TRIGGER
|
|
struct gvt_trigger_struct *gvt_trigger; /* pointer to trigger info for this global
|
|
* (is non-NULL only if db_trigger_cycle is non-zero) */
|
|
uint4 db_trigger_cycle; /* copy of csd->db_trigger_cycle when triggers for this global were
|
|
* last read/initialized from ^#t global (in gvtr_init) */
|
|
uint4 db_dztrigger_cycle; /* copy of csa->db_dztrigger_cycle when triggers for this global were
|
|
* last read/initialized from ^#t global (in gvtr_init) */
|
|
boolean_t trig_mismatch_test_done; /* whether update process has checked once if there is a mismatch
|
|
* in trigger definitions between originating and replicating instance */
|
|
GTM64_ONLY(uint4 filler_8byte_align2;) /* for 8-byte alignment of "clue" member. (targ_alloc relies on this) */
|
|
# endif
|
|
gv_key clue; /* Clue key, must be last in namehead struct because of hung buffer. */
|
|
} gv_namehead;
|
|
|
|
typedef struct gvnh_reg_struct
|
|
{
|
|
gv_namehead *gvt;
|
|
gd_region *gd_reg; /* Region of key */
|
|
} gvnh_reg_t;
|
|
|
|
#define INVALID_GV_TARGET (gv_namehead *)-1L
|
|
|
|
typedef struct gvsavtarg_struct
|
|
{
|
|
gd_addr *gd_targ_addr;
|
|
gd_binding *gd_map;
|
|
gd_region *gv_cur_region;
|
|
gv_namehead *gv_target;
|
|
bool gv_last_subsc_null;
|
|
bool gv_some_subsc_null;
|
|
short prev;
|
|
short end;
|
|
short filler_8byte_align;
|
|
} gvsavtarg_t;
|
|
|
|
#define GVSAVTARG_ALIGN_BNDRY 8
|
|
#define GVSAVTARG_FIXED_SIZE (SIZEOF(gvsavtarg_t))
|
|
|
|
/* Following three macros define the mechanism to restore gv_target under normal and error conditions.
|
|
* RESET_GV_TARGET should be used to restore gv_target from the global, reset_gv_target, only when we
|
|
* are sure that this function is the first one in the call stack to have saved gv_target.
|
|
* If the module that needs the restoration mechanism is not the first one to save gv_target in the call
|
|
* stack, then one of the last two macros should be used.
|
|
* RESET_GV_TARGET_LCL is used to restore gv_target from the local variable used to save gv_target.
|
|
* RESET_GV_TARGET_LCL_AND_CLR_GBL is used at the end of the module, when there are no more gv_target
|
|
* restorations needed. This resets gv_target and invalidates reset_gv_target.
|
|
*
|
|
* This mechanism ensures that, when there are multiple functions in a given call stack that save and
|
|
* restore gv_target, only the bottom most function gets to store its value in the global, reset_gv_target.
|
|
* In case of rts errors, if the error is not SUCCESS or INFO, then gv_target gets restored to reset_gv_target
|
|
* (in preemptive_ch()). For SUCCESS or INFO, no restoration is necessary because CONTINUE from the condition
|
|
* handlers would take us through the normal path for gv_target restoration.
|
|
*/
|
|
|
|
#define SKIP_GVT_GVKEY_CHECK FALSE
|
|
#define DO_GVT_GVKEY_CHECK TRUE
|
|
|
|
#define RESET_GV_TARGET(GVT_GVKEY_CHECK) \
|
|
{ \
|
|
assert(INVALID_GV_TARGET != reset_gv_target); \
|
|
gv_target = reset_gv_target; \
|
|
reset_gv_target = INVALID_GV_TARGET; \
|
|
DEBUG_ONLY( \
|
|
if (GVT_GVKEY_CHECK) \
|
|
{ \
|
|
DBG_CHECK_GVTARGET_CSADDRS_IN_SYNC; \
|
|
DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC; \
|
|
} \
|
|
) \
|
|
}
|
|
|
|
#define RESET_GV_TARGET_LCL(SAVE_TARG) gv_target = SAVE_TARG;
|
|
|
|
#define RESET_GV_TARGET_LCL_AND_CLR_GBL(SAVE_TARG) \
|
|
{ \
|
|
gv_target = SAVE_TARG; \
|
|
if (!gbl_target_was_set) \
|
|
{ \
|
|
assert(SAVE_TARG == reset_gv_target || INVALID_GV_TARGET == reset_gv_target); \
|
|
DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC; \
|
|
reset_gv_target = INVALID_GV_TARGET; \
|
|
} \
|
|
}
|
|
|
|
|
|
/* No point doing the gvtarget-gvcurrkey in-sync check or the gvtarget-csaddrs in-sync check if we are anyways going to exit.
|
|
* There is no way op_gvname (which is where these design assumptions get actually used) is going to be called from now onwards.
|
|
*/
|
|
GBLREF int process_exiting;
|
|
GBLREF trans_num local_tn;
|
|
GBLREF gv_namehead *gvt_tp_list;
|
|
|
|
#define ADD_TO_GVT_TP_LIST(GVT) \
|
|
{ \
|
|
if (GVT->read_local_tn != local_tn) \
|
|
{ /* Set read_local_tn to local_tn; Also add GVT to list of gvtargets referenced in this TP transaction. */ \
|
|
GVT->read_local_tn = local_tn; \
|
|
GVT->next_tp_gvnh = gvt_tp_list; \
|
|
gvt_tp_list = GVT; \
|
|
} else \
|
|
{ /* Check that GVT is already part of the list of gvtargets referenced in this TP transaction */ \
|
|
DBG_CHECK_IN_GVT_TP_LIST(GVT, TRUE); /* TRUE => we check that GVT IS present in the gvt_tp_list */ \
|
|
} \
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
#define DBG_CHECK_IN_GVT_TP_LIST(gvt, present) \
|
|
{ \
|
|
gv_namehead *gvtarg; \
|
|
\
|
|
GBLREF gv_namehead *gvt_tp_list; \
|
|
GBLREF uint4 dollar_tlevel; \
|
|
\
|
|
for (gvtarg = gvt_tp_list; NULL != gvtarg; gvtarg = gvtarg->next_tp_gvnh) \
|
|
{ \
|
|
if (gvtarg == gvt) \
|
|
break; \
|
|
} \
|
|
assert(!present || (NULL != gvtarg)); \
|
|
assert(present || (NULL == gvtarg) || (process_exiting && !dollar_tlevel)); \
|
|
}
|
|
|
|
#define DBG_CHECK_GVT_IN_GVTARGETLIST(gvt) \
|
|
{ \
|
|
gv_namehead *gvtarg; \
|
|
\
|
|
GBLREF gd_region *gv_cur_region; \
|
|
GBLREF gv_namehead *gv_target_list; \
|
|
\
|
|
for (gvtarg = gv_target_list; NULL != gvtarg; gvtarg = gvtarg->next_gvnh) \
|
|
{ \
|
|
if (gvtarg == gvt) \
|
|
break; \
|
|
} \
|
|
/* For dba_cm or dba_usr type of regions, gv_target_list is not maintained so \
|
|
* if gv_target is not part of gv_target_list, assert region is not BG or MM. \
|
|
*/ \
|
|
assert((NULL != gvtarg) || (dba_cm == gv_cur_region->dyn.addr->acc_meth) \
|
|
|| (dba_usr == gv_cur_region->dyn.addr->acc_meth)); \
|
|
}
|
|
|
|
#define DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC \
|
|
{ \
|
|
mname_entry *gvent; \
|
|
mstr *varname; \
|
|
int varlen; \
|
|
unsigned short keyend; \
|
|
unsigned char *keybase; \
|
|
\
|
|
GBLREF int4 gv_keysize; \
|
|
\
|
|
GBLREF gv_key *gv_currkey; \
|
|
GBLREF gv_namehead *reset_gv_target; \
|
|
\
|
|
assert((NULL != gv_currkey) || (NULL == gv_target)); \
|
|
/* make sure gv_currkey->top always reflects the maximum keysize across all dbs that we opened until now */ \
|
|
assert((NULL == gv_currkey) || (gv_currkey->top == gv_keysize)); \
|
|
keybase = &gv_currkey->base[0]; \
|
|
if (!process_exiting && (NULL != gv_currkey) && (0 != keybase[0]) && (INVALID_GV_TARGET == reset_gv_target)) \
|
|
{ \
|
|
assert(NULL != gv_target); \
|
|
gvent = &gv_target->gvname; \
|
|
varname = &gvent->var_name; \
|
|
varlen = varname->len; \
|
|
assert(varlen); \
|
|
assert((0 != keybase[varlen]) || !memcmp(keybase, varname->addr, varlen)); \
|
|
keyend = gv_currkey->end; \
|
|
assert(!keyend || (KEY_DELIMITER == keybase[keyend])); \
|
|
assert(!keyend || (KEY_DELIMITER == keybase[keyend - 1])); \
|
|
/* Check that gv_target is part of the gv_target_list */ \
|
|
DBG_CHECK_GVT_IN_GVTARGETLIST(gv_target); \
|
|
} \
|
|
/* Do gv_target sanity check as well; Do not do this if it is NULL or if it is GT.CM GNP client (gd_csa is NULL) */ \
|
|
if ((NULL != gv_target) && (NULL != gv_target->gd_csa)) \
|
|
DBG_CHECK_GVTARGET_INTEGRITY(gv_target); \
|
|
}
|
|
|
|
/* Do checks on the integrity of various fields in gv_target. targ_alloc initializes these and they are supposed to
|
|
* stay that way. The following code is very similar to that in targ_alloc so needs to be maintained in sync. This
|
|
* macro expects that gv_target->gd_csa is non-NULL (could be NULL for GT.CM GNP client) so any callers of this macro
|
|
* should ensure they do not invoke it in case of NULL gd_csa.
|
|
*/
|
|
#define DBG_CHECK_GVTARGET_INTEGRITY(GVT) \
|
|
{ \
|
|
int keysize, partial_size; \
|
|
GBLREF boolean_t dse_running; \
|
|
GBLREF jnl_gbls_t jgbl; \
|
|
\
|
|
/* Forward recovery does targ_alloc of MAX_KEY_SZ (independent of db max_key_size) for csa->dir_tree. \
|
|
* Take that into account while computing keysize. \
|
|
*/ \
|
|
keysize = (!jgbl.forw_phase_recovery || (GVT->gd_csa->dir_tree != GVT)) ? GVT->gd_csa->hdr->max_key_size : MAX_KEY_SZ; \
|
|
keysize = DBKEYSIZE(keysize); \
|
|
partial_size = SIZEOF(gv_namehead) + 2 * SIZEOF(gv_key) + 3 * keysize; \
|
|
/* DSE could change the max_key_size dynamically so account for it in the below assert */ \
|
|
if (!dse_running) \
|
|
{ \
|
|
assert(GVT->gvname.var_name.addr == (char *)GVT + partial_size); \
|
|
assert((char *)GVT->first_rec == ((char *)&GVT->clue + SIZEOF(gv_key) + keysize)); \
|
|
assert((char *)GVT->last_rec == ((char *)GVT->first_rec + SIZEOF(gv_key) + keysize)); \
|
|
assert(GVT->clue.top == keysize); \
|
|
} \
|
|
assert(GVT->clue.top == GVT->first_rec->top); \
|
|
assert(GVT->clue.top == GVT->last_rec->top); \
|
|
}
|
|
#else
|
|
# define DBG_CHECK_IN_GVT_TP_LIST(gvt, present)
|
|
# define DBG_CHECK_GVT_IN_GVTARGETLIST(gvt)
|
|
# define DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC
|
|
# define DBG_CHECK_GVTARGET_INTEGRITY(GVT)
|
|
#endif
|
|
|
|
/* The below GBLREFs are for the following macro */
|
|
GBLREF gv_namehead *gv_target;
|
|
GBLREF sgmnt_addrs *cs_addrs;
|
|
#define DBG_CHECK_GVTARGET_CSADDRS_IN_SYNC assert(process_exiting || (NULL == gv_target) || (gv_target->gd_csa == cs_addrs))
|
|
|
|
/* Indicate incompleteness of (potentially subscripted) global name by adding a "*" (without closing ")") at the end */
|
|
#define GV_SET_LAST_SUBSCRIPT_INCOMPLETE(BUFF, END) \
|
|
{ \
|
|
if (NULL == (char *)(END)) \
|
|
{ /* The buffer passed to format_targ_key was not enough \
|
|
* for the transformation. We don't expect this. Handle \
|
|
* it nevertheless by adding ",*" at end. \
|
|
*/ \
|
|
assert(FALSE); \
|
|
END = ((unsigned char *)ARRAYTOP(BUFF)) - 1; \
|
|
assert((char *)(END) > (char *)(BUFF)); \
|
|
*(END)++ = '*'; \
|
|
} else \
|
|
{ /* Overflow occurred while adding the global name OR \
|
|
* after adding the last subscript OR in the middle of \
|
|
* adding a subscript (not necessarily last). In all \
|
|
* cases, add a '*' at end to indicate incompleteness. \
|
|
*/ \
|
|
if (')' == END[-1]) \
|
|
(END)--; \
|
|
/* ensure we have space to write 1 byte */ \
|
|
assert((char *)(END) + 1 <= ((char *)ARRAYTOP(BUFF))); \
|
|
*(END)++ = '*'; \
|
|
} \
|
|
}
|
|
|
|
#define ISSUE_GVSUBOFLOW_ERROR(GVKEY) \
|
|
{ \
|
|
unsigned char *endBuff, fmtBuff[MAX_ZWR_KEY_SZ]; \
|
|
\
|
|
/* Assert that input key to format_targ_key is double null terminated */ \
|
|
assert(KEY_DELIMITER == GVKEY->base[GVKEY->end]); \
|
|
endBuff = format_targ_key(fmtBuff, ARRAYSIZE(fmtBuff), GVKEY, TRUE); \
|
|
GV_SET_LAST_SUBSCRIPT_INCOMPLETE(fmtBuff, endBuff); /* Note: might update "endBuff" */ \
|
|
rts_error(VARLSTCNT(6) ERR_GVSUBOFLOW, 0, ERR_GVIS, 2, endBuff - fmtBuff, fmtBuff); \
|
|
}
|
|
|
|
#define COPY_SUBS_TO_GVCURRKEY(mvarg, max_key, gv_currkey, was_null, is_null) \
|
|
{ \
|
|
GBLREF mv_stent *mv_chain; \
|
|
GBLREF unsigned char *msp, *stackwarn, *stacktop; \
|
|
mval temp; \
|
|
unsigned char buff[MAX_ZWR_KEY_SZ], *end; \
|
|
int len; \
|
|
\
|
|
was_null |= is_null; \
|
|
if (mvarg->mvtype & MV_SUBLIT) \
|
|
{ \
|
|
is_null = ((STR_SUB_PREFIX == *(unsigned char *)mvarg->str.addr) \
|
|
&& (KEY_DELIMITER == *(mvarg->str.addr + 1))); \
|
|
if (gv_target->collseq || gv_target->nct) \
|
|
{ \
|
|
/* collation transformation should be done at the server's end for CM regions */ \
|
|
assert(dba_cm != gv_cur_region->dyn.addr->acc_meth); \
|
|
TREF(transform) = FALSE; \
|
|
end = gvsub2str((uchar_ptr_t)mvarg->str.addr, buff, FALSE); \
|
|
TREF(transform) = TRUE; \
|
|
temp.mvtype = MV_STR; \
|
|
temp.str.addr = (char *)buff; \
|
|
temp.str.len = (mstr_len_t)(end - buff); \
|
|
mval2subsc(&temp, gv_currkey); \
|
|
} else \
|
|
{ \
|
|
len = mvarg->str.len; \
|
|
if (gv_currkey->end + len - 1 >= max_key) \
|
|
ISSUE_GVSUBOFLOW_ERROR(gv_currkey); \
|
|
memcpy((gv_currkey->base + gv_currkey->end), mvarg->str.addr, len); \
|
|
if (is_null && 0 != gv_cur_region->std_null_coll) \
|
|
gv_currkey->base[gv_currkey->end] = SUBSCRIPT_STDCOL_NULL; \
|
|
gv_currkey->prev = gv_currkey->end; \
|
|
gv_currkey->end += len - 1; \
|
|
} \
|
|
} else \
|
|
{ \
|
|
MV_FORCE_DEFINED(mvarg); \
|
|
mval2subsc(mvarg, gv_currkey); \
|
|
if (gv_currkey->end >= max_key) \
|
|
ISSUE_GVSUBOFLOW_ERROR(gv_currkey); \
|
|
is_null = (MV_IS_STRING(mvarg) && (0 == mvarg->str.len)); \
|
|
} \
|
|
}
|
|
|
|
/* Copy GVKEY to GVT->CLUE. Take care NOT to copy cluekey->top to GVKEY->top as they correspond
|
|
* to the allocation sizes of two different memory locations and should stay untouched.
|
|
*/
|
|
#define COPY_CURRKEY_TO_GVTARGET_CLUE(GVT, GVKEY) \
|
|
{ \
|
|
gv_key *cluekey; \
|
|
\
|
|
if (GVT->clue.top <= GVKEY->end) \
|
|
GTMASSERT; \
|
|
assert(KEY_DELIMITER == GVKEY->base[GVKEY->end]); \
|
|
assert(KEY_DELIMITER == GVKEY->base[GVKEY->end - 1]); \
|
|
cluekey = &GVT->clue; \
|
|
memcpy(cluekey->base, GVKEY->base, GVKEY->end + 1); \
|
|
cluekey->end = GVKEY->end; \
|
|
cluekey->prev = GVKEY->prev; \
|
|
DBG_CHECK_GVTARGET_INTEGRITY(GVT); \
|
|
}
|
|
|
|
/* Macro to denote special value of first_rec when it is no longer reliable */
|
|
#define GVT_CLUE_FIRST_REC_UNRELIABLE (short)0xffff
|
|
|
|
/* Macro to denote special value of last_rec when it is the absolute maximum (in case of *-keys all the way down) */
|
|
#define GVT_CLUE_LAST_REC_MAXKEY (short)0xffff
|
|
|
|
/* Macro to reset first_rec to a special value to indicate it is no longer reliable
|
|
* (i.e. the keyrange [first_rec, clue] should not be used by gvcst_search.
|
|
* Note that [clue, last_rec] is still a valid keyrange and can be used by gvcst_search.
|
|
*/
|
|
#define GVT_CLUE_INVALIDATE_FIRST_REC(GVT) \
|
|
{ \
|
|
assert(GVT->clue.end); \
|
|
*((short *)GVT->first_rec->base) = GVT_CLUE_FIRST_REC_UNRELIABLE; \
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
/* Macro to check that the clue is valid. Basically check that first_rec <= clue <= last_rec. Also check that
|
|
* all of them start with the same global name in case of a GVT. A clue that does not satisfy these validity
|
|
* checks implies the possibility of DBKEYORD errors (e.g. C9905-001119 in VMS).
|
|
*/
|
|
#define DEBUG_GVT_CLUE_VALIDATE(GVT) \
|
|
{ \
|
|
mname_entry *gvent; \
|
|
unsigned short klen; \
|
|
gv_namehead *gvt; \
|
|
\
|
|
/* Verify that clue->first_rec <= clue.base <= clue->last_rec. \
|
|
* The only exception is if first_rec has been reset to an unreliable value. \
|
|
*/ \
|
|
gvt = GVT; /* copy into local variable to avoid evaluating input multiple times */ \
|
|
klen = MIN(gvt->clue.end, gvt->first_rec->end); \
|
|
assert(klen); \
|
|
assert((0 <= memcmp(gvt->clue.base, gvt->first_rec->base, klen)) \
|
|
|| (GVT_CLUE_FIRST_REC_UNRELIABLE == *((short *)gvt->first_rec->base))); \
|
|
klen = MIN(gvt->clue.end, gvt->last_rec->end); \
|
|
assert(klen); \
|
|
assert(0 <= memcmp(gvt->last_rec->base, gvt->clue.base, klen)); \
|
|
if (DIR_ROOT != gvt->root) \
|
|
{ /* Not a directory tree => a GVT tree, check that first_rec/last_rec have at least gvname in it */ \
|
|
gvent = &gvt->gvname; \
|
|
if (GVT_CLUE_FIRST_REC_UNRELIABLE != *((short *)gvt->first_rec->base)) \
|
|
{ \
|
|
assert((0 == memcmp(gvent->var_name.addr, gvt->first_rec->base, gvent->var_name.len)) \
|
|
&& (KEY_DELIMITER == gvt->first_rec->base[gvent->var_name.len])); \
|
|
} \
|
|
if (GVT_CLUE_LAST_REC_MAXKEY != *((short *)gvt->last_rec->base)) \
|
|
{ \
|
|
assert((0 == memcmp(gvent->var_name.addr, gvt->last_rec->base, gvent->var_name.len)) \
|
|
&& (KEY_DELIMITER == gvt->last_rec->base[gvent->var_name.len])); \
|
|
} \
|
|
} \
|
|
}
|
|
#else
|
|
#define DEBUG_GVT_CLUE_VALIDATE(GVT)
|
|
#endif
|
|
|
|
/* Macro used by $ZPREVIOUS to replace a NULL subscript at the end with the maximum possible subscript
|
|
* that could exist in the database for this global name.
|
|
*/
|
|
#define GVZPREVIOUS_APPEND_MAX_SUBS_KEY(GVKEY, GVT) \
|
|
{ \
|
|
int lastsubslen, keysize; \
|
|
unsigned char *ptr; \
|
|
\
|
|
assert(GVT->clue.top || (NULL == GVT->gd_csa)); \
|
|
assert(!GVT->clue.top || (NULL != GVT->gd_csa) && (GVT->gd_csa == cs_addrs)); \
|
|
/* keysize can be obtained from GVT->clue.top in case of GT.M. \
|
|
* But for GT.CM client, clue will be uninitialized. So we would need to \
|
|
* compute keysize from gv_cur_region->max_key_size. Since this is true for \
|
|
* GT.M as well, we use the same approach for both to avoid an if check and a \
|
|
* break in the pipeline. \
|
|
*/ \
|
|
keysize = DBKEYSIZE(gv_cur_region->max_key_size); \
|
|
assert(!GVT->clue.top || (keysize == GVT->clue.top)); \
|
|
lastsubslen = keysize - GVKEY->prev - 2; \
|
|
if ((0 < lastsubslen) && (GVKEY->top >= keysize) && (GVKEY->end > GVKEY->prev)) \
|
|
{ \
|
|
ptr = &GVKEY->base[GVKEY->prev]; \
|
|
memset(ptr, STR_SUB_MAXVAL, lastsubslen); \
|
|
ptr += lastsubslen; \
|
|
*ptr++ = KEY_DELIMITER; /* terminator for last subscript */ \
|
|
*ptr = KEY_DELIMITER; /* terminator for entire key */ \
|
|
GVKEY->end = GVKEY->prev + lastsubslen + 1; \
|
|
assert(GVKEY->end == (ptr - &GVKEY->base[0])); \
|
|
} else \
|
|
GTMASSERT; \
|
|
if (NULL != gv_target->gd_csa) \
|
|
DBG_CHECK_GVTARGET_INTEGRITY(GVT); \
|
|
}
|
|
|
|
/* Bit masks for the update_trans & si->update_trans variables */
|
|
#define UPDTRNS_DB_UPDATED_MASK (1 << 0) /* 1 if this region was updated by this non-TP/TP transaction */
|
|
#define UPDTRNS_JNL_LOGICAL_MASK (1 << 1) /* 1 if logical jnl record was written in this region's
|
|
* journal file by this TP transaction. Maintained only for TP.
|
|
*/
|
|
#define UPDTRNS_JNL_REPLICATED_MASK (1 << 2) /* 1 if there is at least one logical jnl record written in this
|
|
* region's journal file by this TP transaction that needs to be
|
|
* replicated across. 0 if all updates done to this region was
|
|
* inside of a trigger. Maintained only for TP.
|
|
*/
|
|
#define UPDTRNS_TCOMMIT_STARTED_MASK (1 << 3) /* 1 if non-TP or TP transaction is beyond the point of rolling
|
|
* back by "t_commit_cleanup" and can only be rolled forward by
|
|
* "secshr_db_clnup".
|
|
*/
|
|
#define UPDTRNS_ZTRIGGER_MASK (1 << 4) /* 1 if ZTRIGGER command was done in this transaction. This allows
|
|
* the transaction to be committed even if it had no updates.
|
|
* Maintained only for TP.
|
|
*/
|
|
#define UPDTRNS_VALID_MASK (UPDTRNS_DB_UPDATED_MASK | UPDTRNS_JNL_LOGICAL_MASK \
|
|
| UPDTRNS_JNL_REPLICATED_MASK | UPDTRNS_TCOMMIT_STARTED_MASK \
|
|
| UPDTRNS_ZTRIGGER_MASK)
|
|
|
|
/* The enum codes below correspond to code-paths that can increment the database curr_tn
|
|
* without having a logical update. Journaling currently needs to know all such code-paths */
|
|
typedef enum
|
|
{
|
|
inctn_invalid_op = 0, /* 0 : */
|
|
/* the following opcodes do NOT populate the global variable "inctn_detail" */
|
|
inctn_gvcstput_extra_blk_split, /* 1 : */
|
|
inctn_mu_reorg, /* 2 : */
|
|
inctn_wcs_recover, /* 3 : */
|
|
/* the following opcodes populate "inctn_detail.blks2upgrd_struct" */
|
|
inctn_gdsfilext_gtm, /* 4 : */
|
|
inctn_gdsfilext_mu_reorg, /* 5 : */
|
|
inctn_db_format_change, /* 6 : written when cs_data->desired_db_format changes */
|
|
/* the following opcodes populate "inctn_detail.blknum_struct" */
|
|
inctn_bmp_mark_free_gtm, /* 7 : */
|
|
inctn_bmp_mark_free_mu_reorg, /* 8 : */
|
|
inctn_blkmarkfree, /* 9 : a RECYCLED block being marked free by MUPIP REORG UPGRADE/DOWNGRADE */
|
|
inctn_blkupgrd, /* 10 : written whenever a GDS block is upgraded by MUPIP REORG UPGRADE if
|
|
* a) SAFEJNL is specified OR
|
|
* b) NOSAFEJNL is specified and the block is not undergoing a fmt change
|
|
*/
|
|
inctn_blkupgrd_fmtchng, /* 11 : written whenever a GDS block is upgraded by MUPIP REORG UPGRADE -NOSAFEJNL
|
|
* and if that block is undergoing a fmt change i.e. (V4 -> V5) OR (V5 -> V4).
|
|
* This differentiation (inctn_blkupgrd vs inctn_blkupgrd_fmtch) is necessary
|
|
* because in the latter case we will not be writing a PBLK record and hence
|
|
* have no record otherwise of a block fmt change if it occurs (note that a
|
|
* PBLK journal record's "ondsk_blkver" field normally helps recovery
|
|
* determine if a fmt change occurred or not).
|
|
*/
|
|
inctn_blkdwngrd, /* 12 : similar to inctn_blkupgrd except that this is for DOWNGRADE */
|
|
inctn_blkdwngrd_fmtchng, /* 13 : similar to inctn_blkupgrd_fmtchng except that this is for DOWNGRADE */
|
|
/* the following opcodes do NOT populate the global variable "inctn_detail" */
|
|
inctn_opcode_total /* 15 : MAX. All additions of inctn opcodes should be done BEFORE this line */
|
|
} inctn_opcode_t;
|
|
|
|
/* macros to check curr_tn */
|
|
#define MAX_TN_V4 ((trans_num)(MAXUINT4 - TN_HEADROOM_V4))
|
|
#define MAX_TN_V5 (MAXUINT8 - TN_HEADROOM_V5)
|
|
#define TN_HEADROOM_V4 (2 * MAXTOTALBLKS_V4)
|
|
#define TN_HEADROOM_V5 (2 * MAXTOTALBLKS_V5)
|
|
#define HEADROOM_FACTOR 4
|
|
|
|
/* the following macro checks that curr_tn < max_tn_warn <= max_tn.
|
|
* if not, it adjusts max_tn_warn accordingly to ensure the above.
|
|
* if not possible, it issues TNTOOLARGE error.
|
|
*/
|
|
#define CHECK_TN(CSA, CSD, TN) \
|
|
{ \
|
|
assert((CSA)->hdr == (CSD)); \
|
|
assert((TN) <= (CSD)->max_tn_warn); \
|
|
assert((CSD)->max_tn_warn <= (CSD)->max_tn); \
|
|
assert((CSA)->now_crit); /* Must be crit to mess with stuff */ \
|
|
if ((TN) >= (CSD)->max_tn_warn) \
|
|
{ \
|
|
trans_num trans_left; \
|
|
\
|
|
if ((CSA)->hdr->max_tn <= (TN)) \
|
|
{ \
|
|
rts_error(VARLSTCNT(5) ERR_TNTOOLARGE, 3, DB_LEN_STR((CSA)->region), &(CSA)->hdr->max_tn); \
|
|
assert(FALSE); /* should not come here */ \
|
|
} \
|
|
assert((CSD)->max_tn > (TN)); \
|
|
trans_left = (CSD)->max_tn - (TN); \
|
|
send_msg(VARLSTCNT(6) ERR_TNWARN, 4, DB_LEN_STR((CSA)->region), &trans_left, &(CSD)->max_tn); \
|
|
(CSD)->max_tn_warn = (TN) + 1 + ((trans_left - 1) >> 1); \
|
|
assert((TN) < (CSD)->max_tn_warn); \
|
|
assert((CSD)->max_tn_warn <= (CSD)->max_tn); \
|
|
} \
|
|
}
|
|
|
|
#define INCREMENT_CURR_TN(CSD) \
|
|
{ \
|
|
assert((CSD)->trans_hist.curr_tn < (CSD)->max_tn_warn); \
|
|
assert((CSD)->max_tn_warn <= (CSD)->max_tn); \
|
|
(CSD)->trans_hist.curr_tn++; \
|
|
assert((CSD)->trans_hist.curr_tn == (CSD)->trans_hist.early_tn); \
|
|
}
|
|
|
|
#define SET_TN_WARN(CSD, ret_warn_tn) \
|
|
{ \
|
|
trans_num headroom; \
|
|
\
|
|
headroom = (gtm_uint64_t)(GDSV4 == (CSD)->desired_db_format ? TN_HEADROOM_V4 : TN_HEADROOM_V5); \
|
|
headroom *= HEADROOM_FACTOR; \
|
|
(ret_warn_tn) = (CSD)->trans_hist.curr_tn; \
|
|
if ((headroom < (CSD)->max_tn) && ((ret_warn_tn) < ((CSD)->max_tn - headroom))) \
|
|
(ret_warn_tn) = (CSD)->max_tn - headroom; \
|
|
assert((CSD)->trans_hist.curr_tn <= (ret_warn_tn)); \
|
|
assert((ret_warn_tn) <= (CSD)->max_tn); \
|
|
}
|
|
|
|
#define HIST_TERMINATOR 0
|
|
#define HIST_SIZE(h) ( (SIZEOF(int4) * 2) + (SIZEOF(srch_blk_status) * ((h).depth + 1)) )
|
|
|
|
/* Start of lock space in a bg file, therefore also doubles as overhead size for header, bt and wc queues F = # of wc blocks */
|
|
#define LOCK_BLOCK(X) (DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(X) + BT_SIZE(X), DISK_BLOCK_SIZE))
|
|
#define LOCK_BLOCK_SIZE(X) (DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(X) + BT_SIZE(X), OS_PAGE_SIZE))
|
|
#define LOCK_SPACE_SIZE(X) (ROUND_UP2(((sgmnt_data_ptr_t)X)->lock_space_size, OS_PAGE_SIZE))
|
|
/* In case of an encrypted database, we maintain both encrypted and decrypted versions of the block in shared memory
|
|
* in parallel arrays of global buffers hence the doubling calculation below. Although this doubles the shared memory
|
|
* size requirements for encrypted databases (when compared to the same unencrypted database), it helps in other ways.
|
|
* By ensuring that this encrypted global buffer array contents are identical to the encrypted on-disk block contents
|
|
* of database blocks at all times, we can avoid allocating process private memory to store encrypted before-images
|
|
* (to write to a journal file). Instead processes can use the encrypted global buffer directly for this purpose.
|
|
* In user environments where process-private memory is very costly compared to database shared memory (e.g. where
|
|
* 1000s of GT.M processes run against the same database) the above approach is expected to use lesser total memory.
|
|
*/
|
|
#define CACHE_CONTROL_SIZE(X) \
|
|
(ROUND_UP((ROUND_UP((X->bt_buckets + X->n_bts) * SIZEOF(cache_rec) + SIZEOF(cache_que_heads), OS_PAGE_SIZE) \
|
|
+ ((gtm_uint64_t)X->n_bts * X->blk_size * (X->is_encrypted ? 2 : 1))), OS_PAGE_SIZE))
|
|
#define MMBLK_CONTROL_SIZE(X) (ROUND_UP((((sgmnt_data_ptr_t)X)->bt_buckets + ((sgmnt_data_ptr_t)X)->n_bts) * SIZEOF(mmblk_rec) \
|
|
+ SIZEOF(mmblk_que_heads), OS_PAGE_SIZE))
|
|
|
|
OS_PAGE_SIZE_DECLARE
|
|
|
|
#ifdef VMS
|
|
#define MAX_NAME_LEN 31 /* Size of a repl resource name on vvms */
|
|
#endif
|
|
/* structure to identify a given system wide shared section to be ours (replic section) */
|
|
typedef struct
|
|
{
|
|
unsigned char label[GDS_LABEL_SZ];
|
|
char pool_type;
|
|
char now_running[MAX_REL_NAME];
|
|
#ifdef VMS
|
|
char repl_pool_key[MAX_NAME_LEN + 1]; /* resource name for the section */
|
|
char filler[7]; /* makes sure the size of the structure is a multiple of 8 */
|
|
char gtmgbldir[MAX_FN_LEN + 1]; /* Identify which instance of this shared pool corresponds to */
|
|
#else
|
|
int4 repl_pool_key_filler; /* makes sure the size of the structure is a multiple of 8 */
|
|
char instfilename[MAX_FN_LEN + 1]; /* Identify which instance file this shared pool corresponds to */
|
|
#endif
|
|
} replpool_identifier;
|
|
|
|
#if defined(__osf__) && defined(__alpha)
|
|
# pragma pointer_size(save)
|
|
# pragma pointer_size(long)
|
|
#endif
|
|
|
|
typedef replpool_identifier *replpool_id_ptr_t;
|
|
|
|
#if defined(__osf__) && defined(__alpha)
|
|
# pragma pointer_size(restore)
|
|
#endif
|
|
|
|
/* Macro to increment the count of processes that are doing two phase commit.
|
|
* This is invoked just BEFORE starting phase1 of the commit.
|
|
*/
|
|
#define INCR_WCS_PHASE2_COMMIT_PIDCNT(csa, cnl) \
|
|
{ \
|
|
assert(!csa->wcs_pidcnt_incremented); \
|
|
INCR_CNT(&cnl->wcs_phase2_commit_pidcnt, &cnl->wc_var_lock); \
|
|
csa->wcs_pidcnt_incremented = TRUE; \
|
|
}
|
|
|
|
/* Macro to decrement the count of processes that are doing two phase commit.
|
|
* This is invoked just AFTER finishing phase2 of the commit.
|
|
*/
|
|
#define DECR_WCS_PHASE2_COMMIT_PIDCNT(csa, cnl) \
|
|
{ \
|
|
assert(csa->wcs_pidcnt_incremented); \
|
|
csa->wcs_pidcnt_incremented = FALSE; \
|
|
DECR_CNT(&cnl->wcs_phase2_commit_pidcnt, &cnl->wc_var_lock); \
|
|
}
|
|
|
|
/* The CAREFUL_DECR_WCS_PHASE2_COMMIT_PIDCNT macro is the same as the DECR_WCS_PHASE2_COMMIT_PIDCNT macro
|
|
* except that it uses CAREFUL_DECR_CNT instead of DECR_CNT. This does alignment checks and is needed by
|
|
* secshr_db_clnup as it runs in kernel mode in VMS. The two macros should be maintained in parallel.
|
|
*/
|
|
#define CAREFUL_DECR_WCS_PHASE2_COMMIT_PIDCNT(csa, cnl) \
|
|
{ \
|
|
assert(csa->wcs_pidcnt_incremented); \
|
|
csa->wcs_pidcnt_incremented = FALSE; \
|
|
CAREFUL_DECR_CNT(&cnl->wcs_phase2_commit_pidcnt, &cnl->wc_var_lock); \
|
|
}
|
|
|
|
#ifdef UNIX
|
|
/* Insert the process_id into the list of process ids actively doing a kill */
|
|
#define INSERT_KIP_PID(local_csa) \
|
|
{ \
|
|
int idx; \
|
|
uint4 pid; \
|
|
uint4 *kip_pid_arr_ptr; \
|
|
GBLREF uint4 process_id; \
|
|
\
|
|
kip_pid_arr_ptr = local_csa->nl->kip_pid_array; \
|
|
assert(local_csa->now_crit); \
|
|
for (idx = 0; idx < MAX_KIP_PID_SLOTS; idx++) \
|
|
{ \
|
|
pid = kip_pid_arr_ptr[idx]; \
|
|
if ((0 == pid) || (process_id == pid)) \
|
|
{ \
|
|
kip_pid_arr_ptr[idx] = process_id; \
|
|
break; \
|
|
} \
|
|
} \
|
|
}
|
|
/* Remove the process_id from the list of process ids actively doing a kill */
|
|
#define REMOVE_KIP_PID(local_csa) \
|
|
{ \
|
|
int idx; \
|
|
uint4 *kip_pid_arr_ptr; \
|
|
GBLREF uint4 process_id; \
|
|
\
|
|
kip_pid_arr_ptr = local_csa->nl->kip_pid_array; \
|
|
for (idx = 0; idx < MAX_KIP_PID_SLOTS; idx++) \
|
|
{ \
|
|
if (process_id == kip_pid_arr_ptr[idx]) \
|
|
{ \
|
|
kip_pid_arr_ptr[idx] = 0; \
|
|
break; \
|
|
} \
|
|
} \
|
|
}
|
|
#else
|
|
#define INSERT_KIP_PID(local_csa)
|
|
#define REMOVE_KIP_PID(local_csa)
|
|
#endif
|
|
|
|
#define DECR_KIP(CSD, CSA, KIP_CSA) \
|
|
{ \
|
|
sgmnt_data_ptr_t local_csd; \
|
|
sgmnt_addrs *local_csa; \
|
|
\
|
|
/* Instead of using CSA and CSD directly in DECR_CNT, assign it to \
|
|
* local variables as the caller can potentially pass the global \
|
|
* kip_csa as the second argument(which also happens to be the \
|
|
* the third argument which will be reset to NULL below) thereby \
|
|
* leading to SEG faults in the calls to DECR_CNT. Similar \
|
|
* modifications are in INCR_KIP and their CAREFUL counterparts */ \
|
|
local_csd = CSD; \
|
|
local_csa = CSA; \
|
|
assert(NULL != KIP_CSA); \
|
|
KIP_CSA = NULL; \
|
|
DECR_CNT(&local_csd->kill_in_prog, &local_csa->nl->wc_var_lock); \
|
|
REMOVE_KIP_PID(local_csa); \
|
|
}
|
|
/* Note that the INCR_KIP and CAREFUL_INCR_KIP macros should be maintained in parallel */
|
|
#define INCR_KIP(CSD, CSA, KIP_CSA) \
|
|
{ \
|
|
sgmnt_data_ptr_t local_csd; \
|
|
sgmnt_addrs *local_csa; \
|
|
\
|
|
local_csd = CSD; \
|
|
local_csa = CSA; \
|
|
assert(NULL == KIP_CSA); \
|
|
INCR_CNT(&local_csd->kill_in_prog, &local_csa->nl->wc_var_lock); \
|
|
INSERT_KIP_PID(local_csa); \
|
|
KIP_CSA = CSA; \
|
|
}
|
|
/* The CAREFUL_INCR_KIP macro is the same as the INCR_KIP macro except that it uses CAREFUL_INCR_CNT instead of INCR_CNT.
|
|
* This does alignment checks and is needed by secshr_db_clnup as it runs in kernel mode in VMS.
|
|
* The INCR_KIP and CAREFUL_INCR_KIP macros should be maintained in parallel.
|
|
*/
|
|
#define CAREFUL_INCR_KIP(CSD, CSA, KIP_CSA) \
|
|
{ \
|
|
sgmnt_data_ptr_t local_csd; \
|
|
sgmnt_addrs *local_csa; \
|
|
\
|
|
local_csd = CSD; \
|
|
local_csa = CSA; \
|
|
assert(NULL == KIP_CSA); \
|
|
CAREFUL_INCR_CNT(&local_csd->kill_in_prog, &local_csa->nl->wc_var_lock); \
|
|
INSERT_KIP_PID(local_csa); \
|
|
KIP_CSA = CSA; \
|
|
}
|
|
#define CAREFUL_DECR_KIP(CSD, CSA, KIP_CSA) \
|
|
{ \
|
|
sgmnt_data_ptr_t local_csd; \
|
|
sgmnt_addrs *local_csa; \
|
|
\
|
|
local_csd = CSD; \
|
|
local_csa = CSA; \
|
|
assert(NULL != KIP_CSA); \
|
|
KIP_CSA = NULL; \
|
|
CAREFUL_DECR_CNT(&local_csd->kill_in_prog, &local_csa->nl->wc_var_lock); \
|
|
REMOVE_KIP_PID(local_csa); \
|
|
}
|
|
/* Since abandoned_kills counter is only incremented in secshr_db_clnup it does not have its equivalent DECR_ABANDONED_KILLS */
|
|
#define CAREFUL_INCR_ABANDONED_KILLS(CSD, CSA) \
|
|
{ \
|
|
CAREFUL_INCR_CNT(&CSD->abandoned_kills, &CSA->nl->wc_var_lock); \
|
|
}
|
|
|
|
#define INCR_INHIBIT_KILLS(CNL) \
|
|
{ \
|
|
INCR_CNT(&CNL->inhibit_kills, &CNL->wc_var_lock); \
|
|
}
|
|
|
|
#define DECR_INHIBIT_KILLS(CNL) \
|
|
{ \
|
|
if (0 < CNL->inhibit_kills) \
|
|
DECR_CNT(&CNL->inhibit_kills, &CNL->wc_var_lock); \
|
|
}
|
|
|
|
/* Commands like MUPIP BACKUP, MUPIP INTEG -REG or MUPIP FREEZE wait for kills-in-prog flag to become zero.
|
|
* While these process wait for ongoing block-freeing KILLs (or reorg actions that free up blocks) to complete,
|
|
* new block-freeing KILLs (or reorg actions that free up blocks) are deferred using inhibit_kills counter.
|
|
* New block-freeing KILLs/REORG will wait for a maximum period of 1 minute until inhibit_kills counter is 0.
|
|
* In case of timeout, they will proceed after resetting the inhibit_kills to 0. The reset is done in case
|
|
* the inhibit_kills was orphaned (i.e. the process that set it got killed before it got a chance to reset).
|
|
*/
|
|
#define WAIT_ON_INHIBIT_KILLS(CNL, MAXKILLINHIBITWAIT) \
|
|
{ \
|
|
int4 sleep_counter; \
|
|
\
|
|
GBLREF boolean_t need_kip_incr; \
|
|
GBLREF uint4 dollar_tlevel; \
|
|
\
|
|
assert(dollar_tlevel || need_kip_incr); \
|
|
for (sleep_counter = 1; (0 < CNL->inhibit_kills); ++sleep_counter) \
|
|
{ \
|
|
if (MAXKILLINHIBITWAIT <= sleep_counter) \
|
|
{ \
|
|
CNL->inhibit_kills = 0; \
|
|
SHM_WRITE_MEMORY_BARRIER; \
|
|
break; \
|
|
} \
|
|
wcs_sleep(sleep_counter); \
|
|
} \
|
|
}
|
|
|
|
/* Wait for a region freeze to be turned off. Note that we dont hold CRIT at this point. Ideally we would have
|
|
* READ memory barriers between each iterations of sleep to try and get the latest value of the "freeze" field from
|
|
* the concurrently updated database shared memory. But since region-freeze is a perceivably rare event, we choose
|
|
* not to do the memory barriers. The consequence of this decision is that it might take more iterations for us to
|
|
* see updates to the "freeze" field than it would have if we did the memory barrier each iteration. But since we
|
|
* dont hold crit at this point AND since freeze is a rare event, we dont mind the extra wait.
|
|
*/
|
|
#define MAXHARDCRITS 31
|
|
|
|
#define WAIT_FOR_REGION_TO_UNFREEZE(CSA, CSD) \
|
|
{ \
|
|
int lcnt1; \
|
|
\
|
|
assert(CSA->hdr == CSD); \
|
|
assert(!CSA->now_crit); \
|
|
for (lcnt1 = 1; ; lcnt1++) \
|
|
{ \
|
|
if (!CSD->freeze) \
|
|
break; \
|
|
if (MAXHARDCRITS < lcnt1) \
|
|
wcs_backoff(lcnt1); \
|
|
} \
|
|
}
|
|
|
|
#define GRAB_UNFROZEN_CRIT(reg, csa, csd) \
|
|
{ \
|
|
int lcnt; \
|
|
\
|
|
assert(&FILE_INFO(reg)->s_addrs == csa && csa->hdr == csd); \
|
|
assert(csa->now_crit); \
|
|
for (lcnt = 0; ; lcnt++) \
|
|
{ \
|
|
if (!csd->freeze) \
|
|
break; \
|
|
rel_crit(reg); \
|
|
WAIT_FOR_REGION_TO_UNFREEZE(csa, csd); \
|
|
grab_crit(reg); \
|
|
} \
|
|
assert(!csd->freeze && csa->now_crit); \
|
|
}
|
|
/* remove "csa" from list of open regions (cs_addrs_list) */
|
|
#define REMOVE_CSA_FROM_CSADDRSLIST(CSA) \
|
|
{ \
|
|
GBLREF sgmnt_addrs *cs_addrs_list; \
|
|
\
|
|
sgmnt_addrs *tmpcsa, *prevcsa; \
|
|
\
|
|
assert(NULL != CSA); \
|
|
assert(NULL == CSA->nl); \
|
|
prevcsa = NULL; \
|
|
for (tmpcsa = cs_addrs_list; NULL != tmpcsa; tmpcsa = tmpcsa->next_csa) \
|
|
{ \
|
|
if (CSA == tmpcsa) \
|
|
break; \
|
|
prevcsa = tmpcsa; \
|
|
} \
|
|
/* tmpcsa could not be equal to CSA in case CSA was never added to this list \
|
|
* (possible in case of errors during gvcst_init). In dbg, the only case we \
|
|
* know of this is if an external signal causes exit processing before db_init \
|
|
* completes. Assert accordingly. \
|
|
*/ \
|
|
assert((tmpcsa == CSA) || process_exiting); \
|
|
if (tmpcsa == CSA) \
|
|
{ \
|
|
if (NULL != prevcsa) \
|
|
prevcsa->next_csa = CSA->next_csa; \
|
|
else \
|
|
cs_addrs_list = CSA->next_csa; \
|
|
} \
|
|
}
|
|
|
|
#define INVALID_SEMID -1
|
|
#define INVALID_SHMID -1L
|
|
|
|
#if defined(UNIX)
|
|
#define DB_FSYNC(reg, udi, csa, db_fsync_in_prog, save_errno) \
|
|
{ \
|
|
BG_TRACE_PRO_ANY(csa, n_db_fsyncs); \
|
|
if (csa->now_crit) \
|
|
BG_TRACE_PRO_ANY(csa, n_db_fsyncs_in_crit); \
|
|
db_fsync_in_prog++; \
|
|
save_errno = 0; \
|
|
if (-1 == fsync(udi->fd)) \
|
|
{ \
|
|
db_fsync_in_prog--; \
|
|
save_errno = errno; \
|
|
} \
|
|
db_fsync_in_prog--; \
|
|
assert(0 <= db_fsync_in_prog); \
|
|
}
|
|
|
|
#define STANDALONE(x) mu_rndwn_file(x, TRUE)
|
|
#define DBFILOP_FAIL_MSG(status, msg) gtm_putmsg(VARLSTCNT(5) msg, 2, DB_LEN_STR(gv_cur_region), status);
|
|
#elif defined(VMS)
|
|
#define STANDALONE(x) mu_rndwn_file(TRUE) /* gv_cur_region needs to be equal to "x" */
|
|
#define DBFILOP_FAIL_MSG(status, msg) gtm_putmsg(VARLSTCNT(6) msg, 2, DB_LEN_STR(gv_cur_region), status, \
|
|
FILE_INFO(gv_cur_region)->fab->fab$l_stv);
|
|
#else
|
|
#error unsupported platform
|
|
#endif
|
|
|
|
#define CR_NOT_ALIGNED(cr, cr_base) (!IS_PTR_ALIGNED((cr), (cr_base), SIZEOF(cache_rec)))
|
|
#define CR_NOT_IN_RANGE(cr, cr_lo, cr_hi) (!IS_PTR_IN_RANGE((cr), (cr_lo), (cr_hi)))
|
|
#define MBR_NOT_ALIGNED(mbr, mbr_base) (!IS_PTR_ALIGNED((mbr), (mbr_base), SIZEOF(mmblk_rec)))
|
|
#define MBR_NOT_IN_RANGE(mbr, mbr_lo, mbr_hi) (!IS_PTR_IN_RANGE((mbr), (mbr_lo), (mbr_hi)))
|
|
|
|
/* Examine that cr->buffaddr is indeed what it should be. If not, this macro fixes its value by
|
|
* recomputing from the cache_array.
|
|
* NOTE: We rely on bt_buckets, n_bts and blk_size fields of file header being correct/not corrupt */
|
|
#define CR_BUFFER_CHECK(reg, csa, csd, cr) \
|
|
{ \
|
|
cache_rec_ptr_t cr_lo, cr_hi; \
|
|
\
|
|
cr_lo = (cache_rec_ptr_t)csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets; \
|
|
cr_hi = cr_lo + csd->n_bts; \
|
|
CR_BUFFER_CHECK1(reg, csa, csd, cr, cr_lo, cr_hi); \
|
|
}
|
|
|
|
/* A more efficient macro than CR_BUFFER_CHECK when we have cr_lo and cr_hi already available */
|
|
#define CR_BUFFER_CHECK1(reg, csa, csd, cr, cr_lo, cr_hi) \
|
|
{ \
|
|
INTPTR_T bp, bp_lo, bp_top, cr_top; \
|
|
\
|
|
cr_top = GDS_ANY_ABS2REL(csa, cr_hi); \
|
|
bp_lo = ROUND_UP(cr_top, OS_PAGE_SIZE); \
|
|
bp = bp_lo + ((cr) - (cr_lo)) * csd->blk_size; \
|
|
if (bp != cr->buffaddr) \
|
|
{ \
|
|
send_msg(VARLSTCNT(13) ERR_DBCRERR, 11, DB_LEN_STR(reg), cr, cr->blk, \
|
|
RTS_ERROR_TEXT("cr->buffaddr"), cr->buffaddr, bp, CALLFROM); \
|
|
cr->buffaddr = bp; \
|
|
} \
|
|
DEBUG_ONLY(bp_top = bp_lo + (gtm_uint64_t)csd->n_bts * csd->blk_size;) \
|
|
assert(IS_PTR_IN_RANGE(bp, bp_lo, bp_top) && IS_PTR_ALIGNED(bp, bp_lo, csd->blk_size)); \
|
|
}
|
|
|
|
#define DB_INFO UNIX_ONLY(unix_db_info)VMS_ONLY(vms_gds_info)
|
|
|
|
#define FILE_CNTL_INIT_IF_NULL(SEG) \
|
|
{ \
|
|
file_control *lcl_fc; \
|
|
\
|
|
lcl_fc = SEG->file_cntl; \
|
|
if (NULL == lcl_fc) \
|
|
{ \
|
|
MALLOC_INIT(lcl_fc, SIZEOF(file_control)); \
|
|
SEG->file_cntl = lcl_fc; \
|
|
} \
|
|
if (NULL == lcl_fc->file_info) \
|
|
{ \
|
|
MALLOC_INIT(lcl_fc->file_info, SIZEOF(DB_INFO)); \
|
|
SEG->file_cntl->file_info = lcl_fc->file_info; \
|
|
} \
|
|
}
|
|
#define FILE_CNTL_INIT(SEG) \
|
|
{ \
|
|
file_control *lcl_fc; \
|
|
\
|
|
MALLOC_INIT(lcl_fc, SIZEOF(file_control)); \
|
|
MALLOC_INIT(lcl_fc->file_info, SIZEOF(DB_INFO)); \
|
|
SEG->file_cntl = lcl_fc; \
|
|
}
|
|
|
|
#define IS_DOLLAR_INCREMENT ((is_dollar_incr) && (ERR_GVPUTFAIL == t_err))
|
|
|
|
#define AVG_BLKS_PER_100_GBL 200
|
|
#define PRE_READ_TRIGGER_FACTOR 50
|
|
#define UPD_RESERVED_AREA 50
|
|
#define UPD_WRITER_TRIGGER_FACTOR 33
|
|
|
|
#define SNAPSHOT_ALONE_IN_PROG(CSA) (CSA->snapshot_in_prog && !CSA->backup_in_prog && !(JNL_ENABLED(CSA) && CSA->jnl_before_image))
|
|
|
|
#ifdef GTM_SNAPSHOT
|
|
# define SNAPSHOTS_IN_PROG(X) ((X)->snapshot_in_prog)
|
|
/* Creates a new snapshot context. Called by GT.M (or utilities like update process, MUPIP LOAD which uses
|
|
* GT.M runtime. As a side effect sets csa->snapshot_in_prog to TRUE if the context creation went fine.
|
|
*/
|
|
# define SS_INIT_IF_NEEDED(CSA, CNL) \
|
|
{ \
|
|
int ss_shmcycle; \
|
|
boolean_t status; \
|
|
snapshot_context_ptr_t lcl_ss_ctx; \
|
|
\
|
|
lcl_ss_ctx = SS_CTX_CAST(CSA->ss_ctx); \
|
|
assert(NULL != lcl_ss_ctx); \
|
|
ss_shmcycle = CNL->ss_shmcycle; \
|
|
CSA->snapshot_in_prog = TRUE; \
|
|
assert(lcl_ss_ctx->ss_shmcycle <= ss_shmcycle); \
|
|
if (lcl_ss_ctx->ss_shmcycle != ss_shmcycle) \
|
|
{ /* Process' view of snapshot is stale. Create/Update snapshot context */ \
|
|
status = ss_create_context(lcl_ss_ctx, ss_shmcycle); \
|
|
if (!status) \
|
|
{ /* snapshot context creation failed. Reset private copy of snapshot_in_prog so that we don't \
|
|
* read the before images in t_end or op_tcommit */ \
|
|
CSA->snapshot_in_prog = FALSE; \
|
|
} \
|
|
assert(!status || (SNAPSHOT_INIT_DONE == lcl_ss_ctx->cur_state)); \
|
|
assert(status || (SHADOW_FIL_OPEN_FAIL == lcl_ss_ctx->cur_state) \
|
|
|| (SNAPSHOT_SHM_ATTACH_FAIL == lcl_ss_ctx->cur_state) \
|
|
|| (SNAPSHOT_NOT_INITED == lcl_ss_ctx->cur_state)); \
|
|
} else if ((SHADOW_FIL_OPEN_FAIL == lcl_ss_ctx->cur_state) \
|
|
|| (SNAPSHOT_SHM_ATTACH_FAIL == lcl_ss_ctx->cur_state)) \
|
|
{ /* Previous attempt at snapshot context creation failed (say, snapshot file open failed) and the error \
|
|
* has been reported in the shared memory. However, the snapshot is not yet complete. So, set \
|
|
* snapshot_in_prog to FALSE since the ongoing snapshot is not valid (as indicated by us in the prior \
|
|
* transaction/retry inside crit) \
|
|
* Note that we will be doing this 'if' check unconditionally until MUPIP INTEG detects the error in \
|
|
* shared memory which can be avoided by making GT.M itself set CNL->snapshot_in_prog to FALSE when it \
|
|
* detects inside crit that snapshot initialization failed for this process and hence the ongoing \
|
|
* snapshot is no longer valid. This way we don't wait for MUPIP INTEG to detect and terminate the \
|
|
* snapshots \
|
|
*/ \
|
|
CSA->snapshot_in_prog = FALSE; \
|
|
} \
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
# define DBG_ENSURE_SNAPSHOT_GOOD_TO_GO(LCL_SS_CTX, CNL) \
|
|
{ \
|
|
shm_snapshot_ptr_t ss_shm_ptr; \
|
|
\
|
|
assert(SNAPSHOTS_IN_PROG(CNL)); \
|
|
assert(NULL != LCL_SS_CTX); \
|
|
ss_shm_ptr = LCL_SS_CTX->ss_shm_ptr; \
|
|
assert(NULL != ss_shm_ptr); \
|
|
assert(SNAPSHOT_INIT_DONE == LCL_SS_CTX->cur_state); \
|
|
assert(0 == LCL_SS_CTX->failure_errno); \
|
|
assert((-1 != CNL->ss_shmid) && \
|
|
(LCL_SS_CTX->attach_shmid == CNL->ss_shmid)); \
|
|
assert(NULL != LCL_SS_CTX->start_shmaddr); \
|
|
assert(0 == strcmp(LCL_SS_CTX->shadow_file, ss_shm_ptr->ss_info.shadow_file)); \
|
|
assert(-1 != LCL_SS_CTX->shdw_fd); \
|
|
}
|
|
#else
|
|
# define DBG_ENSURE_SNAPSHOT_GOOD_TO_GO(LCL_SS_CTX, CNL)
|
|
#endif
|
|
/* Destroy an existing snapshot. Called by GT.M (or utilities like update process, MUPIP LOAD which uses
|
|
* GT.M runtime. Assumes that csa->snapshot_in_prog is TRUE and as a side effect sets csa->snapshot_in_prog
|
|
* to FALSE if the context is destroyed
|
|
*/
|
|
# define SS_RELEASE_IF_NEEDED(CSA, CNL) \
|
|
{ \
|
|
int ss_shmcycle; \
|
|
snapshot_context_ptr_t lcl_ss_ctx; \
|
|
\
|
|
lcl_ss_ctx = SS_CTX_CAST(CSA->ss_ctx); \
|
|
assert(SNAPSHOTS_IN_PROG(CSA) && (NULL != lcl_ss_ctx)); \
|
|
ss_shmcycle = CNL->ss_shmcycle; \
|
|
if (!SNAPSHOTS_IN_PROG(cnl) || (lcl_ss_ctx->ss_shmcycle != ss_shmcycle)) \
|
|
{ \
|
|
ss_destroy_context(lcl_ss_ctx); \
|
|
CSA->snapshot_in_prog = FALSE; \
|
|
} \
|
|
}
|
|
|
|
/* No need to write before-image in case the block is FREE. In case the database had never been fully upgraded from V4 to V5 format
|
|
* (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered FREE (i.e. no need to write before-images since
|
|
* backward journal recovery will never be expected to take the database to a point BEFORE the mupip upgrade).
|
|
* Logic to check if before image of a given block has to be read or not are slightly complicated if snapshots are present
|
|
* For snapshots, we might want to read the before images of FREE blocks. Also, if the block that we are reading
|
|
* is already before imaged by some other GT.M process then we do not needed to read the before image of such a block. But, such
|
|
* a condition is applicable ONLY if snapshots alone are in progress as we might want the same block for BACKUP if it is in
|
|
* progress.
|
|
* Note: The below condition, to before image FREE blocks, is needed only if INTEG is the snapshot initiator. When we add
|
|
* bitmasks or some alternate mechanism to optionalize the before image'ing of FREE blocks, this condition must be tweaked
|
|
* accordingly. For now, INTEG is the only snapshot initiator.
|
|
*/
|
|
# define BEFORE_IMAGE_NEEDED(read_before_image, CS, csa, csd, blk_no, retval) \
|
|
{ \
|
|
retval = (read_before_image && csd->db_got_to_v5_once && (!CS->was_free || SNAPSHOTS_IN_PROG(csa))); \
|
|
retval = retval && (!SNAPSHOT_ALONE_IN_PROG(csa) || !ss_chk_shdw_bitmap(csa, SS_CTX_CAST(csa->ss_ctx), blk_no));\
|
|
}
|
|
|
|
# define CHK_AND_UPDATE_SNAPSHOT_STATE_IF_NEEDED(CSA, CNL, SS_NEED_TO_RESTART) \
|
|
{ \
|
|
GBLREF uint4 process_id; \
|
|
\
|
|
uint4 lcl_failure_errno; \
|
|
ss_proc_status cur_state; \
|
|
shm_snapshot_ptr_t ss_shm_ptr; \
|
|
snapshot_context_ptr_t lcl_ss_ctx; \
|
|
boolean_t csa_snapshot_in_prog, cnl_snapshot_in_prog; \
|
|
\
|
|
assert(CSA->now_crit); \
|
|
csa_snapshot_in_prog = SNAPSHOTS_IN_PROG(CSA); \
|
|
cnl_snapshot_in_prog = SNAPSHOTS_IN_PROG(CNL); \
|
|
if (csa_snapshot_in_prog || cnl_snapshot_in_prog) \
|
|
{ \
|
|
lcl_ss_ctx = SS_CTX_CAST(CSA->ss_ctx); \
|
|
ss_shm_ptr = (shm_snapshot_ptr_t)(SS_GETSTARTPTR(CSA)); \
|
|
assert(lcl_ss_ctx->ss_shmcycle <= CNL->ss_shmcycle); \
|
|
if (!cnl_snapshot_in_prog || ss_shm_ptr->failure_errno) \
|
|
{ /* No on going snapshots or on going snapshot is invalid. Even if we encountered error during snapshot \
|
|
* context creation outside crit, we ignore it as the snapshot is no more active/valid. \
|
|
*/ \
|
|
CSA->snapshot_in_prog = FALSE; \
|
|
} else if (lcl_ss_ctx->ss_shmcycle == CNL->ss_shmcycle) \
|
|
{ /* Neither new snapshots started nor existing ones completed. However, it's possible that we might have \
|
|
* encountered error during snapshot context creation outside crit. If the values noted outside crit \
|
|
* matches with the global values, then the error is genuine. If not, then we might have done operations\
|
|
* (shm attach and file open) when things in the shared memory were in flux in which case we need to \
|
|
* restart \
|
|
*/ \
|
|
lcl_failure_errno = lcl_ss_ctx->failure_errno; \
|
|
assert(!ss_shm_ptr->failure_errno); \
|
|
SS_NEED_TO_RESTART = FALSE; \
|
|
cur_state = lcl_ss_ctx->cur_state; \
|
|
switch(cur_state) \
|
|
{ \
|
|
case SNAPSHOT_INIT_DONE: \
|
|
/* Most common case. Ensure the local values of snapshot context matches with the \
|
|
* values stored in shared memory */ \
|
|
assert(csa_snapshot_in_prog); \
|
|
DBG_ENSURE_SNAPSHOT_GOOD_TO_GO(lcl_ss_ctx, CNL); \
|
|
break; \
|
|
case SNAPSHOT_SHM_ATTACH_FAIL: \
|
|
assert(0 != lcl_failure_errno); \
|
|
assert(FALSE == CSA->snapshot_in_prog); \
|
|
if (lcl_ss_ctx->nl_shmid == CNL->ss_shmid) \
|
|
{ /* Error encountered outside crit is genuine. Indicate MUPIP INTEG that the \
|
|
* snapshot is no more valid \
|
|
*/ \
|
|
send_msg(VARLSTCNT(4) ERR_SSATTACHSHM, 1, lcl_ss_ctx->nl_shmid, \
|
|
lcl_failure_errno); \
|
|
ss_shm_ptr->failure_errno = lcl_failure_errno; \
|
|
ss_shm_ptr->failed_pid = process_id; \
|
|
} else /* snapshot context creation done while things were in flux */ \
|
|
SS_NEED_TO_RESTART = TRUE; \
|
|
break; \
|
|
case SHADOW_FIL_OPEN_FAIL: \
|
|
assert(0 != lcl_failure_errno); \
|
|
assert(FALSE == CSA->snapshot_in_prog); \
|
|
if (0 == STRCMP(lcl_ss_ctx->shadow_file, ss_shm_ptr->ss_info.shadow_file)) \
|
|
{ /* Error encountered outside crit is genuine. Indicate MUPIP INTEG that the \
|
|
* snapshot is no more valid \
|
|
*/ \
|
|
send_msg(VARLSTCNT(7) ERR_SSFILOPERR, 4, LEN_AND_LIT("open"), \
|
|
LEN_AND_STR(lcl_ss_ctx->shadow_file), lcl_failure_errno); \
|
|
ss_shm_ptr->failure_errno = lcl_failure_errno; \
|
|
ss_shm_ptr->failed_pid = process_id; \
|
|
} else /* snapshot context creation done while things were in flux */ \
|
|
SS_NEED_TO_RESTART = TRUE; \
|
|
break; \
|
|
default: \
|
|
assert(FALSE); \
|
|
} \
|
|
} else /* A new snapshot has started after we grabbed crit in t_end. Need to restart */ \
|
|
SS_NEED_TO_RESTART = TRUE; \
|
|
} \
|
|
}
|
|
|
|
# define WRITE_SNAPSHOT_BLOCK(csa, cr, mm_blk_ptr, blkid, lcl_ss_ctx) \
|
|
{ \
|
|
assert(NULL != lcl_ss_ctx); \
|
|
/* write this block to the snapshot shadow file only if this was not already \
|
|
* before imaged. If error happens while writing to the snapshot file, then \
|
|
* ss_write_block will mark the appropriate error in the shared memory \
|
|
* which INTEG will later query and report accordingly. So, just continue \
|
|
* as if nothing happened. \
|
|
*/ \
|
|
if (!ss_chk_shdw_bitmap(csa, lcl_ss_ctx, blkid)) \
|
|
if (!ss_write_block(csa, blkid, cr, mm_blk_ptr, lcl_ss_ctx)) \
|
|
assert(FALSE); \
|
|
}
|
|
#else
|
|
# define SNAPSHOTS_IN_PROG(X) (FALSE)
|
|
# define WRITE_SNAPSHOT_BLOCK(csa, cr, mm_blk_ptr, blkid, lcl_ss_ctx)
|
|
# define SS_INIT_IF_NEEDED(CSA, CNL)
|
|
# define SS_RELEASE_IF_NEEDED(CSA, CNL)
|
|
/* No need to write before-image in case the block is FREE. In case the database had never been fully upgraded from V4 to V5 format
|
|
* (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered FREE (i.e. no need to write before-images since
|
|
* backward journal recovery will never be expected to take the database to a point BEFORE the mupip upgrade).
|
|
*/
|
|
# define BEFORE_IMAGE_NEEDED(read_before_image, CS, csa, csd, blk_no, retval) \
|
|
retval = (read_before_image && csd->db_got_to_v5_once && !CS->was_free);
|
|
#endif
|
|
|
|
/* Determine if the state of 'backup in progress' has changed since we grabbed crit in t_end.c/tp_tend.c */
|
|
#define CHK_AND_UPDATE_BKUP_STATE_IF_NEEDED(CNL, CSA, NEW_BKUP_STARTED) \
|
|
{ \
|
|
if (CSA->backup_in_prog != (BACKUP_NOT_IN_PROGRESS != CNL->nbb)) \
|
|
{ \
|
|
if (!CSA->backup_in_prog) \
|
|
NEW_BKUP_STARTED = TRUE; \
|
|
CSA->backup_in_prog = !CSA->backup_in_prog; \
|
|
} \
|
|
}
|
|
|
|
#define BLK_HDR_EMPTY(bp) ((0 == (bp)->bsiz) && (0 == (bp)->tn))
|
|
|
|
typedef enum
|
|
{
|
|
REG_FREEZE_SUCCESS,
|
|
REG_ALREADY_FROZEN,
|
|
REG_HAS_KIP
|
|
}freeze_status;
|
|
|
|
#define GV_BIND_NAME_ONLY(ADDR, TARG) gv_bind_name(ADDR, TARG)
|
|
|
|
#define GV_BIND_NAME_AND_ROOT_SEARCH(ADDR, TARG) \
|
|
{ \
|
|
enum db_acc_method acc_meth; \
|
|
GBLREF gd_region *gv_cur_region; \
|
|
GBLREF gv_namehead *gv_target; \
|
|
\
|
|
GV_BIND_NAME_ONLY(ADDR, TARG); \
|
|
acc_meth = gv_cur_region->dyn.addr->acc_meth; \
|
|
if ((dba_bg == acc_meth) || (dba_mm == acc_meth)) \
|
|
{ \
|
|
if ((0 == gv_target->root) || (DIR_ROOT == gv_target->root)) \
|
|
gvcst_root_search(); \
|
|
} \
|
|
}
|
|
|
|
void assert_jrec_member_offsets(void);
|
|
bt_rec_ptr_t bt_put(gd_region *r, int4 block);
|
|
void bt_que_refresh(gd_region *greg);
|
|
void bt_init(sgmnt_addrs *cs);
|
|
void bt_malloc(sgmnt_addrs *csa);
|
|
void bt_refresh(sgmnt_addrs *csa);
|
|
void db_common_init(gd_region *reg, sgmnt_addrs *csa, sgmnt_data_ptr_t csd);
|
|
void grab_crit(gd_region *reg);
|
|
void grab_lock(gd_region *reg);
|
|
void gv_init_reg(gd_region *reg);
|
|
void gvcst_init(gd_region *greg);
|
|
enum cdb_sc gvincr_compute_post_incr(srch_blk_status *bh);
|
|
enum cdb_sc gvincr_recompute_upd_array(srch_blk_status *bh, struct cw_set_element_struct *cse, cache_rec_ptr_t cr);
|
|
boolean_t mupfndfil(gd_region *reg, mstr *mstr_addr);
|
|
boolean_t region_init(bool cm_regions);
|
|
freeze_status region_freeze(gd_region *region, boolean_t freeze, boolean_t override, boolean_t wait_for_kip);
|
|
void rel_crit(gd_region *reg);
|
|
void rel_lock(gd_region *reg);
|
|
boolean_t wcs_verify(gd_region *reg, boolean_t expect_damage, boolean_t caller_is_wcs_recover);
|
|
bool wcs_wtfini(gd_region *reg);
|
|
|
|
#ifdef VMS
|
|
int4 wcs_wtstart(gd_region *region);
|
|
#elif defined(UNIX)
|
|
int4 wcs_wtstart(gd_region *region, int4 writes);
|
|
#else
|
|
#error Undefined Platform
|
|
#endif
|
|
|
|
void bmm_init(void);
|
|
int4 bmm_find_free(uint4 hint, uchar_ptr_t base_addr, uint4 total_bits);
|
|
|
|
bool reg_cmcheck(gd_region *reg);
|
|
|
|
void gv_bind_name(gd_addr *addr, mstr *targ);
|
|
|
|
void db_csh_ini(sgmnt_addrs *cs);
|
|
void db_csh_ref(sgmnt_addrs *cs_addrs);
|
|
cache_rec_ptr_t db_csh_get(block_id block);
|
|
cache_rec_ptr_t db_csh_getn(block_id block);
|
|
|
|
enum cdb_sc tp_hist(srch_hist *hist1);
|
|
|
|
sm_uc_ptr_t get_lmap(block_id blk, unsigned char *bits, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr);
|
|
|
|
bool ccp_userwait(struct gd_region_struct *reg, uint4 state, int4 *timadr, unsigned short cycle);
|
|
void ccp_closejnl_ast(struct gd_region_struct *reg);
|
|
bt_rec *ccp_bt_get(sgmnt_addrs *cs_addrs, int4 block);
|
|
unsigned char *mval2subsc(mval *in_val, gv_key *out_key);
|
|
|
|
int4 dsk_read(block_id blk, sm_uc_ptr_t buff, enum db_ver *ondisk_blkver, boolean_t blk_free);
|
|
|
|
uint4 jnl_flush(gd_region *reg);
|
|
void jnl_fsync(gd_region *reg, uint4 fsync_addr);
|
|
void jnl_mm_timer(sgmnt_addrs *csa, gd_region *reg);
|
|
void jnl_oper_user_ast(gd_region *reg);
|
|
void jnl_wait(gd_region *reg);
|
|
void view_jnlfile(mval *dst, gd_region *reg);
|
|
void jnl_put_jrt_pfin(sgmnt_addrs *csa);
|
|
void jnl_put_jrt_pini(sgmnt_addrs *csa);
|
|
void jnl_write_epoch_rec(sgmnt_addrs *csa);
|
|
void jnl_write_inctn_rec(sgmnt_addrs *csa);
|
|
void fileheader_sync(gd_region *reg);
|
|
|
|
gd_addr *create_dummy_gbldir(void);
|
|
|
|
/* These prototypes should ideally be included in gvstats_rec.h but they require "sgmnt_addrs" type
|
|
* to be defined which is done in this header file, hence the prototyping is done here instead.
|
|
*/
|
|
void gvstats_rec_csd2cnl(sgmnt_addrs *csa);
|
|
void gvstats_rec_cnl2csd(sgmnt_addrs *csa);
|
|
void gvstats_rec_upgrade(sgmnt_addrs *csa);
|
|
|
|
#include "gdsfheadsp.h"
|
|
|
|
/* End of gdsfhead.h */
|
|
|
|
#endif
|