1226 lines
53 KiB
C
1226 lines
53 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include <sys/mman.h>
|
|
#ifndef __MVS__
|
|
#include <sys/param.h>
|
|
#endif
|
|
#include <errno.h>
|
|
#include <sys/un.h>
|
|
#include <sys/sem.h>
|
|
#include <sys/shm.h>
|
|
#include <sys/time.h>
|
|
#include "gtm_ipc.h"
|
|
#include "gtm_socket.h"
|
|
#include "gtm_fcntl.h"
|
|
#include "gtm_unistd.h"
|
|
#include "gtm_stdio.h"
|
|
#include "gtm_string.h"
|
|
#include "gtm_sem.h"
|
|
#include "gtm_statvfs.h"
|
|
|
|
#include "gt_timer.h"
|
|
#include "gdsroot.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "gdsblk.h"
|
|
#include "gdscc.h"
|
|
#include "min_max.h"
|
|
#include "gdsblkops.h"
|
|
#include "filestruct.h"
|
|
#include "parse_file.h"
|
|
#include "jnl.h"
|
|
#include "interlock.h"
|
|
#include "io.h"
|
|
#include "iosp.h"
|
|
#include "error.h"
|
|
#include "mutex.h"
|
|
#include "gtmio.h"
|
|
#include "mupipbckup.h"
|
|
#include "gtmimagename.h"
|
|
#include "mmseg.h"
|
|
#include "gtmsecshr.h"
|
|
#include "secshr_client.h"
|
|
#include "ftok_sems.h"
|
|
#include "repl_msg.h"
|
|
#include "gtmsource.h"
|
|
#include "anticipatory_freeze.h"
|
|
|
|
/* Include prototypes */
|
|
#include "mlk_shr_init.h"
|
|
#include "gtm_c_stack_trace.h"
|
|
#include "eintr_wrappers.h"
|
|
#include "eintr_wrapper_semop.h"
|
|
#include "is_file_identical.h"
|
|
#include "repl_instance.h"
|
|
|
|
#include "heartbeat_timer.h"
|
|
#include "util.h"
|
|
#include "dbfilop.h"
|
|
#include "gvcst_protos.h"
|
|
#include "is_raw_dev.h"
|
|
#include "gv_match.h"
|
|
#include "do_semop.h"
|
|
#include "gvcmy_open.h"
|
|
#include "wcs_sleep.h"
|
|
#include "do_shmat.h"
|
|
#include "send_msg.h"
|
|
#include "gtmmsg.h"
|
|
#include "shmpool.h"
|
|
#include "gtm_permissions.h"
|
|
#include "wbox_test_init.h"
|
|
#include "wcs_clean_dbsync.h" /* for setting wcs_clean_dbsync pointer */
|
|
#ifdef GTM_CRYPT
|
|
#include "gtmcrypt.h"
|
|
#endif
|
|
#include "have_crit.h"
|
|
#ifdef __MVS__
|
|
#include "gtm_zos_io.h"
|
|
#endif
|
|
#include "db_snapshot.h"
|
|
#include "lockconst.h" /* for LOCK_AVAILABLE */
|
|
#ifdef GTM_TRUNCATE
|
|
#include "recover_truncate.h"
|
|
#endif
|
|
|
|
#ifndef GTM_SNAPSHOT
|
|
# error "Snapshot facility not supported in this platform"
|
|
#endif
|
|
|
|
#define REQRUNDOWN_TEXT "semid is invalid but shmid is valid or at least one of sem_ctime or shm_ctime are non-zero"
|
|
#define MAX_ACCESS_SEM_RETRIES 2
|
|
|
|
#define SS_INFO_INIT(CSA) \
|
|
{ \
|
|
shm_snapshot_ptr_t ss_shm_ptr; \
|
|
node_local_ptr_t lcl_cnl; \
|
|
\
|
|
lcl_cnl = CSA->nl; \
|
|
lcl_cnl->ss_shmid = INVALID_SHMID; \
|
|
lcl_cnl->ss_shmcycle = 0; \
|
|
CLEAR_SNAPSHOTS_IN_PROG(lcl_cnl); \
|
|
lcl_cnl->num_snapshots_in_effect = 0; \
|
|
SET_LATCH_GLOBAL(&lcl_cnl->snapshot_crit_latch, LOCK_AVAILABLE); \
|
|
assert(1 == MAX_SNAPSHOTS); /* To ensure that we revisit this whenever multiple snapshots is implemented */ \
|
|
ss_shm_ptr = (shm_snapshot_ptr_t)(SS_GETSTARTPTR(CSA)); \
|
|
SS_DEFAULT_INIT_POOL(ss_shm_ptr); \
|
|
}
|
|
|
|
#define GTM_ATTACH_CHECK_ERROR \
|
|
{ \
|
|
if (-1 == status_l) \
|
|
{ \
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), \
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno); \
|
|
} \
|
|
}
|
|
|
|
#define GTM_ATTACH_SHM \
|
|
{ \
|
|
status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND)); \
|
|
GTM_ATTACH_CHECK_ERROR; \
|
|
csa->nl = (node_local_ptr_t)csa->db_addrs[0]; \
|
|
}
|
|
|
|
#define GTM_ATTACH_SHM_AND_CHECK_VERS(VERMISMATCH, SHM_SETUP_OK) \
|
|
{ \
|
|
GTM_ATTACH_SHM; \
|
|
/* The following checks for GDS_LABEL_GENERIC and gtm_release_name ensure that the shared memory under consideration \
|
|
* is valid. If shared memory is already initialized, do VERMISMATCH check BEFORE referencing any other fields in \
|
|
* shared memory. \
|
|
*/ \
|
|
VERMISMATCH = FALSE; \
|
|
SHM_SETUP_OK = FALSE; \
|
|
if (!MEMCMP_LIT(csa->nl->label, GDS_LABEL_GENERIC)) \
|
|
{ \
|
|
if (memcmp(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1)) \
|
|
{ /* Copy csa->nl->now_running into a local variable before passing to rts_error() due to the following \
|
|
* issue: \
|
|
* In VMS, a call to rts_error() copies only the error message and its arguments (as pointers) and \
|
|
* transfers control to the topmost condition handler which is dbinit_ch() in this case. dbinit_ch() \
|
|
* does a PRN_ERROR only for SUCCESS/INFO (VERMISMATCH is neither of them) and in addition \
|
|
* nullifies csa->nl as part of its condition handling. It then transfers control to the next level \
|
|
* condition handler which does a PRN_ERROR but at that point in time, the parameter \
|
|
* csa->nl->now_running is no longer accessible and hence no \parameter substitution occurs (i.e. the \
|
|
* error message gets displayed with plain !ADs). \
|
|
* In UNIX, this is not an issue since the first call to rts_error() does the error message \
|
|
* construction before handing control to the topmost condition handler. But it does not hurt to do \
|
|
* the copy. \
|
|
*/ \
|
|
assert(strlen(csa->nl->now_running) < SIZEOF(now_running)); \
|
|
memcpy(now_running, csa->nl->now_running, SIZEOF(now_running)); \
|
|
now_running[SIZEOF(now_running) - 1] = '\0'; /* protection against bad csa->nl->now_running values */ \
|
|
VERMISMATCH = TRUE; \
|
|
} else \
|
|
SHM_SETUP_OK = TRUE; \
|
|
} \
|
|
}
|
|
|
|
#define GTM_VERMISMATCH_ERROR \
|
|
{ \
|
|
if (!vermismatch_already_printed) \
|
|
{ \
|
|
vermismatch_already_printed = TRUE; \
|
|
/* for DSE, change VERMISMATCH to be INFO (instead of the more appropriate WARNING) \
|
|
* as we want the condition handler (dbinit_ch) to do a CONTINUE (which it does \
|
|
* only for severity levels SUCCESS or INFO) and resume processing in gvcst_init.c \
|
|
* instead of detaching from shared memory. \
|
|
*/ \
|
|
rts_error(VARLSTCNT(8) MAKE_MSG_TYPE(ERR_VERMISMATCH, (!IS_DSE_IMAGE ? ERROR : INFO)), 6, \
|
|
DB_LEN_STR(reg), gtm_release_name_len, gtm_release_name, LEN_AND_STR(now_running)); \
|
|
} \
|
|
}
|
|
|
|
#ifdef GTM_CRYPT
|
|
#define INIT_DB_ENCRYPTION_IF_NEEDED(DO_CRYPT_INIT, INIT_STATUS, REG, CSA, TSD) \
|
|
{ \
|
|
if (DO_CRYPT_INIT) \
|
|
{ /* Do database specific encryption initialization. For all utilities other than GT.M, defer the error until \
|
|
* encryption invocation is actually necessary. This way, MUPIP/DSE can continue as long as the operation does \
|
|
* not involve encryption (for instance MUPIP JOURNAL -EXTRACT -SHOW=HEADER). For GT.M, issue error right away \
|
|
*/ \
|
|
if (0 == INIT_STATUS) \
|
|
INIT_DB_ENCRYPTION(REG->dyn.addr->fname, CSA, TSD, INIT_STATUS); \
|
|
if ((0 != INIT_STATUS) && IS_GTM_IMAGE) \
|
|
GC_RTS_ERROR(INIT_STATUS, REG->dyn.addr->fname); \
|
|
CSA->encrypt_init_status = INIT_STATUS; /* defer error reporting */ \
|
|
} \
|
|
}
|
|
#define INIT_PROC_ENCRYPTION_IF_NEEDED(DO_CRYPT_INIT, INIT_STATUS) \
|
|
{ \
|
|
if (DO_CRYPT_INIT) \
|
|
INIT_PROC_ENCRYPTION(INIT_STATUS); \
|
|
}
|
|
#else
|
|
#define INIT_DB_ENCRYPTION_IF_NEEDED(IS_ENCRYPTED, INIT_STATUS, REG, CSA, TSD)
|
|
#define INIT_PROC_ENCRYPTION_IF_NEEDED(IS_ENCRYPTED, INIT_STATUS)
|
|
#endif
|
|
|
|
#define READ_DB_FILE_HEADER(REG, TSD) \
|
|
{ \
|
|
file_control *fc; \
|
|
\
|
|
fc = REG->dyn.addr->file_cntl; \
|
|
fc->file_type = REG->dyn.addr->acc_meth; \
|
|
fc->op = FC_READ; \
|
|
fc->op_buff = (sm_uc_ptr_t)TSD; \
|
|
fc->op_pos = 1; \
|
|
fc->op_len = SIZEOF(sgmnt_data); \
|
|
dbfilop(fc); \
|
|
}
|
|
|
|
#define READ_DB_FILE_MASTERMAP(REG, CSD) \
|
|
{ \
|
|
file_control *fc; \
|
|
\
|
|
assert(dba_bg == CSD->acc_meth); \
|
|
fc = REG->dyn.addr->file_cntl; \
|
|
fc->file_type = dba_bg; \
|
|
fc->op = FC_READ; \
|
|
fc->op_buff = MM_ADDR(CSD); \
|
|
fc->op_len = MASTER_MAP_SIZE(CSD); \
|
|
fc->op_pos = MM_BLOCK; \
|
|
dbfilop(fc); \
|
|
}
|
|
|
|
/* Depending on whether journaling and/or replication was enabled at the time of the crash,
|
|
* print REQRUNDOWN, REQRECOV, or REQROLLBACK error message.
|
|
*/
|
|
#define PRINT_CRASH_MESSAGE(CNT, ARG, ...) \
|
|
{ \
|
|
if (JNL_ENABLED(tsd)) \
|
|
{ \
|
|
if (REPL_ENABLED(tsd) && tsd->jnl_before_image) \
|
|
rts_error(VARLSTCNT(10 + CNT) ERR_REQROLLBACK, 4, DB_LEN_STR(reg), \
|
|
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
|
|
else \
|
|
rts_error(VARLSTCNT(10 + CNT) ERR_REQRECOV, 4, DB_LEN_STR(reg), \
|
|
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
|
|
} else \
|
|
rts_error(VARLSTCNT(10 + CNT) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), \
|
|
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
|
|
}
|
|
|
|
GBLREF boolean_t gtm_fullblockwrites; /* Do full (not partial) database block writes T/F */
|
|
GBLREF boolean_t is_src_server;
|
|
GBLREF boolean_t mupip_jnl_recover;
|
|
GBLREF gd_region *gv_cur_region;
|
|
GBLREF ipcs_mesg db_ipcs;
|
|
GBLREF jnlpool_addrs jnlpool;
|
|
GBLREF node_local_ptr_t locknl;
|
|
GBLREF uint4 heartbeat_counter;
|
|
GBLREF uint4 mutex_per_process_init_pid;
|
|
GBLREF uint4 process_id;
|
|
GBLREF void (*wcs_clean_dbsync_fptr)();
|
|
GBLREF jnl_gbls_t jgbl;
|
|
GTMCRYPT_ONLY(
|
|
GBLREF gtmcrypt_key_t mu_int_encrypt_key_handle;
|
|
)
|
|
#ifndef MUTEX_MSEM_WAKE
|
|
GBLREF int mutex_sock_fd;
|
|
#endif
|
|
|
|
LITREF char gtm_release_name[];
|
|
LITREF int4 gtm_release_name_len;
|
|
|
|
OS_PAGE_SIZE_DECLARE
|
|
|
|
error_def(ERR_BADDBVER);
|
|
ZOS_ONLY(error_def(ERR_BADTAG);)
|
|
error_def(ERR_CLSTCONFLICT);
|
|
error_def(ERR_CRITSEMFAIL);
|
|
error_def(ERR_DBCREINCOMP);
|
|
error_def(ERR_DBFILERR);
|
|
error_def(ERR_DBFLCORRP);
|
|
error_def(ERR_DBIDMISMATCH);
|
|
error_def(ERR_DBNAMEMISMATCH);
|
|
error_def(ERR_DBNOTGDS);
|
|
error_def(ERR_DBSHMNAMEDIFF);
|
|
error_def(ERR_JNLBUFFREGUPD);
|
|
error_def(ERR_NLMISMATCHCALC);
|
|
error_def(ERR_MMNODYNUPGRD);
|
|
error_def(ERR_PERMGENFAIL);
|
|
error_def(ERR_REQROLLBACK);
|
|
error_def(ERR_REQRECOV);
|
|
error_def(ERR_REQRUNDOWN);
|
|
error_def(ERR_SYSCALL);
|
|
error_def(ERR_TEXT);
|
|
error_def(ERR_VERMISMATCH);
|
|
|
|
gd_region *dbfilopn (gd_region *reg)
|
|
{
|
|
unix_db_info *udi;
|
|
parse_blk pblk;
|
|
mstr file;
|
|
char *fnptr, fbuff[MAX_FBUFF + 1];
|
|
struct stat buf;
|
|
gd_region *prev_reg;
|
|
gd_segment *seg;
|
|
int status;
|
|
bool raw;
|
|
int stat_res, rc, save_errno;
|
|
ZOS_ONLY(int realfiletag;)
|
|
|
|
seg = reg->dyn.addr;
|
|
assert(seg->acc_meth == dba_bg || seg->acc_meth == dba_mm);
|
|
FILE_CNTL_INIT_IF_NULL(seg);
|
|
file.addr = (char *)seg->fname;
|
|
file.len = seg->fname_len;
|
|
memset(&pblk, 0, SIZEOF(pblk));
|
|
pblk.buffer = fbuff;
|
|
pblk.buff_size = MAX_FBUFF;
|
|
pblk.fop = (F_SYNTAXO | F_PARNODE);
|
|
memcpy(fbuff,file.addr,file.len);
|
|
*(fbuff + file.len) = '\0';
|
|
if (is_raw_dev(fbuff))
|
|
{
|
|
raw = TRUE;
|
|
pblk.def1_buf = DEF_NODBEXT;
|
|
pblk.def1_size = SIZEOF(DEF_NODBEXT) - 1;
|
|
} else
|
|
{
|
|
raw = FALSE;
|
|
pblk.def1_buf = DEF_DBEXT;
|
|
pblk.def1_size = SIZEOF(DEF_DBEXT) - 1;
|
|
}
|
|
status = parse_file(&file, &pblk);
|
|
if (!(status & 1))
|
|
{
|
|
if (!IS_GTCM_GNP_SERVER_IMAGE)
|
|
{
|
|
free(seg->file_cntl->file_info);
|
|
free(seg->file_cntl);
|
|
seg->file_cntl = 0;
|
|
}
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status);
|
|
}
|
|
assert(((int)pblk.b_esl + 1) <= SIZEOF(seg->fname));
|
|
memcpy(seg->fname, pblk.buffer, pblk.b_esl);
|
|
pblk.buffer[pblk.b_esl] = 0;
|
|
seg->fname[pblk.b_esl] = 0;
|
|
seg->fname_len = pblk.b_esl;
|
|
if (pblk.fnb & F_HAS_NODE)
|
|
{ /* Remote node specification given */
|
|
assert(pblk.b_node && pblk.l_node[pblk.b_node - 1] == ':');
|
|
gvcmy_open(reg, &pblk);
|
|
return (gd_region *)-1L;
|
|
}
|
|
fnptr = (char *)seg->fname + pblk.b_node;
|
|
udi = FILE_INFO(reg);
|
|
udi->raw = raw;
|
|
udi->fn = (char *)fnptr;
|
|
OPENFILE(fnptr, O_RDWR, udi->fd);
|
|
if (!udi->grabbed_access_sem)
|
|
{ /* If the process already has standalone access, these fields are initialized in mu_rndwn_file */
|
|
udi->ftok_semid = INVALID_SEMID;
|
|
udi->semid = INVALID_SEMID;
|
|
udi->shmid = INVALID_SHMID;
|
|
udi->gt_sem_ctime = 0;
|
|
udi->gt_shm_ctime = 0;
|
|
}
|
|
reg->read_only = FALSE; /* maintain csa->read_write simultaneously */
|
|
udi->s_addrs.read_write = TRUE; /* maintain reg->read_only simultaneously */
|
|
if (FD_INVALID == udi->fd)
|
|
{
|
|
OPENFILE(fnptr, O_RDONLY, udi->fd);
|
|
if (FD_INVALID == udi->fd)
|
|
{
|
|
save_errno = errno;
|
|
if (!IS_GTCM_GNP_SERVER_IMAGE)
|
|
{
|
|
free(seg->file_cntl->file_info);
|
|
free(seg->file_cntl);
|
|
seg->file_cntl = 0;
|
|
}
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
|
|
}
|
|
reg->read_only = TRUE; /* maintain csa->read_write simultaneously */
|
|
udi->s_addrs.read_write = FALSE; /* maintain reg->read_only simultaneously */
|
|
}
|
|
# ifdef __MVS__
|
|
if (-1 == gtm_zos_tag_to_policy(udi->fd, TAG_BINARY, &realfiletag))
|
|
TAG_POLICY_SEND_MSG(fnptr, errno, realfiletag, TAG_BINARY);
|
|
# endif
|
|
STAT_FILE(fnptr, &buf, stat_res);
|
|
if (-1 == stat_res)
|
|
{
|
|
save_errno = errno;
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
|
|
}
|
|
set_gdid_from_stat(&udi->fileid, &buf);
|
|
if (prev_reg = gv_match(reg))
|
|
{
|
|
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
|
|
free(seg->file_cntl->file_info);
|
|
free(seg->file_cntl);
|
|
seg->file_cntl = 0;
|
|
return prev_reg;
|
|
}
|
|
return reg;
|
|
}
|
|
|
|
void dbsecspc(gd_region *reg, sgmnt_data_ptr_t csd, gtm_uint64_t *sec_size)
|
|
{
|
|
/* Ensure that all the various sections that the shared memory contains are actually
|
|
* aligned at the OS_PAGE_SIZE boundary
|
|
*/
|
|
assert(0 == NODE_LOCAL_SPACE % OS_PAGE_SIZE);
|
|
assert(0 == LOCK_SPACE_SIZE(csd) % OS_PAGE_SIZE);
|
|
assert(0 == JNL_SHARE_SIZE(csd) % OS_PAGE_SIZE);
|
|
assert(0 == SHMPOOL_SECTION_SIZE % OS_PAGE_SIZE);
|
|
switch(reg->dyn.addr->acc_meth)
|
|
{
|
|
case dba_mm:
|
|
assert(0 == MMBLK_CONTROL_SIZE(csd) % OS_PAGE_SIZE);
|
|
*sec_size = ROUND_UP(NODE_LOCAL_SPACE + LOCK_SPACE_SIZE(csd) + MMBLK_CONTROL_SIZE(csd) \
|
|
+ JNL_SHARE_SIZE(csd) + SHMPOOL_SECTION_SIZE, OS_PAGE_SIZE);
|
|
break;
|
|
case dba_bg:
|
|
assert(0 == CACHE_CONTROL_SIZE(csd) % OS_PAGE_SIZE);
|
|
*sec_size = ROUND_UP(NODE_LOCAL_SPACE + (LOCK_BLOCK(csd) * DISK_BLOCK_SIZE) + LOCK_SPACE_SIZE(csd) \
|
|
+ CACHE_CONTROL_SIZE(csd) + JNL_SHARE_SIZE(csd) + SHMPOOL_SECTION_SIZE, OS_PAGE_SIZE);
|
|
break;
|
|
default:
|
|
GTMASSERT;
|
|
}
|
|
return;
|
|
}
|
|
|
|
void db_init(gd_region *reg)
|
|
{
|
|
boolean_t is_bg, read_only, sem_created = FALSE, need_stacktrace, have_standalone_access;
|
|
boolean_t shm_setup_ok = FALSE, vermismatch = FALSE, vermismatch_already_printed = FALSE;
|
|
boolean_t new_shm_ipc, do_crypt_init = FALSE, replinst_mismatch;
|
|
char machine_name[MAX_MCNAMELEN];
|
|
int gethostname_res, stat_res, mm_prot, group_id, perm, save_udi_semid;
|
|
int4 status, semval, dblksize, fbwsize, save_errno, wait_time, loopcnt, sem_pid;
|
|
sm_long_t status_l;
|
|
sgmnt_addrs *csa;
|
|
sgmnt_data tsdbuff;
|
|
sgmnt_data_ptr_t csd, tsd;
|
|
struct sembuf sop[3];
|
|
struct stat stat_buf;
|
|
union semun semarg;
|
|
struct semid_ds semstat;
|
|
struct shmid_ds shmstat;
|
|
struct statvfs dbvfs;
|
|
uint4 sopcnt, start_hrtbt_cntr;
|
|
unix_db_info *udi;
|
|
char now_running[MAX_REL_NAME];
|
|
int init_status;
|
|
gtm_uint64_t sec_size;
|
|
semwait_status_t retstat;
|
|
struct perm_diag_data pdd;
|
|
boolean_t bypassed_ftok = FALSE, bypassed_access = FALSE;
|
|
int jnl_buffer_size;
|
|
char s[JNLBUFFUPDAPNDX_SIZE]; /* JNLBUFFUPDAPNDX_SIZE is defined in jnl.h */
|
|
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
ESTABLISH(dbinit_ch);
|
|
assert(INTRPT_IN_GVCST_INIT == intrpt_ok_state); /* we better be called from gvcst_init */
|
|
wcs_clean_dbsync_fptr = &wcs_clean_dbsync;
|
|
tsd = &tsdbuff;
|
|
read_only = reg->read_only;
|
|
TREF(new_dbinit_ipc) = 0; /* we did not create a new ipc resource */
|
|
udi = FILE_INFO(reg);
|
|
memset(machine_name, 0, SIZEOF(machine_name));
|
|
if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res))
|
|
rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno);
|
|
assert(strlen(machine_name) < MAX_MCNAMELEN);
|
|
csa = &udi->s_addrs;
|
|
csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL; /* to help in dbinit_ch and gds_rundown */
|
|
reg->opening = TRUE;
|
|
assert(0 <= udi->fd); /* database file must have been already opened by dbfilopn() done from gvcst_init() */
|
|
FSTAT_FILE(udi->fd, &stat_buf, stat_res); /* get the stats for the database file */
|
|
if (-1 == stat_res)
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
|
|
/* Setup new group and permissions if indicated by the security rules. */
|
|
if (gtm_set_group_and_perm(&stat_buf, &group_id, &perm, PERM_IPC, &pdd) < 0)
|
|
{
|
|
send_msg(VARLSTCNT(6 + PERMGENDIAG_ARG_COUNT)
|
|
ERR_PERMGENFAIL, 4, RTS_ERROR_STRING("ipc resources"), RTS_ERROR_STRING(udi->fn),
|
|
PERMGENDIAG_ARGS(pdd));
|
|
rts_error(VARLSTCNT(6 + PERMGENDIAG_ARG_COUNT)
|
|
ERR_PERMGENFAIL, 4, RTS_ERROR_STRING("ipc resources"), RTS_ERROR_STRING(udi->fn),
|
|
PERMGENDIAG_ARGS(pdd));
|
|
}
|
|
/* if the process has standalone access, it will have udi->grabbed_access_sem set to TRUE at this point. Note that down
|
|
* in a local variable as the udi->grabbed_access_sem will be set to TRUE even for non-standalone access below and hence
|
|
* we can't rely on that later to determine if the process had standalone access or not when it entered this function.
|
|
*/
|
|
have_standalone_access = udi->grabbed_access_sem;
|
|
if (!have_standalone_access)
|
|
{
|
|
do_crypt_init = (reg->dyn.addr->is_encrypted && !IS_LKE_IMAGE);
|
|
INIT_PROC_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status); /* heavy-weight so needs to be done before ftok */
|
|
start_hrtbt_cntr = heartbeat_counter;
|
|
if (!ftok_sem_get2(reg, start_hrtbt_cntr, &retstat, &bypassed_ftok))
|
|
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "ftok");
|
|
if (bypassed_ftok)
|
|
send_msg(VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("FTOK bypassed at database initialization"));
|
|
/* At this point we have ftok_semid semaphore based on ftok key. Any ftok conflicted region will block at this
|
|
* point. For example, if a.dat and b.dat both have same ftok and process A tries to open or close a.dat and
|
|
* process B tries to open or close b.dat, even though the database accesses don't conflict, the first one to
|
|
* control the ftok semaphore blocks (makes wait) the other(s).
|
|
*/
|
|
READ_DB_FILE_HEADER(reg, tsd); /* file already opened by dbfilopn() done from gvcst_init() */
|
|
DO_BADDBVER_CHK(reg, tsd); /* need to do BADDBVER check before de-referencing shmid and semid from file header
|
|
* as they could be at different offsets if the database is V4-format */
|
|
if (reg->dyn.addr->is_encrypted != tsd->is_encrypted)
|
|
{ /* Encryption setting different between global directory and database file header */
|
|
reg->dyn.addr->is_encrypted = tsd->is_encrypted; /* override with the value in file header */
|
|
do_crypt_init = (tsd->is_encrypted && !IS_LKE_IMAGE);
|
|
if (do_crypt_init)
|
|
{ /* Encryption is turned on in the file header. Need to do encryption initialization. Release ftok
|
|
* as initialization is heavy-weight.
|
|
*/
|
|
if (!ftok_sem_release(reg, TRUE, FALSE)) /* decrement counter so later increment is correct */
|
|
rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
|
|
INIT_PROC_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status); /* redo initialization */
|
|
start_hrtbt_cntr = heartbeat_counter; /* update to reflect time lost in encryption initialization */
|
|
if (!ftok_sem_get2(reg, start_hrtbt_cntr, &retstat, &bypassed_ftok))
|
|
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "ftok");
|
|
if (bypassed_ftok)
|
|
send_msg(VARLSTCNT(4) ERR_TEXT, 2,
|
|
LEN_AND_LIT("bypassed at database encryption initialization"));
|
|
} /* else encryption is turned off in the file header. Continue as-is. Any encryption initialization done
|
|
* before is discarded
|
|
*/
|
|
}
|
|
INIT_DB_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status, reg, csa, tsd);
|
|
# ifdef DEBUG
|
|
if (gtm_white_box_test_case_enabled && (WBTEST_HOLD_ONTO_FTOKSEM_IN_DBINIT == gtm_white_box_test_case_number))
|
|
{
|
|
DBGFPF((stderr, "Holding the ftok semaphore.. Sleeping for 30 seconds\n"));
|
|
LONG_SLEEP(30);
|
|
DBGFPF((stderr, "30 second sleep exhausted.. continuing with rest of db_init..\n"));
|
|
}
|
|
# endif
|
|
for (loopcnt = 0; MAX_ACCESS_SEM_RETRIES > loopcnt; loopcnt++)
|
|
{
|
|
CSD2UDI(tsd, udi); /* sets udi->semid/shmid/sem_ctime/shm_ctime from file header */
|
|
TREF(new_dbinit_ipc) = 0;
|
|
sem_created = FALSE;
|
|
if (INVALID_SEMID == udi->semid)
|
|
{ /* access control semaphore does not exist. Create one */
|
|
if (0 != udi->gt_sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->gt_shm_ctime)
|
|
{ /* We must have somthing wrong in protocol or, code, if this happens. */
|
|
assert(FALSE);
|
|
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2, LEN_AND_STR(REQRUNDOWN_TEXT));
|
|
}
|
|
/* Create new semaphore using IPC_PRIVATE. System guarantees a unique id. */
|
|
if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
|
|
{
|
|
udi->semid = INVALID_SEMID;
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno);
|
|
}
|
|
udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */
|
|
TREF(new_dbinit_ipc) |= (NEW_DBINIT_SEM_IPC_MASK | NEW_DBINIT_SHM_IPC_MASK);
|
|
sem_created = TRUE;
|
|
/* change group and permissions */
|
|
semarg.buf = &semstat;
|
|
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT1"), errno);
|
|
if ((-1 != group_id) && (group_id != semstat.sem_perm.gid))
|
|
semstat.sem_perm.gid = group_id;
|
|
semstat.sem_perm.mode = perm;
|
|
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_SET, semarg))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_SET"), errno);
|
|
SET_GTM_ID_SEM(udi->semid, status);
|
|
if (-1 == status)
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno);
|
|
/* WARNING: Because SETVAL changes sem_ctime, we must NOT do any SETVAL after this one; code here
|
|
* and elsewhere uses IPC_STAT to get sem_ctime and relies on sem_ctime as the creation time of the
|
|
* semaphore.
|
|
*/
|
|
semarg.buf = &semstat;
|
|
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT2"), errno);
|
|
tsd->gt_sem_ctime.ctime = udi->gt_sem_ctime = semarg.buf->sem_ctime;
|
|
} else
|
|
{ /* "semid" already exists. Need to lock it. Before that do sanity check on "semid" and "shmid" */
|
|
if (INVALID_SHMID != udi->shmid)
|
|
{
|
|
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
|
|
{
|
|
PRINT_CRASH_MESSAGE(1, tsd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("Error with database control shmctl"), errno);
|
|
} else if (shmstat.shm_ctime != tsd->gt_shm_ctime.ctime)
|
|
{
|
|
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
|
|
if (vermismatch)
|
|
{
|
|
GTM_VERMISMATCH_ERROR;
|
|
} else
|
|
{
|
|
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("IPC creation time indicates a probable prior crash"));
|
|
}
|
|
}
|
|
semarg.buf = &semstat;
|
|
if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg))
|
|
{ /* file header has valid semid but semaphore does not exist */
|
|
PRINT_CRASH_MESSAGE(1, tsd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("Error with database control semaphore (IPC_STAT)"), errno);
|
|
} else if (semarg.buf->sem_ctime != tsd->gt_sem_ctime.ctime)
|
|
{
|
|
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
|
|
if (vermismatch)
|
|
{
|
|
GTM_VERMISMATCH_ERROR;
|
|
} else
|
|
{
|
|
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("IPC creation time indicates a probable prior crash"));
|
|
}
|
|
}
|
|
} else
|
|
{ /* else "shmid" is NOT valid. This is possible if -
|
|
* (a) Another process is holding the access control semaphore for a longer duration of time
|
|
* but does NOT have the shared memory setup (MUPIP INTEG -FILE or MUPIP RESTORE).
|
|
*
|
|
* (b) If a process (like in (a)) were kill -15ed or -9ed and hence did not get a chance to
|
|
* do db_ipcs_reset which resets "semid"/"shmid" field in the file header to INVALID.
|
|
*
|
|
* In either case, try grabbing the semaphore. If not, wait (depending on the user specified
|
|
* wait time). Eventually, we will either get hold of the semaphore OR will error out.
|
|
*/
|
|
TREF(new_dbinit_ipc) |= NEW_DBINIT_SHM_IPC_MASK; /* Need to create shared memory */
|
|
}
|
|
}
|
|
/* We already have ftok semaphore of this region, so all we need is the access control semaphore */
|
|
SET_GTM_SOP_ARRAY(sop, sopcnt, !read_only, (SEM_UNDO | IPC_NOWAIT));
|
|
SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT);
|
|
if (-1 != status)
|
|
break;
|
|
else
|
|
{
|
|
assert(!sem_created); /* if we created the semaphore, we should be able to do the semop */
|
|
save_errno = errno;
|
|
if (EAGAIN == save_errno)
|
|
{
|
|
if (NO_SEMWAIT_ON_EAGAIN == TREF(dbinit_max_hrtbt_delta))
|
|
{
|
|
sem_pid = semctl(udi->semid, 0, GETPID);
|
|
if (-1 != sem_pid)
|
|
{
|
|
rts_error(VARLSTCNT(13) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_SEMWT2LONG, 7, process_id, 0, LEN_AND_LIT("access control"),
|
|
DB_LEN_STR(reg), sem_pid);
|
|
} else
|
|
{
|
|
save_errno = errno;
|
|
if (!SEM_REMOVED(save_errno))
|
|
{
|
|
rts_error(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
|
|
ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM,
|
|
save_errno);
|
|
} /* else semaphore was removed. Fall-through */
|
|
}
|
|
} else if (!do_blocking_semop(udi->semid, gtm_access_sem, start_hrtbt_cntr,
|
|
&retstat, reg, &bypassed_access))
|
|
{
|
|
if (!SEM_REMOVED(retstat.save_errno))
|
|
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "access control");
|
|
save_errno = retstat.save_errno;
|
|
} else
|
|
{
|
|
if (bypassed_access)
|
|
send_msg(VARLSTCNT(4) ERR_TEXT, 2,
|
|
LEN_AND_LIT("Access control bypassed at init"));
|
|
save_errno = status = SS_NORMAL;
|
|
break;
|
|
}
|
|
} else if (!SEM_REMOVED(save_errno))
|
|
{
|
|
rts_error(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
|
|
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
|
|
}
|
|
/* this is possible if a concurrent gds_rundown removed the access control semaphore (if
|
|
* it was the last writer). Another possibility is if the user did an ipcrm which removed
|
|
* the access control semaphore from the system. Instead of issuing an error right-away,
|
|
* retry by reading the file header again. Note, it is not possible for another gds_rundown
|
|
* removing the access control semaphore because any other process has to first get the
|
|
* ftok lock at startup and since we hold it, they will wait for us to release the ftok.
|
|
*/
|
|
assert(SEM_REMOVED(save_errno));
|
|
if (1 == loopcnt)
|
|
{
|
|
rts_error(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
|
|
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
|
|
}
|
|
READ_DB_FILE_HEADER(reg, tsd);
|
|
}
|
|
}
|
|
assert(-1 != status || bypassed_access);
|
|
if (!bypassed_access)
|
|
udi->grabbed_access_sem = TRUE;
|
|
/* Now that we have the access control semaphore, re-read the file header so we have the uptodate information
|
|
* in case some of the fields (like access method) were modified concurrently by MUPIP SET -FILE
|
|
*/
|
|
READ_DB_FILE_HEADER(reg, tsd);
|
|
UDI2CSD(udi, tsd); /* Since we read the file header again, tsd->semid/shmid and corresponding ctime fields
|
|
* will not be uptodate. Refresh it with the udi copies as they are the ones used above */
|
|
} else
|
|
{ /* for have_standalone_access we were already in "mu_rndwn_file" and got "semid" semaphore. Since mu_rndwn_file
|
|
* would have gotten "ftok" semaphore before acquiring the access control semaphore, no need to get the "ftok"
|
|
* semaphore as well.
|
|
*/
|
|
READ_DB_FILE_HEADER(reg, tsd); /* file already opened by dbfilopn() done from gvcst_init() */
|
|
do_crypt_init = (tsd->is_encrypted && !IS_LKE_IMAGE);
|
|
INIT_PROC_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status);
|
|
INIT_DB_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status, reg, csa, tsd);
|
|
CSD2UDI(tsd, udi);
|
|
/* Make sure "mu_rndwn_file" has created semaphore for standalone access */
|
|
if (INVALID_SEMID == udi->semid || 0 == udi->gt_sem_ctime)
|
|
GTMASSERT;
|
|
/* Make sure "mu_rndwn_file" has reset shared memory. In pro, just clear it and proceed. */
|
|
assert((INVALID_SHMID == udi->shmid) && (0 == udi->gt_shm_ctime));
|
|
/* In pro, just clear it and proceed */
|
|
udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */
|
|
TREF(new_dbinit_ipc) |= (NEW_DBINIT_SEM_IPC_MASK | NEW_DBINIT_SHM_IPC_MASK);
|
|
}
|
|
assert(udi->grabbed_access_sem || bypassed_access);
|
|
DO_DB_HDR_CHECK(reg, tsd); /* Basic sanity check on the file header fields */
|
|
# ifdef DEBUG
|
|
if (gtm_white_box_test_case_enabled && (WBTEST_HOLD_ONTO_ACCSEM_IN_DBINIT == gtm_white_box_test_case_number))
|
|
{
|
|
DBGFPF((stderr, "Holding the access control semaphore.. Sleeping for 30 seconds\n"));
|
|
LONG_SLEEP(30);
|
|
DBGFPF((stderr, "30 second sleep exhausted.. continuing with rest of db_init..\n"));
|
|
}
|
|
# endif
|
|
/* Now that the access control lock is obtained and file header passed all sanity checks, update the acc_meth of the
|
|
* region from the one in the file header (in case they are different). This way, any later code that relies on the
|
|
* acc_meth dereferenced from the region will work correctly. Instead of checking if they are different, do the assignment
|
|
* unconditionally
|
|
*/
|
|
reg->dyn.addr->acc_meth = tsd->acc_meth;
|
|
new_shm_ipc = (TREF(new_dbinit_ipc) & NEW_DBINIT_SHM_IPC_MASK);
|
|
if (new_shm_ipc)
|
|
{ /* Bypassers are not allowed to create shared memory so we don't end up with conflicting shared memories */
|
|
if (bypassed_ftok || bypassed_access)
|
|
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("DSE/LKE database initialization attempt tried a startup short cut that "
|
|
"failed due to a conflict with a database shutdown - please retry"));
|
|
/* Since we are about to allocate new shared memory, if necessary, adjust the journal buffer size right now.
|
|
* Note that if the process setting up shared memory is a read-only process, then we might not flush updated
|
|
* jnl_buffer_size to the file header, which is fine because the value in shared memory is what all processes
|
|
* are looking at. If necessary, the next process to initialize shared memory will repeat the process of
|
|
* adjusting the jnl_buffer_size value.
|
|
*/
|
|
jnl_buffer_size = tsd->jnl_buffer_size;
|
|
if ((0 != jnl_buffer_size) && (jnl_buffer_size < JNL_BUFFER_MIN))
|
|
{
|
|
ROUND_UP_MIN_JNL_BUFF_SIZE(tsd->jnl_buffer_size, tsd);
|
|
SNPRINTF(s, JNLBUFFUPDAPNDX_SIZE, JNLBUFFUPDAPNDX, JNL_BUFF_PORT_MIN(tsd), JNL_BUFFER_MAX);
|
|
send_msg(VARLSTCNT(10) ERR_JNLBUFFREGUPD, 4, REG_LEN_STR(reg),
|
|
jnl_buffer_size, tsd->jnl_buffer_size, ERR_TEXT, 2, LEN_AND_STR(s));
|
|
}
|
|
dbsecspc(reg, tsd, &sec_size); /* Find db segment size */
|
|
/* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */
|
|
GTM_WHITE_BOX_TEST(WBTEST_FAIL_ON_SHMGET, sec_size, GTM_UINT64_MAX);
|
|
if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, sec_size, RWDALL | IPC_CREAT)))
|
|
{
|
|
udi->shmid = (int)INVALID_SHMID;
|
|
status_l = INVALID_SHMID;
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno);
|
|
}
|
|
tsd->shmid = udi->shmid;
|
|
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_STAT1"), errno);
|
|
/* change group and permissions */
|
|
if ((-1 != group_id) && (group_id != shmstat.shm_perm.gid))
|
|
shmstat.shm_perm.gid = group_id;
|
|
shmstat.shm_perm.mode = perm;
|
|
if (-1 == shmctl(udi->shmid, IPC_SET, &shmstat))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_SET"), errno);
|
|
/* Warning: We must read the shm_ctime using IPC_STAT after IPC_SET, which changes it.
|
|
* We must NOT do any more IPC_SET or SETVAL after this. Our design is to use
|
|
* shm_ctime as creation time of shared memory and store it in file header.
|
|
*/
|
|
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_STAT2"), errno);
|
|
tsd->gt_shm_ctime.ctime = udi->gt_shm_ctime = shmstat.shm_ctime;
|
|
GTM_ATTACH_SHM;
|
|
shm_setup_ok = TRUE;
|
|
} else
|
|
{
|
|
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
|
|
if (vermismatch)
|
|
{
|
|
GTM_VERMISMATCH_ERROR;
|
|
} else if (!shm_setup_ok)
|
|
{
|
|
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2, LEN_AND_LIT("shared memory is invalid"));
|
|
}
|
|
}
|
|
csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE);
|
|
assert(((INTPTR_T)csa->critical & 0xf) == 0); /* critical should be 16-byte aligned */
|
|
# ifdef CACHELINE_SIZE
|
|
assert(0 == ((INTPTR_T)csa->critical & (CACHELINE_SIZE - 1)));
|
|
# endif
|
|
/* Note: Here we check jnl_state from database file; its value cannot change without stand-alone access.
|
|
* The jnl_buff should be initialized irrespective of read/write process
|
|
*/
|
|
JNL_INIT(csa, reg, tsd);
|
|
csa->shmpool_buffer = (shmpool_buff_hdr_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd));
|
|
/* Initialize memory for snapshot context */ \
|
|
csa->ss_ctx = malloc(SIZEOF(snapshot_context_t));
|
|
DEFAULT_INIT_SS_CTX((SS_CTX_CAST(csa->ss_ctx)));
|
|
csa->lock_addrs[0] = (sm_uc_ptr_t)csa->shmpool_buffer + SHMPOOL_SECTION_SIZE;
|
|
csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
|
|
csa->total_blks = tsd->trans_hist.total_blks; /* For test to see if file has extended */
|
|
if (new_shm_ipc)
|
|
{
|
|
memset(csa->nl, 0, SIZEOF(*csa->nl)); /* We allocated shared storage -- we have to init it */
|
|
csa->nl->sec_size = sec_size; /* Set the shared memory size */
|
|
if (JNL_ALLOWED(csa))
|
|
{ /* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not
|
|
* necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch
|
|
* check in order to determine whether to call jnl_file_open() or not. this is in preparation for that.
|
|
*/
|
|
csa->jnl->jnl_buff->cycle = 1;
|
|
}
|
|
}
|
|
is_bg = (dba_bg == tsd->acc_meth);
|
|
if (is_bg)
|
|
csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd));
|
|
else
|
|
{
|
|
csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1);
|
|
FSTAT_FILE(udi->fd, &stat_buf, stat_res);
|
|
if (-1 == stat_res)
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
|
|
mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE);
|
|
if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL,
|
|
(size_t)stat_buf.st_size,
|
|
mm_prot,
|
|
GTM_MM_FLAGS, udi->fd, (off_t)0)))
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
|
|
csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1;
|
|
csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0];
|
|
}
|
|
/* At this point, shm_setup_ok is TRUE so we are guaranteed that vermismatch is FALSE. Therefore, we can safely
|
|
* dereference csa->nl->glob_sec_init without worrying about whether or not it could be at a different offset than
|
|
* the current version. The only exception is DSE which can continue even after the VERMISMATCH error and hence
|
|
* can have shm_setup_ok set to FALSE at this point.
|
|
*/
|
|
if (shm_setup_ok && !csa->nl->glob_sec_init && !(bypassed_ftok || bypassed_access))
|
|
{
|
|
assert(new_shm_ipc);
|
|
assert(!vermismatch);
|
|
csa->dbinit_shm_created = TRUE;
|
|
if (is_bg)
|
|
{
|
|
memcpy(csd, tsd, SIZEOF(sgmnt_data));
|
|
READ_DB_FILE_MASTERMAP(reg, csd);
|
|
}
|
|
if (csd->machine_name[0]) /* crash occurred */
|
|
{
|
|
if (0 != STRNCMP_STR(csd->machine_name, machine_name, MAX_MCNAMELEN)) /* crashed on some other node */
|
|
rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
|
|
else
|
|
{
|
|
PRINT_CRASH_MESSAGE(0, csd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("machine name in file header is non-null implying possible crash"));
|
|
}
|
|
}
|
|
if (is_bg)
|
|
{
|
|
bt_malloc(csa);
|
|
csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd);
|
|
db_csh_ini(csa);
|
|
}
|
|
db_csh_ref(csa, TRUE);
|
|
shmpool_buff_init(reg);
|
|
SS_INFO_INIT(csa);
|
|
STRNCPY_STR(csa->nl->machine_name, machine_name, MAX_MCNAMELEN); /* machine name */
|
|
assert(MAX_REL_NAME > gtm_release_name_len);
|
|
memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1); /* GT.M release name */
|
|
memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1); /* GDS label */
|
|
memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len); /* database filename */
|
|
csa->nl->creation_date_time4 = csd->creation_time4;
|
|
csa->nl->highest_lbm_blk_changed = -1;
|
|
csa->nl->wcs_timers = -1;
|
|
csa->nl->nbb = BACKUP_NOT_IN_PROGRESS;
|
|
csa->nl->unique_id.uid = FILE_INFO(reg)->fileid; /* save what file we initialized this storage for */
|
|
/* save pointers in csa to access shared memory */
|
|
csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl);
|
|
if (JNL_ALLOWED(csa))
|
|
csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl);
|
|
csa->nl->shmpool_buffer = (sm_off_t)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl);
|
|
if (is_bg)
|
|
/* Field is sm_off_t (4 bytes) so only in BG mode is this assurred to be 4 byte capable */
|
|
csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl);
|
|
csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl);
|
|
if (!read_only || is_bg)
|
|
{
|
|
csd->trans_hist.early_tn = csd->trans_hist.curr_tn;
|
|
csd->max_update_array_size = csd->max_non_bm_update_array_size
|
|
= (int4)(ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE));
|
|
csd->max_update_array_size += (int4)(ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE));
|
|
/* add current db_csh counters into the cumulative counters and reset the current counters */
|
|
# define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2) \
|
|
csd->COUNTER.cumul_count += csd->COUNTER.curr_count; \
|
|
csd->COUNTER.curr_count = 0;
|
|
# include "tab_db_csh_acct_rec.h"
|
|
# undef TAB_DB_CSH_ACCT_REC
|
|
}
|
|
csa->nl->wc_blocked = FALSE; /* Since we are creating shared memory, reset wc_blocked to FALSE */
|
|
gvstats_rec_csd2cnl(csa); /* should be called before "db_auto_upgrade" */
|
|
reg->dyn.addr->ext_blk_count = csd->extension_size;
|
|
mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only));
|
|
DEBUG_ONLY(locknl = csa->nl;) /* for DEBUG_ONLY LOCK_HIST macro */
|
|
gtm_mutex_init(reg, NUM_CRIT_ENTRY, FALSE);
|
|
DEBUG_ONLY(locknl = NULL;) /* restore "locknl" to default value */
|
|
if (read_only)
|
|
csa->nl->remove_shm = TRUE; /* gds_rundown can remove shmem if first process has read-only access */
|
|
db_auto_upgrade(reg);
|
|
if (FALSE == csd->multi_site_open)
|
|
{ /* first time database is opened after upgrading to a GTM version that supports multi-site replication */
|
|
csd->zqgblmod_seqno = 0;
|
|
csd->zqgblmod_tn = 0;
|
|
if (csd->pre_multisite_resync_seqno > csd->reg_seqno)
|
|
csd->pre_multisite_resync_seqno = csd->reg_seqno;
|
|
csd->multi_site_open = TRUE;
|
|
}
|
|
csa->nl->glob_sec_init = TRUE;
|
|
STAT_FILE((char *)csa->nl->fname, &stat_buf, stat_res);
|
|
if (-1 == stat_res)
|
|
{
|
|
save_errno = errno;
|
|
rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
|
|
}
|
|
set_gdid_from_stat(&csa->nl->unique_id.uid, &stat_buf);
|
|
# ifdef RELEASE_LATCH_GLOBAL
|
|
/* On HP-UX, it is possible that mucregini/cs_data is not aligned at the same address
|
|
* boundary as csd would be in shared memory. This may lead to the initialization and
|
|
* usage of different elements of hp_latch_space. This may lead to the latch being
|
|
* "in-use" permanently. To resolve this, shm-initialer re-initializes the global latch
|
|
* to the "available" state.
|
|
* Although Solaris doesn't have the same issue of alignment, we'll cover the case of
|
|
* a corrupt latch (say in case of abnormal process termination).
|
|
*/
|
|
RELEASE_LATCH_GLOBAL(&csd->next_upgrd_warn.time_latch);
|
|
# endif
|
|
GTM_TRUNCATE_ONLY(recover_truncate(csa, csd, reg);)
|
|
csa->nl->jnlpool_shmid = INVALID_SHMID;
|
|
} else
|
|
{
|
|
if (STRNCMP_STR(csa->nl->machine_name, machine_name, MAX_MCNAMELEN)) /* machine names do not match */
|
|
{
|
|
if (csa->nl->machine_name[0])
|
|
rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name));
|
|
else
|
|
{
|
|
PRINT_CRASH_MESSAGE(0, csd, ERR_TEXT, 2,
|
|
LEN_AND_LIT("machine name in shared memory is non-null implying possible crash"));
|
|
}
|
|
}
|
|
/* Since nl is memset to 0 initially and then fname is copied over from gv_cur_region and since "fname" is
|
|
* guaranteed to not exceed MAX_FN_LEN, we should have a terminating '\0' atleast at csa->nl->fname[MAX_FN_LEN]
|
|
*/
|
|
assert(csa->nl->fname[MAX_FN_LEN] == '\0'); /* Note: the first '\0' in csa->nl->fname can be much earlier */
|
|
/* Check whether csa->nl->fname exists. If not, then it is a serious condition. Error out. */
|
|
STAT_FILE((char *)csa->nl->fname, &stat_buf, stat_res);
|
|
if (-1 == stat_res)
|
|
{
|
|
save_errno = errno;
|
|
send_msg(VARLSTCNT(13) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
|
|
ERR_DBNAMEMISMATCH, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname, save_errno);
|
|
PRINT_CRASH_MESSAGE(3, csa->nl, ERR_DBNAMEMISMATCH, 4,
|
|
DB_LEN_STR(reg), udi->shmid, csa->nl->fname, save_errno);
|
|
}
|
|
/* Check whether csa->nl->fname and csa->nl->unique_id.uid are in sync. If not error out. */
|
|
if (FALSE == is_gdid_stat_identical(&csa->nl->unique_id.uid, &stat_buf))
|
|
{
|
|
send_msg(VARLSTCNT(12) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
|
|
ERR_DBIDMISMATCH, 4, csa->nl->fname, DB_LEN_STR(reg), udi->shmid);
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_DBIDMISMATCH, 4, csa->nl->fname, DB_LEN_STR(reg), udi->shmid);
|
|
}
|
|
/* Previously, we used to check for csa->nl->creation_date_time4 vs csd->creation_time4 and treat it as
|
|
* an id mismatch situation as well. But later it was determined that as long as the filename and the fileid
|
|
* match between the database file header and the copy in shared memory, there is no more matching that needs
|
|
* to be done. It is not possible for the user to create a situation where the filename/fileid matches but
|
|
* the creation time does not. The only way for this to happen is shared memory corruption in which case we
|
|
* have a much bigger problem to deal with -- 2011/03/30 --- nars.
|
|
*/
|
|
if (FALSE == is_gdid_gdid_identical(&FILE_INFO(reg)->fileid, &csa->nl->unique_id.uid))
|
|
{
|
|
send_msg(VARLSTCNT(12) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
|
|
ERR_DBSHMNAMEDIFF, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname);
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_DBSHMNAMEDIFF, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname);
|
|
}
|
|
/* If a regular Recover/Rollback created the shared memory and died (because of a user error or runtime error),
|
|
* any process that comes up after that should NOT touch the shared memory or database. The user should reissue
|
|
* Rollback/Recover command that will fix the state of the shared memory and bring the database back to a consistent
|
|
* state. Note that the reissue of a regular Rollback/Recover command will NOT hit this condition because it invokes
|
|
* mu_rndwn_file (STANDALONE) that removes the shared memory. The only case in which mu_rndwn_file does NOT remove
|
|
* shared memory is if it was invoked by an Online Rollback in which case the below check should be bypassed
|
|
*/
|
|
if (csa->nl->donotflush_dbjnl && !jgbl.onlnrlbk)
|
|
{
|
|
assert(FALSE);
|
|
PRINT_CRASH_MESSAGE(0, csa->nl, ERR_TEXT, 2,
|
|
LEN_AND_LIT("mupip recover/rollback created shared memory. Needs MUPIP RUNDOWN"));
|
|
}
|
|
/* verify pointers from our calculation vs. the copy in shared memory */
|
|
if (csa->nl->critical != (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl))
|
|
{
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("critical"),
|
|
(uint4)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->critical);
|
|
}
|
|
if ((JNL_ALLOWED(csa)) &&
|
|
(csa->nl->jnl_buff != (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl)))
|
|
{
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("journal buffer"),
|
|
(uint4)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->jnl_buff);
|
|
}
|
|
if (csa->nl->shmpool_buffer != (sm_off_t)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl))
|
|
{
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("backup buffer"),
|
|
(uint4)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->shmpool_buffer);
|
|
}
|
|
if ((is_bg) && (csa->nl->hdr != (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl)))
|
|
{
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("file header"),
|
|
(uint4)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->hdr);
|
|
}
|
|
if (csa->nl->lock_addrs != (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl))
|
|
{
|
|
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("lock address"),
|
|
(uint4)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->lock_addrs);
|
|
}
|
|
csa->dbinit_shm_created = FALSE;
|
|
}
|
|
if (REPL_ALLOWED(csd) && is_src_server)
|
|
{ /* Bind this database to the journal pool shmid & instance file name that the source server started with.
|
|
* Assert that jnlpool_init has already been done by the source server before it does db_init.
|
|
*/
|
|
assert(NULL != jnlpool.repl_inst_filehdr);
|
|
/* Note: csa->nl->replinstfilename is changed under control of the init/rundown semaphore only. */
|
|
assert('\0' != jnlpool.jnlpool_ctl->jnlpool_id.instfilename[0]);
|
|
replinst_mismatch = FALSE;
|
|
if ('\0' == csa->nl->replinstfilename[0])
|
|
STRCPY(csa->nl->replinstfilename, jnlpool.jnlpool_ctl->jnlpool_id.instfilename);
|
|
else if (STRCMP(csa->nl->replinstfilename, jnlpool.jnlpool_ctl->jnlpool_id.instfilename))
|
|
replinst_mismatch = TRUE;
|
|
/* Note: csa->nl->jnlpool_shmid is changed under control of the init/rundown semaphore only. */
|
|
assert(INVALID_SHMID != jnlpool.repl_inst_filehdr->jnlpool_shmid);
|
|
if (INVALID_SHMID == csa->nl->jnlpool_shmid)
|
|
csa->nl->jnlpool_shmid = jnlpool.repl_inst_filehdr->jnlpool_shmid;
|
|
else if (csa->nl->jnlpool_shmid != jnlpool.repl_inst_filehdr->jnlpool_shmid)
|
|
{ /* shmid mismatch. Check if the shmid noted down in db filehdr is out-of-date.
|
|
* Possible if the jnlpool has since been deleted. If so, note the new one down.
|
|
* If not, then issue an error.
|
|
*/
|
|
if (-1 == shmctl(csa->nl->jnlpool_shmid, IPC_STAT, &shmstat))
|
|
{
|
|
save_errno = errno;
|
|
if ((EINVAL == save_errno) || (EIDRM == save_errno)) /* EIDRM is only on Linux */
|
|
{
|
|
replinst_mismatch = FALSE;
|
|
csa->nl->jnlpool_shmid = jnlpool.repl_inst_filehdr->jnlpool_shmid;
|
|
} else
|
|
replinst_mismatch = TRUE;
|
|
} else
|
|
replinst_mismatch = TRUE;
|
|
}
|
|
/* Replication instance file or jnlpool id mismatch. Issue error. */
|
|
if (replinst_mismatch)
|
|
rts_error(VARLSTCNT(10) ERR_REPLINSTMISMTCH, 8,
|
|
LEN_AND_STR(jnlpool.jnlpool_ctl->jnlpool_id.instfilename), jnlpool.repl_inst_filehdr->jnlpool_shmid,
|
|
DB_LEN_STR(reg), LEN_AND_STR(csa->nl->replinstfilename), csa->nl->jnlpool_shmid);
|
|
}
|
|
csa->root_search_cycle = csa->nl->root_search_cycle;
|
|
csa->onln_rlbk_cycle = csa->nl->onln_rlbk_cycle; /* take local copy of the current Online Rollback cycle */
|
|
csa->db_onln_rlbkd_cycle = csa->nl->db_onln_rlbkd_cycle; /* take local copy of the current Online Rollback mod cycle */
|
|
/* Record ftok information as soon as shared memory set up is done */
|
|
if (!have_standalone_access && !bypassed_ftok)
|
|
FTOK_TRACE(csa, csd->trans_hist.curr_tn, ftok_ops_lock, process_id);
|
|
if (-1 == (semval = semctl(udi->semid, 1, GETVAL))) /* semval = number of process attached */
|
|
{
|
|
save_errno = errno;
|
|
rts_error(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
|
|
RTS_ERROR_LITERAL("semctl()"), CALLFROM, save_errno);
|
|
}
|
|
if (!read_only && (1 == semval) && !bypassed_ftok && !bypassed_access)
|
|
{ /* For read-write process flush file header to write machine_name,
|
|
* semaphore, shared memory id and semaphore creation time to disk.
|
|
*/
|
|
csa->nl->remove_shm = FALSE;
|
|
STRNCPY_STR(csd->machine_name, machine_name, MAX_MCNAMELEN);
|
|
if (!is_bg)
|
|
{
|
|
csd->shmid = tsd->shmid;
|
|
csd->semid = tsd->semid;
|
|
csd->gt_sem_ctime = tsd->gt_sem_ctime;
|
|
csd->gt_shm_ctime = tsd->gt_shm_ctime;
|
|
}
|
|
DB_LSEEKWRITE(csa, udi->fn, udi->fd, (off_t)0, (sm_uc_ptr_t)csd, SIZEOF(sgmnt_data), save_errno);
|
|
if (0 != save_errno)
|
|
{
|
|
rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("Error with database header flush"), save_errno);
|
|
}
|
|
} else if (read_only && new_shm_ipc)
|
|
{ /* For read-only process if shared memory and semaphore created for first time,
|
|
* semaphore and shared memory id, and semaphore creation time are written to disk.
|
|
*/
|
|
db_ipcs.semid = tsd->semid; /* use tsd instead of csd in order for MM to work too */
|
|
db_ipcs.shmid = tsd->shmid;
|
|
db_ipcs.gt_sem_ctime = tsd->gt_sem_ctime.ctime;
|
|
db_ipcs.gt_shm_ctime = tsd->gt_shm_ctime.ctime;
|
|
db_ipcs.fn_len = reg->dyn.addr->fname_len;
|
|
memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len);
|
|
db_ipcs.fn[reg->dyn.addr->fname_len] = 0;
|
|
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa);
|
|
if (0 != send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0))
|
|
rts_error(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
|
|
ERR_TEXT, 2, LEN_AND_LIT("gtmsecshr failed to update database file header"));
|
|
|
|
}
|
|
if (gtm_fullblockwrites)
|
|
{ /* We have been asked to do FULL BLOCK WRITES for this database. On *NIX, attempt to get the filesystem
|
|
* blocksize from statvfs. This allows a full write of a blockwithout the OS having to fetch the old
|
|
* block for a read/update operation. We will round the IOs to the next filesystem blocksize if the
|
|
* following criteria are met:
|
|
*
|
|
* 1) Database blocksize must be a whole multiple of the filesystem blocksize for the above
|
|
* mentioned reason.
|
|
*
|
|
* 2) Filesystem blocksize must be a factor of the location of the first data block
|
|
* given by the start_vbn.
|
|
*
|
|
* The saved length (if the feature is enabled) will be the filesystem blocksize and will be the
|
|
* length that a database IO is rounded up to prior to initiation of the IO.
|
|
*/
|
|
FSTATVFS_FILE(udi->fd, &dbvfs, status);
|
|
if (-1 != status)
|
|
{
|
|
dblksize = csd->blk_size;
|
|
fbwsize = (int4)dbvfs.f_bsize;
|
|
if (0 != fbwsize && (0 == dblksize % fbwsize) && (0 == ((csd->start_vbn - 1) * DISK_BLOCK_SIZE) % fbwsize))
|
|
csa->do_fullblockwrites = TRUE; /* This region is fullblockwrite enabled */
|
|
/* Report this length in DSE even if not enabled */
|
|
csa->fullblockwrite_len = fbwsize; /* Length for rounding fullblockwrite */
|
|
} else
|
|
{
|
|
save_errno = errno;
|
|
send_msg(VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_LIT("fstatvfs"), CALLFROM, save_errno);
|
|
}
|
|
}
|
|
++csa->nl->ref_cnt; /* This value is changed under control of the init/rundown semaphore only */
|
|
assert(!csa->ref_cnt); /* Increment shared ref_cnt before private ref_cnt increment. */
|
|
csa->ref_cnt++; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */
|
|
# ifdef DEBUG
|
|
if (!IS_GTM_IMAGE && gtm_white_box_test_case_enabled && (WBTEST_HOLD_SEM_BYPASS == gtm_white_box_test_case_number))
|
|
{
|
|
if (0 == csa->nl->wbox_test_seq_num)
|
|
{
|
|
csa->nl->wbox_test_seq_num = 1;
|
|
DBGFPF((stderr, "Holding semaphores...\n"));
|
|
while (1 == csa->nl->wbox_test_seq_num)
|
|
LONG_SLEEP(1);
|
|
}
|
|
}
|
|
# endif
|
|
if (!have_standalone_access && !jgbl.onlnrlbk && !bypassed_access)
|
|
{
|
|
/* Release control lockout now that it is init'd */
|
|
if (0 != (save_errno = do_semop(udi->semid, 0, -1, SEM_UNDO)))
|
|
{
|
|
save_errno = errno;
|
|
rts_error(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
|
|
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
|
|
}
|
|
udi->grabbed_access_sem = FALSE;
|
|
}
|
|
# ifdef DEBUG
|
|
if (gtm_white_box_test_case_enabled && (WBTEST_SEMTOOLONG_STACK_TRACE == gtm_white_box_test_case_number) \
|
|
&& (1 == csa->nl->wbox_test_seq_num))
|
|
{
|
|
csa->nl->wbox_test_seq_num = 2;
|
|
/* Wait till the other process has got some stack traces */
|
|
while (csa->nl->wbox_test_seq_num != 3)
|
|
LONG_SLEEP(10);
|
|
}
|
|
# endif
|
|
if (!have_standalone_access && !bypassed_ftok)
|
|
{ /* Release ftok semaphore lock so that any other ftok conflicted database can continue now */
|
|
if (!ftok_sem_release(reg, FALSE, FALSE))
|
|
rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
|
|
FTOK_TRACE(csa, csd->trans_hist.curr_tn, ftok_ops_release, process_id);
|
|
}
|
|
/* Do the per process initialization of mutex stuff */
|
|
assert(!mutex_per_process_init_pid || mutex_per_process_init_pid == process_id);
|
|
if (!mutex_per_process_init_pid)
|
|
mutex_per_process_init();
|
|
REVERT;
|
|
return;
|
|
}
|