fis-gtm/sr_unix/gvcst_init_sysops.c

1271 lines
54 KiB
C

/****************************************************************
* *
* Copyright 2001, 2013 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include <sys/mman.h>
#ifndef __MVS__
#include <sys/param.h>
#endif
#include <errno.h>
#include <sys/un.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <sys/time.h>
#include "gtm_ipc.h"
#include "gtm_socket.h"
#include "gtm_fcntl.h"
#include "gtm_unistd.h"
#include "gtm_stdio.h"
#include "gtm_string.h"
#include "gtm_sem.h"
#include "gtm_statvfs.h"
#ifdef __linux__
#include "hugetlbfs_overrides.h"
#endif
#include "gt_timer.h"
#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "gdsblk.h"
#include "gdscc.h"
#include "min_max.h"
#include "gdsblkops.h"
#include "filestruct.h"
#include "parse_file.h"
#include "jnl.h"
#include "interlock.h"
#include "io.h"
#include "iosp.h"
#include "error.h"
#include "mutex.h"
#include "gtmio.h"
#include "mupipbckup.h"
#include "gtmimagename.h"
#include "mmseg.h"
#include "gtmsecshr.h"
#include "secshr_client.h"
#include "ftok_sems.h"
#include "repl_msg.h"
#include "gtmsource.h"
#include "anticipatory_freeze.h"
/* Include prototypes */
#include "mlk_shr_init.h"
#include "gtm_c_stack_trace.h"
#include "eintr_wrappers.h"
#include "eintr_wrapper_semop.h"
#include "is_file_identical.h"
#include "repl_instance.h"
#include "heartbeat_timer.h"
#include "util.h"
#include "dbfilop.h"
#include "gvcst_protos.h"
#include "is_raw_dev.h"
#include "gv_match.h"
#include "do_semop.h"
#include "gvcmy_open.h"
#include "wcs_sleep.h"
#include "do_shmat.h"
#include "send_msg.h"
#include "gtmmsg.h"
#include "shmpool.h"
#include "gtm_permissions.h"
#include "wbox_test_init.h"
#include "wcs_clean_dbsync.h" /* for setting wcs_clean_dbsync pointer */
#ifdef GTM_CRYPT
#include "gtmcrypt.h"
#endif
#include "have_crit.h"
#ifdef __MVS__
#include "gtm_zos_io.h"
#endif
#include "db_snapshot.h"
#include "lockconst.h" /* for LOCK_AVAILABLE */
#ifdef GTM_TRUNCATE
#include "recover_truncate.h"
#endif
#ifndef GTM_SNAPSHOT
# error "Snapshot facility not supported in this platform"
#endif
#define REQRUNDOWN_TEXT "semid is invalid but shmid is valid or at least one of sem_ctime or shm_ctime are non-zero"
#define MAX_ACCESS_SEM_RETRIES 2
#define RTS_ERROR(...) rts_error_csa(CSA_ARG(csa) __VA_ARGS__)
#define SEND_MSG(...) send_msg_csa(CSA_ARG(csa) __VA_ARGS__)
#define SS_INFO_INIT(CSA) \
{ \
shm_snapshot_ptr_t ss_shm_ptr; \
node_local_ptr_t lcl_cnl; \
\
lcl_cnl = CSA->nl; \
lcl_cnl->ss_shmid = INVALID_SHMID; \
lcl_cnl->ss_shmcycle = 0; \
CLEAR_SNAPSHOTS_IN_PROG(lcl_cnl); \
lcl_cnl->num_snapshots_in_effect = 0; \
SET_LATCH_GLOBAL(&lcl_cnl->snapshot_crit_latch, LOCK_AVAILABLE); \
assert(1 == MAX_SNAPSHOTS); /* To ensure that we revisit this whenever multiple snapshots is implemented */ \
ss_shm_ptr = (shm_snapshot_ptr_t)(SS_GETSTARTPTR(CSA)); \
SS_DEFAULT_INIT_POOL(ss_shm_ptr); \
}
#define GTM_ATTACH_CHECK_ERROR \
{ \
if (-1 == status_l) \
{ \
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), \
ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno); \
} \
}
#define GTM_ATTACH_SHM \
{ \
status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND)); \
GTM_ATTACH_CHECK_ERROR; \
csa->nl = (node_local_ptr_t)csa->db_addrs[0]; \
}
#define GTM_ATTACH_SHM_AND_CHECK_VERS(VERMISMATCH, SHM_SETUP_OK) \
{ \
GTM_ATTACH_SHM; \
/* The following checks for GDS_LABEL_GENERIC and gtm_release_name ensure that the shared memory under consideration \
* is valid. If shared memory is already initialized, do VERMISMATCH check BEFORE referencing any other fields in \
* shared memory. \
*/ \
VERMISMATCH = FALSE; \
SHM_SETUP_OK = FALSE; \
if (!MEMCMP_LIT(csa->nl->label, GDS_LABEL_GENERIC)) \
{ \
if (memcmp(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1)) \
{ /* Copy csa->nl->now_running into a local variable before passing to rts_error due to the following \
* issue: \
* In VMS, a call to rts_error copies only the error message and its arguments (as pointers) and \
* transfers control to the topmost condition handler which is dbinit_ch() in this case. dbinit_ch() \
* does a PRN_ERROR only for SUCCESS/INFO (VERMISMATCH is neither of them) and in addition \
* nullifies csa->nl as part of its condition handling. It then transfers control to the next level \
* condition handler which does a PRN_ERROR but at that point in time, the parameter \
* csa->nl->now_running is no longer accessible and hence no \parameter substitution occurs (i.e. the \
* error message gets displayed with plain !ADs). \
* In UNIX, this is not an issue since the first call to rts_error does the error message \
* construction before handing control to the topmost condition handler. But it does not hurt to do \
* the copy. \
*/ \
assert(strlen(csa->nl->now_running) < SIZEOF(now_running)); \
memcpy(now_running, csa->nl->now_running, SIZEOF(now_running)); \
now_running[SIZEOF(now_running) - 1] = '\0'; /* protection against bad csa->nl->now_running values */ \
VERMISMATCH = TRUE; \
} else \
SHM_SETUP_OK = TRUE; \
} \
}
#define GTM_VERMISMATCH_ERROR \
{ \
if (!vermismatch_already_printed) \
{ \
vermismatch_already_printed = TRUE; \
RTS_ERROR(VARLSTCNT(8) ERR_VERMISMATCH, 6, DB_LEN_STR(reg), gtm_release_name_len, gtm_release_name, \
LEN_AND_STR(now_running)); \
} \
}
#ifdef GTM_CRYPT
#define INIT_DB_ENCRYPTION_IF_NEEDED(DO_CRYPT_INIT, INIT_STATUS, REG, CSA, TSD) \
{ \
int fn_len = 0; \
char *fn; \
\
if (DO_CRYPT_INIT) \
{ \
if (0 == INIT_STATUS) \
INIT_DB_ENCRYPTION(CSA, TSD, INIT_STATUS); \
if (0 != INIT_STATUS) \
{ \
fn = (char *)(REG->dyn.addr->fname); \
fn_len = REG->dyn.addr->fname_len; \
if (IS_GTM_IMAGE) \
{ \
GTMCRYPT_REPORT_ERROR(INIT_STATUS, rts_error, fn_len, fn); \
} else \
GTMCRYPT_REPORT_ERROR(MAKE_MSG_WARNING(INIT_STATUS), gtm_putmsg, fn_len, fn); \
CSA->encr_key_handle = GTMCRYPT_INVALID_KEY_HANDLE; \
} \
} \
}
#define INIT_PROC_ENCRYPTION_IF_NEEDED(CSA, DO_CRYPT_INIT, INIT_STATUS) \
{ \
if (DO_CRYPT_INIT) \
INIT_PROC_ENCRYPTION(CSA, INIT_STATUS); \
}
#else
#define INIT_DB_ENCRYPTION_IF_NEEDED(IS_ENCRYPTED, INIT_STATUS, REG, CSA, TSD)
#define INIT_PROC_ENCRYPTION_IF_NEEDED(CSA, IS_ENCRYPTED, INIT_STATUS)
#endif
#define READ_DB_FILE_HEADER(REG, TSD) \
{ \
file_control *fc; \
\
fc = REG->dyn.addr->file_cntl; \
fc->file_type = REG->dyn.addr->acc_meth; \
fc->op = FC_READ; \
fc->op_buff = (sm_uc_ptr_t)TSD; \
fc->op_pos = 1; \
fc->op_len = SIZEOF(sgmnt_data); \
dbfilop(fc); \
}
#define READ_DB_FILE_MASTERMAP(REG, CSD) \
{ \
file_control *fc; \
\
fc = REG->dyn.addr->file_cntl; \
fc->file_type = dba_bg; \
fc->op = FC_READ; \
fc->op_buff = MM_ADDR(CSD); \
fc->op_len = MASTER_MAP_SIZE(CSD); \
fc->op_pos = MM_BLOCK; \
dbfilop(fc); \
}
/* Depending on whether journaling and/or replication was enabled at the time of the crash,
* print REQRUNDOWN, REQRECOV, or REQROLLBACK error message.
*/
#define PRINT_CRASH_MESSAGE(CNT, ARG, ...) \
{ \
if (JNL_ENABLED(tsd)) \
{ \
if (REPL_ENABLED(tsd) && tsd->jnl_before_image) \
RTS_ERROR(VARLSTCNT(10 + CNT) ERR_REQROLLBACK, 4, DB_LEN_STR(reg), \
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
else \
RTS_ERROR(VARLSTCNT(10 + CNT) ERR_REQRECOV, 4, DB_LEN_STR(reg), \
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
} else \
RTS_ERROR(VARLSTCNT(10 + CNT) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), \
LEN_AND_STR((ARG)->machine_name), __VA_ARGS__); \
}
GBLREF boolean_t gtm_fullblockwrites; /* Do full (not partial) database block writes T/F */
GBLREF boolean_t is_src_server;
GBLREF boolean_t mupip_jnl_recover;
GBLREF gd_region *gv_cur_region;
GBLREF ipcs_mesg db_ipcs;
GBLREF jnlpool_addrs jnlpool;
GBLREF node_local_ptr_t locknl;
GBLREF uint4 heartbeat_counter;
GBLREF uint4 mutex_per_process_init_pid;
GBLREF uint4 process_id;
GBLREF void (*wcs_clean_dbsync_fptr)();
GBLREF jnl_gbls_t jgbl;
GTMCRYPT_ONLY(
GBLREF gtmcrypt_key_t mu_int_encrypt_key_handle;
)
#ifndef MUTEX_MSEM_WAKE
GBLREF int mutex_sock_fd;
#endif
LITREF char gtm_release_name[];
LITREF int4 gtm_release_name_len;
OS_PAGE_SIZE_DECLARE
error_def(ERR_BADDBVER);
ZOS_ONLY(error_def(ERR_BADTAG);)
error_def(ERR_HOSTCONFLICT);
error_def(ERR_CRITSEMFAIL);
error_def(ERR_DBCREINCOMP);
error_def(ERR_DBFILERR);
error_def(ERR_DBFLCORRP);
error_def(ERR_DBIDMISMATCH);
error_def(ERR_DBNAMEMISMATCH);
error_def(ERR_DBNOTGDS);
error_def(ERR_DBSHMNAMEDIFF);
error_def(ERR_JNLBUFFREGUPD);
error_def(ERR_NLMISMATCHCALC);
error_def(ERR_MMNODYNUPGRD);
error_def(ERR_PERMGENFAIL);
error_def(ERR_REQROLLBACK);
error_def(ERR_REQRECOV);
error_def(ERR_REQRUNDOWN);
error_def(ERR_REGOPENRETRY);
error_def(ERR_SYSCALL);
error_def(ERR_TEXT);
error_def(ERR_VERMISMATCH);
gd_region *dbfilopn (gd_region *reg)
{
unix_db_info *udi;
parse_blk pblk;
mstr file;
char *fnptr, fbuff[MAX_FBUFF + 1];
struct stat buf;
gd_region *prev_reg;
gd_segment *seg;
int status;
bool raw;
int stat_res, rc, save_errno;
sgmnt_addrs *csa;
ZOS_ONLY(int realfiletag;)
seg = reg->dyn.addr;
assert(seg->acc_meth == dba_bg || seg->acc_meth == dba_mm);
FILE_CNTL_INIT_IF_NULL(seg);
udi = FILE_INFO(reg);
csa = &udi->s_addrs;
file.addr = (char *)seg->fname;
file.len = seg->fname_len;
memset(&pblk, 0, SIZEOF(pblk));
pblk.buffer = fbuff;
pblk.buff_size = MAX_FBUFF;
pblk.fop = (F_SYNTAXO | F_PARNODE);
memcpy(fbuff,file.addr,file.len);
*(fbuff + file.len) = '\0';
if (is_raw_dev(fbuff))
{
raw = TRUE;
pblk.def1_buf = DEF_NODBEXT;
pblk.def1_size = SIZEOF(DEF_NODBEXT) - 1;
} else
{
raw = FALSE;
pblk.def1_buf = DEF_DBEXT;
pblk.def1_size = SIZEOF(DEF_DBEXT) - 1;
}
status = parse_file(&file, &pblk);
if (!(status & 1))
{
if (!IS_GTCM_GNP_SERVER_IMAGE)
{
free(seg->file_cntl->file_info);
free(seg->file_cntl);
seg->file_cntl = 0;
}
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status);
}
assert(((int)pblk.b_esl + 1) <= SIZEOF(seg->fname));
memcpy(seg->fname, pblk.buffer, pblk.b_esl);
pblk.buffer[pblk.b_esl] = 0;
seg->fname[pblk.b_esl] = 0;
seg->fname_len = pblk.b_esl;
if (pblk.fnb & F_HAS_NODE)
{ /* Remote node specification given */
assert(pblk.b_node && pblk.l_node[pblk.b_node - 1] == ':');
gvcmy_open(reg, &pblk);
return (gd_region *)-1L;
}
fnptr = (char *)seg->fname + pblk.b_node;
udi->raw = raw;
udi->fn = (char *)fnptr;
OPENFILE(fnptr, O_RDWR, udi->fd);
if (!udi->grabbed_access_sem)
{ /* If the process already has standalone access, these fields are initialized in mu_rndwn_file */
udi->ftok_semid = INVALID_SEMID;
udi->semid = INVALID_SEMID;
udi->shmid = INVALID_SHMID;
udi->gt_sem_ctime = 0;
udi->gt_shm_ctime = 0;
}
reg->read_only = FALSE; /* maintain csa->read_write simultaneously */
csa->read_write = TRUE; /* maintain reg->read_only simultaneously */
if (FD_INVALID == udi->fd)
{
OPENFILE(fnptr, O_RDONLY, udi->fd);
if (FD_INVALID == udi->fd)
{
save_errno = errno;
if (!IS_GTCM_GNP_SERVER_IMAGE)
{
free(seg->file_cntl->file_info);
free(seg->file_cntl);
seg->file_cntl = 0;
}
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
}
reg->read_only = TRUE; /* maintain csa->read_write simultaneously */
csa->read_write = FALSE; /* maintain reg->read_only simultaneously */
}
# ifdef __MVS__
if (-1 == gtm_zos_tag_to_policy(udi->fd, TAG_BINARY, &realfiletag))
TAG_POLICY_SEND_MSG(fnptr, errno, realfiletag, TAG_BINARY);
# endif
STAT_FILE(fnptr, &buf, stat_res);
if (-1 == stat_res)
{
save_errno = errno;
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
}
set_gdid_from_stat(&udi->fileid, &buf);
if (prev_reg = gv_match(reg))
{
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
free(seg->file_cntl->file_info);
free(seg->file_cntl);
seg->file_cntl = 0;
return prev_reg;
}
return reg;
}
void dbsecspc(gd_region *reg, sgmnt_data_ptr_t csd, gtm_uint64_t *sec_size)
{
gtm_uint64_t tmp_sec_size;
/* Ensure that all the various sections that the shared memory contains are actually
* aligned at the OS_PAGE_SIZE boundary
*/
INIT_NUM_CRIT_ENTRY_IF_NEEDED(csd);
assert(MIN_NODE_LOCAL_SPACE <= NODE_LOCAL_SPACE(csd));
assert(0 == NODE_LOCAL_SPACE(csd) % OS_PAGE_SIZE);
assert(0 == LOCK_SPACE_SIZE(csd) % OS_PAGE_SIZE);
assert(0 == JNL_SHARE_SIZE(csd) % OS_PAGE_SIZE);
assert(0 == SHMPOOL_SECTION_SIZE % OS_PAGE_SIZE);
assert(0 == CACHE_CONTROL_SIZE(csd) % OS_PAGE_SIZE);
/* First compute the size based on sections common to both MM and BG */
tmp_sec_size = NODE_LOCAL_SPACE(csd) + JNL_SHARE_SIZE(csd) + SHMPOOL_SECTION_SIZE + LOCK_SPACE_SIZE(csd);
/* Now, add sections specific to MM and BG */
if (dba_mm == reg->dyn.addr->acc_meth)
tmp_sec_size += SIZEOF_FILE_HDR(csd);
else
{
assertpro(dba_bg == reg->dyn.addr->acc_meth);
tmp_sec_size += CACHE_CONTROL_SIZE(csd) + (LOCK_BLOCK(csd) * DISK_BLOCK_SIZE);
}
# ifdef HUGETLB_SUPPORTED
*sec_size = ROUND_UP(tmp_sec_size, OS_HUGEPAGE_SIZE);
# else
*sec_size = ROUND_UP(tmp_sec_size, OS_PAGE_SIZE);
# endif
return;
}
int db_init(gd_region *reg)
{
boolean_t is_bg, read_only, sem_created = FALSE, need_stacktrace, have_standalone_access;
boolean_t shm_setup_ok = FALSE, vermismatch = FALSE, vermismatch_already_printed = FALSE;
boolean_t new_shm_ipc, do_crypt_init = FALSE, replinst_mismatch;
char machine_name[MAX_MCNAMELEN];
int gethostname_res, stat_res, group_id, perm, save_udi_semid;
int4 status, semval, dblksize, fbwsize, save_errno, wait_time, loopcnt, sem_pid;
sm_long_t status_l;
sgmnt_addrs *csa;
sgmnt_data tsdbuff;
sgmnt_data_ptr_t csd, tsd;
struct sembuf sop[3];
struct stat stat_buf;
union semun semarg;
struct semid_ds semstat;
struct shmid_ds shmstat;
struct statvfs dbvfs;
uint4 sopcnt, start_hrtbt_cntr;
unix_db_info *udi;
char now_running[MAX_REL_NAME];
int init_status;
gtm_uint64_t sec_size, mmap_sz;
semwait_status_t retstat;
struct perm_diag_data pdd;
boolean_t bypassed_ftok = FALSE, bypassed_access = FALSE;
int jnl_buffer_size;
char s[JNLBUFFUPDAPNDX_SIZE]; /* JNLBUFFUPDAPNDX_SIZE is defined in jnl.h */
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
ESTABLISH_NOUNWIND(dbinit_ch);
assert(INTRPT_IN_GVCST_INIT == intrpt_ok_state); /* we better be called from gvcst_init */
wcs_clean_dbsync_fptr = &wcs_clean_dbsync;
tsd = &tsdbuff;
read_only = reg->read_only;
udi = FILE_INFO(reg);
memset(machine_name, 0, SIZEOF(machine_name));
csa = &udi->s_addrs;
assert(!mutex_per_process_init_pid || mutex_per_process_init_pid == process_id);
if (!mutex_per_process_init_pid)
mutex_per_process_init();
if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res))
RTS_ERROR(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno);
if (WBTEST_ENABLED(WBTEST_TAMPER_HOSTNAME))
STRCPY(machine_name, "s_i_l_l_y");
assert(strlen(machine_name) < MAX_MCNAMELEN);
assert(NULL == csa->hdr); /* dbinit_ch relies on this to unmap the db (if mm) */
assert((NULL == csa->db_addrs[0]) && (NULL == csa->db_addrs[1]));
assert((NULL == csa->lock_addrs[0]) && (NULL == csa->lock_addrs[1]));
reg->opening = TRUE;
assert(0 <= udi->fd); /* database file must have been already opened by dbfilopn() done from gvcst_init() */
FSTAT_FILE(udi->fd, &stat_buf, stat_res); /* get the stats for the database file */
if (-1 == stat_res)
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
/* Setup new group and permissions if indicated by the security rules. */
if (gtm_set_group_and_perm(&stat_buf, &group_id, &perm, PERM_IPC, &pdd) < 0)
{
SEND_MSG(VARLSTCNT(6 + PERMGENDIAG_ARG_COUNT)
ERR_PERMGENFAIL, 4, RTS_ERROR_STRING("ipc resources"), RTS_ERROR_STRING(udi->fn),
PERMGENDIAG_ARGS(pdd));
RTS_ERROR(VARLSTCNT(6 + PERMGENDIAG_ARG_COUNT)
ERR_PERMGENFAIL, 4, RTS_ERROR_STRING("ipc resources"), RTS_ERROR_STRING(udi->fn),
PERMGENDIAG_ARGS(pdd));
}
/* if the process has standalone access, it will have udi->grabbed_access_sem set to TRUE at
* this point. Note that down in a local variable as the udi->grabbed_access_sem will be set
* to TRUE even for non-standalone access below and hence we can't rely on that later to determine if the process had
* standalone access or not when it entered this function.
*/
have_standalone_access = udi->grabbed_access_sem;
if (!have_standalone_access)
{
do_crypt_init = (reg->dyn.addr->is_encrypted && !IS_LKE_IMAGE);
INIT_PROC_ENCRYPTION_IF_NEEDED(csa, do_crypt_init, init_status); /* heavy-weight so needs to be done before ftok */
start_hrtbt_cntr = heartbeat_counter;
if (!ftok_sem_get2(reg, start_hrtbt_cntr, &retstat, &bypassed_ftok))
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "ftok");
if (bypassed_ftok)
SEND_MSG(VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("FTOK bypassed at database initialization"));
/* At this point we have ftok_semid semaphore based on ftok key. Any ftok conflicted region will block at this
* point. For example, if a.dat and b.dat both have same ftok and process A tries to open or close a.dat and
* process B tries to open or close b.dat, even though the database accesses don't conflict, the first one to
* control the ftok semaphore blocks (makes wait) the other(s).
*/
READ_DB_FILE_HEADER(reg, tsd); /* file already opened by dbfilopn() done from gvcst_init() */
DO_BADDBVER_CHK(reg, tsd); /* need to do BADDBVER check before de-referencing shmid and semid from file header
* as they could be at different offsets if the database is V4-format */
if (reg->dyn.addr->is_encrypted != tsd->is_encrypted)
{ /* Encryption setting different between global directory and database file header */
reg->dyn.addr->is_encrypted = tsd->is_encrypted; /* override with the value in file header */
do_crypt_init = (tsd->is_encrypted && !IS_LKE_IMAGE);
if (do_crypt_init)
{ /* Encryption is turned on in the file header. Need to do encryption initialization. Release ftok
* as initialization is heavy-weight.
*/
if (!ftok_sem_release(reg, TRUE, FALSE)) /* decrement counter so later increment is correct */
RTS_ERROR(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
INIT_PROC_ENCRYPTION_IF_NEEDED(csa, do_crypt_init, init_status); /* redo initialization */
start_hrtbt_cntr = heartbeat_counter; /* update to reflect time lost in encryption initialization */
if (!ftok_sem_get2(reg, start_hrtbt_cntr, &retstat, &bypassed_ftok))
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "ftok");
if (bypassed_ftok)
SEND_MSG(VARLSTCNT(4) ERR_TEXT, 2,
LEN_AND_LIT("bypassed at database encryption initialization"));
} /* else encryption is turned off in the file header. Continue as-is. Any encryption initialization done
* before is discarded
*/
}
INIT_DB_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status, reg, csa, tsd);
if (WBTEST_ENABLED(WBTEST_HOLD_ONTO_FTOKSEM_IN_DBINIT))
{
DBGFPF((stderr, "Holding the ftok semaphore.. Sleeping for 30 seconds\n"));
LONG_SLEEP(30);
DBGFPF((stderr, "30 second sleep exhausted.. continuing with rest of db_init..\n"));
}
for (loopcnt = 0; MAX_ACCESS_SEM_RETRIES > loopcnt; loopcnt++)
{
CSD2UDI(tsd, udi); /* sets udi->semid/shmid/sem_ctime/shm_ctime from file header */
/* we did not create a new ipc resource */
udi->new_sem = udi->new_shm = FALSE;
sem_created = FALSE;
if (INVALID_SEMID == udi->semid)
{ /* access control semaphore does not exist. Create one */
if (0 != udi->gt_sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->gt_shm_ctime)
{ /* We must have somthing wrong in protocol or, code, if this happens. */
assert(FALSE);
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2, LEN_AND_STR(REQRUNDOWN_TEXT));
}
/* Create new semaphore using IPC_PRIVATE. System guarantees a unique id. */
if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
{
udi->semid = INVALID_SEMID;
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno);
}
udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */
udi->new_sem = udi->new_shm = TRUE;
sem_created = TRUE;
/* change group and permissions */
semarg.buf = &semstat;
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT1"), errno);
if ((-1 != group_id) && (group_id != semstat.sem_perm.gid))
semstat.sem_perm.gid = group_id;
semstat.sem_perm.mode = perm;
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_SET, semarg))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_SET"), errno);
SET_GTM_ID_SEM(udi->semid, status);
if (-1 == status)
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno);
/* WARNING: Because SETVAL changes sem_ctime, we must NOT do any SETVAL after this one; code here
* and elsewhere uses IPC_STAT to get sem_ctime and relies on sem_ctime as the creation time of the
* semaphore.
*/
semarg.buf = &semstat;
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT2"), errno);
tsd->gt_sem_ctime.ctime = udi->gt_sem_ctime = semarg.buf->sem_ctime;
} else
{ /* "semid" already exists. Need to lock it. Before that do sanity check on "semid" and "shmid" */
if (INVALID_SHMID != udi->shmid)
{
if (WBTEST_ENABLED(WBTEST_HOLD_FTOK_UNTIL_BYPASS))
{
if (4 == semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))
{ /* We are bypasser */
DBGFPF((stderr, "Waiting for all processes to quit.\n"));
while (1 < semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))
LONG_SLEEP(1);
}
}
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
{
if (bypassed_ftok)
{
gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_REGOPENRETRY, 4,
REG_LEN_STR(reg), DB_LEN_STR(reg));
REVERT;
return -1; /* Retry calling db_init. Cleanup in gvcst_init() */
}
PRINT_CRASH_MESSAGE(1, tsd, ERR_TEXT, 2,
LEN_AND_LIT("Error with database control shmctl"), errno);
} else if (shmstat.shm_ctime != tsd->gt_shm_ctime.ctime)
{
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
if (vermismatch)
{
GTM_VERMISMATCH_ERROR;
} else
{
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2,
LEN_AND_LIT("IPC creation time indicates a probable prior crash"));
}
}
semarg.buf = &semstat;
if (-1 == semctl(udi->semid, DB_CONTROL_SEM, IPC_STAT, semarg))
{ /* file header has valid semid but semaphore does not exist */
PRINT_CRASH_MESSAGE(1, tsd, ERR_TEXT, 2,
LEN_AND_LIT("Error with database control semaphore (IPC_STAT)"), errno);
} else if (semarg.buf->sem_ctime != tsd->gt_sem_ctime.ctime)
{
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
if (vermismatch)
{
GTM_VERMISMATCH_ERROR;
} else
{
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2,
LEN_AND_LIT("IPC creation time indicates a probable prior crash"));
}
}
} else
{ /* else "shmid" is NOT valid. This is possible if -
* (a) Another process is holding the access control semaphore for a longer duration of time
* but does NOT have the shared memory setup (MUPIP INTEG -FILE or MUPIP RESTORE).
*
* (b) If a process (like in (a)) were kill -15ed or -9ed and hence did not get a chance to
* do db_ipcs_reset which resets "semid"/"shmid" field in the file header to INVALID.
*
* In either case, try grabbing the semaphore. If not, wait (depending on the user specified
* wait time). Eventually, we will either get hold of the semaphore OR will error out.
*/
udi->new_shm = TRUE; /* Need to create shared memory */
}
}
/* We already have ftok semaphore of this region, so all we need is the access control semaphore */
SET_GTM_SOP_ARRAY(sop, sopcnt, !read_only, (SEM_UNDO | IPC_NOWAIT));
SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT);
if (-1 != status)
break;
else
{
assert(!sem_created); /* if we created the semaphore, we should be able to do the semop */
save_errno = errno;
if (EAGAIN == save_errno)
{
if (NO_SEMWAIT_ON_EAGAIN == TREF(dbinit_max_hrtbt_delta))
{
sem_pid = semctl(udi->semid, DB_CONTROL_SEM, GETPID);
if (-1 != sem_pid)
{
RTS_ERROR(VARLSTCNT(13) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_SEMWT2LONG, 7, process_id, 0, LEN_AND_LIT("access control"),
DB_LEN_STR(reg), sem_pid);
} else
{
save_errno = errno;
if (!SEM_REMOVED(save_errno))
{
RTS_ERROR(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM,
save_errno);
} /* else semaphore was removed. Fall-through */
}
} else if (!do_blocking_semop(udi->semid, gtm_access_sem, start_hrtbt_cntr,
&retstat, reg, &bypassed_access))
{
if (!SEM_REMOVED(retstat.save_errno))
ISSUE_SEMWAIT_ERROR((&retstat), reg, udi, "access control");
save_errno = retstat.save_errno;
} else
{
if (bypassed_access)
SEND_MSG(VARLSTCNT(4) ERR_TEXT, 2,
LEN_AND_LIT("Access control bypassed at init"));
save_errno = status = SS_NORMAL;
break;
}
} else if (!SEM_REMOVED(save_errno))
{
RTS_ERROR(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
}
/* this is possible if a concurrent gds_rundown removed the access control semaphore (if
* it was the last writer). Another possibility is if the user did an ipcrm which removed
* the access control semaphore from the system. Instead of issuing an error right-away,
* retry by reading the file header again. Note, it is not possible for another gds_rundown
* removing the access control semaphore because any other process has to first get the
* ftok lock at startup and since we hold it, they will wait for us to release the ftok.
*/
assert(SEM_REMOVED(save_errno));
if (1 == loopcnt)
{
RTS_ERROR(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
}
READ_DB_FILE_HEADER(reg, tsd);
}
}
assert(-1 != status || bypassed_access);
if (!bypassed_access)
udi->grabbed_access_sem = TRUE;
if(!read_only)
udi->counter_acc_incremented = TRUE;
/* Now that we have the access control semaphore, re-read the file header so we have the uptodate information
* in case some of the fields (like access method) were modified concurrently by MUPIP SET -FILE
*/
READ_DB_FILE_HEADER(reg, tsd);
UDI2CSD(udi, tsd); /* Since we read the file header again, tsd->semid/shmid and corresponding ctime fields
* will not be uptodate. Refresh it with the udi copies as they are the ones used above */
} else
{ /* for have_standalone_access we were already in "mu_rndwn_file" and got "semid" semaphore. Since mu_rndwn_file
* would have gotten "ftok" semaphore before acquiring the access control semaphore, no need to get the "ftok"
* semaphore as well.
*/
READ_DB_FILE_HEADER(reg, tsd); /* file already opened by dbfilopn() done from gvcst_init() */
do_crypt_init = (tsd->is_encrypted && !IS_LKE_IMAGE);
INIT_PROC_ENCRYPTION_IF_NEEDED(csa, do_crypt_init, init_status);
INIT_DB_ENCRYPTION_IF_NEEDED(do_crypt_init, init_status, reg, csa, tsd);
CSD2UDI(tsd, udi);
/* Make sure "mu_rndwn_file" has created semaphore for standalone access */
if (INVALID_SEMID == udi->semid || 0 == udi->gt_sem_ctime)
GTMASSERT;
/* Make sure "mu_rndwn_file" has reset shared memory. In pro, just clear it and proceed. */
assert((INVALID_SHMID == udi->shmid) && (0 == udi->gt_shm_ctime));
/* In pro, just clear it and proceed */
udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */
udi->new_shm = udi->new_sem = TRUE;
}
assert(udi->grabbed_access_sem || bypassed_access);
DO_DB_HDR_CHECK(reg, tsd); /* Basic sanity check on the file header fields */
if (WBTEST_ENABLED(WBTEST_HOLD_ONTO_ACCSEM_IN_DBINIT))
{
DBGFPF((stderr, "Holding the access control semaphore.. Sleeping for 30 seconds\n"));
LONG_SLEEP(30);
DBGFPF((stderr, "30 second sleep exhausted.. continuing with rest of db_init..\n"));
}
if (WBTEST_ENABLED(WBTEST_HOLD_FTOK_UNTIL_BYPASS))
{
if (3 == semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))
{ /* We are ftok semaphore holder */
DBGFPF((stderr, "Holding the ftok semaphore until a new process comes along.\n"));
while (3 == semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))
LONG_SLEEP(1);
}
}
/* Now that the access control lock is obtained and file header passed all sanity checks, update the acc_meth of the
* region from the one in the file header (in case they are different). This way, any later code that relies on the
* acc_meth dereferenced from the region will work correctly. Instead of checking if they are different, do the assignment
* unconditionally
*/
reg->dyn.addr->acc_meth = tsd->acc_meth;
new_shm_ipc = udi->new_shm;
if (new_shm_ipc)
{ /* Bypassers are not allowed to create shared memory so we don't end up with conflicting shared memories */
if (bypassed_ftok || bypassed_access)
{
gtm_putmsg_csa(CSA_ARG(csa) ERR_REGOPENRETRY, 2, REG_LEN_STR(reg), DB_LEN_STR(reg));
REVERT;
return -1; /* Retry calling db_init. Cleanup in gvcst_init() */
}
/* Since we are about to allocate new shared memory, if necessary, adjust the journal buffer size right now.
* Note that if the process setting up shared memory is a read-only process, then we might not flush updated
* jnl_buffer_size to the file header, which is fine because the value in shared memory is what all processes
* are looking at. If necessary, the next process to initialize shared memory will repeat the process of
* adjusting the jnl_buffer_size value.
*/
jnl_buffer_size = tsd->jnl_buffer_size;
if ((0 != jnl_buffer_size) && (jnl_buffer_size < JNL_BUFFER_MIN))
{
ROUND_UP_MIN_JNL_BUFF_SIZE(tsd->jnl_buffer_size, tsd);
SNPRINTF(s, JNLBUFFUPDAPNDX_SIZE, JNLBUFFUPDAPNDX, JNL_BUFF_PORT_MIN(tsd), JNL_BUFFER_MAX);
SEND_MSG(VARLSTCNT(10) ERR_JNLBUFFREGUPD, 4, REG_LEN_STR(reg),
jnl_buffer_size, tsd->jnl_buffer_size, ERR_TEXT, 2, LEN_AND_STR(s));
}
dbsecspc(reg, tsd, &sec_size); /* Find db segment size */
/* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */
GTM_WHITE_BOX_TEST(WBTEST_FAIL_ON_SHMGET, sec_size, GTM_UINT64_MAX);
if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, sec_size, RWDALL | IPC_CREAT)))
{
udi->shmid = (int)INVALID_SHMID;
status_l = INVALID_SHMID;
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno);
}
tsd->shmid = udi->shmid;
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_STAT1"), errno);
/* change group and permissions */
if ((-1 != group_id) && (group_id != shmstat.shm_perm.gid))
shmstat.shm_perm.gid = group_id;
shmstat.shm_perm.mode = perm;
if (-1 == shmctl(udi->shmid, IPC_SET, &shmstat))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_SET"), errno);
/* Warning: We must read the shm_ctime using IPC_STAT after IPC_SET, which changes it.
* We must NOT do any more IPC_SET or SETVAL after this. Our design is to use
* shm_ctime as creation time of shared memory and store it in file header.
*/
if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl IPC_STAT2"), errno);
tsd->gt_shm_ctime.ctime = udi->gt_shm_ctime = shmstat.shm_ctime;
GTM_ATTACH_SHM;
shm_setup_ok = TRUE;
} else
{
GTM_ATTACH_SHM_AND_CHECK_VERS(vermismatch, shm_setup_ok);
if (vermismatch)
{
GTM_VERMISMATCH_ERROR;
} else if (!shm_setup_ok)
{
PRINT_CRASH_MESSAGE(0, tsd, ERR_TEXT, 2, LEN_AND_LIT("shared memory is invalid"));
}
}
csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE);
assert(((INTPTR_T)csa->critical & 0xf) == 0); /* critical should be 16-byte aligned */
# ifdef CACHELINE_SIZE
assert(0 == ((INTPTR_T)csa->critical & (CACHELINE_SIZE - 1)));
# endif
/* Note: Here we check jnl_state from database file; its value cannot change without stand-alone access.
* The jnl_buff should be initialized irrespective of read/write process
*/
JNL_INIT(csa, reg, tsd);
csa->shmpool_buffer = (shmpool_buff_hdr_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE(tsd) + JNL_SHARE_SIZE(tsd));
/* Initialize memory for snapshot context */ \
csa->ss_ctx = malloc(SIZEOF(snapshot_context_t));
DEFAULT_INIT_SS_CTX((SS_CTX_CAST(csa->ss_ctx)));
csa->lock_addrs[0] = (sm_uc_ptr_t)csa->shmpool_buffer + SHMPOOL_SECTION_SIZE;
csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
csa->total_blks = tsd->trans_hist.total_blks; /* For test to see if file has extended */
if (new_shm_ipc)
{
memset(csa->nl, 0, SIZEOF(*csa->nl)); /* We allocated shared storage -- we have to init it */
csa->nl->sec_size = sec_size; /* Set the shared memory size */
if (JNL_ALLOWED(csa))
{ /* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not
* necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch
* check in order to determine whether to call jnl_file_open() or not. this is in preparation for that.
*/
csa->jnl->jnl_buff->cycle = 1;
}
}
is_bg = (dba_bg == tsd->acc_meth);
if (is_bg)
csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd));
else
{
FSTAT_FILE(udi->fd, &stat_buf, stat_res);
if (-1 == stat_res)
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
mmap_sz = stat_buf.st_size - BLK_ZERO_OFF(tsd);
assert(0 < mmap_sz);
CHECK_LARGEFILE_MMAP(reg, mmap_sz); /* can issue rts_error MMFILETOOLARGE */
if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)MMAP_FD(udi->fd, mmap_sz, BLK_ZERO_OFF(tsd), read_only)))
{
RTS_ERROR(VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_SYSCALL, 5, LEN_AND_LIT("mmap()"), CALLFROM, errno);
}
csa->db_addrs[1] = csa->db_addrs[0] + mmap_sz - 1; /* '- 1' due to 0-based indexing */
assert(csa->db_addrs[1] > csa->db_addrs[0]);
csd = csa->hdr = (sgmnt_data_ptr_t)((sm_uc_ptr_t)csa->lock_addrs[1] + 1);
}
/* At this point, shm_setup_ok is TRUE so we are guaranteed that vermismatch is FALSE. Therefore, we can safely
* dereference csa->nl->glob_sec_init without worrying about whether or not it could be at a different offset than
* the current version. The only exception is DSE which can continue even after the VERMISMATCH error and hence
* can have shm_setup_ok set to FALSE at this point.
*/
if (shm_setup_ok && !csa->nl->glob_sec_init && !(bypassed_ftok || bypassed_access))
{
assert(new_shm_ipc);
assert(!vermismatch);
csa->dbinit_shm_created = TRUE;
memcpy(csd, tsd, SIZEOF(sgmnt_data));
READ_DB_FILE_MASTERMAP(reg, csd);
if (csd->machine_name[0]) /* crash occurred */
{
if (0 != STRNCMP_STR(csd->machine_name, machine_name, MAX_MCNAMELEN)) /* crashed on some other node */
RTS_ERROR(VARLSTCNT(8) ERR_HOSTCONFLICT, 6, LEN_AND_STR(machine_name), DB_LEN_STR(reg),
LEN_AND_STR(csd->machine_name));
else
{
PRINT_CRASH_MESSAGE(0, csd, ERR_TEXT, 2,
LEN_AND_LIT("machine name in file header is non-null implying possible crash"));
}
}
if (is_bg)
{
csa->nl->cache_off = -CACHE_CONTROL_SIZE(csd);
db_csh_ini(csa);
bt_malloc(csa);
}
db_csh_ref(csa, TRUE);
shmpool_buff_init(reg);
SS_INFO_INIT(csa);
STRNCPY_STR(csa->nl->machine_name, machine_name, MAX_MCNAMELEN); /* machine name */
assert(MAX_REL_NAME > gtm_release_name_len);
memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1); /* GT.M release name */
memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1); /* GDS label */
memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len); /* database filename */
csa->nl->creation_date_time4 = csd->creation_time4;
csa->nl->highest_lbm_blk_changed = -1;
csa->nl->wcs_timers = -1;
csa->nl->nbb = BACKUP_NOT_IN_PROGRESS;
csa->nl->unique_id.uid = FILE_INFO(reg)->fileid; /* save what file we initialized this storage for */
/* save pointers in csa to access shared memory */
csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl);
if (JNL_ALLOWED(csa))
csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl);
csa->nl->shmpool_buffer = (sm_off_t)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl);
if (is_bg)
/* Field is sm_off_t (4 bytes) so only in BG mode is this assurred to be 4 byte capable */
csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl);
csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl);
if (!read_only || is_bg)
{
csd->trans_hist.early_tn = csd->trans_hist.curr_tn;
csd->max_update_array_size = csd->max_non_bm_update_array_size
= (int4)(ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE));
csd->max_update_array_size += (int4)(ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE));
/* add current db_csh counters into the cumulative counters and reset the current counters */
# define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2) \
csd->COUNTER.cumul_count += csd->COUNTER.curr_count; \
csd->COUNTER.curr_count = 0;
# include "tab_db_csh_acct_rec.h"
# undef TAB_DB_CSH_ACCT_REC
}
csa->nl->wc_blocked = FALSE; /* Since we are creating shared memory, reset wc_blocked to FALSE */
gvstats_rec_csd2cnl(csa); /* should be called before "db_auto_upgrade" */
reg->dyn.addr->ext_blk_count = csd->extension_size;
mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only));
db_auto_upgrade(reg); /* should be called before "gtm_mutex_init" to ensure NUM_CRIT_ENTRY is nonzero */
DEBUG_ONLY(locknl = csa->nl;) /* for DEBUG_ONLY LOCK_HIST macro */
gtm_mutex_init(reg, NUM_CRIT_ENTRY(csd), FALSE);
DEBUG_ONLY(locknl = NULL;) /* restore "locknl" to default value */
if (read_only)
csa->nl->remove_shm = TRUE; /* gds_rundown can remove shmem if first process has read-only access */
if (FALSE == csd->multi_site_open)
{ /* first time database is opened after upgrading to a GTM version that supports multi-site
* replication
*/
csd->zqgblmod_seqno = 0;
csd->zqgblmod_tn = 0;
if (csd->pre_multisite_resync_seqno > csd->reg_seqno)
csd->pre_multisite_resync_seqno = csd->reg_seqno;
csd->multi_site_open = TRUE;
}
csa->nl->glob_sec_init = TRUE;
STAT_FILE((char *)csa->nl->fname, &stat_buf, stat_res);
if (-1 == stat_res)
{
save_errno = errno;
RTS_ERROR(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), save_errno);
}
set_gdid_from_stat(&csa->nl->unique_id.uid, &stat_buf);
# ifdef RELEASE_LATCH_GLOBAL
/* On HP-UX, it is possible that mucregini/cs_data is not aligned at the same address
* boundary as csd would be in shared memory. This may lead to the initialization and
* usage of different elements of hp_latch_space. This may lead to the latch being
* "in-use" permanently. To resolve this, shm-initialer re-initializes the global latch
* to the "available" state.
* Although Solaris doesn't have the same issue of alignment, we'll cover the case of
* a corrupt latch (say in case of abnormal process termination).
*/
RELEASE_LATCH_GLOBAL(&csd->next_upgrd_warn.time_latch);
# endif
GTM_TRUNCATE_ONLY(recover_truncate(csa, csd, reg);)
csa->nl->jnlpool_shmid = INVALID_SHMID;
} else
{
if (STRNCMP_STR(csa->nl->machine_name, machine_name, MAX_MCNAMELEN)) /* machine names do not match */
{
if (csa->nl->machine_name[0])
RTS_ERROR(VARLSTCNT(8) ERR_HOSTCONFLICT, 6, LEN_AND_STR(machine_name), DB_LEN_STR(reg),
LEN_AND_STR(csa->nl->machine_name));
else
{
PRINT_CRASH_MESSAGE(0, csd, ERR_TEXT, 2,
LEN_AND_LIT("machine name in shared memory is non-null implying possible crash"));
}
}
/* Since nl is memset to 0 initially and then fname is copied over from gv_cur_region and since "fname" is
* guaranteed to not exceed MAX_FN_LEN, we should have a terminating '\0' atleast at csa->nl->fname[MAX_FN_LEN]
*/
assert(csa->nl->fname[MAX_FN_LEN] == '\0'); /* Note: the first '\0' in csa->nl->fname can be much earlier */
/* Check whether csa->nl->fname exists. If not, then it is a serious condition. Error out. */
STAT_FILE((char *)csa->nl->fname, &stat_buf, stat_res);
if (-1 == stat_res)
{
save_errno = errno;
SEND_MSG(VARLSTCNT(13) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
ERR_DBNAMEMISMATCH, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname, save_errno);
PRINT_CRASH_MESSAGE(3, csa->nl, ERR_DBNAMEMISMATCH, 4,
DB_LEN_STR(reg), udi->shmid, csa->nl->fname, save_errno);
}
/* Check whether csa->nl->fname and csa->nl->unique_id.uid are in sync. If not error out. */
if (FALSE == is_gdid_stat_identical(&csa->nl->unique_id.uid, &stat_buf))
{
SEND_MSG(VARLSTCNT(12) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
ERR_DBIDMISMATCH, 4, csa->nl->fname, DB_LEN_STR(reg), udi->shmid);
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_DBIDMISMATCH, 4, csa->nl->fname, DB_LEN_STR(reg), udi->shmid);
}
/* Previously, we used to check for csa->nl->creation_date_time4 vs csd->creation_time4 and treat it as
* an id mismatch situation as well. But later it was determined that as long as the filename and the fileid
* match between the database file header and the copy in shared memory, there is no more matching that needs
* to be done. It is not possible for the user to create a situation where the filename/fileid matches but
* the creation time does not. The only way for this to happen is shared memory corruption in which case we
* have a much bigger problem to deal with -- 2011/03/30 --- nars.
*/
if (FALSE == is_gdid_gdid_identical(&FILE_INFO(reg)->fileid, &csa->nl->unique_id.uid))
{
SEND_MSG(VARLSTCNT(12) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csa->nl->machine_name),
ERR_DBSHMNAMEDIFF, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname);
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_DBSHMNAMEDIFF, 4, DB_LEN_STR(reg), udi->shmid, csa->nl->fname);
}
/* If a regular Recover/Rollback created the shared memory and died (because of a user error or runtime error),
* any process that comes up after that should NOT touch the shared memory or database. The user should reissue
* Rollback/Recover command that will fix the state of the shared memory and bring the database back to a consistent
* state. Note that the reissue of a regular Rollback/Recover command will NOT hit this condition because it invokes
* mu_rndwn_file (STANDALONE) that removes the shared memory. The only case in which mu_rndwn_file does NOT remove
* shared memory is if it was invoked by an Online Rollback in which case the below check should be bypassed
*/
if (csa->nl->donotflush_dbjnl && !jgbl.onlnrlbk)
{
assert(FALSE);
PRINT_CRASH_MESSAGE(0, csa->nl, ERR_TEXT, 2,
LEN_AND_LIT("mupip recover/rollback created shared memory. Needs MUPIP RUNDOWN"));
}
/* verify pointers from our calculation vs. the copy in shared memory */
if (csa->nl->critical != (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl))
{
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("critical"),
(uint4)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->critical);
}
if ((JNL_ALLOWED(csa)) &&
(csa->nl->jnl_buff != (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl)))
{
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("journal buffer"),
(uint4)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->jnl_buff);
}
if (csa->nl->shmpool_buffer != (sm_off_t)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl))
{
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("backup buffer"),
(uint4)((sm_uc_ptr_t)csa->shmpool_buffer - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->shmpool_buffer);
}
if ((is_bg) && (csa->nl->hdr != (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl)))
{
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("file header"),
(uint4)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->hdr);
}
if (csa->nl->lock_addrs != (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl))
{
PRINT_CRASH_MESSAGE(2, csa->nl, ERR_NLMISMATCHCALC, 4, LEN_AND_LIT("lock address"),
(uint4)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl), (uint4)csa->nl->lock_addrs);
}
csa->dbinit_shm_created = FALSE;
if (is_bg)
db_csh_ini(csa);
}
if (REPL_ALLOWED(csd) && is_src_server)
{ /* Bind this database to the journal pool shmid & instance file name that the source server started with.
* Assert that jnlpool_init has already been done by the source server before it does db_init.
*/
assert(NULL != jnlpool.repl_inst_filehdr);
/* Note: csa->nl->replinstfilename is changed under control of the init/rundown semaphore only. */
assert('\0' != jnlpool.jnlpool_ctl->jnlpool_id.instfilename[0]);
replinst_mismatch = FALSE;
if ('\0' == csa->nl->replinstfilename[0])
STRCPY(csa->nl->replinstfilename, jnlpool.jnlpool_ctl->jnlpool_id.instfilename);
else if (STRCMP(csa->nl->replinstfilename, jnlpool.jnlpool_ctl->jnlpool_id.instfilename))
replinst_mismatch = TRUE;
/* Note: csa->nl->jnlpool_shmid is changed under control of the init/rundown semaphore only. */
assert(INVALID_SHMID != jnlpool.repl_inst_filehdr->jnlpool_shmid);
if (INVALID_SHMID == csa->nl->jnlpool_shmid)
csa->nl->jnlpool_shmid = jnlpool.repl_inst_filehdr->jnlpool_shmid;
else if (csa->nl->jnlpool_shmid != jnlpool.repl_inst_filehdr->jnlpool_shmid)
{ /* shmid mismatch. Check if the shmid noted down in db filehdr is out-of-date.
* Possible if the jnlpool has since been deleted. If so, note the new one down.
* If not, then issue an error.
*/
if (-1 == shmctl(csa->nl->jnlpool_shmid, IPC_STAT, &shmstat))
{
save_errno = errno;
if ((EINVAL == save_errno) || (EIDRM == save_errno)) /* EIDRM is only on Linux */
{
replinst_mismatch = FALSE;
csa->nl->jnlpool_shmid = jnlpool.repl_inst_filehdr->jnlpool_shmid;
} else
replinst_mismatch = TRUE;
} else
replinst_mismatch = TRUE;
}
/* Replication instance file or jnlpool id mismatch. Issue error. */
if (replinst_mismatch)
RTS_ERROR(VARLSTCNT(10) ERR_REPLINSTMISMTCH, 8,
LEN_AND_STR(jnlpool.jnlpool_ctl->jnlpool_id.instfilename), jnlpool.repl_inst_filehdr->jnlpool_shmid,
DB_LEN_STR(reg), LEN_AND_STR(csa->nl->replinstfilename), csa->nl->jnlpool_shmid);
}
csa->root_search_cycle = csa->nl->root_search_cycle;
csa->onln_rlbk_cycle = csa->nl->onln_rlbk_cycle; /* take local copy of the current Online Rollback cycle */
csa->db_onln_rlbkd_cycle = csa->nl->db_onln_rlbkd_cycle; /* take local copy of the current Online Rollback mod cycle */
/* Record ftok information as soon as shared memory set up is done */
if (!have_standalone_access && !bypassed_ftok)
FTOK_TRACE(csa, csd->trans_hist.curr_tn, ftok_ops_lock, process_id);
if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) /* semval = number of process attached */
{
save_errno = errno;
RTS_ERROR(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
RTS_ERROR_LITERAL("semctl()"), CALLFROM, save_errno);
}
if (!read_only && (1 == semval) && !bypassed_ftok && !bypassed_access)
{ /* For read-write process flush file header to write machine_name,
* semaphore, shared memory id and semaphore creation time to disk.
*/
csa->nl->remove_shm = FALSE;
STRNCPY_STR(csd->machine_name, machine_name, MAX_MCNAMELEN);
if (!is_bg)
{
csd->shmid = tsd->shmid;
csd->semid = tsd->semid;
csd->gt_sem_ctime = tsd->gt_sem_ctime;
csd->gt_shm_ctime = tsd->gt_shm_ctime;
}
DB_LSEEKWRITE(csa, udi->fn, udi->fd, (off_t)0, (sm_uc_ptr_t)csd, SIZEOF(sgmnt_data), save_errno);
if (0 != save_errno)
{
RTS_ERROR(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("Error with database header flush"), save_errno);
}
} else if (read_only && new_shm_ipc)
{ /* For read-only process if shared memory and semaphore created for first time,
* semaphore and shared memory id, and semaphore creation time are written to disk.
*/
db_ipcs.semid = tsd->semid; /* use tsd instead of csd in order for MM to work too */
db_ipcs.shmid = tsd->shmid;
db_ipcs.gt_sem_ctime = tsd->gt_sem_ctime.ctime;
db_ipcs.gt_shm_ctime = tsd->gt_shm_ctime.ctime;
db_ipcs.fn_len = reg->dyn.addr->fname_len;
memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len);
db_ipcs.fn[reg->dyn.addr->fname_len] = 0;
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa);
if (0 != send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0))
RTS_ERROR(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, LEN_AND_LIT("gtmsecshr failed to update database file header"));
}
if (gtm_fullblockwrites)
{ /* We have been asked to do FULL BLOCK WRITES for this database. On *NIX, attempt to get the filesystem
* blocksize from statvfs. This allows a full write of a blockwithout the OS having to fetch the old
* block for a read/update operation. We will round the IOs to the next filesystem blocksize if the
* following criteria are met:
*
* 1) Database blocksize must be a whole multiple of the filesystem blocksize for the above
* mentioned reason.
*
* 2) Filesystem blocksize must be a factor of the location of the first data block
* given by the start_vbn.
*
* The saved length (if the feature is enabled) will be the filesystem blocksize and will be the
* length that a database IO is rounded up to prior to initiation of the IO.
*/
FSTATVFS_FILE(udi->fd, &dbvfs, status);
if (-1 != status)
{
dblksize = csd->blk_size;
fbwsize = (int4)dbvfs.f_bsize;
if (0 != fbwsize && (0 == dblksize % fbwsize) && (0 == ((csd->start_vbn - 1) * DISK_BLOCK_SIZE) % fbwsize))
csa->do_fullblockwrites = TRUE; /* This region is fullblockwrite enabled */
/* Report this length in DSE even if not enabled */
csa->fullblockwrite_len = fbwsize; /* Length for rounding fullblockwrite */
} else
{
save_errno = errno;
SEND_MSG(VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_LIT("fstatvfs"), CALLFROM, save_errno);
}
}
++csa->nl->ref_cnt; /* This value is changed under control of the init/rundown semaphore only */
assert(!csa->ref_cnt); /* Increment shared ref_cnt before private ref_cnt increment. */
csa->ref_cnt++; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */
if (WBTEST_ENABLED(WBTEST_HOLD_SEM_BYPASS) && !IS_GTM_IMAGE)
{
if (0 == csa->nl->wbox_test_seq_num)
{
csa->nl->wbox_test_seq_num = 1;
DBGFPF((stderr, "Holding semaphores...\n"));
while (1 == csa->nl->wbox_test_seq_num)
LONG_SLEEP(1);
}
}
if (!have_standalone_access && !jgbl.onlnrlbk && !bypassed_access)
{
/* Release control lockout now that it is init'd */
if (0 != (save_errno = do_semop(udi->semid, DB_CONTROL_SEM, -1, SEM_UNDO)))
{
save_errno = errno;
RTS_ERROR(VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, \
RTS_ERROR_LITERAL("semop()"), CALLFROM, save_errno);
}
udi->grabbed_access_sem = FALSE;
}
if (WBTEST_ENABLED(WBTEST_SEMTOOLONG_STACK_TRACE) && (1 == csa->nl->wbox_test_seq_num))
{
csa->nl->wbox_test_seq_num = 2;
/* Wait till the other process has got some stack traces */
while (csa->nl->wbox_test_seq_num != 3)
LONG_SLEEP(10);
}
if (!have_standalone_access && !bypassed_ftok)
{ /* Release ftok semaphore lock so that any other ftok conflicted database can continue now */
if (!ftok_sem_release(reg, FALSE, FALSE))
RTS_ERROR(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
FTOK_TRACE(csa, csd->trans_hist.curr_tn, ftok_ops_release, process_id);
udi->grabbed_ftok_sem = FALSE;
}
REVERT;
return 0;
}