fis-gtm/sr_unix/mu_rndwn_file.c

901 lines
34 KiB
C
Raw Normal View History

/****************************************************************
* *
* Copyright 2001, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_ipc.h"
#include "gtm_string.h"
#include "gtm_time.h"
#include "gtm_unistd.h"
#include "gtm_stdio.h"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <errno.h>
#include "gtm_sem.h"
#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gtmio.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "iosp.h"
#include "jnl.h"
#include "mutex.h"
#include "lockconst.h"
#include "interlock.h"
#include "aswp.h"
#include "gtm_c_stack_trace.h"
#include "eintr_wrappers.h"
#include "eintr_wrapper_semop.h"
#include "mu_rndwn_file.h"
#include "performcaslatchcheck.h"
#include "util.h"
#include "send_msg.h"
#include "tp_change_reg.h"
#include "dbfilop.h"
#include "gvcst_protos.h" /* for gvcst_init_sysops prototype */
#include "do_semop.h"
#include "ipcrmid.h"
#include "rc_cpt_ops.h"
#include "gtmmsg.h"
#include "wcs_flu.h"
#include "do_shmat.h"
#include "is_file_identical.h"
#include "io.h"
#include "gtmsecshr.h"
#include "ftok_sems.h"
#include "mu_rndwn_all.h"
#include "error.h"
#ifdef GTM_CRYPT
#include "gtmcrypt.h"
#endif
#include "db_snapshot.h"
#include "shmpool.h" /* Needed for the shmpool structures */
#include "is_proc_alive.h"
#include "ss_lock_facility.h"
#ifndef GTM_SNAPSHOT
# error "Snapshot facility not available in this platform"
#endif
GBLREF sgmnt_addrs *cs_addrs;
GBLREF gd_region *gv_cur_region;
GBLREF sgmnt_data_ptr_t cs_data;
GBLREF uint4 process_id;
GBLREF boolean_t mupip_jnl_recover;
GBLREF ipcs_mesg db_ipcs;
GBLREF gd_region *standalone_reg;
GBLREF jnl_gbls_t jgbl;
GBLREF boolean_t mu_rndwn_file_dbjnl_flush;
GBLREF gd_region *ftok_sem_reg;
static gd_region *rundown_reg = NULL;
static gd_region *temp_region;
static sgmnt_data_ptr_t temp_cs_data;
static sgmnt_addrs *temp_cs_addrs;
static boolean_t restore_rndwn_gbl;
LITREF char gtm_release_name[];
LITREF int4 gtm_release_name_len;
error_def(ERR_BADDBVER);
error_def(ERR_DBFILERR);
error_def(ERR_DBNOTGDS);
error_def(ERR_DBRDONLY);
error_def(ERR_DBNAMEMISMATCH);
error_def(ERR_DBIDMISMATCH);
error_def(ERR_DBSHMNAMEDIFF);
error_def(ERR_TEXT);
error_def(ERR_VERMISMATCH);
error_def(ERR_SYSCALL);
error_def(ERR_SHMREMOVED);
error_def(ERR_SEMREMOVED);
#define RESET_GV_CUR_REGION \
{ \
gv_cur_region = temp_region; \
cs_addrs = temp_cs_addrs; \
cs_data = temp_cs_data; \
}
#define CLNUP_AND_RETURN(REG, UDI, TSD, SEM_CREATED, SEM_INCREMENTED) \
{ \
int rc; \
\
if (FD_INVALID != UDI->fd) \
{ \
CLOSEFILE_RESET(UDI->fd, rc); \
assert(FD_INVALID == UDI->fd); \
} \
if (NULL != TSD) \
{ \
free(TSD); \
TSD = NULL; \
} \
if (SEM_CREATED) \
{ \
if (-1 == semctl(UDI->semid, 0, IPC_RMID)) \
{ \
RNDWN_ERR("!AD -> Error removing semaphore.", REG); \
} else \
{ \
send_msg(VARLSTCNT(3) ERR_SEMREMOVED, 1, UDI->semid); \
SEM_CREATED = FALSE; \
} \
} else if (SEM_INCREMENTED) \
{ \
do_semop(udi->semid, 0, -1, IPC_NOWAIT | SEM_UNDO); \
SEM_INCREMENTED = FALSE; \
} \
REVERT; \
assert((NULL == ftok_sem_reg) || (REG == ftok_sem_reg)); \
if (REG == ftok_sem_reg) \
ftok_sem_release(REG, TRUE, TRUE); \
if (restore_rndwn_gbl) \
{ \
RESET_GV_CUR_REGION; \
restore_rndwn_gbl = FALSE; \
} \
return FALSE; \
}
#define SEG_SHMATTACH(addr, reg, udi, tsd, sem_created, sem_incremented) \
{ \
if (-1 == (sm_long_t)(cs_addrs->db_addrs[0] = (sm_uc_ptr_t) \
do_shmat(udi->shmid, addr, SHM_RND))) \
{ \
if (EINVAL != errno) \
RNDWN_ERR("!AD -> Error attaching to shared memory", (reg)); \
/* shared memory segment no longer exists */ \
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented); \
} \
}
#define SEG_MEMMAP(addr, reg, udi, tsd, sem_created, sem_incremented) \
{ \
if (-1 == (sm_long_t)(cs_addrs->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)addr, \
(size_t)stat_buf.st_size, PROT_READ | PROT_WRITE, GTM_MM_FLAGS, udi->fd, (off_t)0))) \
{ \
RNDWN_ERR("!AD -> Error mapping memory", (reg)); \
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented); \
} \
}
#define REMOVE_SEMID_IF_ORPHANED(REG, UDI, TSD, SEM_CREATED, SEM_INCREMENTED) \
{ \
assert(!standalone); \
if (is_orphaned_gtm_semaphore(UDI->semid)) \
{ \
if (0 != sem_rmid(UDI->semid)) \
{ \
RNDWN_ERR("!AD -> Error removing semaphore.", reg); \
CLNUP_AND_RETURN(REG, UDI, TSD, SEM_CREATED, SEM_INCREMENTED); \
} \
send_msg(VARLSTCNT(3) ERR_SEMREMOVED, 1, UDI->semid); \
UDI->semid = INVALID_SEMID; \
} \
}
/*
* Description:
* This routine is used for two reasons
* 1) get standalone access:
* First uses/creates ftok semaphore.
* Then create a new semaphore for that region.
* Releases ftok semaphore (does not remove).
* 2) rundown shared memory
* Uses/creates ftok semaphore.
* Parameters:
* standalone = TRUE => create semaphore to get standalone access
* standalone = FALSE => rundown shared memory
* Return Value:
* TRUE for success
* FALSE for failure
*/
bool mu_rndwn_file(gd_region *reg, bool standalone)
{
int status, save_errno, sopcnt, tsd_size, csd_size;
char now_running[MAX_REL_NAME];
boolean_t rc_cpt_removed = FALSE, sem_created = FALSE, sem_incremented = FALSE, is_gtm_shm;
boolean_t glob_sec_init, db_shm_in_sync, remove_shmid;
sgmnt_data_ptr_t csd, tsd = NULL;
jnl_private_control *jpc;
struct sembuf sop[4];
struct shmid_ds shm_buf;
file_control *fc;
unix_db_info *udi;
enum db_acc_method acc_meth;
struct stat stat_buf;
struct semid_ds semstat;
union semun semarg;
int semop_res, stat_res;
uint4 status_msg, ss_pid;
int rc;
GTMCRYPT_ONLY(
int init_status;
sgmnt_addrs *csa;
)
shm_snapshot_t *ss_shm_ptr;
gtm_uint64_t sec_size;
restore_rndwn_gbl = FALSE;
assert(!mupip_jnl_recover || standalone);
temp_region = gv_cur_region; /* save gv_cur_region wherever there is scope for it to be changed */
rundown_reg = gv_cur_region = reg;
# ifdef GTCM_RC
rc_cpt_removed = mupip_rundown_cpt();
# endif
fc = reg->dyn.addr->file_cntl;
fc->op = FC_OPEN;
status = dbfilop(fc);
gv_cur_region = temp_region;
udi = FILE_INFO(reg);
if (SS_NORMAL != status)
{
gtm_putmsg(VARLSTCNT(5) status, 2, DB_LEN_STR(reg), errno);
if (FD_INVALID != udi->fd) /* Since dbfilop failed, close udi->fd only if it was opened */
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
return FALSE;
}
/*
* read_only process cannot rundown database.
* read only process can succeed to get standalone access of the database,
* if the db is clean with no orphaned shared memory.
* Note: we use gtmsecshr for updating file header for semaphores id.
*/
if (reg->read_only && !standalone)
{
gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg));
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
return FALSE;
}
ESTABLISH_RET(mu_rndwn_file_ch, FALSE);
if (!ftok_sem_get(reg, TRUE, GTM_ID, !standalone))
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
/*
* Now we have standalone access of the database using ftok semaphore.
* Any other ftok conflicted database will suspend there operation at this point.
* At the end of this routine, we release ftok semaphore lock.
*/
/* We only need to read SIZEOF(sgmnt_data) here */
tsd_size = ROUND_UP(SIZEOF(sgmnt_data), DISK_BLOCK_SIZE);
tsd = (sgmnt_data_ptr_t)malloc(tsd_size);
LSEEKREAD(udi->fd, 0, tsd, tsd_size, status);
if (0 != status)
{
RNDWN_ERR("!AD -> Error reading from file.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
# ifdef GTM_CRYPT
/* During rundown gvcst_init is not called and hence we need to do the encryption setup here. */
/* We do some basic encryption initializations here.
* 1. Call hash check to figure out if the database file's hash matches with that of the db_key_file.
* 2. if the dat file is encrypted, then make a call to the encryption plugin with GTMCRYPT_GETKEY to obtain the
* symmetric key which can be used at later stages for encryption and decryption.
* 3. malloc csa->encrypted_blk_contents to be used in jnl_write_aimg_rec and dsk_write_nocache
*/
if (tsd->is_encrypted)
{
csa = &(udi->s_addrs);
INIT_PROC_ENCRYPTION(init_status);
if (0 == init_status)
INIT_DB_ENCRYPTION(reg->dyn.addr->fname, csa, tsd, init_status);
if (0 != init_status)
{
GC_GTM_PUTMSG(init_status, (reg->dyn.addr->fname));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
}
# endif
udi->shmid = tsd->shmid;
udi->semid = tsd->semid;
udi->gt_sem_ctime = tsd->gt_sem_ctime.ctime;
udi->gt_shm_ctime = tsd->gt_shm_ctime.ctime;
if (standalone)
{
semarg.buf = &semstat;
if (INVALID_SEMID == udi->semid || -1 == semctl(udi->semid, 0, IPC_STAT, semarg) ||
tsd->gt_sem_ctime.ctime != semarg.buf->sem_ctime)
{
/*
* if no database access control semaphore id is found, or,
* if semaphore id is found but ctime does not match, create new semaphore.
*/
if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
{
udi->semid = INVALID_SEMID;
RNDWN_ERR("!AD -> Error with semget with IPC_CREAT.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
sem_created = TRUE;
tsd->semid = udi->semid;
/*
* Set value of semaphore number 2 ( = FTOK_SEM_PER_ID - 1) as GTM_ID.
* In case we have orphaned semaphore for some reason, mupip rundown will be
* able to identify GTM semaphores from the value and can remove.
*/
semarg.val = GTM_ID;
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg))
{
RNDWN_ERR("!AD -> Error with semctl with SETVAL.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/*
* Warning: We must read the sem_ctime after SETVAL, which changes it.
* We must not do any more SETVAL after this.
* We consider sem_ctime as creation time of semaphore.
*/
semarg.buf = &semstat;
if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
{
RNDWN_ERR("!AD -> Error with semctl with IPC_STAT.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
udi->gt_sem_ctime = tsd->gt_sem_ctime.ctime = semarg.buf->sem_ctime;
}
/* Now lock the database using access control semaphore and increment counter */
sop[0].sem_num = 0; sop[0].sem_op = 0;
sop[1].sem_num = 0; sop[1].sem_op = 1;
sop[2].sem_num = 1; sop[2].sem_op = 0;
sop[3].sem_num = 1; sop[3].sem_op = 1;
sopcnt = 4;
sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = sop[3].sem_flg = SEM_UNDO | IPC_NOWAIT;
SEMOP(udi->semid, sop, sopcnt, semop_res, NO_WAIT);
if (-1 == semop_res)
{
RNDWN_ERR("!AD -> File already open by another process.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/* At this point, we have the the database access control lock.
* We also incremented the counter semaphore.
* We also have the ftok semaphore lock acquired.
*/
sem_incremented = TRUE;
}
/* Now rundown database if shared memory segment exists.
* We try this for both values of "standalone"
*/
if (INVALID_SHMID == udi->shmid || -1 == shmctl(udi->shmid, IPC_STAT, &shm_buf) ||
tsd->gt_shm_ctime.ctime != shm_buf.shm_ctime) /* if no shared memory exists */
{
if (rc_cpt_removed)
{ /* reset RC values if we've rundown the RC CPT */
/* attempt to force-write header */
tsd->rc_srv_cnt = tsd->dsid = tsd->rc_node = 0;
tsd->owner_node = 0;
assert(FALSE); /* not sure what to do here. handle it if/when it happens */
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
} else
{ /* Note that if creation time does not match, we ignore that shared memory segment.
* It might result in orphaned shared memory segment.
*/
tsd->shmid = udi->shmid = INVALID_SHMID;
tsd->gt_shm_ctime.ctime = udi->gt_shm_ctime = 0;
if (standalone)
{
/* Reset ipc fields in file header */
if (!reg->read_only)
{
if (mupip_jnl_recover)
{
memset(tsd->machine_name, 0, MAX_MCNAMELEN);
tsd->freeze = 0;
tsd->owner_node = 0;
}
LSEEKWRITE(udi->fd, (off_t)0, tsd, tsd_size, status);
if (0 != status)
{
RNDWN_ERR("!AD -> Unable to write header to disk.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
} else
{
db_ipcs.semid = tsd->semid;
db_ipcs.gt_sem_ctime = tsd->gt_sem_ctime.ctime;
db_ipcs.shmid = tsd->shmid;
db_ipcs.gt_shm_ctime = tsd->gt_shm_ctime.ctime;
if (!get_full_path((char *)DB_STR_LEN(reg), db_ipcs.fn, &db_ipcs.fn_len,
MAX_TRANS_NAME_LEN, &status_msg))
{
gtm_putmsg(VARLSTCNT(1) status_msg);
RNDWN_ERR("!AD -> get_full_path failed.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
db_ipcs.fn[db_ipcs.fn_len] = 0;
if (0 != send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0))
{
RNDWN_ERR("!AD -> gtmsecshr was unable to write header to disk.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
}
if (!ftok_sem_release(reg, FALSE, FALSE))
{
RNDWN_ERR("!AD -> Error from ftok_sem_release.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
REVERT;
free(tsd);
standalone_reg = reg;
return TRUE; /* For "standalone" and "no shared memory existing", we exit here */
} else
{ /* We are here for not standalone (basically the "mupip rundown" command).
* We have not created any new semaphore and no shared memory was existing.
* So just reset the file header ipc fields and exit.
*/
assert(!sem_created);
/* Eventhough no shared memory exists at this point, it is possible that we have an
* orphaned semaphore. If so, remove it.
*/
REMOVE_SEMID_IF_ORPHANED(reg, udi, tsd, sem_created, sem_incremented);
memset(tsd->machine_name, 0, MAX_MCNAMELEN);
tsd->freeze = 0;
tsd->owner_node = 0;
tsd->shmid = INVALID_SHMID;
tsd->semid = INVALID_SEMID;
tsd->gt_sem_ctime.ctime = 0;
tsd->gt_shm_ctime.ctime = 0;
}
}
assert(!standalone);
LSEEKWRITE(udi->fd, (off_t)0, tsd, tsd_size, status);
if (0 != status)
{
RNDWN_ERR("!AD -> Unable to write header to disk.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
free(tsd);
REVERT;
/* For mupip rundown (standalone = FALSE), we release/remove ftok semaphore here. */
if (!ftok_sem_release(reg, TRUE, TRUE))
{
RNDWN_ERR("!AD -> Error from ftok_sem_release.", reg);
assert(!sem_created);
return FALSE;
}
return TRUE; /* For "!standalone" and "no shared memory existing", we exit here */
}
/*
* shared memory already exists, so now find the number of users attached to it
* if it is zero, it needs to be flushed to disk, otherwise, we cannot help.
*/
if (reg->read_only) /* read only process can't succeed beyond this point */
{
gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/* Now we have a pre-existing shared memory section with no other processes attached. Do some setup */
if (memcmp(tsd->label, GDS_LABEL, GDS_LABEL_SZ - 1))
{
if (memcmp(tsd->label, GDS_LABEL, GDS_LABEL_SZ - 3))
gtm_putmsg(VARLSTCNT(4) ERR_DBNOTGDS, 2, DB_LEN_STR(reg));
else
gtm_putmsg(VARLSTCNT(4) ERR_BADDBVER, 2, DB_LEN_STR(reg));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
reg->dyn.addr->acc_meth = acc_meth = tsd->acc_meth;
dbsecspc(reg, tsd, &sec_size);
# ifdef __MVS__
/* match gvcst_init_sysops.c shmget with __IPC_MEGA or _LP64 */
if (ROUND_UP(sec_size, MEGA_BOUND) != shm_buf.shm_segsz)
# else
if (sec_size != shm_buf.shm_segsz)
# endif
{
util_out_print("Expected shared memory size is !SL, but found !SL",
TRUE, sec_size, shm_buf.shm_segsz);
util_out_print("!AD -> Existing shared memory size do not match.", TRUE, DB_LEN_STR(reg));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/*
* save gv_cur_region, cs_data, cs_addrs, and restore them on return
*/
temp_region = gv_cur_region;
temp_cs_data = cs_data;
temp_cs_addrs = cs_addrs;
restore_rndwn_gbl = TRUE;
gv_cur_region = reg;
tp_change_reg();
SEG_SHMATTACH(0, reg, udi, tsd, sem_created, sem_incremented);
cs_addrs->nl = (node_local_ptr_t)cs_addrs->db_addrs[0];
/* The following checks for GDS_LABEL_GENERIC, gtm_release_name, and cs_addrs->nl->glob_sec_init ensure that the
* shared memory under consideration is valid. First, since cs_addrs->nl->label is in the same place for every
* version, a failing check means it is most likely NOT a GT.M created shared memory, so no attempt will be
* made to delete it. Next a successful match of the currently running release will guarantee that all fields in
* the structure are where they're expected to be -- without this check the glob_sec_init check should not be done
* as this field is at different offsets in different versions. Finally, we can check the glob_sec_init flag to
* verify that shared memory has been completely initialized. If not, we should delete it right away. [C9D07-002355]
*/
if (!MEMCMP_LIT(cs_addrs->nl->label, GDS_LABEL_GENERIC))
{
is_gtm_shm = TRUE;
remove_shmid = TRUE;
memcpy(now_running, cs_addrs->nl->now_running, MAX_REL_NAME);
if (memcmp(now_running, gtm_release_name, gtm_release_name_len + 1))
{
gtm_putmsg(VARLSTCNT(8) ERR_VERMISMATCH, 6, DB_LEN_STR(reg), gtm_release_name_len,
gtm_release_name, LEN_AND_STR(now_running));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
if (cs_addrs->nl->glob_sec_init)
{
glob_sec_init = TRUE;
if (memcmp(cs_addrs->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1))
{
if (memcmp(cs_addrs->nl->label, GDS_LABEL, GDS_LABEL_SZ - 3))
gtm_putmsg(VARLSTCNT(8) ERR_DBNOTGDS, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, RTS_ERROR_LITERAL("(from shared segment - nl)"));
else
gtm_putmsg(VARLSTCNT(8) ERR_BADDBVER, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, RTS_ERROR_LITERAL("(from shared segment - nl)"));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/* Since nl is memset to 0 initially and then fname is copied over from gv_cur_region and since "fname" is
* guaranteed to not exceed MAX_FN_LEN, we should have a terminating '\0' at least at
* cs_addrs->nl->fname[MAX_FN_LEN]
*/
assert(cs_addrs->nl->fname[MAX_FN_LEN] == '\0'); /* First '\0' in cs_addrs->nl->fname can be earlier */
db_shm_in_sync = TRUE; /* assume this unless proven otherwise */
/* Check whether cs_addrs->nl->fname exists. If not, then db & shm are not in sync. */
STAT_FILE((char *)cs_addrs->nl->fname, &stat_buf, stat_res);
if (-1 == stat_res)
{
save_errno = errno;
db_shm_in_sync = FALSE;
if( ENOENT == save_errno)
{
send_msg(VARLSTCNT(7) MAKE_MSG_INFO(ERR_DBNAMEMISMATCH), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname, save_errno);
gtm_putmsg(VARLSTCNT(6) MAKE_MSG_INFO(ERR_DBNAMEMISMATCH), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname);
/* In this case, the shared memory no longer points to a valid db file in the filesystem.
* So it is best that we remove this shmid. But remove_shmid is already TRUE. Assert that.*/
assert(remove_shmid);
}
else /*Could be permission issue*/
{
send_msg(VARLSTCNT(7) MAKE_MSG_INFO(ERR_DBNAMEMISMATCH), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname, save_errno);
gtm_putmsg(VARLSTCNT(7) MAKE_MSG_INFO(ERR_DBNAMEMISMATCH), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname, save_errno);
remove_shmid = FALSE; /* Shared memory might be pointing to valid database */
}
}
if (db_shm_in_sync)
{ /* Check if csa->nl->fname and csa->nl->dbfid are in sync. If not, then db & shm are not in sync */
if (FALSE == is_gdid_stat_identical(&cs_addrs->nl->unique_id.uid, &stat_buf))
{
send_msg(VARLSTCNT(10) MAKE_MSG_INFO(ERR_DBIDMISMATCH), 4, cs_addrs->nl->fname,
DB_LEN_STR(reg), udi->shmid, ERR_TEXT, 2,
LEN_AND_LIT("[MUPIP] Database filename and fileid in shared memory are not in sync"));
gtm_putmsg(VARLSTCNT(10) MAKE_MSG_INFO(ERR_DBIDMISMATCH), 4, cs_addrs->nl->fname,
DB_LEN_STR(reg), udi->shmid, ERR_TEXT, 2,
LEN_AND_LIT("[MUPIP] Database filename and fileid in shared memory are not in sync"));
db_shm_in_sync = FALSE;
/* In this case, the shared memory points to a file that exists in the filesystem but
* the fileid of the currently present file does not match the file id recorded in
* shared memory at shm creation time. Therefore the database file has since been
* overwritten. In this case, we have no other option but to remove this shmid.
* remove_shmid is already TRUE. Assert that.
*/
assert(remove_shmid);
}
}
/* Check if reg->dyn.addr->fname and csa->nl->fname are identical filenames.
* If not, then db that we want to rundown points to a shared memory which in turn points
* to a DIFFERENT db file. In this case, do NOT remove the shared memory as it is possible
* some other database file on the system points to it and passes the filename identicality check.
*/
if (db_shm_in_sync && !is_file_identical((char *)cs_addrs->nl->fname, (char *)reg->dyn.addr->fname))
{
send_msg(VARLSTCNT(6) MAKE_MSG_INFO(ERR_DBSHMNAMEDIFF), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname);
gtm_putmsg(VARLSTCNT(6) MAKE_MSG_INFO(ERR_DBSHMNAMEDIFF), 4, DB_LEN_STR(reg),
udi->shmid, cs_addrs->nl->fname);
db_shm_in_sync = FALSE;
remove_shmid = FALSE;
}
/* If db & shm are not in sync at this point, skip the part of flushing shm to db file on disk.
* We still need to reset the fields (shmid, semid etc.) in db file header.
* About deleting the shmid, it depends on the type of out-of-sync between db & shm. This is handled
* by setting the variable "remove_shmid" appropriately in each case.
*/
if (db_shm_in_sync)
{
/* If db & shm are in sync AND we aren't alone in using it, we can do nothing */
if (0 != shm_buf.shm_nattch)
{
util_out_print("!AD [!UL]-> File is in use by another process.",
TRUE, DB_LEN_STR(reg), udi->shmid);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
/* The shared section is valid and up-to-date with respect to the database file header;
* ignore the temporary storage and use the shared section from here on
*/
cs_addrs->critical = (mutex_struct_ptr_t)(cs_addrs->db_addrs[0] + NODE_LOCAL_SIZE);
assert(((INTPTR_T)cs_addrs->critical & 0xf) == 0); /* critical should be 16-byte aligned */
# ifdef CACHELINE_SIZE
assert(0 == ((INTPTR_T)cs_addrs->critical & (CACHELINE_SIZE - 1)));
# endif
JNL_INIT(cs_addrs, reg, tsd);
cs_addrs->shmpool_buffer = (shmpool_buff_hdr_ptr_t)(cs_addrs->db_addrs[0] + NODE_LOCAL_SPACE +
JNL_SHARE_SIZE(tsd));
cs_addrs->lock_addrs[0] = (sm_uc_ptr_t)cs_addrs->shmpool_buffer + SHMPOOL_SECTION_SIZE;
cs_addrs->lock_addrs[1] = cs_addrs->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
if (dba_bg == acc_meth)
cs_data = csd = cs_addrs->hdr = (sgmnt_data_ptr_t)(cs_addrs->lock_addrs[1] + 1 +
CACHE_CONTROL_SIZE(tsd));
else
{
cs_addrs->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(cs_addrs->lock_addrs[1] + 1);
FSTAT_FILE(udi->fd, &stat_buf, stat_res);
if (-1 == stat_res)
{
RNDWN_ERR("!AD -> Error with fstat.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
SEG_MEMMAP(NULL, reg, udi, tsd, sem_created, sem_incremented);
cs_data = csd = cs_addrs->hdr = (sgmnt_data_ptr_t)cs_addrs->db_addrs[0];
cs_addrs->db_addrs[1] = cs_addrs->db_addrs[0] + stat_buf.st_size - 1;
}
if (sem_created)
{
csd->semid = tsd->semid;
csd->gt_sem_ctime.ctime = tsd->gt_sem_ctime.ctime;
}
assert(JNL_ALLOWED(csd) == JNL_ALLOWED(tsd));
free(tsd);
tsd = NULL;
/* Check to see that the fileheader in the shared segment is valid, so we wont end up
* flushing garbage to the db file.
* Check the label in the header - keep adding any further appropriate checks in future here.
*/
if (memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1))
{
if (memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 3))
{
status_msg = ERR_DBNOTGDS;
if (0 != shm_rmid(udi->shmid))
gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, RTS_ERROR_TEXT("Error removing shared memory"));
else
send_msg(VARLSTCNT(3) ERR_SHMREMOVED, 1, udi->shmid);
} else
status_msg = ERR_BADDBVER;
gtm_putmsg(VARLSTCNT(8) status_msg, 2, DB_LEN_STR(reg),
ERR_TEXT, 2, RTS_ERROR_LITERAL("(File header in the shared segment seems corrupt)"));
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
if (dba_bg == acc_meth)
db_csh_ini(cs_addrs);
else
cs_addrs->acc_meth.mm.base_addr = (sm_uc_ptr_t)((sm_ulong_t)csd + (int)(csd->start_vbn - 1)
* DISK_BLOCK_SIZE);
db_common_init(reg, cs_addrs, csd); /* do initialization common to "db_init" and "mu_rndwn_file" */
/* cleanup mutex stuff */
cs_addrs->hdr->image_count = 0;
gtm_mutex_init(reg, NUM_CRIT_ENTRY, FALSE); /* it is ensured, this is the only process running */
assert(!cs_addrs->hold_onto_crit); /* so it is safe to do unconditional grab_crit/rel_crit below */
cs_addrs->nl->in_crit = 0;
cs_addrs->now_crit = FALSE;
reg->open = TRUE;
if (rc_cpt_removed) /* reset RC values if we've rundown the RC CPT */
csd->rc_srv_cnt = csd->dsid = csd->rc_node = 0;
mu_rndwn_file_dbjnl_flush = TRUE; /* indicate to wcs_recover() no need to write inctn or increment
db curr_tn */
/* If an orphaned snapshot is lying around, then clean it up */
/* SS_MULTI: If multiple snapshots are supported, then we must run-through each of the
* "possible" snapshots in progress and clean them up
*/
assert(1 == MAX_SNAPSHOTS);
ss_get_lock(gv_cur_region); /* Snapshot initiator will wait until this cleanup is done */
ss_shm_ptr = (shm_snapshot_ptr_t)SS_GETSTARTPTR(cs_addrs);
ss_pid = ss_shm_ptr->ss_info.ss_pid;
if (ss_pid && !is_proc_alive(ss_pid, 0))
ss_release(NULL);
ss_release_lock(gv_cur_region);
/* If csa->nl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and
* therefore we should not flush shared memory contents to disk as they might be in an inconsistent
* state. In this case, we will go ahead and remove shared memory (without flushing the contents)
* in this routine. A reissue of the recover/rollback command will restore the database to a
* consistent state.
*/
if (!cs_addrs->nl->donotflush_dbjnl)
{
if (cs_addrs->nl->glob_sec_init)
{ /* WCSFLU_NONE only is done here, as we aren't sure of the state, so no EPOCHs are
* written. If we write an EPOCH record, recover may get confused. Note that for
* journaling we do not call jnl_file_close() with TRUE for second parameter.
* As a result journal file might not have an EOF record.
* So, a new process will switch the journal file and cut the journal file link,
* though it might be a good journal without an EOF
*/
wcs_flu(WCSFLU_NONE);
csd = cs_addrs->hdr;
}
jpc = cs_addrs->jnl;
if (NULL != jpc)
{
grab_crit(gv_cur_region);
/* If we own it or owner died, clear the fsync lock */
if (process_id == jpc->jnl_buff->fsync_in_prog_latch.u.parts.latch_pid)
{
RELEASE_SWAPLOCK(&jpc->jnl_buff->fsync_in_prog_latch);
} else
performCASLatchCheck(&jpc->jnl_buff->fsync_in_prog_latch, FALSE);
if (NOJNL != jpc->channel)
jnl_file_close(gv_cur_region, FALSE, FALSE);
free(jpc);
cs_addrs->jnl = NULL;
rel_crit(gv_cur_region);
}
}
/* Write master-map (in addition to file header) as wcs_flu done above would not have updated it */
csd_size = SIZEOF_FILE_HDR(csd); /* SIZEOF(sgmnt_data) + master-map */
} else
{ /* In this case, we just want to clear a few fields in the file header but we do NOT want to
* write the master map which we dont have access to at this point so set csd_size accordingly.
*/
csd = tsd;
csd_size = tsd_size; /* SIZEOF(sgmnt_data) */
}
mu_rndwn_file_dbjnl_flush = FALSE;
reg->open = FALSE;
/* Note: At this point we have write permission */
memset(csd->machine_name, 0, MAX_MCNAMELEN);
csd->owner_node = 0;
csd->freeze = 0;
csd->shmid = INVALID_SHMID;
csd->gt_shm_ctime.ctime = 0;
if (!standalone)
{
csd->semid = INVALID_SEMID;
csd->gt_sem_ctime.ctime = 0;
}
if (!db_shm_in_sync || (dba_bg == acc_meth))
{
LSEEKWRITE(udi->fd, (off_t)0, csd, csd_size, status);
if (0 != status)
{
RNDWN_ERR("!AD -> Error writing header to disk.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
if (NULL != tsd)
{
assert(!db_shm_in_sync);
free(tsd);
tsd = NULL;
}
} else
{
if (-1 == msync((caddr_t)cs_addrs->db_addrs[0], (size_t)stat_buf.st_size, MS_SYNC))
{
RNDWN_ERR("!AD -> Error synchronizing mapped memory.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
if (-1 == munmap((caddr_t)cs_addrs->db_addrs[0], (size_t)stat_buf.st_size))
{
RNDWN_ERR("!AD -> Error unmapping mapped memory.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
}
} else
glob_sec_init = FALSE;
} else
{
is_gtm_shm = FALSE;
assert(FALSE);
}
/* Detach from shared memory whether it is a GT.M shared memory or not */
if (-1 == shmdt((caddr_t)cs_addrs->nl))
{
assert(FALSE);
RNDWN_ERR("!AD -> Error detaching from shared memory.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
cs_addrs->nl = NULL;
/* Remove the shared memory only if it is a GT.M created one. */
if (is_gtm_shm)
{
if (remove_shmid) /* Note: remove_shmid is defined only if is_gtm_shm is TRUE */
{
if (0 != shm_rmid(udi->shmid))
{
assert(FALSE);
RNDWN_ERR("!AD -> Error removing shared memory.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
else
send_msg(VARLSTCNT(3) ERR_SHMREMOVED, 1, udi->shmid);
}
udi->shmid = INVALID_SHMID;
udi->gt_shm_ctime = 0;
}
/* Check if it is a GT.M created orphaned semaphore and if so remove it (only if standalone access is not requested) */
if (!standalone)
REMOVE_SEMID_IF_ORPHANED(reg, udi, tsd, sem_created, sem_incremented);
/* If the shared memory was found to a valid GT.M created segment and if it was properly initialized, we would
* have cleared shmid in the database file header to INVALID_SHMID. If not, do the reset now as we nevertheless
* dont want the database to connect to that non-GT.M-shmid or uninitialized-GT.M-shmid anymore.
*/
if (!is_gtm_shm || !glob_sec_init)
{
assert(NULL != tsd); /* should not have been freed */
assert(INVALID_SEMID != udi->semid);
tsd->shmid = udi->shmid = INVALID_SHMID;
tsd->gt_shm_ctime.ctime = udi->gt_shm_ctime = 0;
if (mupip_jnl_recover)
{
memset(tsd->machine_name, 0, MAX_MCNAMELEN);
tsd->freeze = 0;
tsd->owner_node = 0;
}
LSEEKWRITE(udi->fd, (off_t)0, tsd, tsd_size, status);
if (0 != status)
{
RNDWN_ERR("!AD -> Unable to write header to disk.", reg);
CLNUP_AND_RETURN(reg, udi, tsd, sem_created, sem_incremented);
}
free(tsd);
tsd = NULL;
}
assert(INVALID_SHMID == udi->shmid);
assert(0 == udi->gt_shm_ctime);
REVERT;
RESET_GV_CUR_REGION;
restore_rndwn_gbl = FALSE;
/* For mupip rundown, standalone == TRUE and we want to release/remove ftok semaphore.
* Otherwise, just release ftok semaphore lock. counter will be one more for this process.
* Exit handlers must take care of removing if necessary.
*/
if (!ftok_sem_release(reg, !standalone, !standalone))
return FALSE;
if (standalone)
standalone_reg = reg;
CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */
return TRUE;
}
CONDITION_HANDLER(mu_rndwn_file_ch)
{
unix_db_info *udi;
START_CH;
/* The mu_rndwn_file_ch was introduced in revision 1.54 to account for the ERR_DBIDMISMATCH rts_error that no longer
* exists. So, it is not expected that mu_rndwn_file_ch will ever be called. Add an assert(FALSE). If the assert does
* not trip after an year or two, we can remove this condition handler altogether. 2010/07
*/
assert(FALSE);
mu_rndwn_file_dbjnl_flush = FALSE;
assert(NULL != rundown_reg);
if (NULL != rundown_reg)
{
udi = FILE_INFO(rundown_reg);
if (udi->grabbed_ftok_sem)
ftok_sem_release(rundown_reg, FALSE, TRUE);
}
standalone_reg = NULL;
if (restore_rndwn_gbl)
{
RESET_GV_CUR_REGION;
}
NEXTCH;
}