/**************************************************************** * * * Copyright 2001, 2013 Fidelity Information Services, Inc * * * * This source code contains the intellectual property * * of its copyright holder(s), and is made available * * under a license. If you do not know the terms of * * the license, please stop and do not read further. * * * ****************************************************************/ #include "mdef.h" #include "gtm_stdlib.h" #include "gtm_stdio.h" #include "gtm_string.h" #include "gtm_unistd.h" #include "gtm_fcntl.h" #include "gtm_stat.h" #include "gtm_inet.h" #include "gtm_time.h" #include #include #include #include "eintr_wrappers.h" #include "gdsroot.h" #include "gdsblk.h" #include "gtm_facility.h" #include "fileinfo.h" #include "gdsbt.h" #include "gdsfhead.h" #include "filestruct.h" #include "jnl.h" #include "repl_msg.h" #include "gtmsource.h" #include "gtmrecv.h" #include "iosp.h" #include "gtmio.h" #include "gtm_logicals.h" #include "trans_log_name.h" #include "gtmmsg.h" #include "repl_sem.h" #include "repl_instance.h" #include "ftok_sems.h" #include "error.h" #include "gds_rundown.h" #include "buddy_list.h" /* needed for muprec.h */ #include "hashtab_int4.h" /* needed for muprec.h */ #include "hashtab_int8.h" /* needed for muprec.h */ #include "hashtab_mname.h" /* needed for muprec.h */ #include "muprec.h" #include "have_crit.h" #include "anticipatory_freeze.h" #ifdef __MVS__ #include "gtm_zos_io.h" #endif GBLREF jnlpool_addrs jnlpool; GBLREF recvpool_addrs recvpool; GBLREF boolean_t in_repl_inst_edit; /* Used by an assert in repl_inst_read/repl_inst_write */ GBLREF boolean_t in_repl_inst_create; /* Used by repl_inst_read/repl_inst_write */ GBLREF boolean_t in_mupip_ftok; /* Used by an assert in repl_inst_read */ GBLREF jnl_gbls_t jgbl; GBLREF gd_addr *gd_header; GBLREF gd_region *gv_cur_region; GBLREF sgmnt_addrs *cs_addrs; GBLREF sgmnt_data_ptr_t cs_data; GBLREF bool in_backup; GBLREF int4 strm_index; GBLREF boolean_t is_src_server; GBLREF boolean_t holds_sem[NUM_SEM_SETS][NUM_SRC_SEMS]; GBLREF boolean_t is_rcvr_server; ZOS_ONLY(error_def(ERR_BADTAG);) error_def(ERR_LOGTOOLONG); error_def(ERR_NOTALLDBOPN); error_def(ERR_REPLFTOKSEM); error_def(ERR_REPLINSTACC); error_def(ERR_REPLINSTCLOSE); error_def(ERR_REPLINSTCREATE); error_def(ERR_REPLINSTFMT); error_def(ERR_REPLINSTNOHIST); error_def(ERR_REPLINSTOPEN); error_def(ERR_REPLINSTREAD); error_def(ERR_REPLINSTSEQORD); error_def(ERR_REPLINSTUNDEF); error_def(ERR_REPLINSTWRITE); error_def(ERR_SYSCALL); error_def(ERR_TEXT); /* Description: * Get the environment of replication instance. * Parameters: * fn : repl instance file name it gets * fn_len: length of fn. * bufsize: the buffer size caller gives. If exceeded, it trucates file name. * Return Value: * TRUE, on success * FALSE, otherwise. */ boolean_t repl_inst_get_name(char *fn, unsigned int *fn_len, unsigned int bufsize, instname_act error_action) { char temp_inst_fn[MAX_FN_LEN + 1]; mstr log_nam, trans_name; uint4 ustatus; int4 status; boolean_t ret; log_nam.addr = GTM_REPL_INSTANCE; log_nam.len = SIZEOF(GTM_REPL_INSTANCE) - 1; trans_name.addr = temp_inst_fn; ret = FALSE; GET_INSTFILE_NAME(do_sendmsg_on_log2long, issue_gtm_putmsg); if (FALSE == ret) { if (issue_rts_error == error_action) { if (SS_LOG2LONG == status) rts_error(VARLSTCNT(5) ERR_LOGTOOLONG, 3, log_nam.len, log_nam.addr, SIZEOF(temp_inst_fn) - 1); else rts_error(VARLSTCNT(1) ERR_REPLINSTUNDEF); } else if (issue_gtm_putmsg == error_action) { if (SS_LOG2LONG == status) gtm_putmsg(VARLSTCNT(5) ERR_LOGTOOLONG, 3, log_nam.len, log_nam.addr, SIZEOF(temp_inst_fn) - 1); else gtm_putmsg(VARLSTCNT(1) ERR_REPLINSTUNDEF); } } return ret; } /* Description: * Reads "buflen" bytes of data into "buff" from the file "fn" at offset "offset" * Parameters: * fn : Instance file name. * offset: Offset at which to read * buff : Buffer to read into * buflen: Number of bytes to read * Return Value: * None */ void repl_inst_read(char *fn, off_t offset, sm_uc_ptr_t buff, size_t buflen) { int status, fd; size_t actual_readlen; unix_db_info *udi; gd_region *reg; repl_inst_hdr_ptr_t replhdr; /* Assert that except for MUPIP REPLIC -INSTANCE_CREATE or -EDITINSTANCE or MUPIP FTOK, all callers hold the FTOK semaphore * on the replication instance file OR the journal pool lock. Note that the instance file might be pointed to by one of the * two region pointers "jnlpool.jnlpool_dummy_reg" or "recvpool.recvpool_dummy_reg" depending on whether the journal pool * or the receive pool was attached to first by this particular process. If both of them are non-NULL, both the region * pointers should be identical. This is also asserted below. * Note: Typically, journal pool lock should have sufficed. However, in certain places like jnlpool_init and recvpool_init, * the journal pool is not yet created and hence grab_lock/rel_lock does not make sense. In those cases we need the FTOK * lock on the instance file. The ONLY exception to this is ROLLBACK in which case it does NOT hold the FTOK semaphore and * since it is NOT necessary for ROLLBACK to have a journal pool open, grab_lock will not be done either. Assert * accordingly. */ assert((NULL == jnlpool.jnlpool_dummy_reg) || (NULL == recvpool.recvpool_dummy_reg) || jnlpool.jnlpool_dummy_reg == recvpool.recvpool_dummy_reg); reg = jnlpool.jnlpool_dummy_reg; if (NULL == reg) reg = recvpool.recvpool_dummy_reg; assert((NULL == reg) && (in_repl_inst_create || in_repl_inst_edit || in_mupip_ftok) || (NULL != reg) && !in_repl_inst_create && !in_repl_inst_edit && !in_mupip_ftok); if (NULL != reg) { udi = FILE_INFO(reg); assert(udi->grabbed_ftok_sem || ((NULL != jnlpool.jnlpool_ctl) && udi->s_addrs.now_crit) || jgbl.mur_rollback); } OPENFILE(fn, O_RDONLY, fd); if (FD_INVALID == fd) rts_error(VARLSTCNT(5) ERR_REPLINSTOPEN, 2, LEN_AND_STR(fn), errno); assert(0 < buflen); if (0 != offset) { LSEEKREAD(fd, offset, buff, buflen, status); } else { /* Read starts from the replication instance file header. Assert that the entire file header was requested. */ assert(REPL_INST_HDR_SIZE <= buflen); /* Use LSEEKREAD_AVAILABLE macro instead of LSEEKREAD. This is because if we are not able to read the entire * fileheader, we still want to see if the "label" field of the file header got read in which case we can * do the format check first. It is important to do the format check before checking "status" returned from * LSEEKREAD* macros since the inability to read the entire file header might actually be due to the * older format replication instance file being smaller than even the newer format instance file header. */ LSEEKREAD_AVAILABLE(fd, offset, buff, buflen, actual_readlen, status); if (GDS_REPL_INST_LABEL_SZ <= actual_readlen) { /* Have read the entire label in the instance file header. Check if it is the right version */ if (memcmp(buff, GDS_REPL_INST_LABEL, GDS_REPL_INST_LABEL_SZ - 1)) { rts_error(VARLSTCNT(8) ERR_REPLINSTFMT, 6, LEN_AND_STR(fn), GDS_REPL_INST_LABEL_SZ - 1, GDS_REPL_INST_LABEL, GDS_REPL_INST_LABEL_SZ - 1, buff); } } if (0 == status) { /* Check a few other fields in the file-header for compatibility */ assert(actual_readlen == buflen); replhdr = (repl_inst_hdr_ptr_t)buff; /* Check endianness match */ if (GTM_IS_LITTLE_ENDIAN != replhdr->is_little_endian) { rts_error(VARLSTCNT(8) ERR_REPLINSTFMT, 6, LEN_AND_STR(fn), LEN_AND_LIT(ENDIANTHIS), LEN_AND_LIT(ENDIANOTHER)); } /* Check 64bitness match */ if (GTM_IS_64BIT != replhdr->is_64bit) { rts_error(VARLSTCNT(8) ERR_REPLINSTFMT, 6, LEN_AND_STR(fn), LEN_AND_LIT(GTM_BITNESS_THIS), LEN_AND_LIT(GTM_BITNESS_OTHER)); } /* At the time of this writing, the only minor version supported is 1. * Whenever this gets updated, we need to add code to do the online upgrade. * Add an assert as a reminder to do this. */ assert(1 == replhdr->replinst_minorver); /* Check if on-the-fly minor-version upgrade is necessary */ if (GDS_REPL_INST_MINOR_LABEL != replhdr->replinst_minorver) assert(FALSE); } } assert((0 == status) || in_repl_inst_edit); if (0 != status) { if (-1 == status) rts_error(VARLSTCNT(6) ERR_REPLINSTREAD, 4, buflen, (qw_off_t *)&offset, LEN_AND_STR(fn)); else rts_error(VARLSTCNT(7) ERR_REPLINSTREAD, 4, buflen, (qw_off_t *)&offset, LEN_AND_STR(fn), status); } CLOSEFILE_RESET(fd, status); /* resets "fd" to FD_INVALID */ assert(0 == status); if (0 != status) rts_error(VARLSTCNT(5) ERR_REPLINSTCLOSE, 2, LEN_AND_STR(fn), status); } /* Description: * Writes "buflen" bytes of data from "buff" into the file "fn" at offset "offset" * Parameters: * fn : Instance file name. * offset: Offset at which to write * buff : Buffer to write from * buflen: Number of bytes to write * Return Value: * None. */ void repl_inst_write(char *fn, off_t offset, sm_uc_ptr_t buff, size_t buflen) { int status, fd, oflag; unix_db_info *udi; gd_region *reg; ZOS_ONLY(int realfiletag;) /* Assert that except for MUPIP REPLIC -INSTANCE_CREATE or -EDITINSTANCE, all callers hold the FTOK semaphore on the * replication instance file OR the journal pool lock. Note that the instance file might be pointed to by one of the * two region pointers "jnlpool.jnlpool_dummy_reg" or "recvpool.recvpool_dummy_reg" depending on whether the journal pool * or the receive pool was attached to first by this particular process. If both of them are non-NULL, both the region * pointers should be identical. This is also asserted below. * Note: Typically, journal pool lock should have sufficed. However, in certain places like jnlpool_init and recvpool_init, * the journal pool is not yet created and hence grab_lock/rel_lock does not make sense. In those case we need the FTOK * lock on the instance file. The ONLY exception to this is ROLLBACK in which case it does NOT hold the FTOK semaphore and * since it is NOT necessary for ROLLBACK to have a journal pool open, grab_lock will not be done either. Assert * accordingly. */ assert((NULL == jnlpool.jnlpool_dummy_reg) || (NULL == recvpool.recvpool_dummy_reg) || jnlpool.jnlpool_dummy_reg == recvpool.recvpool_dummy_reg); DEBUG_ONLY( reg = jnlpool.jnlpool_dummy_reg; if (NULL == reg) reg = recvpool.recvpool_dummy_reg; ) assert((NULL == reg) && (in_repl_inst_create || in_repl_inst_edit) || (NULL != reg) && !in_repl_inst_create && !in_repl_inst_edit); DEBUG_ONLY( if (NULL != reg) { udi = FILE_INFO(reg); assert(udi->grabbed_ftok_sem || ((NULL != jnlpool.jnlpool_ctl) && udi->s_addrs.now_crit) || jgbl.mur_rollback); } ) oflag = O_RDWR; if (in_repl_inst_create) oflag |= (O_CREAT | O_EXCL); OPENFILE3(fn, oflag, 0666, fd); if (FD_INVALID == fd) { if (!in_repl_inst_create) rts_error(VARLSTCNT(5) ERR_REPLINSTOPEN, 2, LEN_AND_STR(fn), errno); else rts_error(VARLSTCNT(5) ERR_REPLINSTCREATE, 2, LEN_AND_STR(fn), errno); } #ifdef __MVS__ if (-1 == (in_repl_inst_create ? gtm_zos_set_tag(fd, TAG_BINARY, TAG_NOTTEXT, TAG_FORCE, &realfiletag) : gtm_zos_tag_to_policy(fd, TAG_BINARY, &realfiletag))) TAG_POLICY_GTM_PUTMSG(fn, errno, realfiletag, TAG_BINARY); #endif assert(0 < buflen); REPL_INST_LSEEKWRITE(fd, offset, buff, buflen, status); assert(0 == status); if (0 != status) rts_error(VARLSTCNT(7) ERR_REPLINSTWRITE, 4, buflen, (qw_off_t *)&offset, LEN_AND_STR(fn), status); CLOSEFILE_RESET(fd, status); /* resets "fd" to FD_INVALID */ assert(0 == status); if (0 != status) rts_error(VARLSTCNT(5) ERR_REPLINSTCLOSE, 2, LEN_AND_STR(fn), status); } /* Description: * Hardens all pending writes for the instance file to disk * Parameters: * fn : Instance file name. * Return Value: * None. */ void repl_inst_sync(char *fn) { int status, fd, oflag; unix_db_info *udi; gd_region *reg; /* Assert that except for MUPIP REPLIC -INSTANCE_CREATE or -EDITINSTANCE, all callers hold the FTOK semaphore * on the replication instance file. Note that the instance file might be pointed to by one of the two region * pointers "jnlpool.jnlpool_dummy_reg" or "recvpool.recvpool_dummy_reg" depending on whether the journal pool * or the receive pool was attached to first by this particular process. If both of them are non-NULL, both the * region pointers should be identical. This is also asserted below. */ assert((NULL == jnlpool.jnlpool_dummy_reg) || (NULL == recvpool.recvpool_dummy_reg) || jnlpool.jnlpool_dummy_reg == recvpool.recvpool_dummy_reg); DEBUG_ONLY( reg = jnlpool.jnlpool_dummy_reg; if (NULL == reg) reg = recvpool.recvpool_dummy_reg; ) DEBUG_ONLY( assert(NULL != reg); udi = FILE_INFO(reg); assert((NULL != jnlpool.jnlpool_ctl) && udi->s_addrs.now_crit); ) oflag = O_RDWR; OPENFILE3(fn, oflag, 0666, fd); if (FD_INVALID == fd) rts_error(VARLSTCNT(5) ERR_REPLINSTOPEN, 2, LEN_AND_STR(fn), errno); GTM_REPL_INST_FSYNC(fd, status); assert(0 == status); if (0 != status) rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("fsync()"), CALLFROM, errno); CLOSEFILE_RESET(fd, status); /* resets "fd" to FD_INVALID */ assert(0 == status); if (0 != status) rts_error(VARLSTCNT(5) ERR_REPLINSTCLOSE, 2, LEN_AND_STR(fn), status); } /* Description: * Reset journal pool shmid and semid in replication instance file. * Parameters: * None * Return Value: * None */ void repl_inst_jnlpool_reset(void) { repl_inst_hdr repl_instance; unix_db_info *udi; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->grabbed_ftok_sem); if (NULL != jnlpool.repl_inst_filehdr) { /* If journal pool exists, reset sem/shm ids in the file header in the journal pool and flush changes to disk */ jnlpool.repl_inst_filehdr->jnlpool_semid = INVALID_SEMID; jnlpool.repl_inst_filehdr->jnlpool_shmid = INVALID_SHMID; jnlpool.repl_inst_filehdr->jnlpool_semid_ctime = 0; jnlpool.repl_inst_filehdr->jnlpool_shmid_ctime = 0; repl_inst_flush_filehdr(); } else { /* If journal pool does not exist, reset sem/shm ids directly in the replication instance file header on disk */ repl_inst_read((char *)udi->fn, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); repl_instance.jnlpool_semid = INVALID_SEMID; repl_instance.jnlpool_shmid = INVALID_SHMID; repl_instance.jnlpool_semid_ctime = 0; repl_instance.jnlpool_shmid_ctime = 0; repl_inst_write((char *)udi->fn, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } } /* Description: * Reset receiver pool shmid and semid in replication instance file. * Parameters: * None * Return Value: * None */ void repl_inst_recvpool_reset(void) { repl_inst_hdr repl_instance; unix_db_info *udi; udi = FILE_INFO(recvpool.recvpool_dummy_reg); assert(udi->grabbed_ftok_sem); if (NULL != jnlpool.repl_inst_filehdr) { /* If journal pool exists, reset sem/shm ids in the file header in the journal pool and flush changes to disk */ jnlpool.repl_inst_filehdr->recvpool_semid = INVALID_SEMID; jnlpool.repl_inst_filehdr->recvpool_shmid = INVALID_SHMID; jnlpool.repl_inst_filehdr->recvpool_semid_ctime = 0; jnlpool.repl_inst_filehdr->recvpool_shmid_ctime = 0; repl_inst_flush_filehdr(); } else { /* If journal pool does not exist, reset sem/shm ids directly in the replication instance file header on disk */ repl_inst_read((char *)udi->fn, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); repl_instance.recvpool_semid = INVALID_SEMID; repl_instance.recvpool_shmid = INVALID_SHMID; repl_instance.recvpool_semid_ctime = 0; repl_instance.recvpool_shmid_ctime = 0; repl_inst_write((char *)udi->fn, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } } /* Wrapper routine to GRAB the ftok semaphore lock of the replication instance file and to test for errors */ void repl_inst_ftok_sem_lock(void) { gd_region *reg; unix_db_info *udi; assert(!jgbl.mur_rollback); /* Rollback already has standalone access and will not ask for ftok lock */ assert((NULL != jnlpool.jnlpool_dummy_reg) || (NULL != recvpool.recvpool_dummy_reg)); assert((NULL == jnlpool.jnlpool_dummy_reg) || (NULL == recvpool.recvpool_dummy_reg) || (recvpool.recvpool_dummy_reg == jnlpool.jnlpool_dummy_reg)); reg = jnlpool.jnlpool_dummy_reg; if (NULL == reg) reg = recvpool.recvpool_dummy_reg; assert(NULL != reg); udi = FILE_INFO(reg); assert(!udi->grabbed_ftok_sem); if (!udi->grabbed_ftok_sem) { assert(0 == have_crit(CRIT_HAVE_ANY_REG)); if (!ftok_sem_lock(reg, FALSE, FALSE)) { assert(FALSE); rts_error(VARLSTCNT(4) ERR_REPLFTOKSEM, 2, LEN_AND_STR(udi->fn)); } } assert(udi->grabbed_ftok_sem); } /* Wrapper routine to RELEASE the ftok semaphore lock of the replication instance file and to test for errors */ void repl_inst_ftok_sem_release(void) { gd_region *reg; unix_db_info *udi; assert(!jgbl.mur_rollback); /* Rollback already has standalone access and will not ask for ftok lock */ assert((NULL != jnlpool.jnlpool_dummy_reg) || (NULL != recvpool.recvpool_dummy_reg)); assert((NULL == jnlpool.jnlpool_dummy_reg) || (NULL == recvpool.recvpool_dummy_reg) || (recvpool.recvpool_dummy_reg == jnlpool.jnlpool_dummy_reg)); reg = jnlpool.jnlpool_dummy_reg; if (NULL == reg) reg = recvpool.recvpool_dummy_reg; assert(NULL != reg); udi = FILE_INFO(reg); assert(udi->grabbed_ftok_sem); if (udi->grabbed_ftok_sem) /* Be safe in PRO and avoid releasing if we do not hold the ftok semaphore */ { assert(0 == have_crit(CRIT_HAVE_ANY_REG)); if (!ftok_sem_release(reg, FALSE, FALSE)) { assert(FALSE); rts_error(VARLSTCNT(4) ERR_REPLFTOKSEM, 2, LEN_AND_STR(udi->fn)); } } assert(!udi->grabbed_ftok_sem); } /* Description: * Get the 'n'th histinfo record from the instance file. * Parameters: * index : The number of the histinfo record to be read. 0 for the first histinfo record, 1 for the second and so on... * histinfo : A pointer to the repl_histinfo structure to be filled in. * Return Value: * 0, on success * ERR_REPLINSTNOHIST, if "index" is not a valid histinfo index. */ int4 repl_inst_histinfo_get(int4 index, repl_histinfo *histinfo) { off_t offset; unix_db_info *udi; repl_inst_hdr_ptr_t repl_inst_filehdr; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->s_addrs.now_crit || jgbl.mur_rollback); if (0 > index) return ERR_REPLINSTNOHIST; repl_inst_filehdr = jnlpool.repl_inst_filehdr; assert(NULL != repl_inst_filehdr); assert(index < repl_inst_filehdr->num_histinfo); /* assert that no caller should request a get of an unused (but allocated) histinfo */ if (index >= repl_inst_filehdr->num_alloc_histinfo) return ERR_REPLINSTNOHIST; offset = REPL_INST_HISTINFO_START + (index * SIZEOF(repl_histinfo)); repl_inst_read((char *)udi->fn, offset, (sm_uc_ptr_t)histinfo, SIZEOF(repl_histinfo)); assert(histinfo->histinfo_num == index); return 0; } /* * Parameters: * seqno : The journal seqno that is to be searched in the instance file history. * strm_idx : -1, 0, 1, 2, ... 15 indicating the stream # within which to search. * : -1 (aka INVALID_SUPPL_STRM) implies search across ALL streams. * histinfo : A pointer to the repl_histinfo to be filled in. Contents might have been modified even on error return. * Description: * If strm_idx=-1 * ----------------- * Given an input "seqno", locate the histinfo record (from ANY stream) in the instance file whose "start_seqno" * corresponds to "seqno-1". * If strm_idx=0 * ---------------- * Given an input "seqno", locate the histinfo record (from 0th stream) in the instance file whose "start_seqno" * corresponds to "seqno-1". * If strm_idx=1,2,...,15 * ------------------------- * Given an input "seqno", locate the histinfo record (from "strm_index"th stream) in the instance file * whose "strm_seqno" (not start_seqno) corresponds to "seqno-1". * Return Value: * 0, on success * ERR_REPLINSTNOHIST, if "seqno" is NOT present in the instance file history range. There are two cases to consider here. * If there was an error fetching a history record, "histinfo->histinfo_num" will be set to INVALID_HISTINFO_NUM. * Otherwise, if we ran out of history records, "histinfo" will point to the 0th history record corresponding to "strm_idx". */ int4 repl_inst_histinfo_find_seqno(seq_num seqno, int4 strm_idx, repl_histinfo *histinfo) { unix_db_info *udi; int4 histnum, status; seq_num cur_seqno; # ifdef DEBUG seq_num prev_seqno; int4 prev_histnum; # endif repl_inst_hdr_ptr_t inst_hdr; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->s_addrs.now_crit || jgbl.mur_rollback); assert(0 != seqno); inst_hdr = jnlpool.repl_inst_filehdr; assert(NULL != inst_hdr); assert((INVALID_SUPPL_STRM == strm_idx) || inst_hdr->is_supplementary && (0 <= strm_idx) && (MAX_SUPPL_STRMS > strm_idx)); assert(inst_hdr->num_histinfo <= inst_hdr->num_alloc_histinfo); if (INVALID_SUPPL_STRM == strm_idx) histnum = inst_hdr->num_histinfo - 1; else histnum = inst_hdr->last_histinfo_num[strm_idx]; assert(-1 == INVALID_HISTINFO_NUM); /* so we can safely decrement 0 and reach -1 i.e. an invalid history number */ DEBUG_ONLY(prev_seqno = 0;) do { assert(histnum < inst_hdr->num_histinfo); assert(INVALID_HISTINFO_NUM <= histnum); if (INVALID_HISTINFO_NUM == histnum) return ERR_REPLINSTNOHIST; status = repl_inst_histinfo_get(histnum, histinfo); if (0 != status) { assert(FALSE); histinfo->histinfo_num = INVALID_HISTINFO_NUM; /* signal to caller this is an out-of-design situation */ return ERR_REPLINSTNOHIST; } assert((INVALID_SUPPL_STRM == strm_idx) || (strm_idx == histinfo->strm_index)); cur_seqno = (0 < strm_idx) ? histinfo->strm_seqno : histinfo->start_seqno; assert(cur_seqno); assert((0 == prev_seqno) || (prev_seqno > cur_seqno) || ((INVALID_SUPPL_STRM == strm_idx) && (prev_seqno == cur_seqno))); DEBUG_ONLY(prev_seqno = cur_seqno;) if (seqno > cur_seqno) break; DEBUG_ONLY(prev_histnum = histnum;) histnum = (INVALID_SUPPL_STRM == strm_idx) ? (histnum - 1) : histinfo->prev_histinfo_num; } while (TRUE); return 0; } /* This function finds the histinfo in the local replication instance file corresponding to seqno "seqno-1". * It is a wrapper on top of the function "repl_inst_histinfo_find_seqno" which additionally does error checking. * For the case where "repl_inst_histinfo_find_seqno" returns 0 with a -1 histinfo_num, this function returns ERR_REPLINSTNOHIST. */ int4 repl_inst_wrapper_histinfo_find_seqno(seq_num seqno, int4 strm_idx, repl_histinfo *local_histinfo) { unix_db_info *udi; char histdetail[256]; int4 status; repl_histinfo *next_histinfo; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->s_addrs.now_crit || jgbl.mur_rollback); assert(NULL != jnlpool.repl_inst_filehdr); /* journal pool should be set up */ assert((is_src_server && ((INVALID_SUPPL_STRM == strm_index) || (0 == strm_index))) || (!is_src_server && ((INVALID_SUPPL_STRM == strm_index) || ((0 <= strm_index) && (MAX_SUPPL_STRMS > strm_index))))); status = repl_inst_histinfo_find_seqno(seqno, strm_idx, local_histinfo); assert((0 == status) || (ERR_REPLINSTNOHIST == status)); /* the only error returned by "repl_inst_histinfo_find_seqno" */ if (0 != status) { status = ERR_REPLINSTNOHIST; SPRINTF(histdetail, "seqno "INT8_FMT" "INT8_FMTX, seqno - 1, seqno - 1); gtm_putmsg(VARLSTCNT(6) ERR_REPLINSTNOHIST, 4, LEN_AND_STR(histdetail), LEN_AND_STR(udi->fn)); } else assert(0 <= local_histinfo->histinfo_num); return status; } /* Description: * Add a new histinfo record to the replication instance file. * Parameters: * histinfo : A pointer to the histinfo structure to be added to the instance file. * Return Value: * None * Errors: * Issues ERR_REPLINSTSEQORD error if new histinfo will cause seqno to be out of order. */ void repl_inst_histinfo_add(repl_histinfo *histinfo) { boolean_t is_supplementary, start_seqno_equal; int4 histinfo_num, strm_histinfo_num, prev_histinfo_num, status; int strm_idx, idx; off_t offset; repl_histinfo *last_histinfo, last_histrec, *last_strm_histinfo, last_strm_histrec; repl_histinfo last2_histinfo, *prev_strm_histinfo, prev_strm_histrec; seq_num histinfo_strm_seqno, prev_strm_seqno; unix_db_info *udi; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->s_addrs.now_crit); assert(jnlpool.repl_inst_filehdr->num_histinfo <= jnlpool.repl_inst_filehdr->num_alloc_histinfo); histinfo_num = jnlpool.repl_inst_filehdr->num_histinfo; assert(0 <= histinfo_num); strm_idx = histinfo->strm_index; /* Assert that the very first history record in any instance file (irrespective of whether the * instance is a root primary or propagating primary) should correspond to stream-0. */ assert((0 < histinfo_num) || (0 == strm_idx)); is_supplementary = jnlpool.repl_inst_filehdr->is_supplementary; assert(!is_supplementary && (0 == strm_idx) || (is_supplementary && (0 <= strm_idx) && (MAX_SUPPL_STRMS > strm_idx))); /* If -updateresync is specified and instance is not supplementary, then there better be NO history records */ assert((HISTINFO_TYPE_UPDRESYNC != histinfo->history_type) || is_supplementary || (0 == histinfo_num)); if (strm_idx && !jnlpool.jnlpool_ctl->upd_disabled) { /* A non-supplementary stream history record is being written into a supplementary root primary instance. * Convert the history record as appropriate. See below macro definition for more comments on the conversion. */ CONVERT_NONSUPPL2SUPPL_HISTINFO(histinfo, jnlpool.jnlpool_ctl) } if (0 < histinfo_num) { last_histinfo = &last_histrec; status = repl_inst_histinfo_get(histinfo_num - 1, last_histinfo); assert(0 == status); /* Since histinfo_num-1 we are passing is >=0 and < num_histinfo */ assert(jnlpool.jnlpool_ctl->last_histinfo_seqno == last_histinfo->start_seqno); if (histinfo->start_seqno < last_histinfo->start_seqno) { /* cannot create histinfo with out-of-order start_seqno */ rts_error(VARLSTCNT(8) ERR_REPLINSTSEQORD, 6, LEN_AND_LIT("New history record"), &histinfo->start_seqno, &last_histinfo->start_seqno, LEN_AND_STR(udi->fn)); } } strm_histinfo_num = jnlpool.repl_inst_filehdr->last_histinfo_num[strm_idx]; prev_histinfo_num = strm_histinfo_num; if (0 <= strm_histinfo_num) { assert(strm_histinfo_num < histinfo_num); if (strm_histinfo_num != (histinfo_num - 1)) { last_strm_histinfo = &last_strm_histrec; status = repl_inst_histinfo_get(strm_histinfo_num, last_strm_histinfo); assert(0 == status); /* Since the strm_histinfo_num we are passing is >=0 and < num_histinfo */ } else { /* Had read this history record just now from the instance file. Use it and avoid another read */ last_strm_histinfo = last_histinfo; } assert(strm_idx == last_strm_histinfo->strm_index); /* Check if the history record to be added has the same histinfo content as the last history record * already present in the instance file (in the stream of interest). This is possible in case of a secondary * where the receiver was receiving journal records (from the primary) for a while, was shut down and then * restarted. Same instance is sending information so no new histinfo information needed. Return right away. * The only exception is if this is a supplementary instance and the new history record is an UPDATERESYNC * type of record in which case it is possible the two histories have the histinfo content identical but * have different start_seqnos. In this case, some updates went in between the two histories so we want * to record the input history as a separate record instead of returning (since this signals the beginning * of a new stream of updates). */ if ((!is_supplementary || (HISTINFO_TYPE_UPDRESYNC != histinfo->history_type)) && !STRCMP(last_strm_histinfo->root_primary_instname, histinfo->root_primary_instname) && (last_strm_histinfo->root_primary_cycle == histinfo->root_primary_cycle) && (last_strm_histinfo->creator_pid == histinfo->creator_pid) && (last_strm_histinfo->created_time == histinfo->created_time)) { return; } assert((histinfo->start_seqno != last_strm_histinfo->start_seqno) || (histinfo->strm_seqno == last_strm_histinfo->strm_seqno) || (HISTINFO_TYPE_NORESYNC == histinfo->history_type) || (HISTINFO_TYPE_UPDRESYNC == histinfo->history_type)); /* If stream seqnos match between input history and last stream specific history in the instance file, * make sure the to-be-written history record skips past the last stream specific history record (as we * expect a decreasing sequence of strm_seqnos in the "prev_histinfo_num" linked list of history records). * The only exception is if we are a supplementary instance and this is stream # 0. In that case, only if * the start_seqno is also equal, will we skip. This is because if start_seqno is not equal, the stream # 0 * history records identify a range of updates that happened (even if the updates happened in non-zero * stream #s) and that is used by history record matching between two supplementary instances at replication * connection time. * The same skipping logic applies to "start_seqno" in case the instance is non-supplementary (in which case * the "strm_seqno" field is 0). */ histinfo_strm_seqno = histinfo->strm_seqno; prev_strm_seqno = last_strm_histinfo->strm_seqno; if (histinfo_strm_seqno == prev_strm_seqno) { start_seqno_equal = (histinfo->start_seqno == last_strm_histinfo->start_seqno); if (histinfo_strm_seqno && strm_idx || start_seqno_equal) { assert(prev_histinfo_num > last_strm_histinfo->prev_histinfo_num); prev_histinfo_num = last_strm_histinfo->prev_histinfo_num; } if (start_seqno_equal && (strm_histinfo_num == (histinfo_num - 1))) { /* Starting seqno of the last histinfo in the instance file matches the input histinfo. * This means there are no journal records corresponding to the input stream in the journal * files after the last histinfo (which happens to be same as the input stream) was written * in the instance file. Overwrite the last histinfo with the new histinfo information before * writing new journal records. */ histinfo_num--; } } else if (HISTINFO_TYPE_NORESYNC == histinfo->history_type) { /* Determine the correct value of "prev_histinfo_num" */ prev_strm_histinfo = &prev_strm_histrec; prev_strm_histrec = *last_strm_histinfo; assert(prev_strm_seqno == prev_strm_histinfo->strm_seqno); while (histinfo_strm_seqno <= prev_strm_seqno) { prev_histinfo_num = prev_strm_histinfo->prev_histinfo_num; assert(INVALID_HISTINFO_NUM != prev_histinfo_num); if (INVALID_HISTINFO_NUM == prev_histinfo_num) break; status = repl_inst_histinfo_get(prev_histinfo_num, prev_strm_histinfo); assert(0 == status); /* Since prev_histinfo_num we are passing is >=0 and < num_histinfo */ assert(prev_strm_seqno > prev_strm_histinfo->strm_seqno); prev_strm_seqno = prev_strm_histinfo->strm_seqno; } } } /* Assert that the history record we are going to add is in sync with the current seqno state of the instance */ assert(jnlpool.jnlpool_ctl->jnl_seqno == histinfo->start_seqno); assert(jnlpool.jnlpool_ctl->strm_seqno[histinfo->strm_index] == histinfo->strm_seqno); offset = REPL_INST_HISTINFO_START + (SIZEOF(repl_histinfo) * (off_t)histinfo_num); /* Initialize the following members of the repl_histinfo structure. Everything else should be initialized by caller. * histinfo_num * prev_histinfo_num * last_histinfo_num[] */ histinfo->histinfo_num = histinfo_num; histinfo->prev_histinfo_num = (HISTINFO_TYPE_UPDRESYNC == histinfo->history_type) ? INVALID_HISTINFO_NUM : prev_histinfo_num; assert(histinfo->prev_histinfo_num < histinfo->histinfo_num); for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) { assert((jnlpool.repl_inst_filehdr->last_histinfo_num[idx] < histinfo_num) || (idx == strm_idx) && (jnlpool.repl_inst_filehdr->last_histinfo_num[idx] == histinfo_num)); histinfo->last_histinfo_num[idx] = jnlpool.repl_inst_filehdr->last_histinfo_num[idx]; } if (strm_histinfo_num == histinfo_num) { /* The last history record in the instance file is going to be overwritten with another history record of * the same stream. In this case, jnlpool.repl_inst_filehdr->last_histinfo_num[strm_idx] would not reflect a * state of the instance file BEFORE this history record was added. So find the correct value. Thankfully * the last history record (that we are about to overwrite) already has this value so copy it over. */ histinfo->last_histinfo_num[strm_idx] = last_histinfo->last_histinfo_num[strm_idx]; } assert(strm_histinfo_num == jnlpool.repl_inst_filehdr->last_histinfo_num[strm_idx]); assert(strm_histinfo_num <= histinfo_num); assert(strm_histinfo_num >= prev_histinfo_num); assert(histinfo_num > prev_histinfo_num); assert((INVALID_HISTINFO_NUM == histinfo->prev_histinfo_num) || (0 <= histinfo->prev_histinfo_num)); assert(is_supplementary || (prev_histinfo_num == (histinfo_num - 1))); # ifdef DEBUG /* Assert that the prev_histinfo_num list of history records have decreasing "start_seqno" and "strm_seqno" values. * The only exception is stream # 0 for a supplementary instance as described in a previous comment in this function. */ if (INVALID_HISTINFO_NUM != histinfo->prev_histinfo_num) { assert(histinfo->prev_histinfo_num == prev_histinfo_num); status = repl_inst_histinfo_get(prev_histinfo_num, &last2_histinfo); assert(0 == status); /* Since the strm_histinfo_num we are passing is >=0 and < num_histinfo */ assert(strm_idx == last2_histinfo.strm_index); /* they both better have the same stream # */ assert(histinfo->start_seqno > last2_histinfo.start_seqno); assert(!histinfo->strm_seqno || (histinfo->strm_seqno > last2_histinfo.strm_seqno) || (0 == strm_idx)); } /* Assert that the last_histinfo_num fields reflect a state of the instance file that does not include the about-to-be * added history record. This ensures the instance file header will get restored to a valid state in case of a rollback * that truncates exactly at this history record boundary. */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) assert(histinfo->last_histinfo_num[idx] < histinfo_num); # endif /* Assert that if this is not the first history record being written into the instance file * it should have a valid 0th stream history record number. This is relied upon by "gtmsource_send_new_histrec" */ assert((0 == histinfo_num) || (INVALID_HISTINFO_NUM != histinfo->last_histinfo_num[0])); repl_inst_write(udi->fn, offset, (sm_uc_ptr_t)histinfo, SIZEOF(repl_histinfo)); /* Update stream specific history number fields in the file header to reflect the latest history addition to this stream */ jnlpool.repl_inst_filehdr->last_histinfo_num[strm_idx] = histinfo_num; /* If -updateresync history record for a non-zero stream #, then initialize strm_group_info in file header */ if ((0 < strm_idx) && (HISTINFO_TYPE_UPDRESYNC == histinfo->history_type)) jnlpool.repl_inst_filehdr->strm_group_info[strm_idx - 1] = histinfo->lms_group; histinfo_num++; if (jnlpool.repl_inst_filehdr->num_alloc_histinfo < histinfo_num) jnlpool.repl_inst_filehdr->num_alloc_histinfo = histinfo_num; jnlpool.repl_inst_filehdr->num_histinfo = histinfo_num; repl_inst_flush_filehdr(); jnlpool.jnlpool_ctl->last_histinfo_seqno = histinfo->start_seqno; repl_inst_sync(udi->fn); /* Harden the new histinfo to disk before any logical records for this arrive. */ return; } /* Description: * Given an input "rollback_seqno", virtually truncate all histinfo records that correspond to seqnos >= "rollback_seqno" * This function also updates other fields (unrelated to histinfo truncation) in the file header * to reflect a clean shutdown by MUPIP JOURNAL ROLLBACK. This function is also invoked by MUPIP BACKUP in order * to ensure the backed up instance file is initialized to reflect a clean shutdown. * Parameters: * rollback_seqno : The seqno after which all histinfo records have to be truncated. * Note: In case of a supplementary instance file, this function expects the caller to have * set "inst_hdr->strm_seqno[]" to reflect the "rollback_seqno". * Return Value: * Sequence number (start_seqno) of the last history record in the instance file * Errors: * Issues ERR_REPLINSTNOHIST message if the call to "repl_inst_histinfo_find_seqno" returned an error. */ seq_num repl_inst_histinfo_truncate(seq_num rollback_seqno) { char histdetail[256]; int4 status, index, num_histinfo, last_histnum; int idx; repl_histinfo temphistinfo, nexthistinfo, strmhistinfo; repl_inst_hdr_ptr_t inst_hdr; unix_db_info *udi; seq_num last_histinfo_seqno = 0; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(in_backup || jgbl.mur_rollback); /* Only ROLLBACK or BACKUP calls this function */ assert(udi->s_addrs.now_crit || jgbl.mur_rollback); inst_hdr = jnlpool.repl_inst_filehdr; assert(NULL != inst_hdr); /* Should have been set when mupip rollback invoked "mu_replpool_grab_sem" */ num_histinfo = inst_hdr->num_histinfo; if (0 != num_histinfo) { status = repl_inst_histinfo_find_seqno(rollback_seqno, INVALID_SUPPL_STRM, &temphistinfo); if (0 != status) { assert(ERR_REPLINSTNOHIST == status); /* the only error returned by "repl_inst_histinfo_find_seqno" */ if ((INVALID_HISTINFO_NUM == temphistinfo.histinfo_num) || (temphistinfo.start_seqno != rollback_seqno)) { /* The truncation seqno is not the starting seqno of the instance file. In that case, issue * a RELINSTNOHIST warning message even though rollback is going to proceed anycase. */ assert(FALSE); NON_GTM64_ONLY(SPRINTF(histdetail, "seqno [0x%llx]", rollback_seqno - 1)); GTM64_ONLY(SPRINTF(histdetail, "seqno [0x%lx]", rollback_seqno - 1)); gtm_putmsg(VARLSTCNT(6) MAKE_MSG_WARNING(ERR_REPLINSTNOHIST), 4, LEN_AND_STR(histdetail), LEN_AND_STR(udi->fn)); } index = -1; /* Since we are rolling back all history records in the instance file, * clear all of "strm_group_info[]" and "last_histinfo_num[]" arrays. * The following logic is similar to that in "repl_inst_create" to initialize the above 2 fields. * Note that we keep "jnl_seqno" and "strm_seqno" set to whatever value it came in with (as opposed * to setting it to 0). This is different from what is done in "repl_inst_create" because we want * to keep these set to a non-zero value if possible (see detailed comment below where "jnl_seqno" * gets set). Keeping "jnl_seqno" at a non-zero value necessitates keeping "strm_seqno" at a non-zero * value as well in order to avoid REPLINSTDBSTRM errors at source server startup. */ assert(MAX_SUPPL_STRMS == ARRAYSIZE(inst_hdr->last_histinfo_num)); for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) inst_hdr->last_histinfo_num[idx] = INVALID_HISTINFO_NUM; if (inst_hdr->is_supplementary) { assert(MAX_SUPPL_STRMS == ARRAYSIZE(inst_hdr->strm_seqno)); assert(SIZEOF(seq_num) == SIZEOF(inst_hdr->strm_seqno[0])); assert((MAX_SUPPL_STRMS - 1) == ARRAYSIZE(inst_hdr->strm_group_info)); assert(SIZEOF(repl_inst_uuid) == SIZEOF(inst_hdr->strm_group_info[0])); /* Keep the strm_seqno 0 for those streams which this instance has never used/communicated. For all * other stream#, set the strm_seqno to 1 if the current value of strm_seqno is 0. If the current * value of strm_seqno is non-zero, let it stay as it is (see comment above about strm_seqno). * This way, if this instance reconnects after the ROLLBACK to the same instance it was * communicating before, we avoid issuing REPLINSTNOHIST thereby making it user-friendly. * Note: The LMS group info for stream# "i" is found in strm_group_info[i - 1] (used below) */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) { if ((idx == 0) || (IS_REPL_INST_UUID_NON_NULL(inst_hdr->strm_group_info[idx - 1]))) { if (0 == inst_hdr->strm_seqno[idx]) inst_hdr->strm_seqno[idx] = 1; } # ifdef DEBUG else assert(0 == inst_hdr->strm_seqno[idx]); # endif } /* Leave the LMS group information as-is in the instance file header. By doing so, we avoid cases * where receiver server continuing after the rollback issues an INSUNKNOWN error. While this is * a valid error, we try to make it as user-friendly as possible. */ } } else { index = temphistinfo.histinfo_num; assert(temphistinfo.start_seqno < rollback_seqno); assert(0 <= index); assert(index <= (num_histinfo - 1)); last_histinfo_seqno = temphistinfo.start_seqno; if (index < (num_histinfo - 1)) { status = repl_inst_histinfo_get(index + 1, &nexthistinfo); assert(0 == status); /* Since the histinfo_num we are passing is >=0 and <= num_histinfo */ assert(nexthistinfo.start_seqno >= rollback_seqno); assert(nexthistinfo.histinfo_num == (index + 1)); /* Copy over information from this history record back to the instance file header */ assert(SIZEOF(inst_hdr->last_histinfo_num) == SIZEOF(nexthistinfo.last_histinfo_num)); memcpy(inst_hdr->last_histinfo_num, nexthistinfo.last_histinfo_num, SIZEOF(nexthistinfo.last_histinfo_num)); if (inst_hdr->is_supplementary) { /* inst_hdr->strm_seqno[] is already set by caller */ assert((MAX_SUPPL_STRMS - 1) == ARRAYSIZE(inst_hdr->strm_group_info)); for (idx = 0; idx < (MAX_SUPPL_STRMS - 1); idx++) { last_histnum = nexthistinfo.last_histinfo_num[idx + 1]; assert(INVALID_HISTINFO_NUM <= last_histnum); assert(last_histnum < nexthistinfo.histinfo_num); if (INVALID_HISTINFO_NUM != last_histnum) { status = repl_inst_histinfo_get(last_histnum, &strmhistinfo); assert(0 == status); assert(strmhistinfo.histinfo_num == last_histnum); assert(strmhistinfo.start_seqno < rollback_seqno); assert(strmhistinfo.strm_index); assert(MAX_SUPPL_STRMS > strmhistinfo.strm_index); assert(IS_REPL_INST_UUID_NON_NULL(strmhistinfo.lms_group)); inst_hdr->strm_group_info[idx] = strmhistinfo.lms_group; } else if (IS_REPL_INST_UUID_NON_NULL(inst_hdr->strm_group_info[idx])) { /* stream# (idx + 1) has a non-zero UUID information in the file header * but all the history records corresponding to this stream are now * truncated. This also implies that strm_seqno of this stream is reset * to zero by ROLLBACK. To avoid REPLINSTNOHIST next time a communication * happens with the instance corresponding to stream# idx + 1, set the * strm_seqno to 1. * Note: The LMS group info for stream-i is found in strm_group_info[i - 1] */ inst_hdr->strm_seqno[idx + 1] = 1; /* Also, leave the LMS group information for stream# idx + 1 as-is in the * instance file header By doing so, we avoid cases where receiver server * continuing after the rollback issues an INSUNKNOWN error. While this is * a valid error, we try to mae it as user-friendly as possible. */ } } } } /* else index == "num_histinfo - 1" so no changes needed to "last_histinfo_num[]" * or "strm_seqno[]" or "strm_group_info[]" arrays. */ } index++; assert((index == inst_hdr->num_histinfo) || ((inst_hdr->num_histinfo >= 0) && (inst_hdr->num_alloc_histinfo > index))); inst_hdr->num_histinfo = index; } /* Reset "jnl_seqno" to the rollback seqno so future REPLINSTDBMATCH errors are avoided in "gtmsource_seqno_init". * Note that it is possible inst_hdr->num_histinfo is 0 at this point (i.e. no history records). In that case, * repl_inst_create sets the "jnl_seqno" to 0 whereas we might set it here to a potentially non-zero value. * That is because repl_inst_create does not go through the database and get the max of the reg_seqnos to figure * out the instance jnl_seqno. Hence it sets it to a value of 0 indicating the source server that starts up the * instance to fill it in with a non-zero value. On the other hand, rollback or backup (both of which can call * this function "repl_inst_histinfo_truncate") know exactly what the instance seqno is and so can safely set the * "jnl_seqno" to a non-zero value even though there are no history records. Setting it to a non-zero value whenever * possible is useful for example when we ship a backup of a freshly created live non-supplementary instance (with * jnl_seqno of 1) to be used as input to the -updateresync qualifier of a receiver startup on a supplementary * instance. In this case, if the backup had a jnl_seqno of 0, the startup would fail. But since it has a non-zero * "jnl_seqno" (even though there are no history records), the initial handshake between the non-supplementary and * supplementary instances is possible (they avoid history record exchanges due to jnl_seqno == 1). A zero jnl_seqno * would have resulted in a UPDSYNCINSTFILE error in the initial handshake. */ inst_hdr->jnl_seqno = rollback_seqno; /* Reset sem/shm ids to reflect a clean shutdown so future REPLREQRUNDOWN errors are avoided at "jnlpool_init" time */ if (!jgbl.mur_rollback) { /* Reset semid/sem_ctime fields in the instance file header. */ /* Reset "crash" to FALSE so future REPLREQROLLBACK errors are avoided at "jnlpool_init" time */ inst_hdr->crash = FALSE; inst_hdr->jnlpool_semid = INVALID_SEMID; inst_hdr->jnlpool_shmid = INVALID_SHMID; inst_hdr->jnlpool_semid_ctime = 0; inst_hdr->jnlpool_shmid_ctime = 0; inst_hdr->recvpool_semid = INVALID_SEMID; /* Just in case it is not already reset */ inst_hdr->recvpool_shmid = INVALID_SHMID; /* Just in case it is not already reset */ inst_hdr->recvpool_semid_ctime = 0; inst_hdr->recvpool_shmid_ctime = 0; } /* else for rollback, we reset the IPC fields in mu_replpool_release_sem() and crash in mur_close_files */ /* Flush all file header changes in jnlpool.repl_inst_filehdr to disk */ repl_inst_flush_filehdr(); assert((0 == inst_hdr->num_histinfo) || (0 < last_histinfo_seqno)); return last_histinfo_seqno; } /* Description: * Flushes the instance file header pointed to by "jnlpool.repl_inst_filehdr" to disk. * Parameters: * None * Return Value: * None */ void repl_inst_flush_filehdr() { unix_db_info *udi; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); /* We could come here from several paths. If journal pool exists, we would have done a grab_lock. This covers most of the * cases. If the journal pool doesn't exist, then we could come here from one of the following places * * ROLLBACK (online/noonline): * We already hold standalone access on the journal pool and if the journal pool exists, we also hold the journal pool * lock * * MUPIP RUNDOWN -> mu_rndwn_repl_instance: * We hold the ftok on the instance file and have already made sure that no one else is attached to the journal pool. Even * though we don't hold the access control on the journal pool, no one else can startup at this point because they need * the ftok for which they will have to wait. * * gtmsource_shutdown -> repl_inst_jnlpool_reset: * We hold the ftok on the instance file and have already made sure that no one else is attached to the journal pool. Even * though we don't hold the access control on the journal pool, no one else can startup at this point because they need * the ftok for which they will have to wait. * * gtmrecv_shutdown -> repl_inst_recvpool_reset: * Same as above. * So, in all cases, we are guaranteed that the following code is mutually exclusive (which is what we want). */ assert(udi->s_addrs.now_crit || udi->grabbed_ftok_sem || (jgbl.mur_rollback && holds_sem[SOURCE][JNL_POOL_ACCESS_SEM])); if (jnlpool.jnlpool_dummy_reg->open) COPY_JCTL_STRMSEQNO_TO_INSTHDR_IF_NEEDED; /* Keep the file header copy of "strm_seqno" uptodate with jnlpool_ctl */ assert((NULL == jnlpool.jnlpool_ctl) || udi->s_addrs.now_crit); assert(NULL != jnlpool.repl_inst_filehdr); /* flush the instance file header */ repl_inst_write(udi->fn, (off_t)0, (sm_uc_ptr_t)jnlpool.repl_inst_filehdr, REPL_INST_HDR_SIZE); } /* Description: * Flushes the "gtmsrc_lcl" structure corresponding to the jnlpool.gtmsource_local structure for the * calling source server. Updates "gtmsource_local->last_flush_resync_seqno" to equal "gtmsource_local->read_jnl_seqno" * Parameters: * None * Return Value: * None */ void repl_inst_flush_gtmsrc_lcl() { unix_db_info *udi; int4 index; off_t offset; gtmsrc_lcl_ptr_t gtmsrclcl_ptr; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(!jgbl.mur_rollback); /* Rollback should never reach here */ assert(udi->s_addrs.now_crit); assert(NULL != jnlpool.gtmsource_local); index = jnlpool.gtmsource_local->gtmsrc_lcl_array_index; assert(0 <= index); assert(jnlpool.gtmsource_local == &jnlpool.gtmsource_local_array[index]); gtmsrclcl_ptr = &jnlpool.gtmsrc_lcl_array[index]; assert(jnlpool.jnlpool_dummy_reg->open); /* journal pool exists and this process has done "jnlpool_init" */ /* Copy each field from "gtmsource_local" to "gtmsrc_lcl" before flushing it to disk. * Do not need the journal pool lock, as we are the only ones reading/updating the below fields * in "gtmsource_local" or "gtmsrc_lcl". */ COPY_GTMSOURCELOCAL_TO_GTMSRCLCL(jnlpool.gtmsource_local, gtmsrclcl_ptr); offset = REPL_INST_HDR_SIZE + (SIZEOF(gtmsrc_lcl) * (off_t)index); repl_inst_write(udi->fn, offset, (sm_uc_ptr_t)gtmsrclcl_ptr, SIZEOF(gtmsrc_lcl)); jnlpool.gtmsource_local->last_flush_resync_seqno = jnlpool.gtmsource_local->read_jnl_seqno; } /* Description: * Flushes the "repl_inst_hdr" and "gtmsrc_lcl" sections in the journal pool to the on disk copy of the instance file. * Parameters: * None * Return Value: * None */ void repl_inst_flush_jnlpool(boolean_t reset_replpool_fields, boolean_t reset_crash) { unix_db_info *udi; int4 index; gtmsrc_lcl_ptr_t gtmsrclcl_ptr; gtmsource_local_ptr_t gtmsourcelocal_ptr; assert(NULL != jnlpool.jnlpool_dummy_reg); udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); /* This function should be invoked only if the caller determines this is last process attached to the journal pool. * Since the ftok lock on the instance file is already held, no other process will be allowed to attach to the * journal pool and hence this is the only process having access to the journal pool during this function. The only * exception is if it is invoked from mur_open_files for Online Rollback. But, in that case Online Rollback will be * holding the access control. Any process calling this function, needs the access control semaphore and hence will * wait for Online Rollback to complete. */ assert(udi->grabbed_ftok_sem || (jgbl.onlnrlbk && udi->s_addrs.now_crit)); assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); assert(NULL != jnlpool.gtmsource_local_array); assert(NULL != jnlpool.gtmsrc_lcl_array); assert(NULL != jnlpool.repl_inst_filehdr); assert(NULL != jnlpool.jnlpool_ctl); assert((sm_uc_ptr_t)jnlpool.gtmsrc_lcl_array == (sm_uc_ptr_t)jnlpool.repl_inst_filehdr + REPL_INST_HDR_SIZE); /* Reset the instance file header fields (if needed) before flushing and removing the journal pool shared memory */ if (reset_crash) jnlpool.repl_inst_filehdr->crash = FALSE; if (!jgbl.onlnrlbk) { if (reset_replpool_fields) { jnlpool.repl_inst_filehdr->jnlpool_semid = INVALID_SEMID; jnlpool.repl_inst_filehdr->jnlpool_shmid = INVALID_SHMID; jnlpool.repl_inst_filehdr->recvpool_semid = INVALID_SEMID; /* Just in case it is not already reset */ jnlpool.repl_inst_filehdr->recvpool_shmid = INVALID_SHMID; /* Just in case it is not already reset */ } } /* If the source server that created the journal pool died before it was completely initialized in "gtmsource_seqno_init" * do not copy seqnos from the journal pool into the instance file header. Instead keep the instance file header unchanged. */ if (jnlpool.jnlpool_ctl->pool_initialized) { assert(jnlpool.jnlpool_ctl->start_jnl_seqno); assert(jnlpool.jnlpool_ctl->jnl_seqno); jnlpool.repl_inst_filehdr->jnl_seqno = jnlpool.jnlpool_ctl->jnl_seqno; COPY_JCTL_STRMSEQNO_TO_INSTHDR_IF_NEEDED; /* Keep the file header copy of "strm_seqno" uptodate with jnlpool_ctl */ /* Copy all "gtmsource_local" to corresponding "gtmsrc_lcl" structures before flushing to instance file */ gtmsourcelocal_ptr = &jnlpool.gtmsource_local_array[0]; gtmsrclcl_ptr = &jnlpool.gtmsrc_lcl_array[0]; for (index = 0; index < NUM_GTMSRC_LCL; index++, gtmsourcelocal_ptr++, gtmsrclcl_ptr++) COPY_GTMSOURCELOCAL_TO_GTMSRCLCL(gtmsourcelocal_ptr, gtmsrclcl_ptr); repl_inst_write(udi->fn, (off_t)0, (sm_uc_ptr_t)jnlpool.repl_inst_filehdr, REPL_INST_HDR_SIZE + GTMSRC_LCL_SIZE); } else repl_inst_write(udi->fn, (off_t)0, (sm_uc_ptr_t)jnlpool.repl_inst_filehdr, REPL_INST_HDR_SIZE); } /* This function determines if this replication instance was formerly a root primary. It finds this out by looking at the * last histinfo record in the instance file and comparing the "root_primary_instname" field there with this instance name. * If they are the same, it means the last histinfo was generated by this instance and hence was a root primary then. This * function will only be invoked by a propagating primary instance (RECEIVER SERVER or ROLLBACK -FETCHRESYNC). * * It returns TRUE only if the instance file header field "was_rootprimary" is TRUE and if the last histinfo record was generated * by this instance. It returns FALSE otherwise. */ boolean_t repl_inst_was_rootprimary(void) { int4 histinfo_num, status; repl_histinfo temphistinfo, *last_histinfo = &temphistinfo; boolean_t was_rootprimary, was_crit = FALSE; sgmnt_addrs *csa; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (NULL != jnlpool.jnlpool_ctl) { /* If the journal pool is available (indicated by NULL != jnlpool_ctl), we expect jnlpool_dummy_reg to be open. * The only exception is online rollback which doesn't do a jnlpool_init thereby leaving jnlpool_dummy_reg->open * to be FALSE. Assert accordingly. */ assert(((NULL != jnlpool.jnlpool_dummy_reg) && jnlpool.jnlpool_dummy_reg->open) || jgbl.onlnrlbk || (jgbl.mur_rollback && ANTICIPATORY_FREEZE_AVAILABLE)); csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs; ASSERT_VALID_JNLPOOL(csa); assert(csa->now_crit); } else assert(jgbl.mur_rollback); /* ROLLBACK (holding access control lock) can come here without journal pool */ /* If this is a supplementary instance, look at the last history record corresponding to the 0th stream index. * If not, look at the last history record. This is okay since there is no multiple streams in this case. */ histinfo_num = (!jnlpool.repl_inst_filehdr->is_supplementary) ? (jnlpool.repl_inst_filehdr->num_histinfo - 1) : jnlpool.repl_inst_filehdr->last_histinfo_num[0]; was_rootprimary = jnlpool.repl_inst_filehdr->was_rootprimary; assert(histinfo_num < jnlpool.repl_inst_filehdr->num_alloc_histinfo); if (was_rootprimary && (0 <= histinfo_num)) { status = repl_inst_histinfo_get(histinfo_num, last_histinfo); assert(0 == status); /* Since the histinfo_num we are passing is >=0 and < num_histinfo */ was_rootprimary = !STRCMP(last_histinfo->root_primary_instname, jnlpool.repl_inst_filehdr->inst_info.this_instname); } else was_rootprimary = FALSE; return was_rootprimary; } /* This function resets "zqgblmod_seqno" and "zqgblmod_tn" in all replicated database file headers to 0. * This shares a lot of its code with the function "gtmsource_update_zqgblmod_seqno_and_tn". * Any changes there might need to be reflected here. */ int4 repl_inst_reset_zqgblmod_seqno_and_tn(void) { gd_region *reg, *reg_top; int ret; boolean_t all_files_open; sgmnt_addrs *repl_csa; ret = SS_NORMAL; /* assume success */ /* source server calls this from gtmsource_losttncomplete which always holds the journal pool access control semaphore * Assert this. */ assert(is_rcvr_server || holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); if (0 == jnlpool.jnlpool_ctl->max_zqgblmod_seqno) { /* Already reset to 0 by a previous call to this function. No need to do it again. */ return ret; } /* This function is currently ONLY called by receiver server AND mupip replic -source -losttncomplete * both of which should have NO GBLDIR or REGION OPEN at this time. Assert that. */ assert(NULL == gd_header); if (NULL == gd_header) gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) rts_error(VARLSTCNT(1) ERR_NOTALLDBOPN); repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs; for (reg = gd_header->regions, reg_top = reg + gd_header->n_regions; reg < reg_top; reg++) { assert(reg->open); TP_CHANGE_REG(reg); if (!REPL_ALLOWED(cs_data)) continue; /* csa->hdr->zqgblmod_seqno is modified by the source server and an online rollback (both of these hold the * database crit while doing so). It is also read by fileheader_sync() which does so while holding crit. * To avoid the latter from reading an inconsistent value (i.e neither the pre-update nor the post-update * value, which is possible if the 8-byte operation is not atomic but a sequence of two 4-byte operations * AND if the pre-update and post-update value differ in their most significant 4-bytes) we grab_crit. We * could have used QWCHANGE_IS_READER_CONSISTENT macro (which checks for most significant 4-byte difference) * instead to determine if it is really necessary to grab crit. But, since the update to zqgblmod_seqno is a * rare operation, we decided to play it safe. */ assert(!cs_addrs->hold_onto_crit); /* this ensures we can safely do unconditional grab_crit and rel_crit */ grab_crit(reg); if (cs_addrs->onln_rlbk_cycle != cs_addrs->nl->onln_rlbk_cycle) { /* concurrent online rollback */ assert(is_rcvr_server); SYNC_ONLN_RLBK_CYCLES; rel_crit(reg); ret = -1; /* failure */ break; } cs_addrs->hdr->zqgblmod_seqno = (seq_num)0; cs_addrs->hdr->zqgblmod_tn = (trans_num)0; rel_crit(reg); } assert((SS_NORMAL == ret) || (reg < reg_top)); if (reg >= reg_top) { assert(!repl_csa->hold_onto_crit); /* so we can do unconditional grab_lock and rel_lock */ /* Since the source server holds the access control at this point, a concurrent online rollback is NOT possible. * But, if we are here from receiver code, then we cannot guarantee this. So, get the journal pool lock and if * an online rollback is detected, return without resetting max_zqgblmod_seqno. The caller knows to take appropriate * action (on seeing -1 as the return code). */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY); if (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle) { assert(is_rcvr_server); SYNC_ONLN_RLBK_CYCLES; rel_lock(jnlpool.jnlpool_dummy_reg); ret = -1; /* failure */ } else { jnlpool.jnlpool_ctl->max_zqgblmod_seqno = 0; rel_lock(jnlpool.jnlpool_dummy_reg); } } for (reg = gd_header->regions, reg_top = reg + gd_header->n_regions; reg < reg_top; reg++) { /* Rundown all databases that we opened as we dont need them anymore. This is not done in the previous * loop as it has to wait until the ftok semaphore of the instance file has been released as otherwise * an assert in gds_rundown will fail as it tries to get the ftok semaphore of the database while holding * another ftok semaphore already. */ assert(reg->open); TP_CHANGE_REG(reg); assert(!cs_addrs->now_crit); UNIX_ONLY(ret |=) gds_rundown(); } assert(!repl_csa->now_crit); return ret; }