1425 lines
60 KiB
C
1425 lines
60 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_stat.h"
|
|
#include "gtm_stdlib.h"
|
|
#include "gtm_fcntl.h"
|
|
#include "gtm_time.h"
|
|
#include "gtm_unistd.h"
|
|
#include "gtm_stdio.h"
|
|
#include "gtm_inet.h"
|
|
|
|
#include <sys/mman.h>
|
|
#include <errno.h>
|
|
#ifdef VMS
|
|
#include <ssdef.h>
|
|
#include <descrip.h> /* Required for gtmsource.h */
|
|
#endif
|
|
|
|
#include "cdb_sc.h"
|
|
#include "gtm_string.h"
|
|
#include "gdsroot.h"
|
|
#include "gdskill.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsblk.h"
|
|
#include "gdsfhead.h"
|
|
#include "gdscc.h"
|
|
#include "copy.h"
|
|
#include "filestruct.h"
|
|
#include "jnl.h"
|
|
#include "buddy_list.h" /* needed for tp.h */
|
|
#include "hashtab_int4.h" /* needed for muprec.h and tp.h */
|
|
#include "hashtab_int8.h" /* needed for muprec.h */
|
|
#include "hashtab_mname.h" /* needed for muprec.h */
|
|
#include "muprec.h"
|
|
#include "tp.h"
|
|
#include "iosp.h"
|
|
#include "gtmrecv.h"
|
|
#include "cli.h"
|
|
#include "error.h"
|
|
#include "repl_dbg.h"
|
|
#include "repl_msg.h"
|
|
#include "gtmsource.h"
|
|
#include "repl_shutdcode.h"
|
|
#include "repl_sp.h"
|
|
#include "jnl_write.h"
|
|
#ifdef UNIX
|
|
#include "repl_instance.h"
|
|
#include "gtmio.h"
|
|
#include "repl_inst_dump.h" /* for "repl_dump_histinfo" prototype */
|
|
#endif
|
|
#ifdef GTM_TRIGGER
|
|
#include <rtnhdr.h> /* for rtn_tabent in gv_trigger.h */
|
|
#include "gv_trigger.h"
|
|
#include "targ_alloc.h"
|
|
#endif
|
|
#ifdef VMS
|
|
#include <fab.h>
|
|
#include <rms.h>
|
|
#include <iodef.h>
|
|
#include <secdef.h>
|
|
#include <psldef.h>
|
|
#include <lckdef.h>
|
|
#include <syidef.h>
|
|
#include <xab.h>
|
|
#include <prtdef.h>
|
|
#endif
|
|
#include "op.h"
|
|
#include "svnames.h" /* for SV_ZTWORMHOLE */
|
|
#include "gvcst_protos.h" /* for gvcst_init prototype */
|
|
#include "read_db_files_from_gld.h"
|
|
#include "updproc.h"
|
|
#include "tp_change_reg.h"
|
|
#include "wcs_flu.h"
|
|
#include "repl_log.h"
|
|
#include "tp_restart.h"
|
|
#include "gtmmsg.h" /* for gtm_putmsg() prototype */
|
|
#include "mu_gv_stack_init.h"
|
|
#include "jnl_typedef.h"
|
|
#include "memcoherency.h"
|
|
#include "aswp.h"
|
|
#include "jnl_get_checksum.h"
|
|
#include "updproc_get_gblname.h"
|
|
#include "wcs_recover.h"
|
|
#include "have_crit.h"
|
|
#include "wbox_test_init.h"
|
|
#include "format_targ_key.h"
|
|
#include "op_tcommit.h"
|
|
#include "error_trap.h"
|
|
#include "tp_frame.h"
|
|
#include "gvcst_jrt_null.h" /* for gvcst_jrt_null prototype */
|
|
#include "preemptive_ch.h"
|
|
|
|
#define MAX_IDLE_HARD_SPINS 1000 /* Fail-safe count to avoid hanging CPU in tight loop while it's idle */
|
|
#define UPDPROC_WAIT_FOR_READJNLSEQNO 100 /* ms */
|
|
#define UPDPROC_WAIT_FOR_STARTJNLSEQNO 100 /* ms */
|
|
|
|
GBLREF uint4 dollar_tlevel;
|
|
#ifdef DEBUG
|
|
GBLREF uint4 dollar_trestart;
|
|
GBLREF boolean_t donot_INVOKE_MUMTSTART;
|
|
#endif
|
|
GBLREF gv_key *gv_currkey;
|
|
GBLREF gd_region *gv_cur_region;
|
|
GBLREF sgmnt_addrs *cs_addrs;
|
|
GBLREF sgmnt_data_ptr_t cs_data;
|
|
GBLREF recvpool_addrs recvpool;
|
|
GBLREF jnlpool_addrs jnlpool;
|
|
GBLREF jnlpool_ctl_ptr_t jnlpool_ctl;
|
|
GBLREF boolean_t is_updproc;
|
|
GBLREF seq_num seq_num_zero, seq_num_one;
|
|
GBLREF gd_addr *gd_header;
|
|
GBLREF FILE *updproc_log_fp;
|
|
GBLREF void (*call_on_signal)();
|
|
GBLREF sgm_info *first_sgm_info;
|
|
GBLREF unsigned int t_tries;
|
|
GBLREF unsigned char t_fail_hist[CDB_MAX_TRIES];
|
|
GBLREF struct_jrec_tcom tcom_record;
|
|
GBLREF gv_namehead *reset_gv_target;
|
|
#ifdef VMS
|
|
GBLREF struct chf$signal_array *tp_restart_fail_sig;
|
|
GBLREF boolean_t tp_restart_fail_sig_used;
|
|
GBLREF boolean_t secondary_side_std_null_coll;
|
|
#endif
|
|
GBLREF boolean_t is_replicator;
|
|
GBLREF jnl_gbls_t jgbl;
|
|
GBLREF boolean_t disk_blk_read;
|
|
GBLREF seq_num lastlog_seqno;
|
|
GBLREF uint4 log_interval;
|
|
#ifdef GTM_TRIGGER
|
|
DEBUG_ONLY(GBLREF ch_ret_type (*ch_at_trigger_init)();)
|
|
GBLREF int tprestart_state; /* When triggers restart, multiple states possible. See tp_restart.h */
|
|
GBLREF dollar_ecode_type dollar_ecode; /* structure containing $ECODE related information */
|
|
GBLREF mval dollar_ztwormhole;
|
|
#endif
|
|
GBLREF boolean_t skip_dbtriggers;
|
|
GBLREF gv_namehead *gv_target;
|
|
GBLREF boolean_t gv_play_duplicate_kills;
|
|
#ifdef UNIX
|
|
GBLREF int4 strm_index;
|
|
STATICDEF boolean_t set_onln_rlbk_flg;
|
|
#endif
|
|
|
|
LITREF mval literal_hasht;
|
|
static boolean_t updproc_continue = TRUE;
|
|
|
|
error_def(ERR_GBLOFLOW);
|
|
error_def(ERR_GVIS);
|
|
error_def(ERR_REC2BIG);
|
|
error_def(ERR_RECVPOOLSETUP);
|
|
error_def(ERR_REPEATERROR);
|
|
error_def(ERR_REPLONLNRLBK);
|
|
error_def(ERR_SECONDAHEAD);
|
|
error_def(ERR_STRMSEQMISMTCH);
|
|
error_def(ERR_TEXT);
|
|
error_def(ERR_TPRETRY);
|
|
error_def(ERR_TRIGDEFNOSYNC);
|
|
error_def(ERR_UPDREPLSTATEOFF);
|
|
|
|
/* The below logic does "jnl_ensure_open" and other pre-requisites. This code is very similar to t_end.c */
|
|
#define DO_JNL_ENSURE_OPEN(CSA, JPC) \
|
|
{ \
|
|
jnl_buffer_ptr_t jbp; \
|
|
uint4 jnl_status; \
|
|
\
|
|
assert(CSA = &FILE_INFO(gv_cur_region)->s_addrs); /* so we can use gv_cur_region below */ \
|
|
assert(JPC == CSA->jnl); \
|
|
SET_GBL_JREC_TIME; /* needed for jnl_put_jrt_pini() */ \
|
|
jbp = JPC->jnl_buff; \
|
|
/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order \
|
|
* of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write \
|
|
* journal records (if it decides to switch to a new journal file). \
|
|
*/ \
|
|
ADJUST_GBL_JREC_TIME(jgbl, jbp); \
|
|
/* Make sure timestamp of this seqno is >= timestamp of previous seqno. Note: The below \
|
|
* macro invocation should be done AFTER the ADJUST_GBL_JREC_TIME call as the below resets \
|
|
* jpl->prev_jnlseqno_time. Doing it the other way around would mean the reset will happen \
|
|
* with a potentially lower value than the final adjusted time written in the jnl record. \
|
|
*/ \
|
|
ADJUST_GBL_JREC_TIME_JNLPOOL(jgbl, jnlpool_ctl); \
|
|
if (JNL_ENABLED(CSA)) \
|
|
{ \
|
|
jnl_status = jnl_ensure_open(); \
|
|
if (0 == jnl_status) \
|
|
{ \
|
|
if (0 == JPC->pini_addr) \
|
|
jnl_put_jrt_pini(CSA); \
|
|
} else \
|
|
{ \
|
|
if (SS_NORMAL != JPC->status) \
|
|
rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(CSA->hdr), \
|
|
DB_LEN_STR(gv_cur_region), JPC->status); \
|
|
else \
|
|
rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(CSA->hdr), \
|
|
DB_LEN_STR(gv_cur_region)); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#ifdef UNIX
|
|
# define UPDPROC_ONLN_RLBK_CLNUP(REG) \
|
|
{ \
|
|
sgmnt_addrs *csa; \
|
|
\
|
|
assert(0 != have_crit(CRIT_HAVE_ANY_REG)); \
|
|
csa = &FILE_INFO(REG)->s_addrs; \
|
|
assert(csa->now_crit); \
|
|
SYNC_ONLN_RLBK_CYCLES; \
|
|
if (REG == jnlpool.jnlpool_dummy_reg) \
|
|
rel_lock(REG); \
|
|
else \
|
|
rel_crit(REG); \
|
|
RESET_ALL_GVT_CLUES; \
|
|
rts_error(VARLSTCNT(1) ERR_REPLONLNRLBK); /* transfers control back to updproc_ch */ \
|
|
}
|
|
|
|
/* Receiver eventually sees the upd_proc_local->onln_rlbk_flag being set, drains the replication pipe, closes the connection and
|
|
* restarts. But, before that it also resets recvpool_ctl->jnl_seqno to 0. So, wait until recvpool_ctl->jnl_seqno is reset to 0
|
|
* to be sure that receiver server did acknowledge the upd_proc_local->onln_rlbk_flag.
|
|
*/
|
|
#define WAIT_FOR_ZERO_RECVPOOL_JNL_SEQNO \
|
|
{ \
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "REPL INFO - Waiting for receiver server to reset recvpool_ctl->jnl_seqno\n"); \
|
|
while (recvpool_ctl->jnl_seqno) \
|
|
{ \
|
|
SHORT_SLEEP(UPDPROC_WAIT_FOR_STARTJNLSEQNO); \
|
|
if (SHUTDOWN == upd_proc_local->upd_proc_shutdown) \
|
|
{ \
|
|
updproc_end(); \
|
|
return SS_NORMAL; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#define LOG_ONLINE_ROLLBACK_EVENT \
|
|
{ \
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n"); \
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool.jnlpool_ctl->jnl_seqno); \
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "REPL INFO - Current Update process Read Seqno : %llu\n", \
|
|
upd_proc_local->read_jnl_seqno); \
|
|
assert(recvpool_ctl->jnl_seqno); \
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n", \
|
|
recvpool_ctl->jnl_seqno); \
|
|
}
|
|
#endif
|
|
|
|
CONDITION_HANDLER(updproc_ch)
|
|
{
|
|
int rc;
|
|
unsigned char seq_num_str[32], *seq_num_ptr;
|
|
unsigned char seq_num_strx[32], *seq_num_ptrx;
|
|
|
|
START_CH;
|
|
if ((int)ERR_TPRETRY == SIGNAL)
|
|
{
|
|
# if defined(DEBUG) && defined(DEBUG_UPDPROC_TPRETRY)
|
|
assert(FALSE);
|
|
# endif
|
|
repl_log(updproc_log_fp, TRUE, TRUE, " ----> TPRETRY for sequence number "INT8_FMT" "INT8_FMTX"\n",
|
|
INT8_PRINT(recvpool.upd_proc_local->read_jnl_seqno),
|
|
INT8_PRINTX(recvpool.upd_proc_local->read_jnl_seqno));
|
|
/* This is a kludge. We can come here from 2 places.
|
|
* ( i) From a call to t_retry which does a rts_error(ERR_TPRETRY).
|
|
* (ii) From updproc_actions() where immediately after op_tcommit we detect that dollar_tlevel is non-zero.
|
|
* In the first case, we need to do a tp_restart. In the second, op_tcommit would have already done it for us.
|
|
* The way we detect the second case is from the value of first_sgm_info since it is NULLified in tp_restart.
|
|
*/
|
|
if (first_sgm_info GTMTRIG_ONLY( || (TPRESTART_STATE_NORMAL != tprestart_state)))
|
|
{
|
|
VMS_ONLY(assert(FALSE == tp_restart_fail_sig_used);)
|
|
rc = tp_restart(1, TP_RESTART_HANDLES_ERRORS); /* any nested errors will set SIGNAL accordingly */
|
|
assert(0 == rc); /* No partials restarts can happen at this final level */
|
|
GTMTRIG_ONLY(assert(TPRESTART_STATE_NORMAL == tprestart_state));
|
|
NON_GTMTRIG_ONLY(assert(INVALID_GV_TARGET == reset_gv_target);)
|
|
reset_gv_target = INVALID_GV_TARGET; /* see "trigger_item_tpwrap_ch" similar code for why this is needed */
|
|
# ifdef UNIX
|
|
if (ERR_REPLONLNRLBK == SIGNAL) /* tp_restart did rts_error(ERR_REPLONLNRLBK) */
|
|
set_onln_rlbk_flg = TRUE;
|
|
if ((ERR_TPRETRY == SIGNAL) || (ERR_REPLONLNRLBK == SIGNAL))
|
|
# elif defined VMS
|
|
if (!tp_restart_fail_sig_used) /* If tp_restart ran clean */
|
|
# else
|
|
# error unsupported platform
|
|
# endif
|
|
{
|
|
UNWIND(NULL, NULL);
|
|
}
|
|
# ifdef VMS
|
|
else
|
|
{ /* Otherwise tp_restart had a signal that we must now deal with.
|
|
* replace the TPRETRY information with that saved from tp_restart.
|
|
* first assert that we have room for these arguments and proper setup
|
|
*/
|
|
assert(TPRESTART_ARG_CNT >= tp_restart_fail_sig->chf$is_sig_args);
|
|
memcpy(sig, tp_restart_fail_sig, (tp_restart_fail_sig->chf$l_sig_args + 1) * SIZEOF(int));
|
|
tp_restart_fail_sig_used = FALSE;
|
|
}
|
|
# endif
|
|
} else
|
|
{
|
|
UNWIND(NULL, NULL);
|
|
}
|
|
}
|
|
# ifdef GTM_TRIGGER
|
|
else if (ERR_REPEATERROR == SIGNAL)
|
|
SIGNAL = dollar_ecode.error_last_ecode; /* Error rethrown from a trigger */
|
|
# endif
|
|
# ifdef UNIX
|
|
else if (ERR_REPLONLNRLBK == SIGNAL)
|
|
{
|
|
preemptive_ch(SEVERITY);
|
|
assert(INVALID_GV_TARGET == reset_gv_target);
|
|
set_onln_rlbk_flg = TRUE;
|
|
UNWIND(NULL, NULL);
|
|
}
|
|
# endif
|
|
NEXTCH;
|
|
}
|
|
|
|
/* updproc performs its main processing in a function call invoked within a loop.
|
|
* Unless there is a TPRESTART, the processing remains in the updproc_actions loop,
|
|
* but when a resource conflict triggers a TPRESTART, the condition handler drops
|
|
* back to the outer loop in updproc, which reissues the call.
|
|
*/
|
|
int updproc(void)
|
|
{
|
|
seq_num jnl_seqno; /* the current jnl_seq no of the Update process */
|
|
seq_num start_jnl_seqno;
|
|
uint4 status;
|
|
gld_dbname_list *gld_db_files, *curr;
|
|
recvpool_ctl_ptr_t recvpool_ctl;
|
|
# ifdef VMS
|
|
char proc_name[PROC_NAME_MAXLEN + 1];
|
|
struct dsc$descriptor_s proc_name_desc;
|
|
# endif
|
|
upd_proc_local_ptr_t upd_proc_local;
|
|
sgmnt_addrs *repl_csa;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
call_on_signal = updproc_sigstop;
|
|
GTMTRIG_DBG_ONLY(ch_at_trigger_init = &updproc_ch);
|
|
# ifdef VMS
|
|
/* Get a meaningful process name */
|
|
proc_name_desc.dsc$w_length = get_proc_name(LIT_AND_LEN("GTMUPD"), getpid(), proc_name);
|
|
proc_name_desc.dsc$a_pointer = proc_name;
|
|
proc_name_desc.dsc$b_dtype = DSC$K_DTYPE_T;
|
|
proc_name_desc.dsc$b_class = DSC$K_CLASS_S;
|
|
if (SS$_NORMAL != (status = sys$setprn(&proc_name_desc)))
|
|
rts_error(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2,
|
|
RTS_ERROR_LITERAL("Unable to change update process name"), status);
|
|
# else
|
|
/* In the update process, we want every replicated update from an originating instances to end up in a replicated region
|
|
* on this (the receiving) instance. If not, we will issue a UPDREPLSTATEOFF error. But it is possible that replication
|
|
* was turned ON at that time (since we dont hold crit at the time we do the UPDREPLSTATEOFF check) but later got turned
|
|
* OFF (for example due to no disk space for journal files etc.) just before the actual application of the update.
|
|
* In that case, the UPDREPLSTATEOFF error would not have been issued but there is still an issue in that an update from
|
|
* the primary got applied to a non-replicated database on the secondary. We prevent this out-of-sync situation from
|
|
* happening in the first place, by ensuring "jnl_file_lost" (the function that is invoked to turn journaling OFF) issues
|
|
* a runtime error and does not turn journaling off.
|
|
*/
|
|
TREF(error_on_jnl_file_lost) = JNL_FILE_LOST_ERRORS;
|
|
# endif
|
|
is_updproc = TRUE;
|
|
is_replicator = TRUE; /* as update process goes through t_end() and can write jnl recs to the jnlpool for replicated db */
|
|
gv_play_duplicate_kills = TRUE; /* needed to ensure seqnos are kept in sync between source and receiver instances */
|
|
NON_GTMTRIG_ONLY(skip_dbtriggers = TRUE;)
|
|
memset((uchar_ptr_t)&recvpool, 0, SIZEOF(recvpool)); /* For util_base_ch and mupip_exit */
|
|
if (updproc_init(&gld_db_files, &start_jnl_seqno) == UPDPROC_EXISTS) /* we got the global directory header already */
|
|
rts_error(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Update Process already exists"));
|
|
OPERATOR_LOG_MSG;
|
|
/* Initialization of all the relevant global datastructures and allocation for TP */
|
|
repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;
|
|
mu_gv_stack_init();
|
|
upd_proc_local = recvpool.upd_proc_local;
|
|
recvpool_ctl = recvpool.recvpool_ctl;
|
|
while (TRUE)
|
|
{
|
|
upd_proc_local->read = 0;
|
|
jnl_seqno = start_jnl_seqno;
|
|
upd_proc_local->read_jnl_seqno = 0;
|
|
SHM_WRITE_MEMORY_BARRIER; /* Ensure upd_proc_local->read_jnl_seqno update is visible to the receiver server
|
|
* BEFORE recvpool_ctl->jnl_seqno update. This way we ensure whenever it gets to the
|
|
* stage where it sets upd_proc_local->read_jnl_seqno to a non-zero value (possible
|
|
* in the case of a non-supplementary -> supplementary replication connection),
|
|
* its update will happen AFTER of the update process update to "read_jnl_seqno"
|
|
* and not the other way around.
|
|
*/
|
|
recvpool_ctl->jnl_seqno = jnl_seqno;
|
|
# ifdef UNIX
|
|
/* At this point, the receiver server will see the non-zero jnl_seqno and start communicating with a source server.
|
|
* But if this is a supplementary root primary instance, the current instance jnl_seqno is not what the receiver
|
|
* will ask the source to start sending transactions from. It has to first figure out what non-supplementary
|
|
* stream# (strm_index) the connecting source maps to and find out the current strm_seqno[strm_index] in the
|
|
* instance file header. All this processing involves the receiver server. Until it finishes this processing, the
|
|
* update process cannot proceed. Wait for this to finish and update local copy of seqnos accordingly. The receiver
|
|
* server will set upd_proc_local->read_jnl_seqno to a non-zero value to signal completion of this step.
|
|
*/
|
|
if (jnlpool.repl_inst_filehdr->is_supplementary && !jnlpool.jnlpool_ctl->upd_disabled)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Waiting for Receiver Server to write read_jnl_seqno\n");
|
|
while (!upd_proc_local->read_jnl_seqno)
|
|
{
|
|
SHORT_SLEEP(UPDPROC_WAIT_FOR_READJNLSEQNO);
|
|
if (SHUTDOWN == upd_proc_local->upd_proc_shutdown)
|
|
{
|
|
updproc_end();
|
|
return(SS_NORMAL);
|
|
}
|
|
if (GTMRECV_NO_RESTART != recvpool.gtmrecv_local->restart)
|
|
{ /* wait for restart to become GTMRECV_NO_RESTART (set by the Receiver Server) */
|
|
if (GTMRECV_RCVR_RESTARTED == recvpool.gtmrecv_local->restart)
|
|
{
|
|
recvpool_ctl->jnl_seqno = jnl_seqno;
|
|
assert(0 == upd_proc_local->read);
|
|
recvpool.gtmrecv_local->restart = GTMRECV_UPD_RESTARTED;
|
|
recvpool.upd_helper_ctl->first_done = FALSE;
|
|
recvpool.upd_helper_ctl->pre_read_offset = 0;
|
|
}
|
|
}
|
|
if (repl_csa->onln_rlbk_cycle != jnlpool_ctl->onln_rlbk_cycle)
|
|
{ /* A concurrent online rollback. Handle it */
|
|
LOG_ONLINE_ROLLBACK_EVENT;
|
|
assert(!repl_csa->now_crit && !set_onln_rlbk_flg);
|
|
grab_lock(jnlpool.jnlpool_dummy_reg, GRAB_LOCK_ONLY);
|
|
SYNC_ONLN_RLBK_CYCLES;
|
|
rel_lock(jnlpool.jnlpool_dummy_reg);
|
|
upd_proc_local->onln_rlbk_flg = TRUE; /* let receiver know about the online rollback */
|
|
WAIT_FOR_ZERO_RECVPOOL_JNL_SEQNO;
|
|
upd_proc_local->read = 0;
|
|
recvpool_ctl->jnl_seqno = jnl_seqno;
|
|
}
|
|
}
|
|
/* Ensure all updates done by the receiver server BEFORE setting
|
|
* upd_proc_local->read_jnl_seqno are visible once we see the updated read_jnl_seqno.
|
|
*/
|
|
SHM_READ_MEMORY_BARRIER;
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Receiver Server wrote upd_proc_local->read_jnl_seqno = "
|
|
"%llu [0x%llx]\n", upd_proc_local->read_jnl_seqno, upd_proc_local->read_jnl_seqno);
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Receiver Server wrote stream # = %d\n",
|
|
recvpool.gtmrecv_local->strm_index);
|
|
jnl_seqno = upd_proc_local->read_jnl_seqno;
|
|
assert(0 == GET_STRM_INDEX(jnl_seqno));
|
|
start_jnl_seqno = jnl_seqno;
|
|
strm_index = recvpool.gtmrecv_local->strm_index;
|
|
}
|
|
# endif
|
|
upd_proc_local->read_jnl_seqno = jnl_seqno;
|
|
/* Check if the secondary is ahead of the primary */
|
|
# ifdef VMS
|
|
if ((jnlpool_ctl->jnl_seqno > start_jnl_seqno) && jnlpool_ctl->upd_disabled)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"JNLSEQNO last updated by update process = "INT8_FMT" "INT8_FMTX"\n",
|
|
INT8_PRINT(start_jnl_seqno), INT8_PRINTX(start_jnl_seqno));
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"JNLSEQNO of last transaction written to journal pool = "INT8_FMT" "INT8_FMTX"\n",
|
|
INT8_PRINT(jnlpool_ctl->jnl_seqno), INT8_PRINTX(jnlpool_ctl->jnl_seqno));
|
|
rts_error(VARLSTCNT(1) ERR_SECONDAHEAD);
|
|
}
|
|
# else
|
|
/* The SECONDAHEAD check is performed in the receiver server after it has connected with the source
|
|
* server. This is because the check is relevant only if the source server is dualsite. That is
|
|
* not known now but instead at connection time. Hence the deferred check.
|
|
* Note: In the case of supplementary root primary instances, the jnl_seqno of the jnlpool is the
|
|
* unified seqno across all the non-supplementary streams as well as the local stream of updates.
|
|
* But the receive pool jnl_seqno is the seqno of the specific non-supplementary stream. So those
|
|
* cannot be compared at all. But they are guaranteed to be equal in all other cases. Assert that.
|
|
*/
|
|
assert((jnlpool_ctl->jnl_seqno == start_jnl_seqno)
|
|
|| (jnlpool.repl_inst_filehdr->is_supplementary && !jnlpool.jnlpool_ctl->upd_disabled));
|
|
# endif
|
|
UNIX_ONLY(assert(updproc_continue && !set_onln_rlbk_flg));
|
|
while (updproc_continue UNIX_ONLY(&& !set_onln_rlbk_flg))
|
|
updproc_actions(gld_db_files);
|
|
# ifdef UNIX
|
|
if (set_onln_rlbk_flg)
|
|
{ /* A concurrent online rollback happened which drove the updproc_ch and called us. Need to let the receiver
|
|
* server know about it and set up the sequence numbers
|
|
*/
|
|
LOG_ONLINE_ROLLBACK_EVENT;
|
|
upd_proc_local->onln_rlbk_flg = TRUE; /* let receiver know about the online rollback */
|
|
WAIT_FOR_ZERO_RECVPOOL_JNL_SEQNO;
|
|
set_onln_rlbk_flg = FALSE;
|
|
/* Since we are going to start afresh, do a OP_TROLLBACK if we are in TP. This brings the global variables
|
|
* dollar_tlevel, dollar_trestart all to a known state thereby erasing any history of lingering TP artifacts
|
|
*/
|
|
if (dollar_tlevel)
|
|
{
|
|
OP_TROLLBACK(0);
|
|
assert(!dollar_tlevel);
|
|
assert(!dollar_trestart);
|
|
}
|
|
start_jnl_seqno = jnlpool.jnlpool_ctl->jnl_seqno; /* needed when we go back to the beginning of the loop */
|
|
} else
|
|
# endif
|
|
if (!updproc_continue)
|
|
break;
|
|
}
|
|
updproc_end();
|
|
return(SS_NORMAL);
|
|
}
|
|
|
|
void updproc_actions(gld_dbname_list *gld_db_files)
|
|
{
|
|
mval ts_mv, val_mv;
|
|
jnl_record *rec;
|
|
uint4 temp_write, temp_read;
|
|
enum jnl_record_type rectype;
|
|
int4 upd_rec_seqno = 0; /* the total no of journal records excluding TCOM records */
|
|
int4 tupd_num; /* the number of tset/tkill/tzkill records encountered */
|
|
int4 tcom_num; /* the number of tcom records encountered */
|
|
seq_num jnl_seqno, tmpseqno; /* the current jnl_seq no of the Update process */
|
|
seq_num last_errored_seqno = 0;
|
|
int key_len, rec_len, backptr;
|
|
char fn[MAX_FN_LEN];
|
|
sm_uc_ptr_t readaddrs; /* start of current rec in pool */
|
|
boolean_t incr_seqno;
|
|
seq_num jnlpool_ctl_seqno, rec_strm_seqno, strm_seqno;
|
|
char *val_ptr;
|
|
jnl_string *keystr;
|
|
mstr mname;
|
|
char *key, *keytop;
|
|
gv_key *gv_failed_key = NULL, *gv_failed_key_ptr;
|
|
unsigned char *endBuff, fmtBuff[MAX_ZWR_KEY_SZ];
|
|
enum upd_bad_trans_type bad_trans_type;
|
|
recvpool_ctl_ptr_t recvpool_ctl;
|
|
upd_proc_local_ptr_t upd_proc_local;
|
|
gtmrecv_local_ptr_t gtmrecv_local;
|
|
upd_helper_ctl_ptr_t upd_helper_ctl;
|
|
sgmnt_addrs *csa, *repl_csa;
|
|
sgmnt_data_ptr_t csd;
|
|
char gv_mname[MAX_KEY_SZ];
|
|
unsigned char buff[MAX_ZWR_KEY_SZ], *end, scan_char, next_char;
|
|
boolean_t log_switched = FALSE;
|
|
# ifdef UNIX
|
|
boolean_t suppl_root_primary, suppl_propagate_primary;
|
|
# endif
|
|
jnl_private_control *jpc;
|
|
gld_dbname_list *curr;
|
|
gd_region *save_reg;
|
|
int4 wtstart_errno;
|
|
boolean_t buffers_flushed;
|
|
uint4 idle_flush_count = 0; /* Number of times buffers were flushed without an intermediate sleep */
|
|
uint4 write_wrap, cntr, last_nullsubs, last_subs, keyend;
|
|
UNIX_ONLY(
|
|
repl_histinfo histinfo;
|
|
repl_old_triple_jnl_ptr_t input_old_triple;
|
|
repl_histrec_jnl_ptr_t input_histjrec;
|
|
uint4 expected_rec_len;
|
|
)
|
|
# ifdef GTM_TRIGGER
|
|
uint4 nodeflags;
|
|
boolean_t primary_has_trigdef, secondary_has_trigdef;
|
|
const char *trigdef_inst = NULL, *no_trigdef_inst = NULL;
|
|
# endif
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
# ifdef UNIX
|
|
assert((NULL != jnlpool.jnlpool_dummy_reg) && jnlpool.jnlpool_dummy_reg->open);
|
|
repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;
|
|
DEBUG_ONLY(
|
|
assert(!repl_csa->hold_onto_crit); /* so we can do unconditional grab_lock/rel_lock below */
|
|
ASSERT_VALID_JNLPOOL(repl_csa);
|
|
)
|
|
# endif
|
|
ESTABLISH(updproc_ch);
|
|
recvpool_ctl = recvpool.recvpool_ctl;
|
|
upd_proc_local = recvpool.upd_proc_local;
|
|
gtmrecv_local = recvpool.gtmrecv_local;
|
|
upd_helper_ctl = recvpool.upd_helper_ctl;
|
|
temp_read = upd_proc_local->read;
|
|
temp_write = recvpool_ctl->write;
|
|
readaddrs = recvpool.recvdata_base + temp_read;
|
|
upd_rec_seqno = tupd_num = tcom_num = 0;
|
|
jnl_seqno = upd_proc_local->read_jnl_seqno;
|
|
log_interval = upd_proc_local->log_interval;
|
|
lastlog_seqno = jnl_seqno - log_interval; /* to ensure we log the first transaction */
|
|
# ifdef UNIX
|
|
if (jnlpool.repl_inst_filehdr->is_supplementary)
|
|
{
|
|
if (!jnlpool.jnlpool_ctl->upd_disabled)
|
|
{
|
|
suppl_root_primary = TRUE;
|
|
suppl_propagate_primary = FALSE;
|
|
} else
|
|
{
|
|
suppl_root_primary = FALSE;
|
|
suppl_propagate_primary = TRUE;
|
|
}
|
|
} else
|
|
{
|
|
suppl_root_primary = FALSE;
|
|
suppl_propagate_primary = FALSE;
|
|
}
|
|
# endif
|
|
while (TRUE)
|
|
{
|
|
incr_seqno = FALSE;
|
|
UNIX_ONLY(assert(0 == GET_STRM_INDEX(jnl_seqno));)
|
|
if (SHUTDOWN == upd_proc_local->upd_proc_shutdown)
|
|
break;
|
|
if (GTMRECV_NO_RESTART != gtmrecv_local->restart)
|
|
{ /* wait for restart to become GTMRECV_NO_RESTART (set by the Receiver Server) */
|
|
if (GTMRECV_RCVR_RESTARTED == gtmrecv_local->restart)
|
|
{
|
|
recvpool.recvpool_ctl->jnl_seqno = jnl_seqno;
|
|
readaddrs = recvpool.recvdata_base;
|
|
upd_proc_local->read = 0;
|
|
gtmrecv_local->restart = GTMRECV_UPD_RESTARTED;
|
|
upd_helper_ctl->first_done = FALSE;
|
|
upd_helper_ctl->pre_read_offset = 0;
|
|
temp_read = 0;
|
|
temp_write = 0;
|
|
if (0 < tupd_num)
|
|
{
|
|
assert(donot_INVOKE_MUMTSTART);
|
|
assert(dollar_tlevel);
|
|
OP_TROLLBACK(0);
|
|
tupd_num = 0;
|
|
}
|
|
}
|
|
SHORT_SLEEP(10);
|
|
continue;
|
|
}
|
|
if (upd_proc_local->changelog)
|
|
{
|
|
if (upd_proc_local->changelog & REPLIC_CHANGE_UPD_LOGINTERVAL)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Changing log interval from %u to %u\n",
|
|
log_interval, upd_proc_local->log_interval);
|
|
log_interval = upd_proc_local->log_interval;
|
|
lastlog_seqno = jnl_seqno - log_interval; /* force logging of the first transaction after the
|
|
* change in log interval */
|
|
}
|
|
if (upd_proc_local->changelog & REPLIC_CHANGE_LOGFILE)
|
|
{
|
|
log_switched = TRUE;
|
|
upd_log_init(UPDPROC);
|
|
}
|
|
if ( log_switched == TRUE )
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Change log to %s successful\n",
|
|
recvpool.upd_proc_local->log_file);
|
|
upd_proc_local->changelog = 0;
|
|
}
|
|
/* Assert that dollar_tlevel & tupd_num are in sync with each other. The only exception is if dollar_tlevel
|
|
* is non-zero, it is possible tupd_num is 0 in case we come here after a TP restart.
|
|
*/
|
|
assert((dollar_tlevel && (tupd_num || dollar_trestart)) || !dollar_tlevel && !tupd_num);
|
|
if (upd_proc_local->bad_trans UNIX_ONLY(|| upd_proc_local->onln_rlbk_flg)
|
|
|| (!dollar_tlevel && (FALSE == recvpool.recvpool_ctl->wrapped)
|
|
&& (temp_write = recvpool.recvpool_ctl->write) == upd_proc_local->read))
|
|
{ /* bad-trans OR online rollback OR nothing to process. In case of bad_trans or online rollback, wait for
|
|
* the receiver to reset the corresponding fields. In case there is nothing to process, wait until data
|
|
* is available in the receive pool.
|
|
* Note that we check two shared memory fields "recvpool_ctl->wrapped" and "recvpool_ctl->write" in
|
|
* that order. The receiver server updates them in the opposite order when it sets "wrapped" to TRUE.
|
|
* So it is possible we read an uptodate value of "write" but an outofdate value of "wrapped". This
|
|
* means we could incorrectly conclude the pool is empty when actually it is full. But we expect these
|
|
* situations to be rare enough that it is ok to do an idle buffer flush in this case. We will eventually
|
|
* get to see the uptodate value of "wrapped" at which point we will move on to process the transactions.
|
|
*/
|
|
assert((0 == recvpool.recvpool_ctl->jnl_seqno) || (jnl_seqno <= recvpool.recvpool_ctl->jnl_seqno));
|
|
/* the 0 == check takes care of the startup case where jnl_seqno is 0 in the recvpool_ctl */
|
|
/* If any dirty buffers, write them out since nothing else is happening now.
|
|
* wcs_wtstart only writes a few buffer a call and putting the call in a loop would flush the
|
|
* entire dirty buffer set, but there is no need for a loop around the call since we're already in
|
|
* the main updproc loop and this is the only place it really sleeps. */
|
|
save_reg = gv_cur_region;
|
|
/* Make sure cs_addrs and cs_data are in sync with gv_cur_region before TP_CHANGE_REG changes them */
|
|
assert((NULL == gv_cur_region) || (cs_addrs == &FILE_INFO(gv_cur_region)->s_addrs));
|
|
assert((NULL == gv_cur_region) || (cs_data == cs_addrs->hdr));
|
|
buffers_flushed = FALSE;
|
|
for (curr = gld_db_files; NULL != curr; curr = curr->next)
|
|
{
|
|
TP_CHANGE_REG(curr->gd);
|
|
if (cs_addrs->nl->wcs_active_lvl && (FALSE == gv_cur_region->read_only))
|
|
{
|
|
DCLAST_WCS_WTSTART(gv_cur_region, 0, wtstart_errno);
|
|
/* DCLAST_WCS_WTSTART macro does not set the wtstart_errno variable in VMS. But in
|
|
* any case, we do not support database file extensions with MM on VMS. So we could
|
|
* never get a ERR_GBLOFLOW error there. Therefore the file extension check below is
|
|
* done only in Unix.
|
|
*/
|
|
# ifdef UNIX
|
|
if ((dba_mm == cs_data->acc_meth) && (ERR_GBLOFLOW == wtstart_errno))
|
|
{
|
|
assert(!cs_addrs->hold_onto_crit);
|
|
grab_crit(gv_cur_region);
|
|
if (cs_addrs->onln_rlbk_cycle != cs_addrs->nl->onln_rlbk_cycle)
|
|
UPDPROC_ONLN_RLBK_CLNUP(gv_cur_region); /* No return */
|
|
wcs_recover(gv_cur_region);
|
|
rel_crit(gv_cur_region);
|
|
}
|
|
# endif
|
|
buffers_flushed = TRUE;
|
|
}
|
|
}
|
|
TP_CHANGE_REG(save_reg);
|
|
/* To avoid a potential infinite cpu-bound loop if the wcs_active_lvl field is bogus and wcs_wtstart()
|
|
* returns immediately, we count the number of times a buffer flush has (potentially) occurred. When
|
|
* this count gets to an arbitrarily "large" value or there were no buffers to flush, we will sleep
|
|
* and start the count over again. If there are more than the "large" number of dirty buffers to flush,
|
|
* this logic only causes an extra benign sleep whenever the count is "large". */
|
|
if (buffers_flushed && (MAX_IDLE_HARD_SPINS > idle_flush_count))
|
|
idle_flush_count++;
|
|
else
|
|
{
|
|
idle_flush_count = 0;
|
|
SHORT_SLEEP(10);
|
|
}
|
|
# ifdef UNIX
|
|
if (!upd_proc_local->onln_rlbk_flg && (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle))
|
|
{ /* A concurrent online rollback happened. Start afresh */
|
|
grab_lock(jnlpool.jnlpool_dummy_reg, GRAB_LOCK_ONLY);
|
|
UPDPROC_ONLN_RLBK_CLNUP(jnlpool.jnlpool_dummy_reg); /* No return */
|
|
} /* else onln_rlbk_flag is already set and the receiver should take the next appropriate action */
|
|
# endif
|
|
continue;
|
|
}
|
|
idle_flush_count = 0;
|
|
/* The update process reads "recvpool_ctl->write" first and assumes that all data in the receive pool
|
|
* that it then reads (upto the "write" offset) is valid. In order for this assumption to hold good, the
|
|
* receiver server needs to do a write memory barrier after updating the receive pool data but before
|
|
* updating "write". The update process does a read memory barrier after reading "write" but before reading
|
|
* the receive pool data. Not enforcing this read order would mean we would have seen an updated "write"
|
|
* but not yet see the updated receive pool data which would mean whatever data we read from the receive pool
|
|
* will be stale (which if successfully processed could cause primary-secondary dbs to get out of sync).
|
|
*/
|
|
SHM_READ_MEMORY_BARRIER;
|
|
/* To take the wrapping of buffer in case of over flow ------------ */
|
|
/* assume receiver will update wrapped even for exact overflows */
|
|
write_wrap = recvpool.recvpool_ctl->write_wrap;
|
|
if (temp_read >= write_wrap)
|
|
{
|
|
assert(temp_read == write_wrap);
|
|
if (0 < tupd_num) /* receive pool cannot wrap in the middle of TP */
|
|
GTMASSERT; /* see process_tr_buff in gtmrecv_process for why */
|
|
if (FALSE == recvpool.recvpool_ctl->wrapped)
|
|
{ /* Update process in keeping up with receiver server, notices that there was a wrap
|
|
* (thru write and write_wrap). It has to wait till receiver sets wrapped.
|
|
*/
|
|
SHORT_SLEEP(1);
|
|
continue;
|
|
}
|
|
DEBUG_ONLY(
|
|
repl_log(updproc_log_fp, TRUE, FALSE,
|
|
"-- Wrapping -- read = %ld :: write_wrap = %ld :: upd_jnl_seqno = "INT8_FMT" "INT8_FMTX"\n",
|
|
temp_read, write_wrap, INT8_PRINT(jnl_seqno),INT8_PRINTX(jnl_seqno));
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"-------------> wrapped = %ld :: write = %ld :: recv_jnl_seqno = "INT8_FMT" "INT8_FMTX"\n",
|
|
recvpool.recvpool_ctl->wrapped, recvpool.recvpool_ctl->write,
|
|
INT8_PRINT(recvpool.recvpool_ctl->jnl_seqno),
|
|
INT8_PRINTX(recvpool.recvpool_ctl->jnl_seqno));
|
|
)
|
|
/* The update process reads (a) "recvpool_ctl->wrapped" first and then reads (b) "recvpool_ctl->write".
|
|
* If "wrapped" is TRUE, it assumes that "write" will never hold a stale value that corresponds to a
|
|
* previous state of "wrapped" For this assumption to hold good, the receiver server needs to do a write
|
|
* memory barrier after updating "write" but before updating "wrapped". The update process will do a read
|
|
* memory barrier after reading "wrapped" but before reading "write". This way we are guaranteed the
|
|
* update process will never see a value of "write" that is at the tail end of the receive pool once it
|
|
* sees "wrapped" as TRUE. Not having this guarantee means we would have wrapped to read from the
|
|
* beginning of the receive pool but will continue to see "write" at the tail of the receive pool and will
|
|
* therefore treat almost the entire receive pool as containing unprocessed data when it is not the case.
|
|
*/
|
|
SHM_READ_MEMORY_BARRIER;
|
|
temp_read = 0;
|
|
upd_proc_local->read = 0;
|
|
recvpool.recvpool_ctl->wrapped = FALSE;
|
|
readaddrs = recvpool.recvdata_base;
|
|
temp_write = recvpool.recvpool_ctl->write;
|
|
if (0 == temp_write)
|
|
continue; /* Receiver server wrapped but hasn't yet written anything into the pool */
|
|
}
|
|
rec = (jnl_record *)readaddrs;
|
|
rectype = (enum jnl_record_type)rec->prefix.jrec_type;
|
|
rec_len = rec->prefix.forwptr;
|
|
assert(IS_REPLICATED(rectype));
|
|
# ifdef UNIX
|
|
if ((JRT_TRIPLE == rectype) || (JRT_HISTREC == rectype))
|
|
{ /* Source server has sent a REPL_TRIPLE or REPL_HISTREC message in the middle of logical journal
|
|
* records. Construct the repl_histrec structure from the input message and add this history
|
|
* record to the replication instance file. In case of REPL_TRIPLE message, the source server
|
|
* is running a pre-supplementary version of GT.M so null-fill those fields in the history
|
|
* record that did not exist in that older version.
|
|
*/
|
|
if (JRT_TRIPLE == rectype)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Processing REPL_TRIPLE message\n");
|
|
input_old_triple = (repl_old_triple_jnl_ptr_t)readaddrs;
|
|
histinfo.start_seqno = input_old_triple->start_seqno;
|
|
histinfo.strm_seqno = 0;
|
|
memcpy(histinfo.root_primary_instname, input_old_triple->instname, MAX_INSTNAME_LEN - 1);
|
|
histinfo.root_primary_cycle = input_old_triple->cycle;
|
|
histinfo.creator_pid = 0;
|
|
histinfo.created_time = 0;
|
|
histinfo.strm_index = 0;
|
|
histinfo.history_type = HISTINFO_TYPE_NORMAL;
|
|
NULL_INITIALIZE_REPL_INST_UUID(histinfo.lms_group);
|
|
/* The following fields will be initialized in the "repl_inst_histinfo_add" function call below.
|
|
* histinfo.histinfo_num
|
|
* histinfo.prev_histinfo_num
|
|
* histinfo.last_histinfo_num[]
|
|
*/
|
|
expected_rec_len = SIZEOF(repl_old_triple_jnl_t);
|
|
} else
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Processing REPL_HISTREC message\n");
|
|
input_histjrec = (repl_histrec_jnl_ptr_t)readaddrs;
|
|
histinfo = input_histjrec->histcontent;
|
|
expected_rec_len = SIZEOF(repl_histrec_jnl_t);
|
|
}
|
|
if (expected_rec_len != rec_len)
|
|
{
|
|
bad_trans_type = upd_bad_histinfo_len;
|
|
assert(FALSE);
|
|
} else if (histinfo.start_seqno != recvpool.upd_proc_local->read_jnl_seqno)
|
|
{
|
|
bad_trans_type = upd_bad_histinfo_start_seqno1;
|
|
assert(FALSE);
|
|
} else if (histinfo.start_seqno > recvpool_ctl->jnl_seqno)
|
|
{
|
|
bad_trans_type = upd_bad_histinfo_start_seqno2;
|
|
assert(FALSE);
|
|
} else
|
|
bad_trans_type = upd_good_record;
|
|
if (upd_good_record != bad_trans_type)
|
|
{ /* Signal a BADTRANS */
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"-> Bad trans :: bad_trans_type = %ld type = %ld len = %ld "
|
|
"start_seqno = %llu [0x%llx] upd_read_seqno = %llu [0x%llx] "
|
|
"recvpool_jnl_seqno = %llu [0x%llx]\n",
|
|
bad_trans_type, rectype, rec_len, histinfo.start_seqno, histinfo.start_seqno,
|
|
recvpool.upd_proc_local->read_jnl_seqno, recvpool.upd_proc_local->read_jnl_seqno,
|
|
recvpool_ctl->jnl_seqno, recvpool_ctl->jnl_seqno);
|
|
upd_proc_local->bad_trans = TRUE;
|
|
assert(!dollar_tlevel);
|
|
if (dollar_tlevel)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "OP_TROLLBACK IS CALLED "
|
|
"-->Bad trans :: dollar_tlevel = %ld\n", dollar_tlevel);
|
|
OP_TROLLBACK(0);
|
|
}
|
|
readaddrs = recvpool.recvdata_base;
|
|
upd_proc_local->read = 0;
|
|
temp_read = 0;
|
|
temp_write = 0;
|
|
upd_rec_seqno = tupd_num = tcom_num = 0;
|
|
continue;
|
|
}
|
|
if (jnlpool.repl_inst_filehdr->is_supplementary)
|
|
{
|
|
assert(0 <= histinfo.strm_index);
|
|
assert((0 == histinfo.strm_index) || IS_REPL_INST_UUID_NON_NULL(histinfo.lms_group));
|
|
assert((0 != histinfo.strm_index) || IS_REPL_INST_UUID_NULL(histinfo.lms_group));
|
|
/* Check if this is an -UPDATERESYNC type of history record that starts a new stream of updates
|
|
* (or a -NORESYNC which creates a discontinuity from the previous stream).
|
|
* If so, reset all stream-related information (corresponding to the prior stream) in the db file
|
|
* hdr and journal pool. Otherwise if we crash after having processed one logical record after the
|
|
* history record, it is possible the regions unaffected by the logical update might have some
|
|
* garbage "strm_seqno" value which might affect the max_strm_seqno determination in case we do a
|
|
* rollback (this uses the strm_seqno values from the db file header) right after the crash.
|
|
*/
|
|
if ((HISTINFO_TYPE_NORESYNC == histinfo.history_type)
|
|
|| (HISTINFO_TYPE_UPDRESYNC == histinfo.history_type))
|
|
{ /* Also write an EPOCH with the new strm_seqno. This will help journal recovery know a fresh
|
|
* stream of updates begins and that the sequence of strm_seqnos might see a discontinuity.
|
|
*/
|
|
assert((0 < histinfo.strm_index) && (MAX_SUPPL_STRMS > histinfo.strm_index));
|
|
/* Assert strm_seqno is non-zero as this is going to be filled in cs_data and jnlpool_ctl.
|
|
* Note that in case of a root primary supplementary instance, the history record is
|
|
* received from a non-supplementary instance whose history record has a zero strm_seqno
|
|
* so use the start_seqno instead as the strm_seqno. On the other hand, in case of a
|
|
* propagating primary supplementary, the history record is received from a supplementary
|
|
* instance in which case the strm_seqno field would have been properly populated so use it.
|
|
*/
|
|
strm_seqno = (suppl_propagate_primary ? histinfo.strm_seqno : histinfo.start_seqno);
|
|
assert(strm_seqno);
|
|
save_reg = gv_cur_region;
|
|
for (curr = gld_db_files; NULL != curr; curr = curr->next)
|
|
{
|
|
TP_CHANGE_REG(curr->gd);
|
|
assert(!gv_cur_region->read_only);
|
|
grab_crit(gv_cur_region);
|
|
if (cs_addrs->onln_rlbk_cycle != cs_addrs->nl->onln_rlbk_cycle)
|
|
UPDPROC_ONLN_RLBK_CLNUP(gv_cur_region); /* No return */
|
|
cs_data->strm_reg_seqno[histinfo.strm_index] = strm_seqno;
|
|
/* Before doing "wcs_flu", do a "jnl_ensure_open" to ensure the journal file
|
|
* is open in shared memory as otherwise we might skip writing the EPOCH
|
|
* record in case this is the first history record that is being received on
|
|
* this instance and no other process on this instance has opened the
|
|
* journal file yet. But before doing jnl_ensure_open, do some time adjustments
|
|
* like is done in t_end.c (see comments there for details).
|
|
*/
|
|
jpc = cs_addrs->jnl;
|
|
DO_JNL_ENSURE_OPEN(cs_addrs, jpc); /* does "jnl_ensure_open" and pre-requisites */
|
|
wcs_flu(WCSFLU_FSYNC_DB | WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH);
|
|
rel_crit(gv_cur_region);
|
|
}
|
|
TP_CHANGE_REG(save_reg);
|
|
grab_lock(jnlpool.jnlpool_dummy_reg, GRAB_LOCK_ONLY);
|
|
if (repl_csa->onln_rlbk_cycle != jnlpool_ctl->onln_rlbk_cycle)
|
|
UPDPROC_ONLN_RLBK_CLNUP(jnlpool.jnlpool_dummy_reg); /* No return */
|
|
jnlpool_ctl->strm_seqno[histinfo.strm_index] = strm_seqno;
|
|
rel_lock(jnlpool.jnlpool_dummy_reg);
|
|
/* Ideally we also want to do a "repl_inst_flush_filehdr" to make the strm_seqno in the
|
|
* instance file header also uptodate but that is anyways going to be done as part of
|
|
* the "repl_inst_histinfo_add" call below so we skip doing it separately here.
|
|
*/
|
|
}
|
|
assert(jnlpool.jnlpool_ctl->upd_disabled || (strm_index == histinfo.strm_index));
|
|
}
|
|
/* Now that we have constructed the history, add it to the instance file. */
|
|
grab_lock(jnlpool.jnlpool_dummy_reg, GRAB_LOCK_ONLY);
|
|
if (repl_csa->onln_rlbk_cycle != jnlpool_ctl->onln_rlbk_cycle)
|
|
UPDPROC_ONLN_RLBK_CLNUP(jnlpool.jnlpool_dummy_reg); /* No return */
|
|
repl_inst_histinfo_add(&histinfo);
|
|
rel_lock(jnlpool.jnlpool_dummy_reg);
|
|
/* Dump the history contents AFTER the "repl_inst_histinfo_add" and "wcs_flu" above.
|
|
* This way, in case of a -updateresync or -noresync history record addition,
|
|
* (particularly for the first A->P connection), the user is guaranteed that
|
|
* seeing the history dump message indicates the receiver on P can be cleanly shutdown
|
|
* and a restart of the receiver does not need to use the -updateresync.
|
|
*/
|
|
repl_dump_histinfo(updproc_log_fp, TRUE, TRUE, "New History Content", &histinfo);
|
|
/* Update pointers to indicate this record is now processed and move on to the next. */
|
|
readaddrs += rec_len;
|
|
temp_read += rec_len;
|
|
upd_proc_local->read = temp_read;
|
|
continue;
|
|
}
|
|
# endif
|
|
/* NOTE: All journal sequence number fields are at same offset */
|
|
if (ROUND_DOWN2(rec_len, JNL_REC_START_BNDRY) != rec_len)
|
|
{ /* We need that so REC_LEN_FROM_SUFFIX does not access unaligned int */
|
|
bad_trans_type = upd_bad_forwptr;
|
|
assert(FALSE);
|
|
} else if ((0 == rec_len) || (rec_len != (backptr = REC_LEN_FROM_SUFFIX(readaddrs, rec_len))))
|
|
{
|
|
bad_trans_type = upd_bad_backptr;
|
|
assert(FALSE);
|
|
} else if (!IS_REPLICATED(rectype))
|
|
{
|
|
bad_trans_type = upd_rec_not_replicated;
|
|
assert(FALSE);
|
|
} else if (jnl_seqno != rec->jrec_null.jnl_seqno)
|
|
{
|
|
bad_trans_type = upd_bad_jnl_seqno;
|
|
assert(FALSE);
|
|
} else
|
|
{
|
|
bad_trans_type = upd_good_record;
|
|
assert((jnl_seqno < recvpool.recvpool_ctl->jnl_seqno) ||
|
|
(0 == recvpool.recvpool_ctl->jnl_seqno));
|
|
if (JRT_TCOM == rectype)
|
|
{
|
|
assert(0 != upd_rec_seqno); /* we know TCOM is not created without a SET/KILL/ZKILL */
|
|
if (0 != upd_rec_seqno)
|
|
tcom_num++;
|
|
} else if (IS_SET_KILL_ZKILL_ZTRIG(rectype))
|
|
{
|
|
assert(!IS_ZTP(rectype));
|
|
keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node;
|
|
GTMTRIG_ONLY(
|
|
nodeflags = keystr->nodeflags;
|
|
TRIG_PROCESS_JNL_STR_NODEFLAGS(nodeflags);
|
|
)
|
|
key_len = keystr->length; /* local copy of shared recvpool key */
|
|
if ((MAX_KEY_SZ >= key_len && 0 < key_len && 0 == keystr->text[key_len - 1]) &&
|
|
(upd_good_record == updproc_get_gblname(keystr->text, key_len, gv_mname, &mname)))
|
|
{
|
|
if (IS_SET(rectype))
|
|
{
|
|
val_mv.mvtype = MV_STR;
|
|
val_ptr = &keystr->text[keystr->length];
|
|
GET_MSTR_LEN(val_mv.str.len, val_ptr); /* length of value validated later */
|
|
val_mv.str.addr = val_ptr + SIZEOF(mstr_len_t);
|
|
}
|
|
if (IS_FENCED(rectype))
|
|
{
|
|
if (IS_TP(rectype))
|
|
{
|
|
assert(0 <= tupd_num);
|
|
assert(0 == tcom_num);
|
|
if (0 > tupd_num || 0 != tcom_num)
|
|
{
|
|
bad_trans_type = upd_fence_bad_t_num;
|
|
assert(FALSE);
|
|
} else if (IS_TUPD(rectype))
|
|
{
|
|
ts_mv.mvtype = MV_STR;
|
|
ts_mv.str.len = 0;
|
|
ts_mv.str.addr = NULL;
|
|
assert((!dollar_tlevel && !tupd_num) || dollar_tlevel
|
|
&& (tupd_num || dollar_trestart));
|
|
if (!dollar_tlevel)
|
|
{
|
|
assert(!donot_INVOKE_MUMTSTART);
|
|
DEBUG_ONLY(donot_INVOKE_MUMTSTART = TRUE);
|
|
op_tstart(IMPLICIT_TSTART, TRUE, &ts_mv, 0);
|
|
/* 0 ==> save no locals but RESTART OK */
|
|
}
|
|
tupd_num++;
|
|
}
|
|
upd_rec_seqno++;
|
|
} else if (IS_FUPD(rectype))
|
|
op_ztstart();
|
|
} else if (0 != tupd_num)
|
|
{
|
|
bad_trans_type = upd_nofence_bad_tupd_num;
|
|
assert(FALSE);
|
|
}
|
|
} else
|
|
{
|
|
bad_trans_type = upd_bad_key;
|
|
assert(FALSE);
|
|
}
|
|
} else if (IS_ZTWORM(rectype))
|
|
{
|
|
assert(IS_FENCED(rectype));
|
|
assert(IS_TP(rectype));
|
|
assert(0 <= tupd_num);
|
|
assert(0 == tcom_num);
|
|
if (0 > tupd_num || 0 != tcom_num)
|
|
{
|
|
bad_trans_type = upd_fence_bad_ztworm_t_num;
|
|
assert(FALSE);
|
|
} else if (IS_TUPD(rectype))
|
|
{
|
|
ts_mv.mvtype = MV_STR;
|
|
ts_mv.str.len = 0;
|
|
ts_mv.str.addr = NULL;
|
|
assert((!dollar_tlevel && !tupd_num) || dollar_tlevel && (tupd_num || dollar_trestart));
|
|
if (!dollar_tlevel)
|
|
/* 0 ==> save no locals but RESTART OK */
|
|
op_tstart(IMPLICIT_TSTART, TRUE, &ts_mv, 0);
|
|
tupd_num++;
|
|
}
|
|
upd_rec_seqno++;
|
|
}
|
|
}
|
|
if (upd_good_record == bad_trans_type)
|
|
{
|
|
UNIX_ONLY(
|
|
if (suppl_propagate_primary)
|
|
{
|
|
rec_strm_seqno = GET_STRM_SEQNO(rec);
|
|
strm_index = GET_STRM_INDEX(rec_strm_seqno);
|
|
rec_strm_seqno = GET_STRM_SEQ60(rec_strm_seqno);
|
|
strm_seqno = jnlpool_ctl->strm_seqno[strm_index];
|
|
if (rec_strm_seqno != strm_seqno)
|
|
{
|
|
assert(FALSE);
|
|
rts_error(VARLSTCNT(5) ERR_STRMSEQMISMTCH, 3,
|
|
strm_index, &rec_strm_seqno, &strm_seqno);
|
|
}
|
|
}
|
|
)
|
|
if (JRT_NULL == rectype)
|
|
{ /* Play the NULL transaction into the database and journal files */
|
|
save_reg = gv_cur_region;
|
|
gv_cur_region = gld_db_files->gd;
|
|
tp_change_reg();
|
|
DEBUG_ONLY(csa = cs_addrs;)
|
|
assert(!csa->hold_onto_crit);
|
|
assert(!csa->now_crit);
|
|
gvcst_jrt_null();
|
|
incr_seqno = TRUE;
|
|
/* Restore gv_cur_region to what it was before the NULL record processing started. op_tstart
|
|
* relies on the fact that gv_target->gd_csa and cs_addrs be in sync. If prior to NULL record
|
|
* processing, gv_target->gd_csa pointed to AREG and after NULL record processing, gv_cur_region
|
|
* points to DEFAULT then the op_tstart assert DBG_CHECK_GVTARGET_CSADDRS_IN_SYNC will fail.
|
|
*/
|
|
TP_CHANGE_REG(save_reg);
|
|
} else if (JRT_TCOM == rectype)
|
|
{
|
|
if (tcom_num == tupd_num)
|
|
{
|
|
assert(0 != tcom_num);
|
|
memcpy(tcom_record.jnl_tid, rec->jrec_tcom.jnl_tid, TID_STR_SIZE);
|
|
op_tcommit();
|
|
if (dollar_tlevel)
|
|
{ /* op_tcommit restarted the transaction - do update process special
|
|
* handling for tpretry. The error below has special handling in a
|
|
* few condition handlers because it not so much signals an error
|
|
* as it does drive the necessary mechanisms to invoke a restart.
|
|
* Consequently this error can be overridden by a "real" error.
|
|
* For VMS, the extra parameters are specified to provide "placeholders"
|
|
* on the stack in the signal array if a real error needs to be
|
|
* overlayed in place of this one (see code in updproc_ch).
|
|
* Defined in tp.h, the below issues ERR_TPRETRY.
|
|
*/
|
|
INVOKE_RESTART;
|
|
}
|
|
DEBUG_ONLY(donot_INVOKE_MUMTSTART = FALSE);
|
|
/* Following two changes to dollar_ztwormhole reset the value of $ztwormhole to
|
|
* "undefined" which is important since it points into the receiver pool area
|
|
* that is about to be allowed to be overwritten. Prior to the next reference,
|
|
* to dollar_ztwormhole, it should be included in a journal record. Note since
|
|
* the ISV must always be defined op_svget will turn this state into an empty
|
|
* string if somehow $ZTWORMHOLE is referenced before the replicating instance
|
|
* receives a new value (like in a jobexam dump).
|
|
*/
|
|
GTMTRIG_ONLY(dollar_ztwormhole.mvtype = 0);
|
|
GTMTRIG_ONLY(dollar_ztwormhole.str.len = 0);
|
|
tcom_num = tupd_num = upd_rec_seqno = 0;
|
|
incr_seqno = TRUE;
|
|
}
|
|
} else if (JRT_ZTCOM == rectype)
|
|
{
|
|
assert(dollar_tlevel);
|
|
op_ztcommit(1);
|
|
incr_seqno = TRUE;
|
|
} else if (IS_SET_KILL_ZKILL_ZTRIG(rectype))
|
|
{
|
|
key = keystr->text;
|
|
UPD_GV_BIND_NAME_APPROPRIATE(gd_header, mname, key, key_len); /* if ^#t do special processing */
|
|
/* the above would have set gv_target and gv_cur_region appropriately */
|
|
csa = &FILE_INFO(gv_cur_region)->s_addrs;
|
|
if (!REPL_ALLOWED(csa))
|
|
{ /* Replication/Journaling is NOT enabled on the database file that the current
|
|
* global maps to on the secondary even though it was enabled on the corresponding
|
|
* database file on the primary. Do NOT allow this update to happen as otherwise
|
|
* the journal seqno on the secondary database will get out-of-sync with that of
|
|
* the primary database.
|
|
*/
|
|
gtm_putmsg(VARLSTCNT(6) ERR_UPDREPLSTATEOFF, 4,
|
|
mname.len, mname.addr, DB_LEN_STR(gv_cur_region));
|
|
/* Shut down the update process normally */
|
|
upd_proc_local->upd_proc_shutdown = SHUTDOWN;
|
|
break;
|
|
}
|
|
memcpy(gv_currkey->base, keystr->text, keystr->length);
|
|
gv_currkey->base[keystr->length] = 0; /* second null of a key terminator */
|
|
gv_currkey->end = keystr->length;
|
|
if (gv_currkey->end + 1 > gv_cur_region->max_key_size)
|
|
{
|
|
bad_trans_type = upd_bad_key_size;
|
|
assert(gtm_white_box_test_case_enabled
|
|
&& (WBTEST_UPD_PROCESS_ERROR == gtm_white_box_test_case_number));
|
|
} else
|
|
{
|
|
/* Scan the global for two reasons :
|
|
* (a) Need to setup gv_currkey->prev as update process can invoke triggers which
|
|
* could use naked references which relies on gv_currkey->prev being properly set
|
|
* (b) Set gv_some_subsc_null and gv_last_subsc_null accordingly to issue GTM-E-NULSUSBC
|
|
* if needed.
|
|
*/
|
|
key = (char *)(gv_currkey->base);
|
|
keyend = gv_currkey->end;
|
|
cntr = last_nullsubs = last_subs = 0;
|
|
assert((0 < keyend) && (KEY_DELIMITER != *key)); /* we better not have an empty key */
|
|
assert((KEY_DELIMITER == key[keyend])
|
|
&& (KEY_DELIMITER == key[keyend - 1]));
|
|
keytop = (char *)((&gv_currkey->base[0]) + keyend);
|
|
scan_char = (unsigned char)(*key);
|
|
while (cntr < keyend)
|
|
{
|
|
if (key >= keytop)
|
|
{ /* We should never come here as this would mean that we are attempting
|
|
* to cross gv_currkey->base boundary. */
|
|
assert(FALSE);
|
|
break;
|
|
}
|
|
assert(scan_char == (unsigned char)(*key)); /* ensure that scan_char was updated in
|
|
* the previous iteration*/
|
|
next_char = (unsigned char)(*(key + 1));
|
|
/* null subscripts are identified based on whether the region has standard null
|
|
* collation or default null collation. If former, the sequence is 0x01 0x00.
|
|
* If latter, the sequence is 0xFF 0x00
|
|
*/
|
|
if (last_subs == cntr)
|
|
{ /* Beginning of a new subscript. Ensure that if we have standard null
|
|
* collation, then we better don't see 0xFF 0x00 at the start of a
|
|
* subscript. Similary, if we have default null collation, we better not
|
|
* see 0x01 0x00 at the start of a subscript
|
|
*/
|
|
if (KEY_DELIMITER == next_char)
|
|
{
|
|
if (STR_SUB_PREFIX == scan_char)
|
|
{
|
|
VMS_ONLY(assert(!secondary_side_std_null_coll);)
|
|
UNIX_ONLY(
|
|
assert(!recvpool_ctl->this_side.is_std_null_coll);)
|
|
last_nullsubs = cntr;
|
|
} else if (SUBSCRIPT_STDCOL_NULL == scan_char)
|
|
{
|
|
VMS_ONLY(assert(secondary_side_std_null_coll);)
|
|
UNIX_ONLY(assert(recvpool_ctl->this_side.is_std_null_coll);)
|
|
last_nullsubs = cntr;
|
|
}
|
|
}
|
|
}
|
|
cntr++;
|
|
if ((KEY_DELIMITER == scan_char) && (KEY_DELIMITER != next_char))
|
|
{ /* New subscript. Note down the position for gv_currkey->prev. */
|
|
last_subs = cntr;
|
|
}
|
|
key++;
|
|
scan_char = next_char; /* set scan_char for next iteration */
|
|
}
|
|
assert(cntr == keyend);
|
|
assert(last_subs < keyend);
|
|
assert(last_nullsubs < keyend);
|
|
TREF(gv_some_subsc_null) = (last_nullsubs && (last_nullsubs < last_subs));
|
|
TREF(gv_last_subsc_null) = (last_nullsubs && (last_nullsubs == last_subs));
|
|
gv_currkey->prev = last_subs;
|
|
if (IS_KILL(rectype))
|
|
op_gvkill();
|
|
else if (IS_ZKILL(rectype))
|
|
op_gvzwithdraw();
|
|
# ifdef GTM_TRIGGER
|
|
else if (IS_ZTRIG(rectype))
|
|
op_ztrigger();
|
|
# endif
|
|
else
|
|
{
|
|
assert(IS_SET(rectype));
|
|
if (VMS_ONLY(keystr->length + 1 + SIZEOF(rec_hdr) +) val_mv.str.len >
|
|
gv_cur_region->max_rec_size)
|
|
{
|
|
bad_trans_type = upd_bad_val_size;
|
|
assert(gtm_white_box_test_case_enabled
|
|
&& (WBTEST_UPD_PROCESS_ERROR == gtm_white_box_test_case_number));
|
|
} else
|
|
op_gvput(&val_mv);
|
|
}
|
|
# ifdef GTM_TRIGGER
|
|
if (!gv_target->trig_mismatch_test_done)
|
|
{
|
|
gv_target->trig_mismatch_test_done = TRUE; /* reset only in targ_alloc */
|
|
primary_has_trigdef = (0 != (nodeflags & JS_HAS_TRIGGER_MASK));
|
|
secondary_has_trigdef = (NULL != gv_target->gvt_trigger);
|
|
if (primary_has_trigdef != secondary_has_trigdef)
|
|
{ /* trigger definitions out-of-sync between the primary and secondary.
|
|
* Issue warning in operator log
|
|
*/
|
|
if (primary_has_trigdef)
|
|
{
|
|
trigdef_inst = "originating";
|
|
no_trigdef_inst = "replicating";
|
|
} else
|
|
{
|
|
trigdef_inst = "replicating";
|
|
no_trigdef_inst = "originating";
|
|
}
|
|
send_msg(VARLSTCNT(9) ERR_TRIGDEFNOSYNC, 7, mname.len, mname.addr,
|
|
LEN_AND_STR(trigdef_inst), LEN_AND_STR(no_trigdef_inst),
|
|
&jnl_seqno);
|
|
}
|
|
}
|
|
# endif
|
|
if ((upd_good_record == bad_trans_type) && !IS_TP(rectype))
|
|
incr_seqno = TRUE;
|
|
if (disk_blk_read || 0 >= csa->n_pre_read_trigger)
|
|
{
|
|
csd = csa->hdr;
|
|
upd_helper_ctl->first_done = FALSE;
|
|
upd_helper_ctl->pre_read_offset = temp_read + rec_len;
|
|
REPL_DPRINT2("pre_read_offset = %x\n", upd_helper_ctl->pre_read_offset);
|
|
csa->n_pre_read_trigger = (int)((csd->n_bts * (float)csd->reserved_for_upd /
|
|
csd->avg_blks_per_100gbl) * csd->pre_read_trigger_factor / 100.0);
|
|
} else
|
|
csa->n_pre_read_trigger--;
|
|
disk_blk_read = FALSE;
|
|
}
|
|
} else if (IS_ZTWORM(rectype))
|
|
{
|
|
assert(dollar_tlevel); /* op_tstart should already have been done */
|
|
val_mv.mvtype = MV_STR;
|
|
val_mv.str.len = rec->jrec_ztworm.ztworm_str.length;
|
|
val_mv.str.addr = &rec->jrec_ztworm.ztworm_str.text[0];
|
|
op_svput(SV_ZTWORMHOLE, &val_mv);
|
|
}
|
|
}
|
|
if (upd_good_record != bad_trans_type)
|
|
{
|
|
tmpseqno = IS_REPLICATED(rectype) ? rec->jrec_null.jnl_seqno : jnl_seqno;
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"-> Bad trans :: bad_trans_type = %ld type = %ld len = %ld backptr = %ld jnl_seqno = %llu "
|
|
"[0x%llx]\n", bad_trans_type, rectype, rec_len, backptr, tmpseqno, tmpseqno);
|
|
upd_proc_local->bad_trans = TRUE;
|
|
/* We dont expect to be holding crit on any region in case of a bad_trans.
|
|
* Nevertheless release crit in PRO just in case we hold it.
|
|
*/
|
|
assert(0 == have_crit(CRIT_HAVE_ANY_REG));
|
|
if (dollar_tlevel)
|
|
{ /* Copy gv_currkey into temp variable before TROLLBACK overwrites the current variable. */
|
|
gv_failed_key = (gv_key *)malloc(SIZEOF(gv_key) + gv_currkey->end);
|
|
memcpy(gv_failed_key, gv_currkey, SIZEOF(gv_key) + gv_currkey->end);
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"OP_TROLLBACK IS CALLED -->Bad trans :: dollar_tlevel = %ld\n", dollar_tlevel);
|
|
OP_TROLLBACK(0); /* this should also release crit (if any) on all regions in TP */
|
|
assert(!dollar_tlevel);
|
|
} else
|
|
{ /* Non-TP : Release crit if any */
|
|
save_reg = gv_cur_region;
|
|
if ((NULL != save_reg) && save_reg->open)
|
|
{
|
|
csa = (sgmnt_addrs *)&FILE_INFO(save_reg)->s_addrs;
|
|
assert(NULL != csa); /* since save_reg->open is TRUE */
|
|
if (csa->now_crit)
|
|
rel_crit(save_reg);
|
|
}
|
|
}
|
|
assert(0 == have_crit(CRIT_HAVE_ANY_REG));
|
|
readaddrs = recvpool.recvdata_base;
|
|
upd_proc_local->read = 0;
|
|
temp_read = 0;
|
|
temp_write = 0;
|
|
upd_rec_seqno = tupd_num = tcom_num = 0;
|
|
/* KEY2BIG and REC2BIG are cases for which we need to make sure it is not a transmission hiccup before we
|
|
* go ahead and do the rts_error(GVSUBOFLOW) or rts_error(REC2BIG). That is the reason we give those two
|
|
* errors a second chance. Other errors are handled by either throwing an rts_error or asking for an
|
|
* unconditional re-transmission (as opposed to only two attempts for GVSUBOFLOW and REC2BIG). By asking for
|
|
* a re-transmission we increase our confidence level that this is a configuration issue (with smaller
|
|
* keysize on the secondary) and proceed with an rts_error if we see the same symptom. It is possible we
|
|
* might have two successive transmissions having the exact same corruption, but that is highly unlikely.
|
|
*/
|
|
if (last_errored_seqno == jnl_seqno)
|
|
{
|
|
last_errored_seqno = 0;
|
|
switch(bad_trans_type)
|
|
{
|
|
case upd_bad_key_size: /* Not using ISSUE_GVSUBOFLOW_ERROR in order to free gv_failed_key */
|
|
gv_failed_key_ptr = ((NULL == gv_failed_key) ? gv_currkey : gv_failed_key);
|
|
/* Assert that input key to format_targ_key is double null terminated */
|
|
assert(KEY_DELIMITER == gv_failed_key_ptr->base[gv_failed_key_ptr->end]);
|
|
/* Note: might update "endBuff" */
|
|
endBuff = format_targ_key(fmtBuff, ARRAYSIZE(fmtBuff), gv_failed_key_ptr, TRUE);
|
|
GV_SET_LAST_SUBSCRIPT_INCOMPLETE(fmtBuff, endBuff);
|
|
if (NULL != gv_failed_key) /* Free memory if it has been used */
|
|
free(gv_failed_key);
|
|
rts_error(VARLSTCNT(6) ERR_GVSUBOFLOW, 0, ERR_GVIS, 2, endBuff - fmtBuff, fmtBuff);
|
|
break;
|
|
case upd_bad_val_size:
|
|
if (0 == (end = format_targ_key(buff, MAX_ZWR_KEY_SZ,
|
|
((NULL == gv_failed_key) ? gv_currkey : gv_failed_key), TRUE)))
|
|
end = &buff[MAX_ZWR_KEY_SZ - 1];
|
|
if (NULL != gv_failed_key) /* Free memory if it has been used */
|
|
free(gv_failed_key);
|
|
rts_error(VARLSTCNT(10) ERR_REC2BIG, 4,
|
|
VMS_ONLY(gv_currkey->end + 1 + SIZEOF(rec_hdr) +) val_mv.str.len,
|
|
(int4)gv_cur_region->max_rec_size, REG_LEN_STR(gv_cur_region),
|
|
ERR_GVIS, 2, end - buff, buff);
|
|
break;
|
|
}
|
|
} else
|
|
{
|
|
gv_currkey->base[0] = KEY_DELIMITER;
|
|
last_errored_seqno = jnl_seqno;
|
|
/* Free memory on the first unsuccessful attempt if gv_failed_key has been previously used */
|
|
if (NULL != gv_failed_key)
|
|
free(gv_failed_key);
|
|
}
|
|
continue;
|
|
}
|
|
if (incr_seqno)
|
|
{
|
|
if (jnl_seqno - lastlog_seqno >= log_interval)
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE, "Rectype = %d Committed Jnl seq no is : "
|
|
INT8_FMT" "INT8_FMTX"\n", rectype, INT8_PRINT(jnl_seqno), INT8_PRINTX(jnl_seqno));
|
|
lastlog_seqno = jnl_seqno;
|
|
}
|
|
upd_proc_local->read_jnl_seqno = ++jnl_seqno;
|
|
/* Determine if all updates that we play from the receive pool do end up incrementing the jnl_seqno of the
|
|
* journal pool. In case of a root primary supplementary instance, we need to do this check only for the
|
|
* non-supplementary stream of interest (strm_index) that this update process is processing.
|
|
*/
|
|
UNIX_ONLY(
|
|
assert(!suppl_root_primary || ((0 < strm_index) && (MAX_SUPPL_STRMS > strm_index)));
|
|
jnlpool_ctl_seqno = (!suppl_root_primary ? jnlpool_ctl->jnl_seqno
|
|
: jnlpool_ctl->strm_seqno[strm_index]);
|
|
)
|
|
VMS_ONLY(jnlpool_ctl_seqno = jnlpool_ctl->jnl_seqno;)
|
|
if (jnlpool_ctl_seqno)
|
|
{ /* Now that the update process has played an incoming seqno, we expect it to have incremented
|
|
* the corresponding jnl_seqno and strm_seqno fields in the current instance's journal pool
|
|
* as well. Not doing so will cause the source and receiver instances to go out of sync.
|
|
* We know of 4 ways in which this can occur and all of them have already been handled.
|
|
* 1) UPDREPLSTATEOFF error.
|
|
* 2) error_on_jnl_file_lost = JNL_FILE_LOST_ERRORS;
|
|
* 3) Duplicate KILL is journaled and increments seqno even though it does not touch db.
|
|
* 4) A concurrent online rollback on this instance
|
|
* First 3 cases are not expected in a typical update process. But, the 4th case is expected in a
|
|
* typical run. So, as long as the out-of-sync is due to the first 3 cases, stop right away and get
|
|
* a core dump for further analysis. In case of an online rollback, it is okay for us to continue
|
|
* to the next iteration which will eventually detect the online rollback (as part of commit or
|
|
* before that in the idle wait loop) and take appropriate action.
|
|
*/
|
|
if ((upd_proc_local->read_jnl_seqno != jnlpool_ctl_seqno)
|
|
UNIX_ONLY(&& (repl_csa->onln_rlbk_cycle == jnlpool_ctl->onln_rlbk_cycle)))
|
|
{
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"JNLSEQNO last updated by update process = "INT8_FMT" "INT8_FMTX"\n",
|
|
INT8_PRINT(upd_proc_local->read_jnl_seqno),
|
|
INT8_PRINTX(upd_proc_local->read_jnl_seqno));
|
|
repl_log(updproc_log_fp, TRUE, TRUE,
|
|
"JNLSEQNO of last transaction written to journal pool = "INT8_FMT" "INT8_FMTX"\n",
|
|
INT8_PRINT(jnlpool_ctl_seqno), INT8_PRINTX(jnlpool_ctl_seqno));
|
|
GTMASSERT;
|
|
}
|
|
}
|
|
}
|
|
readaddrs += rec_len;
|
|
temp_read += rec_len;
|
|
if (0 == tupd_num)
|
|
upd_proc_local->read = temp_read;
|
|
}
|
|
REVERT; /* of updproc_ch() */
|
|
updproc_continue = FALSE;
|
|
}
|