fis-gtm/sr_port/anticipatory_freeze.h

345 lines
13 KiB
C

/****************************************************************
* *
* Copyright 2012, 2013 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#ifndef _ANTICIPATORY_FREEZE_H
#define _ANTICIPATORY_FREEZE_H
#ifdef UNIX
#include "gtm_time.h" /* needed for GET_CUR_TIME */
#include "gdsroot.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "repl_msg.h" /* needed for gtmsource.h */
#include "gtmsource.h" /* needed for jnlpool_addrs typedef */
#include "sleep_cnt.h" /* needed for SLEEP_INSTFREEZEWAIT macro */
#include "wait_for_disk_space.h" /* needed by DB_LSEEKWRITE macro for prototype */
#include "gtmimagename.h" /* needed for IS_GTM_IMAGE */
boolean_t is_anticipatory_freeze_needed(sgmnt_addrs *csa, int msg_id);
void set_anticipatory_freeze(sgmnt_addrs *csa, int msg_id);
boolean_t init_anticipatory_freeze_errors(void);
/* Define function pointers to certain functions to avoid executables like gtmsecshr from unnecessarily
* linking with these functions (which causes the database/replication stuff to be pulled in).
*/
typedef boolean_t (*is_anticipatory_freeze_needed_t)(sgmnt_addrs *csa, int msgid);
typedef void (*set_anticipatory_freeze_t)(sgmnt_addrs *csa, int msg_id);
GBLREF is_anticipatory_freeze_needed_t is_anticipatory_freeze_needed_fnptr;
GBLREF set_anticipatory_freeze_t set_anticipatory_freeze_fnptr;
GBLREF boolean_t pool_init;
GBLREF boolean_t mupip_jnl_recover;
#ifdef DEBUG
GBLREF uint4 lseekwrite_target;
#endif
error_def(ERR_MUINSTFROZEN);
error_def(ERR_MUINSTUNFROZEN);
error_def(ERR_MUNOACTION);
error_def(ERR_REPLINSTFREEZECOMMENT);
error_def(ERR_REPLINSTFROZEN);
error_def(ERR_REPLINSTUNFROZEN);
error_def(ERR_TEXT);
#define ENABLE_FREEZE_ON_ERROR \
{ \
if (ANTICIPATORY_FREEZE_AVAILABLE) \
{ /* Set anticipatory freeze function pointers to be used later (in send_msg and rts_error) */ \
is_anticipatory_freeze_needed_fnptr = &is_anticipatory_freeze_needed; \
set_anticipatory_freeze_fnptr = &set_anticipatory_freeze; \
} \
}
#define CHECK_IF_FREEZE_ON_ERROR_NEEDED(CSA, MSG_ID, FREEZE_NEEDED, FREEZE_MSG_ID) \
{ \
GBLREF jnlpool_addrs jnlpool; \
DCL_THREADGBL_ACCESS; \
\
SETUP_THREADGBL_ACCESS; \
if (!FREEZE_NEEDED && ANTICIPATORY_FREEZE_AVAILABLE && (NULL != is_anticipatory_freeze_needed_fnptr)) \
{ /* NOT gtmsecshr */ \
if (IS_REPL_INST_UNFROZEN && (*is_anticipatory_freeze_needed_fnptr)((sgmnt_addrs *)CSA, MSG_ID)) \
{ \
FREEZE_NEEDED = TRUE; \
FREEZE_MSG_ID = MSG_ID; \
} \
} \
}
#define FREEZE_INSTANCE_IF_NEEDED(CSA, FREEZE_NEEDED, FREEZE_MSG_ID) \
{ \
GBLREF jnlpool_addrs jnlpool; \
\
if (FREEZE_NEEDED) \
{ \
assert(NULL != set_anticipatory_freeze_fnptr); \
(*set_anticipatory_freeze_fnptr)((sgmnt_addrs *)CSA, FREEZE_MSG_ID); \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, \
jnlpool.repl_inst_filehdr->inst_info.this_instname); \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment); \
} \
}
#define CLEAR_ANTICIPATORY_FREEZE(FREEZE_CLEARED) \
{ \
GBLREF jnlpool_addrs jnlpool; \
\
if (IS_REPL_INST_FROZEN) \
{ \
jnlpool.jnlpool_ctl->freeze = 0; \
FREEZE_CLEARED = TRUE; \
} \
}
#define REPORT_INSTANCE_UNFROZEN(FREEZE_CLEARED) \
{ \
GBLREF jnlpool_addrs jnlpool; \
\
if (FREEZE_CLEARED) \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_REPLINSTUNFROZEN, 1, \
jnlpool.repl_inst_filehdr->inst_info.this_instname); \
}
#define AFREEZE_MASK 0x01
#define ANTICIPATORY_FREEZE_AVAILABLE (0 != (TREF(gtm_custom_errors)).len)
#define INSTANCE_FREEZE_HONORED(CSA) (DBG_ASSERT(NULL != CSA) \
((NULL != jnlpool.jnlpool_ctl) \
&& ((REPL_ALLOWED(((sgmnt_addrs *)CSA)->hdr)) \
|| mupip_jnl_recover /* recover or rollback */ \
|| ((sgmnt_addrs *)CSA)->nl->onln_rlbk_pid )))
#define ANTICIPATORY_FREEZE_ENABLED(CSA) (INSTANCE_FREEZE_HONORED(CSA) \
&& ANTICIPATORY_FREEZE_AVAILABLE \
&& (((sgmnt_addrs *)CSA)->hdr->freeze_on_fail))
#define IS_REPL_INST_FROZEN ((NULL != jnlpool.jnlpool_ctl) && jnlpool.jnlpool_ctl->freeze)
#define IS_REPL_INST_UNFROZEN ((NULL != jnlpool.jnlpool_ctl) && !jnlpool.jnlpool_ctl->freeze)
#define INST_FROZEN_COMMENT "PID %d encountered %s; Instance frozen"
#define MSGID_TO_ERRMSG(MSG_ID, ERRMSG) \
{ \
const err_ctl *ctl; \
\
ctl = err_check(MSG_ID); \
assert(NULL != ctl); \
GET_MSG_INFO(MSG_ID, ctl, ERRMSG); \
}
#define GENERATE_INST_FROZEN_COMMENT(BUF, BUF_LEN, MSG_ID) \
{ \
GBLREF uint4 process_id; \
const err_msg *msginfo; \
\
MSGID_TO_ERRMSG(MSG_ID, msginfo); \
SNPRINTF(BUF, BUF_LEN, INST_FROZEN_COMMENT, process_id, msginfo->tag); \
}
/* This is a version of the macro which waits for the instance freeze to be lifted off assuming the process has
* already attached to the journal pool. We need to wait for the freeze only if the input database cares about
* anticipatory freeze. Examples of those databases that dont care are non-replicated databases, databases with
* "freeze_on_fail" field set to FALSE in the file header etc. Hence the use of ANTICIPATORY_FREEZE_ENABLED below.
* Note: Do not use "hiber_start" as that uses timers and if we are already in a timer handler now, nested timers
* wont work. Since SHORT_SLEEP allows a max of 1000, we use 500 (half a second) for now.
*/
#define WAIT_FOR_REPL_INST_UNFREEZE(CSA) \
{ \
gd_region *reg; \
char *time_ptr, time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */ \
now_t now; \
DCL_THREADGBL_ACCESS; \
\
SETUP_THREADGBL_ACCESS; \
assert(NULL != CSA); \
if (INSTANCE_FREEZE_HONORED(CSA)) \
{ \
reg = ((sgmnt_addrs *)CSA)->region; \
if (!IS_GTM_IMAGE) \
{ \
GET_CUR_TIME; \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_MUINSTFROZEN, 5, CTIME_BEFORE_NL, time_ptr, \
jnlpool.repl_inst_filehdr->inst_info.this_instname, DB_LEN_STR(reg)); \
} \
WAIT_FOR_REPL_INST_UNFREEZE_NOCSA; \
if (!IS_GTM_IMAGE) \
{ \
GET_CUR_TIME; \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_MUINSTUNFROZEN, 5, CTIME_BEFORE_NL, time_ptr, \
jnlpool.repl_inst_filehdr->inst_info.this_instname, DB_LEN_STR(reg)); \
} \
} \
}
/* This is a safer version of the WAIT_FOR_REPL_INST_UNFREEZE macro, which waits for the instance freeze
* to be lifted off but is not sure if the process has access to the journal pool yet.
* If it does not, then it assumes the instance is not frozen.
*/
#define WAIT_FOR_REPL_INST_UNFREEZE_SAFE(CSA) \
{ \
GBLREF jnlpool_addrs jnlpool; \
\
assert(NULL != CSA); \
if (IS_REPL_INST_FROZEN) \
WAIT_FOR_REPL_INST_UNFREEZE(CSA); \
}
/* Below are similar macros like the above but with no CSA to specifically check for */
#define WAIT_FOR_REPL_INST_UNFREEZE_NOCSA \
{ \
GBLREF jnlpool_addrs jnlpool; \
GBLREF volatile int4 exit_state; \
GBLREF int4 exi_condition; \
GBLREF int4 forced_exit_err; \
\
assert(NULL != jnlpool.jnlpool_ctl); \
/* If this region is not replicated, do not care for instance freezes */ \
while (jnlpool.jnlpool_ctl->freeze) \
{ \
if (exit_state != 0) \
{ \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(1) forced_exit_err); \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) forced_exit_err); \
exit(-exi_condition); \
} \
SHORT_SLEEP(SLEEP_INSTFREEZEWAIT); \
DEBUG_ONLY(CLEAR_FAKE_ENOSPC_IF_MASTER_DEAD); \
} \
}
#define WAIT_FOR_REPL_INST_UNFREEZE_NOCSA_SAFE \
{ \
GBLREF jnlpool_addrs jnlpool; \
\
if (IS_REPL_INST_FROZEN) \
WAIT_FOR_REPL_INST_UNFREEZE_NOCSA; \
}
/* GTM_DB_FSYNC/GTM_JNL_FSYNC are similar to GTM_FSYNC except that we dont do the fsync
* (but instead hang) if we detect the instance is frozen. We proceed with the fsync once the freeze clears.
* CSA is a parameter indicating which database it is that we want to fsync.
* GTM_REPL_INST_FSYNC is different in that we currently dont care about instance freeze for replication
* instance file writes.
*/
#define GTM_DB_FSYNC(CSA, FD, RC) \
{ \
GBLREF jnlpool_addrs jnlpool; \
node_local_ptr_t cnl; \
\
assert((NULL != CSA) || (NULL == jnlpool.jnlpool_ctl)); \
if (NULL != CSA) \
{ \
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(CSA); \
cnl = (CSA)->nl; \
if (NULL != cnl) \
INCR_GVSTATS_COUNTER((CSA), cnl, n_db_fsync, 1); \
} \
GTM_FSYNC(FD, RC); \
}
#define GTM_JNL_FSYNC(CSA, FD, RC) \
{ \
GBLREF jnlpool_addrs jnlpool; \
node_local_ptr_t cnl; \
\
assert((NULL != CSA) || (NULL == jnlpool.jnlpool_ctl)); \
if (NULL != CSA) \
{ \
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(CSA); \
cnl = (CSA)->nl; \
if (NULL != cnl) \
INCR_GVSTATS_COUNTER((CSA), cnl, n_jnl_fsync, 1); \
} \
GTM_FSYNC(FD, RC); \
}
#define GTM_REPL_INST_FSYNC(FD, RC) GTM_FSYNC(FD, RC)
#define LSEEKWRITE_IS_TO_NONE 0
#define LSEEKWRITE_IS_TO_DB 1
#define LSEEKWRITE_IS_TO_JNL 2
#ifdef DEBUG
#define FAKE_ENOSPC(CSA, FAKE_WHICH_ENOSPC, LSEEKWRITE_TARGET, LCL_STATUS) \
{ \
GBLREF jnlpool_addrs jnlpool; \
if (NULL != CSA) \
{ \
if (WBTEST_ENABLED(WBTEST_RECOVER_ENOSPC)) \
{ /* This test case is only used by mupip */ \
gtm_wbox_input_test_case_count++; \
if ((0 != gtm_white_box_test_case_count) \
&& (gtm_white_box_test_case_count <= gtm_wbox_input_test_case_count)) \
{ \
LCL_STATUS = ENOSPC; \
if (gtm_white_box_test_case_count == gtm_wbox_input_test_case_count) \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, \
LEN_AND_LIT("Turning on fake ENOSPC for exit status test")); \
} \
} else if (!IS_DSE_IMAGE /*DSE does not freeze so let it work as normal */ \
&& ((NULL != jnlpool.jnlpool_ctl) && (NULL != ((sgmnt_addrs *)CSA)->nl)) \
&& ((sgmnt_addrs *)CSA)->nl->FAKE_WHICH_ENOSPC) \
{ \
LCL_STATUS = ENOSPC; \
lseekwrite_target = LSEEKWRITE_TARGET; \
} \
} \
}
void clear_fake_enospc_if_master_dead(void);
#define CLEAR_FAKE_ENOSPC_IF_MASTER_DEAD clear_fake_enospc_if_master_dead()
#else
#define FAKE_ENOSPC(CSA, FAKE_ENOSPC, LSEEKWRITE_TARGET, LCL_STATUS) {}
#endif
#define DB_LSEEKWRITE(csa, db_fn, fd, new_eof, buff, size, status) \
DO_LSEEKWRITE(csa, db_fn, fd, new_eof, buff, size, status, fake_db_enospc, LSEEKWRITE_IS_TO_DB)
#define JNL_LSEEKWRITE(csa, jnl_fn, fd, new_eof, buff, size, status) \
DO_LSEEKWRITE(csa, jnl_fn, fd, new_eof, buff, size, status, fake_jnl_enospc, LSEEKWRITE_IS_TO_JNL)
#define DO_LSEEKWRITE(csa, fnptr, fd, new_eof, buff, size, status, FAKE_WHICH_ENOSPC, LSEEKWRITE_TARGET) \
{ \
int lcl_status; \
\
if (NULL != csa) \
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa); \
LSEEKWRITE(fd, new_eof, buff, size, lcl_status); \
FAKE_ENOSPC(csa, FAKE_WHICH_ENOSPC, LSEEKWRITE_TARGET, lcl_status); \
if (ENOSPC == lcl_status) \
{ \
wait_for_disk_space(csa, (char *)fnptr, fd, (off_t)new_eof, (char *)buff, (size_t)size, &lcl_status); \
assert((NULL == csa) || (NULL == ((sgmnt_addrs *)csa)->nl) || !((sgmnt_addrs *)csa)->nl->FAKE_WHICH_ENOSPC \
|| (ENOSPC != lcl_status)); \
} \
status = lcl_status; \
}
/* Currently, writes to replication instance files do NOT trigger instance freeze behavior.
* Neither does a pre-existing instance freeze affect replication instance file writes.
* Hence this is defined as simple LSEEKWRITE.
*/
#define REPL_INST_LSEEKWRITE LSEEKWRITE
#define REPL_INST_AVAILABLE (repl_inst_get_name((char *)replpool_id.instfilename, &full_len, SIZEOF(replpool_id.instfilename), \
return_on_error))
#else /* #ifdef UNIX */
# define ANTICIPATORY_FREEZE_AVAILABLE FALSE
# define ANTICIPATORY_FREEZE_ENABLED(CSA) FALSE
# define REPL_INST_AVAILABLE FALSE
# define WAIT_FOR_REPL_INST_UNFREEZE
# define WAIT_FOR_REPL_INST_UNFREEZE_SAFE
#endif /* #ifdef UNIX */
#endif /* #ifndef _ANTICIPATORY_FREEZE_H */