231 lines
7.8 KiB
C
231 lines
7.8 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2013 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_stat.h"
|
|
#include "gtm_fcntl.h"
|
|
|
|
#include "gdsroot.h"
|
|
#include "gdsbt.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsfhead.h"
|
|
#include "filestruct.h"
|
|
#include "jnl.h"
|
|
#include "gtmio.h" /* for CLOSEFILE used by the F_CLOSE macro in JNL_FD_CLOSE */
|
|
#include "repl_sp.h" /* for F_CLOSE used by the JNL_FD_CLOSE macro */
|
|
#include "iosp.h" /* for SS_NORMAL used by the JNL_FD_CLOSE macro */
|
|
|
|
#include "heartbeat_timer.h"
|
|
#include "gt_timer.h"
|
|
#include "gtmimagename.h"
|
|
#include "dpgbldir.h"
|
|
#include "have_crit.h"
|
|
#include "anticipatory_freeze.h"
|
|
|
|
#ifdef DEBUG
|
|
STATICDEF uint4 next_heartbeat_counter = 1; /* the heartbeat_counter at which enospc manipulations need to happen */
|
|
GBLREF boolean_t is_jnlpool_creator; /* The purpose of this check is to assign only one process responsible of
|
|
* setting fake ENOSPC flags. If we had multiple processes messing with FREEZE,
|
|
* the test would freeze more often and eventually do nothing but freeze.
|
|
*/
|
|
STATICDEF uint4 syslog_deferred = 0;
|
|
#endif
|
|
|
|
GBLREF volatile uint4 heartbeat_counter;
|
|
GBLREF boolean_t is_src_server;
|
|
GBLREF enum gtmImageTypes image_type;
|
|
GBLREF uint4 process_id;
|
|
GBLREF jnlpool_addrs jnlpool;
|
|
GBLREF volatile int4 gtmMallocDepth;
|
|
|
|
#ifdef DEBUG
|
|
|
|
#define ENOSPC_FROZEN_DURATION 2 /* 16 seconds */
|
|
#define ENOSPC_UNFROZEN_DURATION (2 * (rand() % 120 + 1)) /* 16 seconds to 32 minutes */
|
|
#define MAX_REGIONS 50
|
|
|
|
#define NONE 0 /* 1/2 probablility */
|
|
#define DB_ON 1 /* 1/6 probablility */
|
|
#define JNL_ON 2 /* 1/6 probablility */
|
|
#define DB_AND_JNL_ON 3 /* 1/6 probablility */
|
|
#define MAX_ENOSPC_TARGET 4
|
|
|
|
error_def(ERR_TEXT);
|
|
error_def(ERR_FAKENOSPCLEARED);
|
|
|
|
void choose_random_reg_list(char *enospc_enable_list, int n_reg)
|
|
{
|
|
int i;
|
|
|
|
assert(MAX_REGIONS >= n_reg);
|
|
for (i = 0; i < n_reg; i++)
|
|
{
|
|
if(rand() % 2) /* Should we trigger fake ENOSPC on this region */
|
|
enospc_enable_list[i] = rand() % (MAX_ENOSPC_TARGET - 1) + 1; /* What kind? */
|
|
else
|
|
enospc_enable_list[i] = NONE;
|
|
}
|
|
}
|
|
|
|
void set_enospc_if_needed()
|
|
{
|
|
gd_addr *addr_ptr;
|
|
char enospc_enable_list[MAX_REGIONS];
|
|
boolean_t ok_to_interrupt, is_time_to_act;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
|
|
if (TREF(gtm_test_fake_enospc) && is_jnlpool_creator && ANTICIPATORY_FREEZE_AVAILABLE)
|
|
{
|
|
ok_to_interrupt = (INTRPT_OK_TO_INTERRUPT == intrpt_ok_state) && (0 == gtmMallocDepth);
|
|
is_time_to_act = (next_heartbeat_counter == heartbeat_counter);
|
|
if (syslog_deferred && ok_to_interrupt)
|
|
{
|
|
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_FAKENOSPCLEARED, 1, (heartbeat_counter - syslog_deferred));
|
|
syslog_deferred = 0;
|
|
}
|
|
if (!is_time_to_act || syslog_deferred || (!ok_to_interrupt && !IS_REPL_INST_FROZEN))
|
|
{
|
|
/* We have to skip this because we have just fallen into deferred zone or we are currently in it */
|
|
if (is_time_to_act)
|
|
next_heartbeat_counter++; /* Try again in the next heartbeat */
|
|
return;
|
|
}
|
|
assert(0 == syslog_deferred);
|
|
srand(time(NULL));
|
|
addr_ptr = get_next_gdr(NULL);
|
|
if (NULL == addr_ptr) /* Ensure that there is a global directory to operate on. */
|
|
return;
|
|
assert(NULL == get_next_gdr(addr_ptr));
|
|
/* Randomly simulate ENOSPC or free space. NO more than 50 regions are allowed to avoid unnecessary
|
|
* malloc/frees in debug-only code
|
|
*/
|
|
assert(MAX_REGIONS >= addr_ptr->n_regions);
|
|
if (!IS_REPL_INST_FROZEN)
|
|
{ /* We are in an UNFROZEN state, and about to be FROZEN due to ENOSPC */
|
|
choose_random_reg_list(enospc_enable_list, addr_ptr->n_regions);
|
|
next_heartbeat_counter = heartbeat_counter + ENOSPC_FROZEN_DURATION;
|
|
} else
|
|
{ /* We are in a FROZEN state, and about to be UNFROZEN due to free space */
|
|
memset(enospc_enable_list, 0, MAX_REGIONS);
|
|
next_heartbeat_counter = heartbeat_counter + ENOSPC_UNFROZEN_DURATION;
|
|
if (!ok_to_interrupt)
|
|
syslog_deferred = heartbeat_counter;
|
|
}
|
|
set_enospc_flags(addr_ptr, enospc_enable_list, ok_to_interrupt);
|
|
}
|
|
}
|
|
|
|
void set_enospc_flags(gd_addr *addr_ptr, char enospc_enable_list[], boolean_t ok_to_interrupt)
|
|
{
|
|
gd_region *r_local, *r_top;
|
|
int i;
|
|
sgmnt_addrs *csa;
|
|
const char *syslog_msg;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
|
|
for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions, i = 0;
|
|
r_local < r_top; r_local++, i++)
|
|
{
|
|
if ((dba_bg != r_local->dyn.addr->acc_meth) && (dba_mm != r_local->dyn.addr->acc_meth))
|
|
continue;
|
|
csa = REG2CSA(r_local);
|
|
if ((NULL != csa) && (NULL != csa->nl) && ANTICIPATORY_FREEZE_ENABLED(csa))
|
|
{
|
|
switch(enospc_enable_list[i])
|
|
{
|
|
case NONE:
|
|
syslog_msg = "Turning off fake ENOSPC for both database and journal file.";
|
|
csa->nl->fake_db_enospc = FALSE;
|
|
csa->nl->fake_jnl_enospc = FALSE;
|
|
break;
|
|
case DB_ON:
|
|
syslog_msg = "Turning on fake ENOSPC only for database file.";
|
|
csa->nl->fake_db_enospc = TRUE;
|
|
csa->nl->fake_jnl_enospc = FALSE;
|
|
break;
|
|
case JNL_ON:
|
|
syslog_msg = "Turning on fake ENOSPC only for journal file.";
|
|
csa->nl->fake_db_enospc = FALSE;
|
|
csa->nl->fake_jnl_enospc = TRUE;
|
|
break;
|
|
case DB_AND_JNL_ON:
|
|
syslog_msg = "Turning on fake ENOSPC for both database and journal file.";
|
|
csa->nl->fake_db_enospc = TRUE;
|
|
csa->nl->fake_jnl_enospc = TRUE;
|
|
break;
|
|
default:
|
|
assert(FALSE);
|
|
}
|
|
if (ok_to_interrupt)
|
|
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TEXT, 2, DB_LEN_STR(r_local), ERR_TEXT, 2,
|
|
LEN_AND_STR(syslog_msg));
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void heartbeat_timer(void)
|
|
{
|
|
gd_addr *addr_ptr;
|
|
sgmnt_addrs *csa;
|
|
jnl_private_control *jpc;
|
|
gd_region *r_local, *r_top;
|
|
int rc;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
|
|
/* It will take heartbeat_counter about 1014 years to overflow. */
|
|
heartbeat_counter++;
|
|
DEBUG_ONLY(set_enospc_if_needed());
|
|
/* Check every 1 minute if we have an older generation journal file open. If so, close it.
|
|
* The only exceptions are
|
|
* a) The source server can have older generations open and they should not be closed.
|
|
* b) If we are in the process of switching to a new journal file while we get interrupted
|
|
* by the heartbeat timer, we should not close the older generation journal file
|
|
* as it will anyways be closed by the mainline code. But identifying that we are in
|
|
* the midst of a journal file switch is tricky so we check if the process is in
|
|
* crit for this region and if so we skip the close this time and wait for the next heartbeat.
|
|
*/
|
|
if ((INTRPT_OK_TO_INTERRUPT == intrpt_ok_state) && !is_src_server
|
|
&& (0 == heartbeat_counter % NUM_HEARTBEATS_FOR_OLDERJNL_CHECK))
|
|
{
|
|
for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr))
|
|
{
|
|
for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions; r_local < r_top; r_local++)
|
|
{
|
|
if (!r_local->open || r_local->was_open)
|
|
continue;
|
|
if ((dba_bg != r_local->dyn.addr->acc_meth) && (dba_mm != r_local->dyn.addr->acc_meth))
|
|
continue;
|
|
csa = REG2CSA(r_local);
|
|
if (csa->now_crit)
|
|
continue;
|
|
jpc = csa->jnl;
|
|
if ((NULL != jpc) && (NOJNL != jpc->channel) && JNL_FILE_SWITCHED(jpc))
|
|
{ /* The journal file we have as open is not the latest generation journal file. Close it */
|
|
/* Assert that we never have an active write on a previous generation journal file. */
|
|
assert(process_id != jpc->jnl_buff->io_in_prog_latch.u.parts.latch_pid);
|
|
JNL_FD_CLOSE(jpc->channel, rc); /* sets jpc->channel to NOJNL */
|
|
jpc->pini_addr = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
start_timer((TID)heartbeat_timer, HEARTBEAT_INTERVAL, heartbeat_timer, 0, NULL);
|
|
}
|