fis-gtm/sr_unix/wcs_write_in_progress_wait.c

105 lines
3.4 KiB
C
Raw Permalink Normal View History

/****************************************************************
* *
* Copyright 2007, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "interlock.h"
#include "gdsbgtr.h"
#include "sleep_cnt.h"
#include "wbox_test_init.h"
#include "copy.h"
/* Include prototypes */
#include "caller_id.h"
#include "send_msg.h"
#include "wcs_sleep.h"
#include "is_proc_alive.h"
#include "wcs_write_in_progress_wait.h"
#include "add_inter.h"
#include "gtm_c_stack_trace.h"
GBLREF gd_region *gv_cur_region; /* for the LOCK_HIST macro used in LOCK_BUFF_FOR_UPDATE macro */
GBLREF uint4 process_id; /* for the LOCK_HIST macro used in LOCK_BUFF_FOR_UPDATE macro */
error_def (ERR_WRITEWAITPID);
/* Waits for a concurrently running write (of a global buffer to disk) to complete.
*
* Returns TRUE if write completes within timeout of approx. 1 minute.
* Returns FALSE otherwise.
*/
boolean_t wcs_write_in_progress_wait(node_local_ptr_t cnl, cache_rec_ptr_t cr, wbtest_code_t wbox_test_code)
{
uint4 lcnt;
int4 n;
for (lcnt = 1; ; lcnt++)
{ /* the design here is that either this process owns the block, or the writer does.
* if the writer does, it must be allowed to finish its write; then it will release the block
* and the next LOCK will establish ownership
*/
LOCK_BUFF_FOR_UPDATE(cr, n, &cnl->db_latch);
/* This destroys evidence of writer ownership, but this is really a test that
* there was no prior owner. It will only be true if the writer has cleared it.
*/
if (OWN_BUFF(n))
break;
else
{
GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK));
/* We have noticed the below assert to fail occasionally on some platforms
* We suspect it is because of waiting for another writer that is in jnl_fsync
* (as part of flushing a global buffer) which takes more than a minute to finish.
* To avoid false failures (where the other writer finishes its job in a little over
* a minute) we wait for twice the time in the debug version.
*/
DEBUG_ONLY(
if ((BUF_OWNER_STUCK == lcnt) && cr->epid)
GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
)
if (BUF_OWNER_STUCK DEBUG_ONLY( * 2) < lcnt)
{ /* sick of waiting */
if (0 == cr->dirty)
{ /* someone dropped something; assume it was the writer and go on */
LOCK_NEW_BUFF_FOR_UPDATE(cr);
break;
} else
{
if (cr->epid)
{
#ifdef DEBUG
GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, TWICE);
send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, TWICE, \
cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#else
GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, ONCE, \
cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#endif
}
return FALSE;
}
}
if (WRITER_STILL_OWNS_BUFF(cr, n))
wcs_sleep(lcnt);
}
} /* end of for loop to control buffer */
return TRUE;
}