fis-gtm/sr_unix/wcs_get_space.c

337 lines
13 KiB
C

/****************************************************************
* *
* Copyright 2007, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_facility.h"
#include "gdsroot.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "interlock.h"
#include "jnl.h"
#include "sleep_cnt.h"
#include "gdsbgtr.h"
#include "wbox_test_init.h"
/* Include prototypes */
#include "send_msg.h"
#include "wcs_get_space.h"
#include "gtmmsg.h"
#include "gt_timer.h"
#include "wcs_sleep.h"
#include "relqop.h"
#include "error.h" /* for gtm_fork_n_core() prototype */
#include "rel_quant.h"
#include "performcaslatchcheck.h"
#include "wcs_phase2_commit_wait.h"
#include "wcs_recover.h"
#include "gtm_c_stack_trace.h"
GBLDEF cache_rec_ptr_t get_space_fail_cr; /* gbldefed to be accessible in a pro core */
GBLDEF wcs_conflict_trace_t *get_space_fail_array; /* gbldefed to be accessilbe in a pro core */
GBLDEF int4 get_space_fail_arridx; /* gbldefed to be accessilbe in a pro core */
GBLREF sgmnt_addrs *cs_addrs;
GBLREF sgmnt_data_ptr_t cs_data;
GBLREF gd_region *gv_cur_region; /* needed for the JNL_ENSURE_OPEN_WCS_WTSTART macro */
GBLREF int num_additional_processors;
GBLREF uint4 process_id;
GBLREF volatile int4 fast_lock_count;
error_def(ERR_DBFILERR);
error_def(ERR_WAITDSKSPACE);
error_def(ERR_GBLOFLOW);
#define WCS_CONFLICT_TRACE_ARRAYSIZE 64
#define LCNT_INTERVAL DIVIDE_ROUND_UP(UNIX_GETSPACEWAIT, WCS_CONFLICT_TRACE_ARRAYSIZE)
#define WCS_GET_SPACE_RETURN_FAIL(TRACEARRAY, CR) \
{ \
assert(FALSE); /* We have failed */ \
get_space_fail_cr = CR; \
get_space_fail_array = TRACEARRAY; \
if (TREF(gtm_environment_init)) \
gtm_fork_n_core(); /* take a snapshot in case running in-house */ \
return FALSE; \
}
#define GET_IO_LATCH_PID(CSA) (CSA->jnl ? CSA->jnl->jnl_buff->io_in_prog_latch.u.parts.latch_pid : -1)
#define GET_FSYNC_LATCH_PID(CSA) (CSA->jnl ? CSA->jnl->jnl_buff->fsync_in_prog_latch.u.parts.latch_pid : -1)
#define INVOKE_C_STACK_APPROPRIATE(CR, CSA, STUCK_CNT) \
{ \
int4 io_latch_pid, fsync_latch_pid; \
\
if (CR->epid) \
{ \
GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_CR", process_id, CR->epid, STUCK_CNT); \
} \
if (0 < (io_latch_pid = GET_IO_LATCH_PID(CSA))) \
{ \
GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_IO_PROG", process_id, io_latch_pid, STUCK_CNT); \
} \
if (0 < (fsync_latch_pid = GET_FSYNC_LATCH_PID(CSA))) \
{ \
GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_FSYNC_PROG", process_id, fsync_latch_pid, STUCK_CNT); \
} \
} \
/* go after a specific number of buffers or a particular buffer */
/* not called if UNTARGETED_MSYNC and MM mode */
bool wcs_get_space(gd_region *reg, int needed, cache_rec_ptr_t cr)
{
sgmnt_addrs *csa;
sgmnt_data_ptr_t csd;
node_local_ptr_t cnl;
cache_que_head_ptr_t q0, base;
int4 n, save_errno = 0, k, i, dummy_errno, max_count, count;
int maxspins, retries, spins;
uint4 lcnt, size, to_wait, to_msg, this_idx;
wcs_conflict_trace_t wcs_conflict_trace[WCS_CONFLICT_TRACE_ARRAYSIZE];
boolean_t is_mm;
cache_rec cr_contents;
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
assert((0 != needed) || (NULL != cr));
get_space_fail_arridx = 0;
csa = &FILE_INFO(reg)->s_addrs;
csd = csa->hdr;
cnl = csa->nl;
is_mm = (dba_mm == csd->acc_meth);
assert(is_mm || (dba_bg == csd->acc_meth));
if (FALSE == csa->now_crit)
{
assert(0 != needed); /* if needed == 0, then we should be in crit */
for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu); 0 < lcnt; lcnt--)
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
/* a macro that ensure jnl is open, invokes wcs_wtstart() and checks for errors etc. */
return TRUE;
}
UNTARGETED_MSYNC_ONLY(assert(!is_mm);)
csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger / STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts));
/* Routine actually serves two purposes:
* 1 - Free up required number of buffers or
* 2 - Free up a specific buffer
* Do a different kind of loop depending on which is our current calling.
*/
if (0 != needed)
{
BG_TRACE_ANY(csa, bufct_buffer_flush);
for (lcnt = 1; (cnl->wc_in_free < needed) && (BUF_OWNER_STUCK > lcnt); ++lcnt)
{
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, needed, save_errno);
if (is_mm && (ERR_GBLOFLOW == save_errno))
wcs_recover(reg);
if (cnl->wc_in_free < needed)
{
if ((ENOSPC == save_errno) && (csa->hdr->wait_disk_space > 0))
{
/* not enough disk space to flush the buffers to regain them
* so wait for it to become available,
* and if it takes too long, just
* quit. Unfortunately, quitting would
* invoke the recovery logic which
* should be of no help to this
* situation. Then what?
*/
lcnt = BUF_OWNER_STUCK;
to_wait = cs_data->wait_disk_space;
to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* output error message around 8 times */
while ((0 < to_wait) && (ENOSPC == save_errno))
{
if ((to_wait == cs_data->wait_disk_space)
|| (0 == to_wait % to_msg))
{
send_msg(VARLSTCNT(7) ERR_WAITDSKSPACE, 4,
process_id, to_wait, DB_LEN_STR(reg), save_errno);
gtm_putmsg(VARLSTCNT(7) ERR_WAITDSKSPACE, 4,
process_id, to_wait, DB_LEN_STR(reg), save_errno);
}
hiber_start(1000);
to_wait--;
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, needed, save_errno);
if (is_mm && (ERR_GBLOFLOW == save_errno))
wcs_recover(reg);
if (cnl->wc_in_free >= needed)
break;
}
}
wcs_sleep(lcnt);
} else
return TRUE;
BG_TRACE_ANY(csa, bufct_buffer_flush_loop);
}
if (cnl->wc_in_free >= needed)
return TRUE;
} else
{ /* Wait for a specific buffer to be flushed. We attempt to speed this along by shuffling the entry
* we want to the front of the queue before we call routines to do some writing.
* Formerly we used to wait for this buffer to be flushed irrespective of its position in the active queue.
* We keep this code commented just in case this needs to be resurrected in the future.
*/
# ifdef old_code
BG_TRACE_ANY(csa, spcfc_buffer_flush);
for (lcnt = 1; (0 != cr->dirty) && (BUF_OWNER_STUCK > lcnt); ++lcnt)
{
for (; 0 != cr->dirty && 0 != csa->acc_meth.bg.cache_state->cacheq_active.fl;)
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, save_errno);
if (0 != cr->dirty)
wcs_sleep(lcnt);
else
return TRUE;
BG_TRACE_ANY(csa, spcfc_buffer_flush_loop);
}
if (0 == cr->dirty)
return TRUE;
# endif
assert(csa->now_crit); /* must be crit to play with queues when not the writer */
BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush);
++fast_lock_count; /* Disable wcs_stale for duration */
if (!is_mm) /* Determine queue base to use */
{
base = &csa->acc_meth.bg.cache_state->cacheq_active;
/* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */
if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr))
return FALSE; /* assumption is that caller will set wc_blocked and trigger cache recovery */
} else
base = &csa->acc_meth.mm.mmblk_state->mmblkq_active;
maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
for (retries = LOCK_TRIES - 1; retries > 0 ; retries--)
{
for (spins = maxspins; spins > 0 ; spins--)
{
if (GET_SWAPLOCK(&base->latch)) /* Lock queue to prevent interference */
{
if (0 != cr->state_que.fl)
{ /* If it is still in the active queue, then insert it at the head of the queue */
csa->wbuf_dqd++;
q0 = (cache_que_head_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl);
shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base);
csa->wbuf_dqd--;
VERIFY_QUEUE(base);
}
/* release the queue header lock so that the writers can proceed */
RELEASE_SWAPLOCK(&base->latch);
--fast_lock_count;
assert(0 <= fast_lock_count);
/* Fire off a writer to write it out. Another writer may grab our cache
* record so we have to be willing to wait for him to flush it.
* Flush this one buffer the first time through.
* If this didn't work, flush normal amount next time in the loop.
*/
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 1, save_errno);
if (is_mm && (ERR_GBLOFLOW == save_errno))
wcs_recover(reg);
for (lcnt = 1; (0 != cr->dirty) && (UNIX_GETSPACEWAIT > lcnt); ++lcnt)
{
if (0 == (lcnt % LCNT_INTERVAL))
{
this_idx = (lcnt / LCNT_INTERVAL);
assert(this_idx < WCS_CONFLICT_TRACE_ARRAYSIZE);
wcs_conflict_trace[this_idx].wcs_active_lvl = cnl->wcs_active_lvl;
wcs_conflict_trace[this_idx].io_in_prog_pid = GET_IO_LATCH_PID(csa);
wcs_conflict_trace[this_idx].fsync_in_prog_pid = GET_FSYNC_LATCH_PID(csa);
}
get_space_fail_arridx = lcnt;
max_count = ROUND_UP(cnl->wcs_active_lvl, csd->n_wrt_per_flu);
/* Check if cache recovery is needed (could be set by another process in
* secshr_db_clnup finishing off a phase2 commit). If so, no point invoking
* wcs_wtstart as it will return right away. Instead return FALSE so
* cache-recovery can be triggered by the caller.
*/
if (csd->wc_blocked)
{
assert(gtm_white_box_test_case_enabled);
return FALSE;
}
/* loop till the active queue is exhausted */
for (count = 0; 0 != cr->dirty && 0 != cnl->wcs_active_lvl &&
max_count > count; count++)
{
BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush_retries);
JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, save_errno);
if (is_mm && (ERR_GBLOFLOW == save_errno))
wcs_recover(reg);
}
/* Usually we want to sleep only if we need to wait on someone else
* i.e. (i) if we are waiting for another process' fsync to complete
* We have seen jnl_fsync() to take more than a minute.
* Hence we wait for a max. of 2 mins (UNIX_GETSPACEWAIT).
* (ii) if some concurrent writer has taken this cache-record out.
* (iii) if someone else is holding the io_in_prog lock.
* Right now we know of only one case where there is no point in waiting
* which is if the cache-record is out of the active queue and is dirty.
* But since that is quite rare and we don't lose much in that case by
* sleeping we do an unconditional sleep (only if cr is dirty).
*/
if (!cr->dirty)
return TRUE;
else
{
DEBUG_ONLY(cr_contents = *cr;)
/* Assert that if the cache-record is dirty, it better be in the
* active queue or be in the process of getting flushed by a concurrent
* writer or phase2 of the commit is in progress. If none of this is
* true, it should have become non-dirty by now even though we found it
* dirty a few lines above. Note that the cache-record could be in the
* process of being released by a concurrent writer; This is done by
* resetting 3 fields cr->epid, cr->dirty, cr->interlock; Since the write
* interlock is the last field to be released, check that BEFORE dirty.
*/
assert(cr_contents.state_que.fl || cr_contents.epid || cnl->in_wtstart
|| cr_contents.in_tend
|| (LATCH_CLEAR != WRITE_LATCH_VAL(&cr_contents))
|| !cr_contents.dirty);
wcs_sleep(lcnt);
}
BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush_loop);
}
if (0 == cr->dirty)
return TRUE;
INVOKE_C_STACK_APPROPRIATE(cr, csa, 1);
WCS_GET_SPACE_RETURN_FAIL(wcs_conflict_trace, cr);
} else
{ /* buffer was locked */
if (0 == cr->dirty)
{
BG_TRACE_ANY(csa, spcfc_buffer_flushed_during_lockwait);
--fast_lock_count;
assert(0 <= fast_lock_count);
return TRUE;
}
}
}
if (retries & 0x3) /* On all but every 4th pass, do a simple rel_quant */
rel_quant(); /* Release processor to holder of lock (hopefully) */
else
{ /* On every 4th pass, we bide for awhile */
wcs_sleep(LOCK_SLEEP);
/* If near end of loop, see if target is dead and/or wake it up */
if (RETRY_CASLATCH_CUTOFF == retries)
performCASLatchCheck(&base->latch, TRUE);
}
}
--fast_lock_count;
assert(0 <= fast_lock_count);
if (0 == cr->dirty)
return TRUE;
}
if (ENOSPC == save_errno)
rts_error(VARLSTCNT(7) ERR_WAITDSKSPACE, 4, process_id, to_wait, DB_LEN_STR(reg), save_errno);
else
assert(FALSE);
INVOKE_C_STACK_APPROPRIATE(cr, csa, 2);
WCS_GET_SPACE_RETURN_FAIL(wcs_conflict_trace, cr);
}