fis-gtm/sr_port/db_csh_getn.c

/****************************************************************
 *								*
 *	Copyright 2001, 2011 Fidelity Information Services, Inc	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include <signal.h>	/* needed for VSIG_ATOMIC_T */

#include "gdsroot.h"
#include "gdsblk.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "gdskill.h"
#include "gdscc.h"
#include "filestruct.h"
#include "interlock.h"
#include "jnl.h"
#include "buddy_list.h"		/* needed for tp.h */
#include "hashtab.h"		/* needed for cws_insert.h */
#include "hashtab_int4.h"	/* needed for tp.h and cws_insert.h */
#include "tp.h"
#include "gdsbgtr.h"
#include "min_max.h"
#include "sleep_cnt.h"
#include "send_msg.h"
#include "relqop.h"
#include "is_proc_alive.h"
#include "cache.h"
#include "longset.h"		/* needed for cws_insert.h */
#include "cws_insert.h"
#include "wcs_sleep.h"
#include "wcs_get_space.h"
#include "wcs_timer_start.h"
#include "add_inter.h"
#include "wbox_test_init.h"
#include "have_crit.h"
#include "memcoherency.h"
#include "gtm_c_stack_trace.h"

GBLREF sgmnt_addrs	*cs_addrs;
GBLREF gd_region	*gv_cur_region;
GBLREF uint4		process_id;
GBLREF uint4		image_count;
GBLREF unsigned int	t_tries;
GBLREF uint4		dollar_tlevel;
GBLREF sgm_info		*sgm_info_ptr;
GBLREF boolean_t        mu_reorg_process;

#define	TRACE_AND_SLEEP(ocnt)				\
{							\
	if (1 == ocnt)					\
	{						\
		BG_TRACE_PRO(db_csh_getn_rip_wait);	\
		first_r_epid = latest_r_epid;		\
	}						\
	wcs_sleep(ocnt);				\
}

error_def(ERR_BUFRDTIMEOUT);
error_def(ERR_INVALIDRIP);

cache_rec_ptr_t	db_csh_getn(block_id block)
{
	cache_rec_ptr_t		hdr, q0, start_cr, cr;
	bt_rec_ptr_t		bt;
	unsigned int		lcnt, ocnt;
	int			rip, max_ent, pass1, pass2, pass3;
	int4			flsh_trigger;
	uint4			first_r_epid, latest_r_epid;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		*tp_srch_status;
	ht_ent_int4		*tabent;

	csa = cs_addrs;
	csd = csa->hdr;
	assert(csa->now_crit);
	assert(csa == &FILE_INFO(gv_cur_region)->s_addrs);
	max_ent = csd->n_bts;
	cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off);
	hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets);
	start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets;
	pass1 = max_ent;	/* skip referred or dirty or read-into cache records */
	pass2 = 2 * max_ent;	/* skip referred cache records */
	pass3 = 3 * max_ent;	/* skip nothing */
	INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1);
	DEFER_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);
	for (lcnt = 0;  ; lcnt++)
	{
		if (lcnt > pass3)
		{
			BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed);
			assert(FALSE);
			break;
		}
		cr++;
		if (cr == start_cr + max_ent)
			cr = start_cr;
		VMS_ONLY(
			if ((lcnt == pass1) || (lcnt == pass2))
				wcs_wtfini(gv_cur_region);
		)
		if (cr->refer && (lcnt < pass2))
		{	/* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */
			cr->refer = FALSE;
			continue;
		}
		if (cr->in_cw_set || cr->in_tend)
		{	/* some process already has this pinned for reading and/or updating. skip it. */
			cr->refer = TRUE;
			continue;
		}
		if (CDB_STAGNATE <= t_tries || mu_reorg_process)
		{
			/* Prevent stepping on self when crit for entire transaction.
			 * This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block.
			 * The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use
			 * 	is updated to the latest cw_stagnate list of blocks only in "tp_hist".
			 * Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's.
			 * This is to allow big-read TP transactions which may use up more than the available global buffers.
			 * There is one issue here in that a block that has been only read till now may be stepped upon here
			 *	but may later be needed for update. It is handled by updating the block's corresponding
			 *	entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the
			 *	"cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the
			 *	first time within the transaction since otherwise the transaction would restart due to a
			 *	cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still
			 *	remain the "tn" when the block was first read within this transaction to ensure the block
			 *	hasn't been modified since the start of the transaction. Once we intend on changing the
			 *	block i.e. srch_blk_status->cse is non-NULL, we ensure in the code below not to step on it.
			 *	["tp_hist" is the routine that updates the "cr", "cycle" and "tn" of the block].
			 * Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn"
			 *	of the first t_qread of the block within that transaction. The above is the only exception.
			 * Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of
			 *	them even if they don't have a cse. This is to ensure that the current action doesn't
			 *	encounter a restart due to cdb_sc_lostcr in "tp_hist" even in the fourth-retry.
			 */
			tp_srch_status = NULL;
			if (dollar_tlevel && (NULL != (tabent = lookup_hashtab_int4(sgm_info_ptr->blks_in_use, (uint4 *)&cr->blk)))
					&& (tp_srch_status = (srch_blk_status *)tabent->value) && (tp_srch_status->cse))
			{	/* this process is already using the block - skip it */
				cr->refer = TRUE;
				continue;
			}
			if (NULL != lookup_hashtab_int4(&cw_stagnate, (uint4 *)&cr->blk))
			{	/* this process is already using the block for the current gvcst_search - skip it */
				cr->refer = TRUE;
				continue;
			}
			if (NULL != tp_srch_status)
			{	/* About to reuse a buffer that is part of the read-set of the current TP transaction.
				 * Reset clue as otherwise the next global reference of that global will use an outofdate clue.
				 * Even though tp_srch_status is available after the sgm_info_ptr->blks_in_use hashtable check,
				 * we dont want to reset the clue in case the cw_stagnate hashtable check causes the same cr
				 * to be skipped from reuse. Hence the placement of this reset logic AFTER the cw_stagnate check.
				 */
				tp_srch_status->blk_target->clue.end = 0;
			}
		}
		if (cr->dirty)
		{	/* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a
			 * concurrent "wcs_wtstart" has reset dirty to 0 but that update did not reach us yet). In this
			 * case the call to "wcs_get_space" below will do the necessary memory barrier instructions
			 * (through calls to "aswp") which will allow us to see the non-stale value of cr->dirty.
			 *
			 * It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space
			 * done below will return FALSE forcing a cache-rebuild which will fix this situation.
			 *
			 * In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine
			 * is "wcs_wtfini" which is executed in crit which another process cannot be in as we are in crit now.
			 */
			if (gv_cur_region->read_only)
				continue;
			if (lcnt < pass1)
			{
				if (!csa->timer && (csa->nl->wcs_timers < 1))
					wcs_timer_start(gv_cur_region, FALSE);
				continue;
			}
			BG_TRACE_PRO(db_csh_getn_flush_dirty);
			if (FALSE == wcs_get_space(gv_cur_region, 0, cr))
			{	/* failed to flush it out - force a rebuild */
				BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt);
				assert(csd->wc_blocked); /* only reason we currently know why wcs_get_space could fail */
				assert(gtm_white_box_test_case_enabled);
				break;
			}
			assert(0 == cr->dirty);
		}
		UNIX_ONLY(
			/* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR.
			 * In VMS, resetting the write-latch value occurs in "wcs_wtfini" which is in CRIT, we are fine.
			 * In Unix, this resetting is done by "wcs_wtstart" which is out-of-crit. Therefore, we need to
			 * 	wait for this value to be LATCH_CLEAR before reusing this cache-record.
			 * Note that we are examining the write-latch-value without holding the interlock. It is ok to do
			 * 	this because the only two routines that modify the latch value are "bg_update" and
			 * 	"wcs_wtstart". The former cannot be concurrently executing because we are in crit.
			 * 	The latter will not update the latch value unless this cache-record is dirty. But in this
			 * 	case we would have most likely gone through the if (cr->dirty) check above. Most likely
			 * 	because there is one rare possibility where a concurrent "wcs_wtstart" has set cr->dirty
			 * 	to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared.
			 * 	In all other cases, nobody is modifying the latch since when we got crit and therefore
			 * 	it is safe to observe the value of the latch without holding the interlock.
			 */
			if (LATCH_CLEAR != WRITE_LATCH_VAL(cr))
			{	/* possible if a concurrent "wcs_wtstart" has set cr->dirty to 0 but not yet
				 * cleared the latch. this should be very rare though.
				 */
				if (lcnt < pass2)
					continue; /* try to find some other cache-record to reuse until the 3rd pass */
				for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++)
					wcs_sleep(SLEEP_WRTLATCHWAIT);	/* since it is a short lock, sleep the minimum */
				if (MAXWRTLATCHWAIT <= ocnt)
				{
					BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck);
					assert(FALSE);
					continue;
				}
			}
		)
		/* Note that before setting up a buffer for the requested block, we should make sure the cache-record's
		 * 	read_in_progress is set. This is so that noone else in t_qread gets access to this empty buffer.
		 * By setting up a buffer, it is meant assigning cr->blk in addition to inserting the cr in the blkques
		 * 	through "shuffqth" below.
		 * Note that "t_qread" has special code to handle read_in_progress */
		LOCK_BUFF_FOR_READ(cr, rip);
		if (0 != rip)
		{
			if (lcnt < pass2)
			{	/* someone is reading into this cache record. leave it for two passes.
				 * this is because if somebody is reading it, it is most likely to be referred to very soon.
				 * if we replace this, we will definitely be causing a restart for the reader.
				 * instead of that, see if some other cache record fits in for us.
				 */
				RELEASE_BUFF_READ_LOCK(cr);
				continue;
			}
			for (ocnt = 1; 0 != rip && BUF_OWNER_STUCK >= ocnt; ocnt++)
			{
				RELEASE_BUFF_READ_LOCK(cr);
				/* The owner has been unable to complete the read - check for some things before going to sleep.
				 * Since cr->r_epid can be changing concurrently, take a local copy before using it below,
				 * particularly before calling is_proc_alive as we dont want to call it with a 0 r_epid.
				 */
				latest_r_epid = cr->r_epid;
				if (cr->read_in_progress < -1)
				{
					BG_TRACE_PRO(db_csh_getn_out_of_design);  /* outside of design; clear to known state */
					send_msg(VARLSTCNT(4) ERR_INVALIDRIP, 2, DB_LEN_STR(gv_cur_region));
					assert(cr->r_epid == 0);
					cr->r_epid = 0;
					INTERLOCK_INIT(cr);
				} else  if (0 != latest_r_epid)
				{
					if (is_proc_alive(latest_r_epid, cr->image_count))
					{
#						ifdef DEBUG
						if ((BUF_OWNER_STUCK / 2) == ocnt)
							GET_C_STACK_FROM_SCRIPT("BUFRDTIMEOUT", process_id, latest_r_epid, ONCE);
#						endif
						TRACE_AND_SLEEP(ocnt);
					} else
					{
						cr->r_epid = 0;
						INTERLOCK_INIT(cr);	/* Process gone, release that process's lock */
					}
				} else
				{
					TRACE_AND_SLEEP(ocnt);
				}
				LOCK_BUFF_FOR_READ(cr, rip);
			}
			if ((BUF_OWNER_STUCK < ocnt) && (0 != rip))
			{
				BG_TRACE_PRO(db_csh_getn_buf_owner_stuck);
				if (0 != latest_r_epid)
				{
					if (first_r_epid != latest_r_epid)
						GTMASSERT;
					GET_C_STACK_FROM_SCRIPT("BUFRDTIMEOUT", process_id, latest_r_epid,
								DEBUG_ONLY(TWICE) PRO_ONLY(ONCE));
					RELEASE_BUFF_READ_LOCK(cr);
					send_msg(VARLSTCNT(8) ERR_BUFRDTIMEOUT, 6, process_id,
						 cr->blk, cr, first_r_epid, DB_LEN_STR(gv_cur_region));
					continue;
				}
				cr->r_epid = 0;
				INTERLOCK_INIT(cr);
				LOCK_BUFF_FOR_READ(cr, rip);
				assert(0 == rip); 	/* Since holding crit, we expect to get lock */
				if (0 != rip)
					continue;
				/* We successfully obtained the lock so can fall out of this block */
			}
		}
		assert(0 == rip);
		/* no other process "owns" the block */
		if (CDB_STAGNATE <= t_tries || mu_reorg_process)
		{	/* this should probably use cr->in_cw_set with a condition handler to cleanup */
			CWS_INSERT(block);
		}
		assert(LATCH_CLEAR == WRITE_LATCH_VAL(cr));
		/* got a block - set it up */
		assert(0 == cr->epid);
		assert(0 == cr->r_epid);
		cr->r_epid = process_id;	/* establish ownership */
		cr->image_count = image_count;
		cr->blk = block;
		/* We want cr->read_in_progress to be locked BEFORE cr->cycle is incremented. t_qread relies on this order.
		 * Enforce this order with a write memory barrier. Not doing so might cause the incremented cr->cycle to be
		 * seen by another process even though it sees the unlocked state of cr->read_in_progress. This could cause
		 * t_qread to incorrectly return with an uptodate cr->cycle even though the buffer is still being read in
		 * from disk and this could cause db integ errors as validation (in t_end/tp_tend which relies on cr->cycle)
		 * will detect no problems even though there is one. Note this memory barrier is still needed even though
		 * there is a memory barrier connotation in the LOCK_BUFF_FOR_READ() macro above. LOCK_BUFF_FOR_READ() does
		 * a read type memory barrier whereas here, we need a write barrier.
		 */
		SHM_WRITE_MEMORY_BARRIER;
		cr->cycle++;
		cr->jnl_addr = 0;
		cr->refer = TRUE;
		if (cr->bt_index != 0)
		{
			bt = (bt_rec_ptr_t)GDS_REL2ABS(cr->bt_index);
			bt->cache_index = CR_NOTVALID;
			cr->bt_index = 0;
		}
		q0 = (cache_rec_ptr_t)((sm_uc_ptr_t)cr + cr->blkque.fl);
		shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)hdr);
		assert(0 == cr->dirty);
		csa->nl->cur_lru_cache_rec_off = GDS_ABS2REL(cr);
		if (lcnt > pass1)
			csa->nl->cache_hits = 0;
		csa->nl->cache_hits++;
		if (csa->nl->cache_hits > csd->n_bts)
		{
			flsh_trigger = csd->flush_trigger;
			csd->flush_trigger = MIN(flsh_trigger + MAX(flsh_trigger / STEP_FACTOR, 1), MAX_FLUSH_TRIGGER(csd->n_bts));
			csa->nl->cache_hits = 0;
		}
		INCR_DB_CSH_COUNTER(csa, n_db_csh_getn_lcnt, lcnt);
		ENABLE_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);
		return cr;
	}
	/* force a recover */
	INCR_DB_CSH_COUNTER(csa, n_db_csh_getn_lcnt, lcnt);
	csa->nl->cur_lru_cache_rec_off = GDS_ABS2REL(cr);
	ENABLE_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);
	return (cache_rec_ptr_t)CR_NOTVALID;
}
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00			`/****************************************************************`
			`* *`
			`* Copyright 2001, 2011 Fidelity Information Services, Inc *`
			`* *`
			`* This source code contains the intellectual property *`
			`* of its copyright holder(s), and is made available *`
			`* under a license. If you do not know the terms of *`
			`* the license, please stop and do not read further. *`
			`* *`
			`****************************************************************/`

			`#include "mdef.h"`

			`#include <signal.h> /* needed for VSIG_ATOMIC_T */`

			`#include "gdsroot.h"`
			`#include "gdsblk.h"`
			`#include "gtm_facility.h"`
			`#include "fileinfo.h"`
			`#include "gdsbt.h"`
			`#include "gdsfhead.h"`
			`#include "gdskill.h"`
			`#include "gdscc.h"`
			`#include "filestruct.h"`
			`#include "interlock.h"`
			`#include "jnl.h"`
			`#include "buddy_list.h" /* needed for tp.h */`
			`#include "hashtab.h" /* needed for cws_insert.h */`
			`#include "hashtab_int4.h" /* needed for tp.h and cws_insert.h */`
			`#include "tp.h"`
			`#include "gdsbgtr.h"`
			`#include "min_max.h"`
			`#include "sleep_cnt.h"`
			`#include "send_msg.h"`
			`#include "relqop.h"`
			`#include "is_proc_alive.h"`
			`#include "cache.h"`
			`#include "longset.h" /* needed for cws_insert.h */`
			`#include "cws_insert.h"`
			`#include "wcs_sleep.h"`
			`#include "wcs_get_space.h"`
			`#include "wcs_timer_start.h"`
			`#include "add_inter.h"`
			`#include "wbox_test_init.h"`
			`#include "have_crit.h"`
			`#include "memcoherency.h"`
			`#include "gtm_c_stack_trace.h"`

			`GBLREF sgmnt_addrs *cs_addrs;`
			`GBLREF gd_region *gv_cur_region;`
			`GBLREF uint4 process_id;`
			`GBLREF uint4 image_count;`
			`GBLREF unsigned int t_tries;`
			`GBLREF uint4 dollar_tlevel;`
			`GBLREF sgm_info *sgm_info_ptr;`
			`GBLREF boolean_t mu_reorg_process;`

			`#define TRACE_AND_SLEEP(ocnt) \`
			`{ \`
			`if (1 == ocnt) \`
			`{ \`
			`BG_TRACE_PRO(db_csh_getn_rip_wait); \`
			`first_r_epid = latest_r_epid; \`
			`} \`
			`wcs_sleep(ocnt); \`
			`}`

			`error_def(ERR_BUFRDTIMEOUT);`
			`error_def(ERR_INVALIDRIP);`

			`cache_rec_ptr_t db_csh_getn(block_id block)`
			`{`
			`cache_rec_ptr_t hdr, q0, start_cr, cr;`
			`bt_rec_ptr_t bt;`
			`unsigned int lcnt, ocnt;`
			`int rip, max_ent, pass1, pass2, pass3;`
			`int4 flsh_trigger;`
			`uint4 first_r_epid, latest_r_epid;`
			`sgmnt_addrs *csa;`
			`sgmnt_data_ptr_t csd;`
			`srch_blk_status *tp_srch_status;`
			`ht_ent_int4 *tabent;`

			`csa = cs_addrs;`
			`csd = csa->hdr;`
			`assert(csa->now_crit);`
			`assert(csa == &FILE_INFO(gv_cur_region)->s_addrs);`
			`max_ent = csd->n_bts;`
			`cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off);`
			`hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets);`
			`start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets;`
			`pass1 = max_ent; /* skip referred or dirty or read-into cache records */`
			`pass2 = 2 * max_ent; /* skip referred cache records */`
			`pass3 = 3 * max_ent; /* skip nothing */`
			`INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1);`
			`DEFER_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);`
			`for (lcnt = 0; ; lcnt++)`
			`{`
			`if (lcnt > pass3)`
			`{`
			`BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed);`
			`assert(FALSE);`
			`break;`
			`}`
			`cr++;`
			`if (cr == start_cr + max_ent)`
			`cr = start_cr;`
			`VMS_ONLY(`
			`if ((lcnt == pass1) \|\| (lcnt == pass2))`
			`wcs_wtfini(gv_cur_region);`
			`)`
			`if (cr->refer && (lcnt < pass2))`
			`{ /* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */`
			`cr->refer = FALSE;`
			`continue;`
			`}`
			`if (cr->in_cw_set \|\| cr->in_tend)`
			`{ /* some process already has this pinned for reading and/or updating. skip it. */`
			`cr->refer = TRUE;`
			`continue;`
			`}`
			`if (CDB_STAGNATE <= t_tries \|\| mu_reorg_process)`
			`{`
			`/* Prevent stepping on self when crit for entire transaction.`
			`* This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block.`
			`* The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use`
			`* is updated to the latest cw_stagnate list of blocks only in "tp_hist".`
			`* Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's.`
			`* This is to allow big-read TP transactions which may use up more than the available global buffers.`
			`* There is one issue here in that a block that has been only read till now may be stepped upon here`
			`* but may later be needed for update. It is handled by updating the block's corresponding`
			`* entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the`
			`* "cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the`
			`* first time within the transaction since otherwise the transaction would restart due to a`
			`* cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still`
			`* remain the "tn" when the block was first read within this transaction to ensure the block`
			`* hasn't been modified since the start of the transaction. Once we intend on changing the`
			`* block i.e. srch_blk_status->cse is non-NULL, we ensure in the code below not to step on it.`
			`* ["tp_hist" is the routine that updates the "cr", "cycle" and "tn" of the block].`
			`* Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn"`
			`* of the first t_qread of the block within that transaction. The above is the only exception.`
			`* Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of`
			`* them even if they don't have a cse. This is to ensure that the current action doesn't`
			`* encounter a restart due to cdb_sc_lostcr in "tp_hist" even in the fourth-retry.`
			`*/`
			`tp_srch_status = NULL;`
			`if (dollar_tlevel && (NULL != (tabent = lookup_hashtab_int4(sgm_info_ptr->blks_in_use, (uint4 *)&cr->blk)))`
			`&& (tp_srch_status = (srch_blk_status *)tabent->value) && (tp_srch_status->cse))`
			`{ /* this process is already using the block - skip it */`
			`cr->refer = TRUE;`
			`continue;`
			`}`
			`if (NULL != lookup_hashtab_int4(&cw_stagnate, (uint4 *)&cr->blk))`
			`{ /* this process is already using the block for the current gvcst_search - skip it */`
			`cr->refer = TRUE;`
			`continue;`
			`}`
			`if (NULL != tp_srch_status)`
			`{ /* About to reuse a buffer that is part of the read-set of the current TP transaction.`
			`* Reset clue as otherwise the next global reference of that global will use an outofdate clue.`
			`* Even though tp_srch_status is available after the sgm_info_ptr->blks_in_use hashtable check,`
			`* we dont want to reset the clue in case the cw_stagnate hashtable check causes the same cr`
			`* to be skipped from reuse. Hence the placement of this reset logic AFTER the cw_stagnate check.`
			`*/`
			`tp_srch_status->blk_target->clue.end = 0;`
			`}`
			`}`
			`if (cr->dirty)`
			`{ /* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a`
			`* concurrent "wcs_wtstart" has reset dirty to 0 but that update did not reach us yet). In this`
			`* case the call to "wcs_get_space" below will do the necessary memory barrier instructions`
			`* (through calls to "aswp") which will allow us to see the non-stale value of cr->dirty.`
			`*`
			`* It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space`
			`* done below will return FALSE forcing a cache-rebuild which will fix this situation.`
			`*`
			`* In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine`
			`* is "wcs_wtfini" which is executed in crit which another process cannot be in as we are in crit now.`
			`*/`
			`if (gv_cur_region->read_only)`
			`continue;`
			`if (lcnt < pass1)`
			`{`
			`if (!csa->timer && (csa->nl->wcs_timers < 1))`
			`wcs_timer_start(gv_cur_region, FALSE);`
			`continue;`
			`}`
			`BG_TRACE_PRO(db_csh_getn_flush_dirty);`
			`if (FALSE == wcs_get_space(gv_cur_region, 0, cr))`
			`{ /* failed to flush it out - force a rebuild */`
			`BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt);`
			`assert(csd->wc_blocked); /* only reason we currently know why wcs_get_space could fail */`
			`assert(gtm_white_box_test_case_enabled);`
			`break;`
			`}`
			`assert(0 == cr->dirty);`
			`}`
			`UNIX_ONLY(`
			`/* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR.`
			`* In VMS, resetting the write-latch value occurs in "wcs_wtfini" which is in CRIT, we are fine.`
			`* In Unix, this resetting is done by "wcs_wtstart" which is out-of-crit. Therefore, we need to`
			`* wait for this value to be LATCH_CLEAR before reusing this cache-record.`
			`* Note that we are examining the write-latch-value without holding the interlock. It is ok to do`
			`* this because the only two routines that modify the latch value are "bg_update" and`
			`* "wcs_wtstart". The former cannot be concurrently executing because we are in crit.`
			`* The latter will not update the latch value unless this cache-record is dirty. But in this`
			`* case we would have most likely gone through the if (cr->dirty) check above. Most likely`
			`* because there is one rare possibility where a concurrent "wcs_wtstart" has set cr->dirty`
			`* to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared.`
			`* In all other cases, nobody is modifying the latch since when we got crit and therefore`
			`* it is safe to observe the value of the latch without holding the interlock.`
			`*/`
			`if (LATCH_CLEAR != WRITE_LATCH_VAL(cr))`
			`{ /* possible if a concurrent "wcs_wtstart" has set cr->dirty to 0 but not yet`
			`* cleared the latch. this should be very rare though.`
			`*/`
			`if (lcnt < pass2)`
			`continue; /* try to find some other cache-record to reuse until the 3rd pass */`
			`for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++)`
			`wcs_sleep(SLEEP_WRTLATCHWAIT); /* since it is a short lock, sleep the minimum */`
			`if (MAXWRTLATCHWAIT <= ocnt)`
			`{`
			`BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck);`
			`assert(FALSE);`
			`continue;`
			`}`
			`}`
			`)`
			`/* Note that before setting up a buffer for the requested block, we should make sure the cache-record's`
			`* read_in_progress is set. This is so that noone else in t_qread gets access to this empty buffer.`
			`* By setting up a buffer, it is meant assigning cr->blk in addition to inserting the cr in the blkques`
			`* through "shuffqth" below.`
			`* Note that "t_qread" has special code to handle read_in_progress */`
			`LOCK_BUFF_FOR_READ(cr, rip);`
			`if (0 != rip)`
			`{`
			`if (lcnt < pass2)`
			`{ /* someone is reading into this cache record. leave it for two passes.`
			`* this is because if somebody is reading it, it is most likely to be referred to very soon.`
			`* if we replace this, we will definitely be causing a restart for the reader.`
			`* instead of that, see if some other cache record fits in for us.`
			`*/`
			`RELEASE_BUFF_READ_LOCK(cr);`
			`continue;`
			`}`
			`for (ocnt = 1; 0 != rip && BUF_OWNER_STUCK >= ocnt; ocnt++)`
			`{`
			`RELEASE_BUFF_READ_LOCK(cr);`
			`/* The owner has been unable to complete the read - check for some things before going to sleep.`
			`* Since cr->r_epid can be changing concurrently, take a local copy before using it below,`
			`* particularly before calling is_proc_alive as we dont want to call it with a 0 r_epid.`
			`*/`
			`latest_r_epid = cr->r_epid;`
			`if (cr->read_in_progress < -1)`
			`{`
			`BG_TRACE_PRO(db_csh_getn_out_of_design); /* outside of design; clear to known state */`
			`send_msg(VARLSTCNT(4) ERR_INVALIDRIP, 2, DB_LEN_STR(gv_cur_region));`
			`assert(cr->r_epid == 0);`
			`cr->r_epid = 0;`
			`INTERLOCK_INIT(cr);`
			`} else if (0 != latest_r_epid)`
			`{`
			`if (is_proc_alive(latest_r_epid, cr->image_count))`
			`{`
			`# ifdef DEBUG`
			`if ((BUF_OWNER_STUCK / 2) == ocnt)`
			`GET_C_STACK_FROM_SCRIPT("BUFRDTIMEOUT", process_id, latest_r_epid, ONCE);`
			`# endif`
			`TRACE_AND_SLEEP(ocnt);`
			`} else`
			`{`
			`cr->r_epid = 0;`
			`INTERLOCK_INIT(cr); /* Process gone, release that process's lock */`
			`}`
			`} else`
			`{`
			`TRACE_AND_SLEEP(ocnt);`
			`}`
			`LOCK_BUFF_FOR_READ(cr, rip);`
			`}`
			`if ((BUF_OWNER_STUCK < ocnt) && (0 != rip))`
			`{`
			`BG_TRACE_PRO(db_csh_getn_buf_owner_stuck);`
			`if (0 != latest_r_epid)`
			`{`
			`if (first_r_epid != latest_r_epid)`
			`GTMASSERT;`
			`GET_C_STACK_FROM_SCRIPT("BUFRDTIMEOUT", process_id, latest_r_epid,`
			`DEBUG_ONLY(TWICE) PRO_ONLY(ONCE));`
			`RELEASE_BUFF_READ_LOCK(cr);`
			`send_msg(VARLSTCNT(8) ERR_BUFRDTIMEOUT, 6, process_id,`
			`cr->blk, cr, first_r_epid, DB_LEN_STR(gv_cur_region));`
			`continue;`
			`}`
			`cr->r_epid = 0;`
			`INTERLOCK_INIT(cr);`
			`LOCK_BUFF_FOR_READ(cr, rip);`
			`assert(0 == rip); /* Since holding crit, we expect to get lock */`
			`if (0 != rip)`
			`continue;`
			`/* We successfully obtained the lock so can fall out of this block */`
			`}`
			`}`
			`assert(0 == rip);`
			`/* no other process "owns" the block */`
			`if (CDB_STAGNATE <= t_tries \|\| mu_reorg_process)`
			`{ /* this should probably use cr->in_cw_set with a condition handler to cleanup */`
			`CWS_INSERT(block);`
			`}`
			`assert(LATCH_CLEAR == WRITE_LATCH_VAL(cr));`
			`/* got a block - set it up */`
			`assert(0 == cr->epid);`
			`assert(0 == cr->r_epid);`
			`cr->r_epid = process_id; /* establish ownership */`
			`cr->image_count = image_count;`
			`cr->blk = block;`
			`/* We want cr->read_in_progress to be locked BEFORE cr->cycle is incremented. t_qread relies on this order.`
			`* Enforce this order with a write memory barrier. Not doing so might cause the incremented cr->cycle to be`
			`* seen by another process even though it sees the unlocked state of cr->read_in_progress. This could cause`
			`* t_qread to incorrectly return with an uptodate cr->cycle even though the buffer is still being read in`
			`* from disk and this could cause db integ errors as validation (in t_end/tp_tend which relies on cr->cycle)`
			`* will detect no problems even though there is one. Note this memory barrier is still needed even though`
			`* there is a memory barrier connotation in the LOCK_BUFF_FOR_READ() macro above. LOCK_BUFF_FOR_READ() does`
			`* a read type memory barrier whereas here, we need a write barrier.`
			`*/`
			`SHM_WRITE_MEMORY_BARRIER;`
			`cr->cycle++;`
			`cr->jnl_addr = 0;`
			`cr->refer = TRUE;`
			`if (cr->bt_index != 0)`
			`{`
			`bt = (bt_rec_ptr_t)GDS_REL2ABS(cr->bt_index);`
			`bt->cache_index = CR_NOTVALID;`
			`cr->bt_index = 0;`
			`}`
			`q0 = (cache_rec_ptr_t)((sm_uc_ptr_t)cr + cr->blkque.fl);`
			`shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)hdr);`
			`assert(0 == cr->dirty);`
			`csa->nl->cur_lru_cache_rec_off = GDS_ABS2REL(cr);`
			`if (lcnt > pass1)`
			`csa->nl->cache_hits = 0;`
			`csa->nl->cache_hits++;`
			`if (csa->nl->cache_hits > csd->n_bts)`
			`{`
			`flsh_trigger = csd->flush_trigger;`
			`csd->flush_trigger = MIN(flsh_trigger + MAX(flsh_trigger / STEP_FACTOR, 1), MAX_FLUSH_TRIGGER(csd->n_bts));`
			`csa->nl->cache_hits = 0;`
			`}`
			`INCR_DB_CSH_COUNTER(csa, n_db_csh_getn_lcnt, lcnt);`
			`ENABLE_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);`
			`return cr;`
			`}`
			`/* force a recover */`
			`INCR_DB_CSH_COUNTER(csa, n_db_csh_getn_lcnt, lcnt);`
			`csa->nl->cur_lru_cache_rec_off = GDS_ABS2REL(cr);`
			`ENABLE_INTERRUPTS(INTRPT_IN_DB_CSH_GETN);`
			`return (cache_rec_ptr_t)CR_NOTVALID;`
			`}`