fis-gtm/sr_port/gvcst_put.c

/****************************************************************
 *								*
 *	Copyright 2001, 2012 Fidelity Information Services, Inc	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include <stddef.h>
#include "gtm_stdio.h"
#include "gtm_stdlib.h"
#include "gtm_string.h"
#include "gtm_inet.h"	/* Required for gtmsource.h */

#ifdef VMS
#include <descrip.h> /* Required for gtmsource.h */
#endif

#include "gdsroot.h"
#include "gdskill.h"
#include "gdsblk.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "cdb_sc.h"
#include "min_max.h"		/* needed for gdsblkops.h */
#include "gdsblkops.h"
#include "jnl.h"
#include "gdscc.h"
#include "copy.h"
#include "buddy_list.h"		/* needed for tp.h */
#include "hashtab_int4.h"	/* needed for tp.h */
#include "tp.h"
#include "rc_oflow.h"
#include "repl_msg.h"
#include "gtmsource.h"
#include "rtnhdr.h"
#include "stack_frame.h"
#ifdef GTM_TRIGGER
# include "gv_trigger.h"
# include "gtm_trigger.h"
# include "gv_trigger_protos.h"
# include "subscript.h"
# include "mv_stent.h"
# include "stringpool.h"
#endif
#include "tp_frame.h"
#include "tp_restart.h"

/* Include prototypes */
#include "t_write.h"
#include "t_write_root.h"
#include "t_end.h"
#include "t_retry.h"
#include "t_begin.h"
#include "t_create.h"
#include "gvcst_blk_build.h"
#include "gvcst_expand_key.h"
#include "gvcst_protos.h"	/* for gvcst_search,gvcst_search_blk,gvcst_put prototype */
#include "op.h"			/* for op_add & op_tstart prototype */
#include "format_targ_key.h"	/* for format_targ_key prototype */
#include "gvsub2str.h"		/* for gvsub2str prototype */
#include "tp_set_sgm.h"		/* for tp_set_sgm prototype */
#include "op_tcommit.h"		/* for op_tcommit prototype */
#include "have_crit.h"

#ifdef GTM_TRIGGER
LITREF	mval	literal_null;
LITREF	mval	literal_one;
LITREF	mval	literal_zero;
#endif

/* Globals that will not change in value across nested trigger calls of gvcst_put OR even if they might change in value,
 * the change is such that they dont need save/restore logic surrounding the "gtm_trigger" call. Any new GBLREFs that are
 * added in this module need to be examined for interference between gvcst_put and nested trigger call and any save/restore
 * logic (if needed) should be appropriately added surrounding the "gtm_trigger" invocation.
 */
GBLREF	boolean_t		gvdupsetnoop; /* if TRUE, duplicate SETs update journal but not database (except for curr_tn++) */
GBLREF	boolean_t		horiz_growth;
GBLREF	boolean_t		in_gvcst_incr;
GBLREF	char			*update_array, *update_array_ptr;
GBLREF	gv_key			*gv_altkey;
GBLREF	gv_namehead		*reset_gv_target;
GBLREF	inctn_opcode_t		inctn_opcode;
GBLREF	int			gv_fillfactor;
GBLREF	int			rc_set_fragment;	/* Contains offset within data at which data fragment starts */
GBLREF	int4			gv_keysize;
GBLREF	int4			prev_first_off, prev_next_off;
GBLREF	uint4			update_trans;
GBLREF	jnl_format_buffer	*non_tp_jfb_ptr;
GBLREF	jnl_gbls_t		jgbl;
GBLREF	jnlpool_addrs		jnlpool;
GBLREF	uint4			dollar_tlevel;
GBLREF	uint4			process_id;
GBLREF	uint4			update_array_size, cumul_update_array_size;	/* the current total size of the update array */
GBLREF	unsigned char		t_fail_hist[CDB_MAX_TRIES];
GBLREF	unsigned int		t_tries;
GBLREF	cw_set_element		cw_set[CDB_CW_SET_SIZE];/* create write set. */
GBLREF	boolean_t		skip_dbtriggers;	/* see gbldefs.c for description of this global */
GBLREF	stack_frame		*frame_pointer;
#ifdef GTM_TRIGGER
GBLREF	int			tprestart_state;
GBLREF	int4			gtm_trigger_depth;
GBLREF	int4			tstart_trigger_depth;
GBLREF	boolean_t		skip_INVOKE_RESTART;
GBLREF	boolean_t		ztwormhole_used;	/* TRUE if $ztwormhole was used by trigger code */
#endif
#ifdef DEBUG
GBLREF	boolean_t		skip_block_chain_tail_check;
#endif

/* Globals that could change in value across nested trigger calls of gvcst_put AND need to be saved/restored */
GBLREF	boolean_t		is_dollar_incr;
GBLREF	gd_region		*gv_cur_region;
GBLREF	gv_key			*gv_currkey;
GBLREF	gv_namehead		*gv_target;
GBLREF	mval			*post_incr_mval;
GBLREF	mval			increment_delta_mval;
GBLREF	sgm_info		*sgm_info_ptr;
GBLREF	sgmnt_addrs		*cs_addrs;
GBLREF	sgmnt_data_ptr_t	cs_data;

GTMTRIG_ONLY(error_def(ERR_DBROLLEDBACK);)
error_def(ERR_GVINCRISOLATION);
error_def(ERR_GVIS);
error_def(ERR_GVPUTFAIL);
error_def(ERR_REC2BIG);
error_def(ERR_RSVDBYTE2HIGH);
error_def(ERR_TPRETRY);

/* Before issuing an error, add GVT to the list of known gvts in this TP transaction in case it is not already done.
 * This GVT addition is usually done by "tp_hist" but that function has most likely not yet been invoked in gvcst_put.
 * Doing this addition will ensure we remember to reset any non-zero clue in dir_tree as part of tp_clean_up when a TROLLBACK
 * or TRESTART (implicit or explicit) occurs. Not doing so could cause transfer of control from the current gvcst_put action
 * to a user-defined error trap which if it does further database references, it could end up using invalid clues from GVT
 * and potentially incorrectly commit the transaction causing db integ errors as well.
 */
#define	ENSURE_VALUE_WITHIN_MAX_REC_SIZE(value, GVT)								\
{														\
	if (dollar_tlevel)											\
		ADD_TO_GVT_TP_LIST(GVT);	/* note: macro also updates read_local_tn if necessary */	\
	if (gv_currkey->end + 1 + value.len + SIZEOF(rec_hdr) > gv_cur_region->max_rec_size)			\
	{                                                                                                       \
		if (0 == (end = format_targ_key(buff, MAX_ZWR_KEY_SZ, gv_currkey, TRUE)))			\
			end = &buff[MAX_ZWR_KEY_SZ - 1];                                                        \
		rts_error(VARLSTCNT(10) ERR_REC2BIG, 4, gv_currkey->end + 1  + value.len + SIZEOF(rec_hdr),	\
			  (int4)gv_cur_region->max_rec_size,                                                    \
			  REG_LEN_STR(gv_cur_region), ERR_GVIS, 2, end - buff, buff);				\
	}                                                                                                       \
}

/* See comment before ENSURE_VALUE_WITHIN_MAX_REC_SIZE macro definition for why the ADD_TO_GVT_TP_LIST call below is necessary */
#define	ISSUE_RSVDBYTE2HIGH_ERROR(GVT)										\
{														\
	if (dollar_tlevel)											\
		ADD_TO_GVT_TP_LIST(GVT);	/* note: macro also updates read_local_tn if necessary */	\
	/* The record that is newly inserted/updated does not fit by itself in a separate block			\
	 * if the current reserved-bytes for this database is taken into account. Cannot go on.			\
	 */													\
	if (0 == (end = format_targ_key(buff, MAX_ZWR_KEY_SZ, gv_currkey, TRUE)))				\
		end = &buff[MAX_ZWR_KEY_SZ - 1];								\
	rts_error(VARLSTCNT(11) ERR_RSVDBYTE2HIGH, 5, new_blk_size_single,					\
		REG_LEN_STR(gv_cur_region), blk_size, blk_reserved_bytes,					\
		ERR_GVIS, 2, end - buff, buff);									\
}

#define	RESTORE_ZERO_GVT_ROOT_ON_RETRY(LCL_ROOT, GV_TARGET, TP_ROOT, DIR_HIST, DIR_TREE)			\
{														\
	if (!LCL_ROOT)												\
	{													\
		assert(NULL != DIR_HIST);									\
		assert(DIR_TREE == GV_TARGET->gd_csa->dir_tree);						\
		/* t_retry only resets gv_target->clue and not the clue of the directory tree.			\
		 * But DIR_HIST non-null implies the directory tree was used in a gvcst_search and hence	\
		 * was validated (in t_end/tp_hist),so we need to reset its clue before the next try.		\
		 */												\
		DIR_TREE->clue.end = 0;										\
		/* Check if LCL_ROOT & GV_TARGET->root are in sync. If not make them so. */			\
		if (GV_TARGET->root)										\
		{	/* We had reset the root block from zero to a non-zero value within			\
			 * this function, but since we are restarting, we can no longer be			\
			 * sure of the validity of the root block. Reset it to 0 so it will			\
			 * be re-determined in the next global reference.					\
			 */											\
			assert((TP_ROOT == GV_TARGET->root)							\
				|| ((0 == TP_ROOT) GTMTRIG_ONLY(&& (0 < gvtr_parms.num_triggers_invoked))));	\
			GV_TARGET->root = 0;									\
		}												\
	}													\
}

#ifdef DEBUG
# define DBG_SAVE_VAL_AT_FUN_ENTRY							\
{	/* Save copy of "val" at function entry.					\
	 * Make sure this is not touched by any nested trigger code */			\
	dbg_lcl_val = val;								\
	dbg_vallen = val->str.len;							\
	memcpy(dbg_valbuff, val->str.addr, MIN(ARRAYSIZE(dbg_valbuff), dbg_vallen));	\
}

# define DBG_CHECK_VAL_AT_FUN_EXIT										\
{	/* Check "val" is same as what it was at function entry.(i.e. was not touched by nested trigger code).	\
	 * The only exception is if $ZTVAL changed "val" in which case gvcst_put would have been redone. */	\
	assert(dbg_vallen == dbg_lcl_val->str.len);								\
	assert(0 == memcmp(dbg_valbuff, dbg_lcl_val->str.addr, MIN(ARRAYSIZE(dbg_valbuff), dbg_vallen)));	\
}
#else
# define DBG_SAVE_VAL_AT_FUN_ENTRY
# define DBG_CHECK_VAL_AT_FUN_EXIT
#endif

#define	GOTO_RETRY									\
{											\
	GTMTRIG_DBG_ONLY(dbg_trace_array[dbg_num_iters].retry_line = __LINE__);		\
	goto retry;									\
}

void	gvcst_put(mval *val)
{
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	node_local_ptr_t	cnl;
	int4			blk_size, blk_fill_size, blk_reserved_bytes;
	const int4		zeroes = 0;
	boolean_t		jnl_format_done;
	blk_segment		*bs1, *bs_ptr, *new_blk_bs;
	block_id		allocation_clue, tp_root, gvt_for_root, blk_num, last_split_blk_num[MAX_BT_DEPTH];
	block_index		left_hand_index, ins_chain_index, root_blk_cw_index, next_blk_index;
	block_offset		next_offset, first_offset, ins_off1, ins_off2, old_curr_chain_next_off;
	cw_set_element		*cse, *cse_new, *old_cse;
	gv_namehead		*save_targ, *split_targ, *dir_tree;
	enum cdb_sc		status;
	gv_key			*temp_key;
	mstr			value;
	off_chain		chain1, curr_chain, prev_chain, chain2;
	rec_hdr_ptr_t		curr_rec_hdr, extra_rec_hdr, next_rec_hdr, new_star_hdr, rp;
	srch_blk_status		*bh, *bq, *tp_srch_status;
	srch_hist		*dir_hist;
	int			cur_blk_size, blk_seg_cnt, delta, i, j, left_hand_offset, n, ins_chain_offset,
				new_blk_size_l, new_blk_size_r, new_blk_size_single, new_blk_size, blk_reserved_size,
				last_possible_left_offset, new_rec_size, next_rec_shrink, next_rec_shrink1,
				offset_sum, rec_cmpc, target_key_size, tp_lev, undo_index, cur_val_offset, curr_offset, bh_level;
	uint4			segment_update_array_size, key_top, cp2_len, bs1_2_len, bs1_3_len;
	char			*va, last_split_direction[MAX_BT_DEPTH];
	sm_uc_ptr_t		cp1, cp2, curr;
	unsigned short		extra_record_orig_size, rec_size, temp_short;
	unsigned int		prev_rec_offset, prev_rec_match, curr_rec_offset, curr_rec_match;
	boolean_t		copy_extra_record, level_0, new_rec, no_pointers, succeeded, key_exists;
	boolean_t		make_it_null, gbl_target_was_set, duplicate_set, new_rec_goes_to_right, need_extra_block_split;
	key_cum_value		*tempkv;
	jnl_format_buffer	*jfb, *ztworm_jfb;
	jnl_action		*ja;
	mval			*set_val;	/* actual right-hand-side value of the SET or $INCR command */
	ht_ent_int4		*tabent;
	unsigned char		buff[MAX_ZWR_KEY_SZ], *end, old_ch, new_ch;
	sm_uc_ptr_t		buffaddr;
	block_id		lcl_root, last_split_bnum;
	sgm_info		*si;
	uint4			nodeflags;
	boolean_t		write_logical_jnlrecs, can_write_logical_jnlrecs, blk_match, is_split_dir_left;
	int			split_depth;
	mval			*ja_val;
	int			rc;
	int4			cse_first_off;
	enum split_dir		last_split_dir;
#	ifdef GTM_TRIGGER
	boolean_t		is_tpwrap;
	boolean_t		ztval_gvcst_put_redo, skip_hasht_read;
	gtm_trigger_parms	trigparms;
	gvt_trigger_t		*gvt_trigger;
	gvtr_invoke_parms_t	gvtr_parms;
	int			gtm_trig_status;
	int4			data_len;
	unsigned char		*save_msp;
	mv_stent		*save_mv_chain;
	mval			*ztold_mval = NULL;
	mval			*ztval_mval;
	boolean_t		lcl_implicit_tstart;		/* local copy of the global variable "implicit_tstart" */
	mval			lcl_increment_delta_mval;	/* local copy of "increment_delta_mval" */
	boolean_t		lcl_is_dollar_incr;		/* local copy of is_dollar_incr taken at start of module.
								 * used to restore is_dollar_incr in case of TP restarts */
	mval			*lcl_post_incr_mval;		/* local copy of "post_incr_mval" at function entry.
								 * used to restore "post_incr_mval" in case of TP restarts */
	mval			*lcl_val;			/* local copy of "val" at function entry.
								 * used to restore "val" in case of TP restarts */
	DEBUG_ONLY(enum cdb_sc	save_cdb_status;)
#	endif
#	ifdef DEBUG
	char			dbg_valbuff[256];
	mstr_len_t		dbg_vallen;
	mval			*dbg_lcl_val;
	int			dbg_num_iters = -1;	/* number of iterations through gvcst_put */
	int			lcl_dollar_tlevel, lcl_t_tries;
	typedef struct
	{
		unsigned int	t_tries;
		int		retry_line;
		boolean_t	is_fresh_tn_start;
		boolean_t	is_dollar_incr;
		boolean_t	ztval_gvcst_put_redo;
		boolean_t	is_extra_block_split;
		mval		*val;
		boolean_t	lcl_implicit_tstart;
	} dbg_trace;
	/* We want to capture all pertinent information across each iteration of gvcst_put.
	 * There are 3 things that can contribute to a new iteration.
	 * 	a) restarts from the primary set.
	 * 		Max of 4 iterations.
	 * 	b) extra_block_split from the primary set. It can have its own set of restarts too.
	 * 		Max of 4 iterations per extra_block_split.
	 * 		The # of extra block splits could be arbitrary in case of non-TP but cannot be more than 1 for TP
	 * 		because in TP, we would have grabbed crit in the final retry and prevent any more concurrent updates.
	 * 	c) ztval_gvcst_put_redo. This in turn can have its own set of restarts and extra_block_split iterations.
	 * 		Could take a max of (a) + (b) = 4 + 4 = 8 iterations.
	 * 	Total of 16 max iterations. If ever a transaction goes for more than this # of iterations (theoretically
	 * 		possible in non-TP if a lot of extra block splits occur), we assert fail.
	 */
	dbg_trace		dbg_trace_array[16];
	boolean_t		is_fresh_tn_start;
	boolean_t		is_mm;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	is_dollar_incr = in_gvcst_incr;
	in_gvcst_incr = FALSE;
	csa = cs_addrs;
	csd = csa->hdr;
	cnl = csa->nl;
	assert(csd == cs_data);
	DEBUG_ONLY(is_mm = (dba_mm == csd->acc_meth);)
#	ifdef GTM_TRIGGER
	TRIG_CHECK_REPLSTATE_MATCHES_EXPLICIT_UPDATE(gv_cur_region, csa);
	if (IS_EXPLICIT_UPDATE)
	{	/* This is an explicit update. Set ztwormhole_used to FALSE. Note that we initialize this only at the
		 * beginning of the transaction and not at the beginning of each try/retry. If the application used
		 * $ztwormhole in any retsarting try of the transaction, we consider it necessary to write the
		 * TZTWORM/UZTWORM record even though it was not used in the succeeding/committing try.
		 */
		ztwormhole_used = FALSE;
	}
#	endif
	JNLPOOL_INIT_IF_NEEDED(csa, csd, cnl);
	blk_size = csd->blk_size;
	blk_reserved_bytes = csd->reserved_bytes;
	blk_fill_size = (blk_size * gv_fillfactor) / 100 - blk_reserved_bytes;
	jnl_format_done = FALSE; /* do "jnl_format" only once per logical non-tp transaction irrespective of number of retries */
	GTMTRIG_ONLY(
		ztval_gvcst_put_redo = FALSE;
		skip_hasht_read = FALSE;
	)
	assert(('\0' != gv_currkey->base[0]) && gv_currkey->end);
	DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC;
	/* this needs to be initialized before any code that does a "goto retry" since this gets used there */
	save_targ = gv_target;
	gbl_target_was_set = (INVALID_GV_TARGET != reset_gv_target);
	if (INVALID_GV_TARGET != reset_gv_target)
		gbl_target_was_set = TRUE;
	else
	{
		gbl_target_was_set = FALSE;
		reset_gv_target = save_targ;
	}
	DBG_SAVE_VAL_AT_FUN_ENTRY;
	GTMTRIG_ONLY(
		lcl_implicit_tstart = FALSE;
		DEBUG_ONLY(gvtr_parms.num_triggers_invoked = -1;) /* set to an out-of-design value; checked by an assert */
	)
	DEBUG_ONLY(
		status = cdb_sc_normal;
		lcl_dollar_tlevel = dollar_tlevel;
	)
fresh_tn_start:
	DEBUG_ONLY(lcl_t_tries = -1;)
	DEBUG_ONLY(is_fresh_tn_start = TRUE;)
	assert(!jnl_format_done || (dollar_tlevel GTMTRIG_ONLY(&& ztval_gvcst_put_redo)));
	T_BEGIN_SETORKILL_NONTP_OR_TP(ERR_GVPUTFAIL);
tn_restart:
	/* t_tries should never decrease - it either increases or stays the same. If should decrease we could live-lock with
	 * an oscillating t_tries and never reach CDB_STAGNATE (go from optimistic to pessimistic concurrency). Since we
	 * typically do a normal increment and then, for certain conditions, do a complementary decrement, we assert that
	 * the net effect is never a decrease.
	 */
	assert(csa == cs_addrs);	/* no amount of retries should change cs_addrs from what it was at entry into gvcst_put */
	assert((((int)t_tries) > lcl_t_tries) || (CDB_STAGNATE == t_tries));
	DEBUG_ONLY(lcl_t_tries = t_tries;) /* update lcl_t_tries */
	DEBUG_ONLY(
		dbg_num_iters++;
		assert(dbg_num_iters < ARRAYSIZE(dbg_trace_array));
		dbg_trace_array[dbg_num_iters].is_fresh_tn_start = is_fresh_tn_start;
		dbg_trace_array[dbg_num_iters].t_tries = t_tries;
		is_fresh_tn_start = FALSE;
		dbg_trace_array[dbg_num_iters].is_dollar_incr = is_dollar_incr;
		GTMTRIG_ONLY(dbg_trace_array[dbg_num_iters].ztval_gvcst_put_redo = ztval_gvcst_put_redo;)
		dbg_trace_array[dbg_num_iters].val = val;
		GTMTRIG_ONLY(dbg_trace_array[dbg_num_iters].lcl_implicit_tstart = lcl_implicit_tstart;)
		dbg_trace_array[dbg_num_iters].is_extra_block_split = FALSE;
		dbg_trace_array[dbg_num_iters].retry_line = 0;
		split_targ = NULL;
	)
	/* If MM and file extension occurred, reset csd to cs_data to avoid out-of-date value. If BG we dont need the reset
	 * but if checks are costlier than unconditional sets in a pipelined architecture so we choose not to do the if.
	 */
	assert(is_mm || (csd == cs_data));
	csd = cs_data;
#	ifdef GTM_TRIGGER
	gvtr_parms.num_triggers_invoked = 0;	/* clear any leftover value */
	assert(!ztval_gvcst_put_redo || IS_PTR_INSIDE_M_STACK(val));
	is_tpwrap = FALSE;
	if (!skip_dbtriggers && !skip_hasht_read)
	{
		GVTR_INIT_AND_TPWRAP_IF_NEEDED(csa, csd, gv_target, gvt_trigger, lcl_implicit_tstart, is_tpwrap, ERR_GVPUTFAIL);
		assert(gvt_trigger == gv_target->gvt_trigger);
		assert(gv_target == save_targ); /* gv_target should NOT have been mutated by the trigger reads */
		if (is_tpwrap)
		{	/* The above call to GVTR_INIT* macro created a TP transaction (by invoking op_tstart).
			 * Save all pertinent global variable information that needs to be restored in case of
			 * a restart.  Note that the restart could happen in a nested trigger so these global
			 * variables could have changed in value from what they were at gvcst_put entry, hence
			 * the need to save/restore them.  If this is not an implicitly tp wrapped transaction,
			 * there is no need to do this save/restore because a restart will transfer control
			 * back to the M code corresponding to the start of the transaction which would
			 * automatically initialize these global variables to the appropriate values.
			 */
			assert(lcl_implicit_tstart);
			lcl_is_dollar_incr = is_dollar_incr;
			lcl_val = val;
			lcl_post_incr_mval = post_incr_mval;
			lcl_increment_delta_mval = increment_delta_mval;
		}
		if (NULL != gvt_trigger)
			PUSH_ZTOLDMVAL_ON_M_STACK(ztold_mval, save_msp, save_mv_chain);
	}
#	endif
	assert(csd == cs_data);	/* assert csd is in sync with cs_data even if there were MM db file extensions */
	si = sgm_info_ptr;	/* Cannot be moved before GVTR_INIT_AND_TPWRAP_IF_NEEDED macro since we could enter gvcst_put
				 * with sgm_info_ptr NULL but could tpwrap a non-tp transaction due to triggers. In that case
				 * we want the updated sgm_info_ptr to be noted down in si and used later.
				 */
	assert((NULL == si) || (si->update_trans));
	assert(NULL != update_array);
	assert(NULL != update_array_ptr);
	assert(0 != update_array_size);
	assert(update_array + update_array_size >= update_array_ptr);
	/* When the following two asserts trip, we should change the data types of prev_first_off
	 * and prev_next_off, so they satisfy the assert.
	 */
	assert(SIZEOF(prev_first_off) >= SIZEOF(block_offset));
	assert(SIZEOF(prev_next_off) >= SIZEOF(block_offset));
	prev_first_off = prev_next_off = PREV_OFF_INVALID;
	horiz_growth = FALSE;
	assert(t_tries < CDB_STAGNATE || csa->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	/* level_0 == true and no_pointers == false means that this is a directory tree data block containing pointers to roots */
	level_0 = no_pointers = TRUE;
	assert(gv_altkey->top == gv_currkey->top);
	assert(gv_altkey->top == gv_keysize);
	assert(gv_currkey->end < gv_currkey->top);
	assert(gv_altkey->end < gv_altkey->top);
	temp_key = gv_currkey;
	dir_hist = NULL;
	ins_chain_index = 0;
	lcl_root = gv_target->root;
	tp_root = lcl_root;
	if (!dollar_tlevel)
	{
		CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	} else
	{
		segment_update_array_size = UA_NON_BM_SIZE(csd);
		ENSURE_UPDATE_ARRAY_SPACE(segment_update_array_size);
		curr_chain = *(off_chain *)&lcl_root;
		if (curr_chain.flag == 1)
		{
			tp_get_cw(si->first_cw_set, (int)curr_chain.cw_index, &cse);
			tp_root = cse->blk;
			assert(tp_root);
		}
	}
	if (0 == tp_root)
	{	/* Global does not exist as far as we know. Creating a new one requires validating the directory tree path which
		 * led us to this conclusion. So scan the directory tree here and validate its history at the end of this function.
		 * If we decide to restart due to a concurrency conflict, remember to reset gv_target->root to 0 before restarting.
		 */
		gv_target = dir_tree = csa->dir_tree;
		SET_GV_ALTKEY_TO_GBLNAME_FROM_GV_CURRKEY;	/* set up gv_altkey to be just the gblname */
		dir_hist = &gv_target->hist;
		status = gvcst_search(gv_altkey, NULL);
		RESET_GV_TARGET_LCL(save_targ);
		if (cdb_sc_normal != status)
			GOTO_RETRY;
		if (gv_altkey->end + 1 == dir_hist->h[0].curr_rec.match)
		{
			GET_LONG(tp_root, (dir_hist->h[0].buffaddr + SIZEOF(rec_hdr)
					   + dir_hist->h[0].curr_rec.offset + gv_altkey->end + 1
					   - ((rec_hdr_ptr_t)(dir_hist->h[0].buffaddr + dir_hist->h[0].curr_rec.offset))->cmpc));
			if (dollar_tlevel)
			{
				gvt_for_root = dir_hist->h[0].blk_num;
				curr_chain = *(off_chain *)&gvt_for_root;
				if (curr_chain.flag == 1)
					tp_get_cw(si->first_cw_set, curr_chain.cw_index, &cse);
				else
				{
					if (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&gvt_for_root)))
						tp_srch_status = tabent->value;
					else
						tp_srch_status = NULL;
					cse = tp_srch_status ? tp_srch_status->cse : NULL;
				}
				assert(!cse || !cse->high_tlevel);
			}
			assert(0 == gv_target->root);
			gv_target->root = tp_root;
		}
	}
	blk_reserved_size = blk_size - blk_reserved_bytes;
	if (0 == tp_root)
	{	/* there is no entry in the GVT (and no root), so create a new empty tree and put the name in the GVT */
		/* Create the data block */
		key_exists = FALSE;
		if (is_dollar_incr)
		{	/* The global variable that is being $INCREMENTed does not exist.
			 * $INCREMENT() should not signal UNDEF error but proceed with an implicit $GET().
			 */
			assert(dollar_tlevel ? si->update_trans : update_trans);
			*post_incr_mval = *val;
			MV_FORCE_NUM(post_incr_mval);
			post_incr_mval->mvtype &= ~MV_STR;	/* needed to force any alphanumeric string to numeric */
			MV_FORCE_STR(post_incr_mval);
			assert(post_incr_mval->str.len);
			value = post_incr_mval->str;
			/* The MAX_REC_SIZE check could not be done in op_gvincr (like is done in op_gvput) because
			 * the post-increment value is not known until here. so do the check here.
			 */
			ENSURE_VALUE_WITHIN_MAX_REC_SIZE(value, dir_tree);
		} else
			value = val->str;
		/* Potential size of a GVT leaf block containing just the new/updated record */
		new_blk_size_single = SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + temp_key->end + 1 + value.len;
		if (new_blk_size_single > blk_reserved_size)
		{	/* The record that is newly inserted/updated does not fit by itself in a separate block
			 * if the current reserved-bytes for this database is taken into account. Cannot go on.
			 */
			ISSUE_RSVDBYTE2HIGH_ERROR(dir_tree);
		}
		BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
		curr_rec_hdr->rsiz = SIZEOF(rec_hdr) + temp_key->end + 1 + value.len;
		curr_rec_hdr->cmpc = 0;
		BLK_INIT(bs_ptr, new_blk_bs);
		BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
		BLK_ADDR(cp1, temp_key->end + 1, unsigned char);
		memcpy(cp1, temp_key->base, temp_key->end + 1);
		BLK_SEG(bs_ptr, cp1, temp_key->end + 1);
		if (0 != value.len)
		{
			BLK_ADDR(va, value.len, char);
			memcpy(va, value.addr, value.len);
			BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
		}
		if (0 == BLK_FINI(bs_ptr, new_blk_bs))
		{
			assert(CDB_STAGNATE > t_tries);
			status = cdb_sc_mkblk;
			GOTO_RETRY;
		}
		assert(new_blk_bs[0].len <= blk_reserved_size); /* Assert that new block has space for reserved bytes */
		/* Create the index block */
		BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
		curr_rec_hdr->rsiz = BSTAR_REC_SIZE;
		curr_rec_hdr->cmpc = 0;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
		BLK_SEG(bs_ptr, (unsigned char *)&zeroes, SIZEOF(block_id));
		if (0 == BLK_FINI(bs_ptr, bs1))
		{
			assert(CDB_STAGNATE > t_tries);
			status = cdb_sc_mkblk;
			GOTO_RETRY;
		}
		assert(bs1[0].len <= blk_reserved_size); /* Assert that new block has space for reserved bytes */
        	allocation_clue = ALLOCATION_CLUE(csd->trans_hist.total_blks);
		next_blk_index = t_create(allocation_clue, (uchar_ptr_t)new_blk_bs, 0, 0, 0);
		++allocation_clue;
		ins_chain_index = t_create(allocation_clue, (uchar_ptr_t)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), next_blk_index, 1);
		root_blk_cw_index = ins_chain_index;
		temp_key = gv_altkey;
		gv_target->hist.h[0].blk_num = HIST_TERMINATOR;
		gv_target = dir_tree;
		bh = &gv_target->hist.h[0];
		value.len = SIZEOF(block_id);
		value.addr = (char *)&zeroes;
		no_pointers = FALSE;
	} else
	{
		if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL)))
			GOTO_RETRY;
		target_key_size = gv_currkey->end + 1;
		bh = &gv_target->hist.h[0];
		key_exists = (target_key_size == bh->curr_rec.match);
		if (is_dollar_incr)
		{
			if (key_exists)
			{	/* $INCR is being done on an existing global variable key in the database.
				 * the value to set the key to has to be determined by adding the existing value
				 * with the increment passed as the input parameter "val" (of type (mval *)) to gvcst_put
				 */
				if (cdb_sc_normal != (status = gvincr_compute_post_incr(bh)))
				{
					assert(CDB_STAGNATE > t_tries);
					GOTO_RETRY;
				}
			} else
			{	/* The global variable that is being $INCREMENTed does not exist.  $INCREMENT() should not
				 * signal UNDEF error but proceed with an implicit $GET() */
				*post_incr_mval = *val;
				MV_FORCE_NUM(post_incr_mval);
				post_incr_mval->mvtype &= ~MV_STR;	/* needed to force any alphanumeric string to numeric */
				MV_FORCE_STR(post_incr_mval);
				assert(post_incr_mval->str.len);
			}
			assert(MV_IS_STRING(post_incr_mval));
			assert(dollar_tlevel ? si->update_trans : update_trans);
			value = post_incr_mval->str;
			/* The MAX_REC_SIZE check could not be done in op_gvincr (like is done in op_gvput) because
			 * the post-increment value is not known until here. so do the check here.
			 */
			ENSURE_VALUE_WITHIN_MAX_REC_SIZE(value, gv_target);

		} else
			value = val->str;
	}
	/* --------------------------------------------------------------------------------------------
	 * The code for the non-block-split case is very similar to the code in recompute_upd_array.
	 * Any changes in either place should be reflected in the other.
	 * --------------------------------------------------------------------------------------------
	 */
	need_extra_block_split = FALSE; /* Assume we don't require an additional block split (most common case) */
	duplicate_set = FALSE; /* Assume this is NOT a duplicate set (most common case) */
	split_depth = 0;
	split_targ = gv_target;
	for (succeeded = FALSE; !succeeded; no_pointers = level_0 = FALSE)
	{
		buffaddr = bh->buffaddr;
		cur_blk_size = ((blk_hdr_ptr_t)buffaddr)->bsiz;
		target_key_size = temp_key->end + 1;
		/* Potential size of a block containing just the new/updated record */
		new_blk_size_single = SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + target_key_size + value.len;
		if (new_blk_size_single > blk_reserved_size)
		{	/* The record that is newly inserted/updated does not fit by itself in a separate block
			 * if the current reserved-bytes for this database is taken into account. If this is not a
			 * GVT leaf block, this situation is then possible if we are not in the final retry (and hence
			 * dont hold crit on the region) and "temp_key->end" (and in turn "target_key_size") was
			 * computed from a stale copy (due to concurrent updates or buffer reuse) of the global buffer
			 * (effectively a restartable situation). If so, restart. If not issue error.
			 */
			if (no_pointers || (CDB_STAGNATE <= t_tries))
			{
				ISSUE_RSVDBYTE2HIGH_ERROR(gv_target);
			} else
			{
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
		}
		curr_rec_match = bh->curr_rec.match;
		curr_rec_offset = bh->curr_rec.offset;
		new_rec = (target_key_size != curr_rec_match);
                if (!new_rec && !no_pointers)
                {
                        assert(CDB_STAGNATE > t_tries);
                        status = cdb_sc_lostcr;         /* will a new cdb_sc status be better */
                        GOTO_RETRY;
                }
		rp = (rec_hdr_ptr_t)(buffaddr + curr_rec_offset);
		if (curr_rec_offset == cur_blk_size)
		{
			if ((FALSE == new_rec) && dollar_tlevel)
			{
				assert(CDB_STAGNATE > t_tries);
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
			rec_cmpc = 0;
			rec_size = 0;
		} else
		{
			GET_USHORT(rec_size, &rp->rsiz);
			rec_cmpc = rp->cmpc;
			if ((sm_uc_ptr_t)rp + rec_size > (sm_uc_ptr_t)buffaddr + cur_blk_size)
			{
				assert(CDB_STAGNATE > t_tries);
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
		}
		prev_rec_match = bh->prev_rec.match;
		if (new_rec)
		{
			new_rec_size = SIZEOF(rec_hdr) + target_key_size - prev_rec_match + value.len;
			if (cur_blk_size <= (signed int)curr_rec_offset) /* typecast necessary to enforce "signed int" comparison */
				next_rec_shrink = 0;
			else
				next_rec_shrink = curr_rec_match - rec_cmpc;
			delta = new_rec_size - next_rec_shrink;
		} else
		{
			if (rec_cmpc != prev_rec_match)
			{
				assert(CDB_STAGNATE > t_tries);
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
			assert(target_key_size > rec_cmpc);
			cur_val_offset = SIZEOF(rec_hdr) + (target_key_size - rec_cmpc);
#			ifdef GTM_TRIGGER
			if (no_pointers && (NULL != ztold_mval) && !skip_hasht_read)
			{	/* Complete initialization of ztold_mval */
				assert(!skip_dbtriggers);
				data_len = rec_size - cur_val_offset;
				if (0 > data_len)
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_rmisalign;
					GOTO_RETRY;
				}
				if (data_len)
				{
					ENSURE_STP_FREE_SPACE(data_len);
					ztold_mval->str.addr = (char *)stringpool.free;
					memcpy(ztold_mval->str.addr, (sm_uc_ptr_t)rp + cur_val_offset, data_len);
					stringpool.free += data_len;
				}
				ztold_mval->str.len = data_len;
				ztold_mval->mvtype = MV_STR;	/* ztold_mval is now completely initialized */
			}
#			endif
			new_rec_size = cur_val_offset + value.len;
			delta = new_rec_size - rec_size;
			if (!delta && value.len
				&& !memcmp(value.addr, (sm_uc_ptr_t)rp + new_rec_size - value.len, value.len))
			{
				duplicate_set = TRUE;
				if (gvdupsetnoop)
				{	/* We do not want to touch the DB Blocks in case of a duplicate set unless the
					 * dupsetnoop optimization is disabled. Since it is enabled, let us break right away.
					 */
					succeeded = TRUE;
					break;	/* duplicate SET */
				}
			}
			next_rec_shrink = 0;
		}
		blk_num = bh->blk_num;
		bh_level = bh->level;
		if (dollar_tlevel)
		{
			if ((SIZEOF(rec_hdr) + target_key_size - prev_rec_match + value.len) != new_rec_size)
			{
				assert(CDB_STAGNATE > t_tries);
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
			chain1 = *(off_chain *)&blk_num;
			if ((1 == chain1.flag) && ((int)chain1.cw_index >= si->cw_set_depth))
			{
				assert(si->tp_csa == csa);
				assert(FALSE == csa->now_crit);
				status = cdb_sc_blknumerr;
				GOTO_RETRY;
			}
		}
		next_rec_shrink1 = next_rec_shrink;
		/* Potential size of the current block including the new/updated record */
		new_blk_size = cur_blk_size + delta;
		/* It is possible due to concurrency issues (for example if the buffer that we are planning on updating
		 * in shared memory got reused for a different block) that "new_blk_size" is lesser than "new_blk_size_single"
		 * In those cases, we will go into the non-block-split case but eventually we will restart.
		 */
		assert((new_blk_size >= new_blk_size_single) || (CDB_STAGNATE > t_tries));
		if ((new_blk_size <= blk_fill_size) || (new_blk_size <= new_blk_size_single))
		{	/* Update can be done without overflowing the block's fillfactor OR the record to be updated
			 * is the only record in the new block. Do not split block in either case. This means we might
			 * not honour the desired FillFactor if the only record in a block exceeds the blk_fill_size,
			 * but in this case we are guaranteed the block has room for the current reserved bytes.
			 */
			if (no_pointers)	/* level zero (normal) data block: no deferred pointer chains */
				ins_chain_offset = 0;
			else			/* index or directory level block */
				ins_chain_offset =(int)((sm_uc_ptr_t)rp - buffaddr + new_rec_size - SIZEOF(block_id));
			BLK_INIT(bs_ptr, bs1);
			if (0 == rc_set_fragment)
			{
				BLK_SEG(bs_ptr, buffaddr + SIZEOF(blk_hdr), curr_rec_offset - SIZEOF(blk_hdr));
				BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
				curr_rec_hdr->rsiz = new_rec_size;
				curr_rec_hdr->cmpc = prev_rec_match;
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
				BLK_ADDR(cp1, target_key_size - prev_rec_match, unsigned char);
				memcpy(cp1, temp_key->base + prev_rec_match, target_key_size - prev_rec_match);
				BLK_SEG(bs_ptr, cp1, target_key_size - prev_rec_match);
				if (0 != value.len)
				{
					BLK_ADDR(va, value.len, char);
					memcpy(va, value.addr, value.len);
					BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
				}
				if (!new_rec)
					rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size);
				n = (int)(cur_blk_size - ((sm_uc_ptr_t)rp - buffaddr));
				if (n > 0)
				{
					if (new_rec)
					{
						BLK_ADDR(next_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
						next_rec_hdr->rsiz = rec_size - next_rec_shrink;
						next_rec_hdr->cmpc = curr_rec_match;
						BLK_SEG(bs_ptr, (sm_uc_ptr_t)next_rec_hdr, SIZEOF(rec_hdr));
						next_rec_shrink += SIZEOF(rec_hdr);
					}
					if (n >= next_rec_shrink)
					{
						BLK_SEG(bs_ptr, (sm_uc_ptr_t)rp + next_rec_shrink, n - next_rec_shrink);
					} else
					{
						assert(CDB_STAGNATE > t_tries);
						status = cdb_sc_mkblk;
						GOTO_RETRY;
					}
				}
			} else
			{	/* With GT.M TRIGGERS, it is not clear how the RC protocol will work. The below assert is to
				 * be informed whenever such usage happens (expected to be really rare) and handle it right
				 * then instead of worrying about it during the initial trigger implementation.
				 */
				assert(FALSE);
				curr_rec_hdr = (rec_hdr_ptr_t)(buffaddr + curr_rec_offset);
				/* First piece is block prior to record + key + data prior to fragment */
				BLK_SEG(bs_ptr,
					buffaddr + SIZEOF(blk_hdr),
					curr_rec_offset - SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + rc_set_fragment
						+ gv_currkey->end + 1 - curr_rec_hdr->cmpc);
				/* Second piece is fragment itself */
				BLK_ADDR(va, value.len, char);
				memcpy(va, value.addr, value.len);
				BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
				/* Third piece is data after fragment + rest of block after record */
				n = (int)(cur_blk_size - ((sm_uc_ptr_t)curr_rec_hdr - buffaddr) - SIZEOF(rec_hdr)
					- (gv_currkey->end + 1 - curr_rec_hdr->cmpc) - rc_set_fragment - value.len);
				if (0 < n)
					BLK_SEG(bs_ptr,
						(sm_uc_ptr_t)curr_rec_hdr + gv_currkey->end + 1 - curr_rec_hdr->cmpc
							+ rc_set_fragment + value.len,
						n);
			}
			if (0 == BLK_FINI(bs_ptr, bs1))
			{
				assert(CDB_STAGNATE > t_tries);
				status = cdb_sc_mkblk;
				GOTO_RETRY;
			}
			assert(bs1[0].len <= blk_reserved_size); /* Assert that new block has space for reserved bytes */
			cse = t_write(bh, (unsigned char *)bs1, ins_chain_offset, ins_chain_index, bh_level,
				FALSE, FALSE, GDS_WRITE_PLAIN);
			assert(!dollar_tlevel || !cse->high_tlevel);
			if ((0 != ins_chain_offset) && (NULL != cse) && (0 != cse->first_off))
			{	/* formerly tp_offset_chain - inserts a new_entry in the chain */
				assert((NULL != cse->new_buff) || horiz_growth && cse->low_tlevel->new_buff
									&& (buffaddr == cse->low_tlevel->new_buff));
				assert(0 == cse->next_off);
				assert(ins_chain_offset > (signed)SIZEOF(blk_hdr));	/* we want signed comparison */
				assert((curr_rec_offset - SIZEOF(off_chain)) == (ins_chain_offset - new_rec_size));
				offset_sum = cse->first_off;
				curr = buffaddr + offset_sum;
				/* The typecast is needed below to enforce a "signed int" (versus "unsigned int") comparison */
				if (offset_sum >= (signed int)curr_rec_offset)
				{	/* the new record is prior to the first existing chain record, id the new one as first */
					/* first_off-------------v--------------------v
					 * [blk_hdr]...[new rec ( )]...[existing rec ( )]... */
					cse->next_off = cse->first_off - (ins_chain_offset - new_rec_size) - next_rec_shrink1;
					cse->first_off = ins_chain_offset;
				} else
				{
					if (horiz_growth)
					{
						old_cse = cse->low_tlevel;
						assert(old_cse->first_off);
						assert(old_cse && old_cse->done);
						assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]);
					}
					/* find chain records before and after the new one */
					for ( ; ; curr += curr_chain.next_off)
					{	/* try to make offset_sum identify the first chain entry after the new record */
						GET_LONGP(&curr_chain, curr);
						assert(curr_chain.flag == 1);
						if (0 == curr_chain.next_off)
							break;
						offset_sum += curr_chain.next_off;
						/* The typecast is needed below to enforce a "signed int" comparison */
						if (offset_sum >= (signed int)curr_rec_offset)
							break;
					}
					/* store the next_off in old_cse before changing it in the buffer (for rolling back) */
					if (horiz_growth)
					{
						old_cse->undo_next_off[0] = curr_chain.next_off;
						old_cse->undo_offset[0] = (block_offset)(curr - buffaddr);
						assert(old_cse->undo_offset[0]);
					}
					if (0 == curr_chain.next_off)
					{	/* the last chain record precedes the new record: just update it */
						/* 			   ---|---------------v
						 * [blk_hdr]...[existing rec ( )]...[new rec ( )]... */
						curr_chain.next_off = ins_chain_offset - offset_sum;
						GET_LONGP(curr, &curr_chain);
					} else
					{	/* update the chain record before the new one */
						/* 			   ---|---------------v--------------------v
						 * [blk_hdr]...[existing rec ( )]...[new rec ( )]...[existing rec ( )] */
						curr_chain.next_off = (unsigned int)(ins_chain_offset - (curr - buffaddr));
						GET_LONGP(curr, &curr_chain);
						cse->next_off = offset_sum - (ins_chain_offset - new_rec_size) - next_rec_shrink1;
					}
				}
				assert((ins_chain_offset + (int)cse->next_off) <=
				       (delta + (sm_long_t)cur_blk_size - SIZEOF(off_chain)));
			}
			succeeded = TRUE;
			if (level_0)
			{
				if (new_rec)
				{	/* New record insertion at leaf level. gvcst_search would have already updated clue to
					 * reflect the new key, but we need to fix the search history to keep it in sync with clue.
					 * This search history (and clue) will be used by the NEXT call to gvcst_search.
					 * Note that clue.end could be 0 at this point (see "Clue less than first rec, invalidate"
					 * comment in gvcst_search) in which case the below assignment is unnecessary (though does
					 * not hurt) but we want to avoid the if check (since we expect clue to be non-zero mostly).
					 */
					assert((0 == gv_target->clue.end) || (gv_target->clue.end + 1 == target_key_size));
					assert(1 < target_key_size);
					assert(bh->curr_rec.match != target_key_size);
					bh->curr_rec.match = target_key_size;
				}
				/* -------------------------------------------------------------------------------------------------
				 * We have to maintain information for future recomputation only if the following are satisfied
				 *	1) The block is a leaf-level block
				 *	2) We are in TP (indicated by non-null cse)
				 *	3) The global has NOISOLATION turned ON
				 *	4) The cw_set_element hasn't encountered a block-split or a kill
				 *	5) We don't need an extra_block_split
				 *
				 * We can also add an optimization that only cse's of mode gds_t_write need to have such updations,
				 *	but because of the belief that for a nonisolated variable, we will very rarely encounter a
				 *	situation where a created block (in TP) will have some new keys added to it, and that adding
				 *	the check slows down the normal code, we don't do that check here.
				 * -------------------------------------------------------------------------------------------------
				 */
				if (cse && gv_target->noisolation && !cse->write_type && !need_extra_block_split)
				{
					assert(dollar_tlevel);
					if (is_dollar_incr)
					{
						ADD_TO_GVT_TP_LIST(gv_target); /* See comment in ENSURE_VALUE_WITHIN_MAX_REC_SIZE
									  * macro definition for why this macro call is necessary */
						rts_error(VARLSTCNT(4) ERR_GVINCRISOLATION, 2,
							gv_target->gvname.var_name.len, gv_target->gvname.var_name.addr);
					}
					if (NULL == cse->recompute_list_tail ||
						0 != memcmp(gv_currkey->base, cse->recompute_list_tail->key.base,
							gv_currkey->top))
					{
						tempkv = (key_cum_value *)get_new_element(si->recompute_list, 1);
						tempkv->key = *gv_currkey;
						tempkv->next = NULL;
						memcpy(tempkv->key.base, gv_currkey->base, gv_currkey->end + 1);
						if (NULL == cse->recompute_list_head)
						{
							assert(NULL == cse->recompute_list_tail);
							cse->recompute_list_head = tempkv;
						} else
							cse->recompute_list_tail->next = tempkv;
						cse->recompute_list_tail = tempkv;
					} else
						tempkv = cse->recompute_list_tail;
					assert(0 == val->str.len
						|| ((val->str.len == bs1[4].len)
							&& 0 == memcmp(val->str.addr, bs1[4].addr, val->str.len)));
					tempkv->value.len = val->str.len;	/* bs1[4].addr is undefined if val->str.len is 0 */
					tempkv->value.addr = (char *)bs1[4].addr;/* 	but not used in that case, so ok */
				}

			}
		} else
		{	/* Block split required */
			split_depth++;
			gv_target->clue.end = 0;	/* invalidate clue */
			/* Potential size of the left and right blocks, including the new record */
			new_blk_size_l = curr_rec_offset + new_rec_size;
			new_blk_size_r = SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + target_key_size + value.len + cur_blk_size
						- curr_rec_offset - (new_rec ? next_rec_shrink : rec_size);
			assert(new_blk_size_single <= blk_reserved_size);
			assert(blk_reserved_size >= blk_fill_size);
			extra_record_orig_size = 0;
			prev_rec_offset = bh->prev_rec.offset;
			assert(new_blk_size_single <= new_blk_size_r);
			/* Decide which side (left or right) the new record goes. Ensure either side has at least one record.
			 * This means we might not honor the desired FillFactor if the only record in a block exceeds the
			 * blk_fill_size, but in this case we are guaranteed the block has room for the current reserved bytes.
			 * The typecast of curr_rec_offset is needed below to enforce a "signed int" comparison.
			 */
			if (new_blk_size_r > blk_fill_size)
			{
				new_rec_goes_to_right = (new_blk_size_r == new_blk_size_single);
				last_split_dir = NEWREC_DIR_FORCED;	/* no choice in split direction */
			} else if (new_blk_size_l > blk_fill_size)
			{
				new_rec_goes_to_right = TRUE;
				last_split_dir = NEWREC_DIR_FORCED;	/* no choice in split direction */
			} else
			{	/* new_rec can go in either direction without any issues of fitting in.
				 * This is where we need to use a few heuristics to ensure good block space utilization.
				 * We note down which direction (left or right) the new record went in after the split.
				 * We use that as the heuristic to identify the direction of data loading and do the
				 * splits accordingly for future updates.
				 */
				last_split_dir = (enum split_dir)gv_target->last_split_direction[bh_level];
				if (NEWREC_DIR_FORCED == last_split_dir)
				{	/* dont have prior information to use heuristic. choose whichever side is less full.
					 * if this turns out to not be the correct choice, we will correct ourselves at the
					 * time of the next block split at the same level.
					 */
					last_split_dir = (new_blk_size_l < new_blk_size_r) ? NEWREC_DIR_LEFT : NEWREC_DIR_RIGHT;
				} else
				{	/* Last block split at this level chose a specific direction for new_rec. See if
					 * that heuristic worked. This is done by checking if the block # that new_rec went
					 * into previously is the same block that is being split now. If so, that means the
					 * previous choice of direction was actually not optimal. So try the other direction now.
					 */
					last_split_bnum = gv_target->last_split_blk_num[bh_level];
					if (dollar_tlevel)
					{
						chain2 = *(off_chain *)&last_split_bnum;
						if (chain1.flag == chain2.flag)
						{
							if (!chain1.flag)
								blk_match = (blk_num == last_split_bnum);
							else
							{
								assert(chain1.cw_index < si->cw_set_depth);
								blk_match = (chain1.cw_index == chain2.cw_index);
							}
						} else
							blk_match = FALSE;
					} else
					{
						DEBUG_ONLY(chain1 = *(off_chain *)&last_split_bnum;)
						assert(!chain1.flag);
						blk_match = (blk_num == last_split_bnum);
					}
					is_split_dir_left = (NEWREC_DIR_LEFT == last_split_dir);
					if (blk_match)	/* switch direction since last choice did not seem to have worked */
						last_split_dir = is_split_dir_left ? NEWREC_DIR_RIGHT : NEWREC_DIR_LEFT;
					else
					{	/* blk# did not match means there is a high likelihood that the current split
						 * is happening in the OTHER sibling block from the previous block split operation
						 * at the same level. There is no easy way of confirming this so we assume the
						 * heuristic is doing its job, unless we see evidence otherwise. And that evidence
						 * is IF the block sizes of the left and right halves dont match the direction of
						 * choice (e.g. if we choose NEWREC_DIR_LEFT, we expect the right block to be
						 * almost full and the left block to be almost empty and vice versa).
						 * In this case too switch the direction.
						 */
						if (is_split_dir_left)
						{
							if (new_blk_size_l > new_blk_size_r)
								last_split_dir = NEWREC_DIR_RIGHT;
						} else
						{
							if (new_blk_size_l < new_blk_size_r)
								last_split_dir = NEWREC_DIR_LEFT;
						}
					}
				}
				new_rec_goes_to_right = (NEWREC_DIR_RIGHT == last_split_dir);
			}
			last_split_direction[bh_level] = (char)last_split_dir;
			if (new_rec_goes_to_right)
			{	/* Left side of this block will be split off into a new block.
				 * The new record and the right side of this block will remain in this block.
				 */
				/* prepare new block */
				BLK_INIT(bs_ptr, bs1);
				if (level_0)
				{
					BLK_SEG(bs_ptr, buffaddr + SIZEOF(blk_hdr), curr_rec_offset - SIZEOF(blk_hdr));
				} else
				{	/* for index records, the record before the split becomes a new *-key */
					/* Note:  If the block split was caused by our appending the new record
					 * to the end of the block, this code causes the record PRIOR to the
					 * current *-key to become the new *-key.
					 */
					BLK_SEG(bs_ptr, buffaddr + SIZEOF(blk_hdr), prev_rec_offset - SIZEOF(blk_hdr));
					BLK_ADDR(new_star_hdr, SIZEOF(rec_hdr), rec_hdr);
					new_star_hdr->rsiz = BSTAR_REC_SIZE;
					new_star_hdr->cmpc = 0;
					BLK_SEG(bs_ptr, (sm_uc_ptr_t)new_star_hdr, SIZEOF(rec_hdr));
					BLK_SEG(bs_ptr, (sm_uc_ptr_t)rp - SIZEOF(block_id), SIZEOF(block_id));
				}
				new_blk_bs = bs1;
				if (0 == BLK_FINI(bs_ptr,bs1))
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				/* We want to assert that the left block has enough space for reserved bytes but
				 * it is possible that it DOES NOT have enough space for reserved bytes if the pre-split
				 * block was previously populated with a very low reserved bytes setting and if the current
				 * reserved bytes setting is much higher than what the chosen split point would free up.
				 * This is an issue waiting to be fixed by C9K01-003221. Until then the following assert
				 * has to remain commented out.
				 *
				 * assert(bs1[0].len <= blk_reserved_size);
				 */
				/* prepare the existing block */
				BLK_INIT(bs_ptr, bs1);
				ins_chain_offset = no_pointers ? 0 : (int)(SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + target_key_size);
				left_hand_offset = left_hand_index
						 = 0;
				if (!new_rec)
					rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size);
				BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
				curr_rec_hdr->rsiz = target_key_size + SIZEOF(rec_hdr) + value.len;
				curr_rec_hdr->cmpc = 0;
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
				BLK_ADDR(cp1, target_key_size, unsigned char);
				memcpy(cp1, temp_key->base, target_key_size);
				BLK_SEG(bs_ptr, cp1, target_key_size);
				if (0 != value.len)
				{
					BLK_ADDR(va, value.len, char);
					memcpy(va, value.addr, value.len);
					BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
				}
				if (buffaddr + cur_blk_size > (sm_uc_ptr_t)rp)
				{
					BLK_ADDR(next_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
					GET_USHORT(next_rec_hdr->rsiz, &rp->rsiz);
					next_rec_hdr->rsiz -= next_rec_shrink;
					next_rec_hdr->cmpc = new_rec ? curr_rec_match : rp->cmpc;
					BLK_SEG(bs_ptr, (sm_uc_ptr_t)next_rec_hdr, SIZEOF(rec_hdr));
					next_rec_shrink += SIZEOF(rec_hdr);
					n = cur_blk_size - INTCAST(((sm_uc_ptr_t)rp - buffaddr)) - next_rec_shrink;
					if (0 > n)	/* want signed compare as 'n' can be negative */
					{
						assert(CDB_STAGNATE > t_tries);
						status = cdb_sc_mkblk;
						GOTO_RETRY;
					}
					BLK_SEG(bs_ptr, (sm_uc_ptr_t)rp + next_rec_shrink, n);
				}
				if (0 == BLK_FINI(bs_ptr, bs1))
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				assert(bs1[0].len <= blk_reserved_size); /* Assert that right block has space for reserved bytes */
				assert(gv_altkey->top == gv_currkey->top);
				assert(gv_altkey->end < gv_altkey->top);
				temp_key = gv_altkey;
				if (cdb_sc_normal != (status = gvcst_expand_key((blk_hdr_ptr_t)buffaddr, prev_rec_offset,
						temp_key)))
					GOTO_RETRY;
			} else
			{	/* Insert in left hand (new) block */
				if (!level_0)
				{	/* In case of an index block, as long as the current record is not a *-record
					 * (i.e. last record in the block) and copying an extra record into the left
					 * block does not cause it to exceed the fill factor, copy an additional record.
					 * Not doing the extra record copy for index blocks (was the case pre-V54002) has
					 * been seen to create suboptimally filled index blocks (as low as 15% fillfactor)
					 * depending on the patterns of updates.
					 */
					assert(new_rec);
					copy_extra_record = ((BSTAR_REC_SIZE != rec_size)
									&& ((new_blk_size_l + BSTAR_REC_SIZE) <= blk_fill_size));
				} else
				{
					copy_extra_record = ((0 == prev_rec_offset) && (NEWREC_DIR_LEFT == last_split_dir)
								&& new_rec && (SIZEOF(blk_hdr) < cur_blk_size));
				}
				BLK_INIT(bs_ptr, bs1);
				if (no_pointers)
					left_hand_offset = 0;
				else
				{
					left_hand_offset = curr_rec_offset + SIZEOF(rec_hdr);
					if (level_0 || copy_extra_record)
						left_hand_offset += target_key_size - prev_rec_match;
				}
				left_hand_index = ins_chain_index;
				ins_chain_index = ins_chain_offset = 0;
				BLK_SEG(bs_ptr, buffaddr + SIZEOF(blk_hdr), curr_rec_offset - SIZEOF(blk_hdr));
				if (level_0)
				{	/* After the initial split, will this record fit into the new left block?
					 * If not, this pass will make room and we will do another block split on the next pass.
					 */
					assert((blk_seg_cnt + SIZEOF(rec_hdr) + target_key_size - prev_rec_match + value.len)
						== new_blk_size_l);
					assert((new_blk_size_single <= new_blk_size_l) || (CDB_STAGNATE > t_tries));
					assert((new_blk_size_single != new_blk_size_l)
						|| ((0 == prev_rec_offset) && (SIZEOF(blk_hdr) == curr_rec_offset)));
					assert((new_blk_size_single >= new_blk_size_l)
						|| ((SIZEOF(blk_hdr) <= prev_rec_offset) && (SIZEOF(blk_hdr) < curr_rec_offset)));
					if ((new_blk_size_l > blk_fill_size) && (new_blk_size_l > new_blk_size_single))
					{	/* There is at least one existing record to the left of the split point.
						 * Do the initial split this pass and make an extra split next pass.
						 */
						need_extra_block_split = TRUE;
						DEBUG_ONLY(dbg_trace_array[dbg_num_iters].is_extra_block_split = TRUE;)
					} else
					{
						BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
						curr_rec_hdr->rsiz = new_rec_size;
						curr_rec_hdr->cmpc = prev_rec_match;
						BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
						BLK_ADDR(cp1, target_key_size - prev_rec_match, unsigned char);
						memcpy(cp1, temp_key->base + prev_rec_match, target_key_size - prev_rec_match);
						BLK_SEG(bs_ptr, cp1, target_key_size - prev_rec_match);
						if (0 != value.len)
						{
							BLK_ADDR(va, value.len, char);
							memcpy(va, value.addr, value.len);
							BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
						}
						if (copy_extra_record)
						{
							n = rec_size - curr_rec_match;
			 				/* typecast needed below to enforce a "signed int" comparison */
							if ((n + (signed int)curr_rec_offset + new_rec_size) > blk_fill_size)
								copy_extra_record = FALSE;
							else
							{
								BLK_ADDR(extra_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
								extra_rec_hdr->rsiz = n;
								extra_rec_hdr->cmpc = curr_rec_match;
								BLK_SEG(bs_ptr, (sm_uc_ptr_t)extra_rec_hdr, SIZEOF(rec_hdr));
								if (n < (signed)SIZEOF(rec_hdr)) /* want signed compare */
								{				     /* as 'n' can be negative */
									assert(CDB_STAGNATE > t_tries);
									status = cdb_sc_mkblk;
									GOTO_RETRY;
								}
								BLK_SEG(bs_ptr,
									buffaddr + SIZEOF(blk_hdr) + SIZEOF(rec_hdr)
										+ curr_rec_match,
									n - SIZEOF(rec_hdr));
								new_blk_size_l += n;
							}
						}
					}
				} else
				{
					if (copy_extra_record)
					{
						BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
						curr_rec_hdr->rsiz = new_rec_size;
						curr_rec_hdr->cmpc = prev_rec_match;
						BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
						BLK_ADDR(cp1, target_key_size - prev_rec_match, unsigned char);
						memcpy(cp1, temp_key->base + prev_rec_match, target_key_size - prev_rec_match);
						BLK_SEG(bs_ptr, cp1, target_key_size - prev_rec_match);
						assert(value.len);
						BLK_ADDR(va, value.len, char);
						memcpy(va, value.addr, value.len);
						BLK_SEG(bs_ptr, (unsigned char *)va, value.len);
						new_blk_size_l += BSTAR_REC_SIZE;
					} else
						new_blk_size_l = curr_rec_offset + BSTAR_REC_SIZE;
					BLK_ADDR(new_star_hdr, SIZEOF(rec_hdr), rec_hdr);
					new_star_hdr->rsiz = BSTAR_REC_SIZE;
					new_star_hdr->cmpc = 0;
					BLK_SEG(bs_ptr, (sm_uc_ptr_t)new_star_hdr, SIZEOF(rec_hdr));
					if (!copy_extra_record)
					{
						BLK_SEG(bs_ptr, (unsigned char *)&zeroes, SIZEOF(block_id));
					} else
						BLK_SEG(bs_ptr, (sm_uc_ptr_t)rp + rec_size - SIZEOF(block_id), SIZEOF(block_id));
				}
				new_blk_bs = bs1;
				if (0 == BLK_FINI(bs_ptr, bs1))
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				/* We want to assert that the left block has enough space for reserved bytes but
				 * it is possible that it DOES NOT have enough space for reserved bytes if the pre-split
				 * block was previously populated with a very low reserved bytes setting and if the current
				 * reserved bytes setting is much higher than what the chosen split point would free up.
				 * This is an issue waiting to be fixed by C9K01-003221. Until then the following assert
				 * has to remain commented out.
				 *
				 * assert(bs1[0].len <= blk_reserved_size);
				 */
				/* assert that both !new_rec and copy_extra_record can never be TRUE at the same time */
				assert(new_rec || !copy_extra_record);
				if (!new_rec || copy_extra_record)
				{	/* Should guard for empty block??? */
					rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size);
					rec_cmpc = rp->cmpc;
					temp_short = rec_size;
					GET_USHORT(rec_size, &rp->rsiz);
				}
				if (copy_extra_record)
				{
				        extra_record_orig_size = temp_short;
					assert(gv_altkey->top == gv_currkey->top);
					assert(gv_altkey->end < gv_altkey->top);
					temp_key = gv_altkey;
					if (cdb_sc_normal !=
						(status = gvcst_expand_key((blk_hdr_ptr_t)buffaddr, curr_rec_offset, temp_key)))
						GOTO_RETRY;
				} else if (temp_key != gv_altkey)
				{
					memcpy(gv_altkey, temp_key, SIZEOF(gv_key) + temp_key->end);
					temp_key = gv_altkey;
				}
				rec_size += rec_cmpc;
				BLK_INIT(bs_ptr, bs1);
				BLK_ADDR(next_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
				next_rec_hdr->rsiz = rec_size;
				next_rec_hdr->cmpc = 0;
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)next_rec_hdr, SIZEOF(rec_hdr));
				BLK_ADDR(cp1, rec_cmpc, unsigned char);
				memcpy(cp1, temp_key->base, rec_cmpc);
				BLK_SEG(bs_ptr, cp1, rec_cmpc);
				n = cur_blk_size - INTCAST(((sm_uc_ptr_t)rp - buffaddr)) - SIZEOF(rec_hdr);
				if (0 > n)	/* want signed compare as 'n' can be negative */
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)(rp + 1), n);
				if (0 == BLK_FINI(bs_ptr, bs1))
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				/* We want to assert that the right block has enough space for reserved bytes but
				 * it is possible that it DOES NOT have enough space for reserved bytes if the pre-split
				 * block was previously populated with a very low reserved bytes setting and if the current
				 * reserved bytes setting is much higher than what the chosen split point would free up.
				 * This is an issue waiting to be fixed by C9K01-003221. Until then the following assert
				 * has to remain commented out.
				 *
				 * assert(bs1[0].len <= blk_reserved_size);
				 */
			}
			next_blk_index = t_create(blk_num, (uchar_ptr_t)new_blk_bs, left_hand_offset, left_hand_index, bh_level);
			if (!no_pointers && dollar_tlevel)
			{	/* there may be chains */
				assert(new_rec);
				curr_chain = *(off_chain *)&blk_num;
				if (curr_chain.flag == 1)
					tp_get_cw(si->first_cw_set, curr_chain.cw_index, &cse);
				else
				{
					if (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&blk_num)))
						tp_srch_status = tabent->value;
					else
						tp_srch_status = NULL;
					cse = tp_srch_status ? tp_srch_status->cse : NULL;
				}
				assert(!cse || !cse->high_tlevel);
			        if ((NULL != cse) && (0 != cse->first_off))
				{	/* there is an existing chain: fix to account for the split */
					assert(NULL != cse->new_buff);
					assert(cse->done);
					assert(0 == cse->next_off);
					cse_new = si->last_cw_set;
					assert(!cse_new->high_tlevel);
					assert(0 == cse_new->next_off);
					assert(0 == cse_new->first_off);
					assert(cse_new->ins_off == left_hand_offset);
					assert(cse_new->index == left_hand_index);
					assert(cse_new->level == cse->level);
					cse_first_off = (int4)cse->first_off;
					offset_sum = cse_first_off;
					curr = buffaddr + offset_sum;
					GET_LONGP(&curr_chain, curr);
					assert(curr_chain.flag == 1);
					last_possible_left_offset = curr_rec_offset + extra_record_orig_size - SIZEOF(off_chain);
					/* some of the following logic used to be in tp_split_chain which was nixed */
					if (offset_sum <= last_possible_left_offset)
					{	/* the split falls within or after the chain; otherwise entire chain stays right */
						assert((cse_first_off < curr_rec_offset)
							|| (cse_first_off == last_possible_left_offset));
						if (left_hand_offset && (curr_rec_offset < cse_first_off))
						{	/* We are inserting the new record (with the to-be-filled child block
							 * number) AND an extra record in the left block and the TP block
							 * chain of the block to be split starts AFTER the new record's offset
							 * in the current block. This means the left block (cse_new) will have a
							 * block chain starting with the newly inserted record's block pointer.
							 */
							cse_new->first_off = left_hand_offset;
						} else
						{
							cse_new->first_off = cse_first_off;
							assert(0 == cse_new->next_off);
						}
						if (level_0)	/* if no *-key issue stop after, rather than at, a match */
							last_possible_left_offset += SIZEOF(off_chain);
						if (offset_sum < last_possible_left_offset)
						{	/* it's not an immediate hit */
							for ( ; ; curr += curr_chain.next_off, GET_LONGP(&curr_chain, curr))
							{	/* follow chain upto split point */
								assert(1 == curr_chain.flag);
								if (0 == curr_chain.next_off)
									break;
								offset_sum += curr_chain.next_off;
								if (offset_sum >= last_possible_left_offset)
									break;
							}	/* end of search chain loop */
						}
						assert(curr >= (buffaddr + cse_first_off));
						if (level_0)	/* restore match point to "normal" */
							last_possible_left_offset -= SIZEOF(off_chain);
						if ((offset_sum == last_possible_left_offset) && !level_0)
						{	/* The last record in the left side of the pre-split block is where
							 * the search stopped. If no extra record copy was done, then this
							 * record will end up BEFORE the inserted record in the post-split
							 * left block. Otherwise this will be AFTER the inserted record.
							 *
							 * In case of copy_extra_record, the extra record will become the *-key
							 *                        ---|------------v-----------------v
							 *     [blk_hdr]...[curr rec( )][new rec ( )] [extra rec (*-key)]
							 *
							 * In case of no extra record copy, the new record will become the *-key
							 *                        ---|-------------------v
							 *     [blk_hdr]...[curr rec( )][new rec (*-key)( )]
							 *
							 * Take this into account during the calculations below.
							 */
							assert(cse_first_off <= last_possible_left_offset);
							if (left_hand_offset)
							{
								assert(!ins_chain_offset);
								if (!extra_record_orig_size && (offset_sum != cse_first_off))
								{	/* bring curr up to the match */
									curr += curr_chain.next_off;
									GET_LONGP(&curr_chain, curr);
								}
								curr_offset = curr - buffaddr;
								undo_index = 0;
								if (curr_offset < curr_rec_offset)
								{	/* The chain starts before the curr_rec_offset. Fix
									 * next_off field from the last element in the chain
									 * before this offset.
									 */
									prev_chain = curr_chain;
									assert(extra_record_orig_size
										|| (BSTAR_REC_SIZE
											== (left_hand_offset - curr_offset)));
									prev_chain.next_off = left_hand_offset - curr_offset;
									assert((curr_offset + prev_chain.next_off)
										<= (new_blk_size_l - SIZEOF(off_chain)));
									if (dollar_tlevel != cse->t_level)
									{
										assert(dollar_tlevel > cse->t_level);
										assert(!cse->undo_next_off[0]
												&& !cse->undo_offset[0]);
										assert(!cse->undo_next_off[1]
												&& !cse->undo_offset[1]);
										cse->undo_next_off[0] = curr_chain.next_off;
										cse->undo_offset[0] = (block_offset)curr_offset;
										undo_index = 1;
									}
									GET_LONGP(curr, &prev_chain);
								}
								if (extra_record_orig_size)
								{
									if (offset_sum != cse_first_off)
									{	/* bring curr up to the match */
										curr += curr_chain.next_off;
										curr_offset += curr_chain.next_off;
										GET_LONGP(&curr_chain, curr);
									}
									if (dollar_tlevel != cse->t_level)
									{
										assert(dollar_tlevel > cse->t_level);
										assert(!cse->undo_next_off[undo_index] &&
											!cse->undo_offset[undo_index]);
										cse->undo_next_off[undo_index] =
													curr_chain.next_off;
										cse->undo_offset[undo_index] =
													(block_offset)curr_offset;
									}
									prev_chain = curr_chain;
									prev_chain.next_off = 0;
									GET_LONGP(curr, &prev_chain);
									cse_new->next_off = BSTAR_REC_SIZE;
								}
								offset_sum += curr_chain.next_off;
							} else
							{
								undo_index = 0;
								/* the last record turns into the *-key */
								if (offset_sum == cse_first_off)
								{	/* it's all there is */
									/* first_off --------------------v
									 * [blk_hdr]...[curr rec (*-key)( )] */
									assert(prev_rec_offset >= SIZEOF(blk_hdr));
									cse_new->first_off = (block_offset)(prev_rec_offset +
													    SIZEOF(rec_hdr));
								} else
								{	/* update the next_off of the previous chain record */
									/*		      ---|--------------------v
									 * [blk_hdr]...[prev rec( )][curr rec (*-key)( )] */
									assert((buffaddr + prev_rec_offset) > curr);
									prev_chain = curr_chain;
									assert((offset_sum - prev_chain.next_off) /* check old */
										== (curr - buffaddr)); /* method equivalent */
									prev_chain.next_off = (unsigned int)(
										(prev_rec_offset + (unsigned int)(SIZEOF(rec_hdr))
										 - (curr - buffaddr)));
									assert((curr - buffaddr + prev_chain.next_off)
										<= ((new_blk_size_l < blk_reserved_size
										? new_blk_size_l : blk_reserved_size)
										- SIZEOF(off_chain)));
									if (dollar_tlevel != cse->t_level)
									{
										assert(dollar_tlevel > cse->t_level);
										assert(!cse->undo_next_off[0]
											&& !cse->undo_offset[0]);
										assert(!cse->undo_next_off[1]
											&& !cse->undo_offset[1]);
										cse->undo_next_off[0] = curr_chain.next_off;
										cse->undo_offset[0] = (block_offset)(curr -
														     buffaddr);
										undo_index = 1;
									}
									GET_LONGP(curr, &prev_chain);
									/* bring curr up to the match */
									curr += curr_chain.next_off;
									GET_LONGP(&curr_chain, curr);
								}
								offset_sum += curr_chain.next_off;
								if (dollar_tlevel != cse->t_level)
								{
									assert(dollar_tlevel > cse->t_level);
									assert(!cse->undo_next_off[undo_index] &&
										!cse->undo_offset[undo_index]);
									cse->undo_next_off[undo_index] = curr_chain.next_off;
									cse->undo_offset[undo_index] = (block_offset)(curr -
														      buffaddr);
								}
								curr_chain.next_off = 0;
								GET_LONGP(curr, &curr_chain);
							}
						} else
						{	/* found the split and no *-key issue: just terminate before the split */
							if (offset_sum == cse_first_off)
								offset_sum += curr_chain.next_off;	/* put it in the lead */
							old_curr_chain_next_off = curr_chain.next_off;
							if (left_hand_offset)
							{	/* there's a new chain rec in left */
								curr_offset = curr - buffaddr;
								if (extra_record_orig_size
									&& (curr_offset == last_possible_left_offset))
								{
									assert(level_0);	/* else *-key issues */
									cse_new->next_off = extra_record_orig_size
													- next_rec_shrink1;
								}
								assert(!ins_chain_offset);
								/* put the new one at the end of the chain */
								/*		      ---|---------------v
								 * [blk_hdr]...[curr rec( )]...[new rec ( )] */
								/* the new rec may or may not be a *-key */
								assert((offset_sum - curr_chain.next_off) == curr_offset);
								assert(left_hand_offset > curr_offset);
								curr_chain.next_off = (block_offset)(left_hand_offset
												- curr_offset);
							} else
								curr_chain.next_off = 0;
							assert((curr - buffaddr + curr_chain.next_off)
									<= ((new_blk_size_l < blk_reserved_size
									? new_blk_size_l : blk_reserved_size) - SIZEOF(off_chain)));
							if (dollar_tlevel != cse->t_level)
							{
								assert(dollar_tlevel > cse->t_level);
								assert(!cse->undo_next_off[0] && !cse->undo_offset[0]);
								assert(!cse->undo_next_off[1] && !cse->undo_offset[1]);
								cse->undo_next_off[0] = old_curr_chain_next_off;
								cse->undo_offset[0] = (block_offset)(curr - buffaddr);
							}
							GET_LONGP(curr, &curr_chain);
						}	/* end of *-key or not alternatives */
						assert((left_hand_offset + (int)cse_new->next_off) <=
							((new_blk_size_l < blk_reserved_size ? new_blk_size_l : blk_reserved_size)
								- SIZEOF(off_chain)));
					}	/* end of buffer and cse_new adjustments */
					prev_first_off = cse_first_off;
					if (ins_chain_offset)
					{	/* if there is a new chain rec in the old block, put it first */
						/* first_off---------v
						 * [blk_hdr][new rec( )]... */
						assert(!left_hand_offset);
						assert(0 == extra_record_orig_size);
						assert(ins_chain_offset >= (SIZEOF(blk_hdr) + SIZEOF(rec_hdr)));
						cse->first_off = ins_chain_offset;
						assert(0 == cse->next_off);
						if (offset_sum > last_possible_left_offset)
						{	/* there are existing chain records after the split */
							/* first_off---------v--------------------v
							 * [blk_hdr][new rec( )]...[existing rec ( )] */
							prev_next_off = cse->next_off;
							cse->next_off = offset_sum - last_possible_left_offset - next_rec_shrink1;
							assert((int)(cse->next_off + ins_chain_offset) < new_blk_size_r);
						}
					} else if (offset_sum <= last_possible_left_offset)
					{	/* the last chain record went left with the split */
						cse->first_off = 0;
					} else
					{	/* just adjust the anchor for the split */
						/* first_off------------------v
						 * [blk_hdr]...[existing rec ( )] */
						assert(offset_sum >= (int)cse_first_off);
						cse->first_off =  (block_offset)(offset_sum - last_possible_left_offset + rec_cmpc
								+ SIZEOF(blk_hdr) - SIZEOF(off_chain));
						assert(cse->first_off >= (SIZEOF(blk_hdr) + SIZEOF(rec_hdr)));
					}
					assert((ins_chain_offset + (int)cse->next_off) <=
						((new_blk_size_r < blk_reserved_size ? new_blk_size_r : blk_reserved_size)
							- SIZEOF(off_chain)));
				}	/* end of of split processing */
			}	/* end of tp only code */
			if (!dollar_tlevel)
				cse = NULL;
			else
			{
				cse_new = si->last_cw_set;
				assert(!cse_new->high_tlevel);
				gvcst_blk_build(cse_new, NULL, 0);
				cse_new->done = TRUE;
			}
			/* Record block split heuristic info that will be used in next block split */
			if (!new_rec_goes_to_right)
			{
				chain1.flag = 1;
				chain1.cw_index = next_blk_index;
				chain1.next_off = 0;
				assert(SIZEOF(gv_target->last_split_blk_num[bh_level]) == SIZEOF(off_chain));
				last_split_blk_num[bh_level] = *(block_id *)&chain1;
			} else
				last_split_blk_num[bh_level] = blk_num;
			assert(temp_key == gv_altkey);
			/* If new_rec_goes_to_right is TRUE, then it almost always implies that the left side of
			 * the block is almost full (i.e. adding the new record there caused it to exceed the fill
			 * factor) therefore direct all future updates to keys in between (which lie between the
			 * last key of the left block and the first key of the right block) to the right block.
			 *
			 * If not, direct those updates to the left block thereby preventing it from staying at a
			 * low capacity for a long period of time.
			 *
			 * This direction of future updates is implemented by controlling what key gets passed for
			 * record addition into the parent index block. For directing all in-between updates to the
			 * right block, pass in the last key of the left block to the parent index block. For directing
			 * all in-between updates to the left block, back off 1 spot from the first key of the right
			 * block and pass that to the parent index block.
			 *
			 * Doing this backoff accurately would imply finding the last non-zero byte in the key and taking
			 * 1 off from it. In case the length of the right key is less than the left key, it is possible
			 * that this backoff causes the new key to be less than even the left key (e.g. if left side has
			 * "C2 13 93 00" as key sequence corresponding to the number 1292 and right side has "C2 14 00"
			 * corresponding to the number 1300, taking one off the right side would give "C2 13 00" which corresponds
			 * to the number 12 and is lesser than the left side). In this case, we would have to start adding in
			 * FF bytes to the key as much as possible until we reached the left key length. In the above example,
			 * we would get "C2 13 FF 00".
			 *
			 * In the end, because of the complexities involved in getting an accurate backoff (see above paragraph),
			 * we instead implement a simplified backoff by examining just the first byte that differs and the
			 * immediately following byte (if needed). If it turns out that we cannot get a backoff with just
			 * those 2 bytes (should be rare), we then let the left key go unmodified. In such cases, we expect
			 * not many intervening possible keys and and therefore it does not matter that much whether we pass
			 * the left or (right-1) key to the parent.
			 *
			 * temp_key already holds the key corresponding to the last record of the left block.
			 * bs1[2] and bs1[3] hold the key corresponding to the first record of the right block.
			 */
			if (level_0)
			{	/* Determine key for record to pass on to parent index block */
				cp1 = temp_key->base;
				cp2 = (unsigned char *)bs1[2].addr;
				bs1_2_len = bs1[2].len;
				for (i = 0; (i < bs1_2_len) && (*cp2 == *cp1); ++i)
				{
					++cp2;
					++cp1;
				}
				if (i == bs1_2_len)
				{
					cp2 = (unsigned char *)bs1[3].addr;
					bs1_3_len = bs1[3].len;
					for (j = 0; (j < bs1_3_len) && (*cp2 == *cp1); ++j)
					{
						++cp2;
						++cp1;
					}
				}
				n = (int)((sm_long_t)*cp2 - (sm_long_t)*cp1);
				if (0 > n)
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				} else if (1 < n)
				{
					temp_key->end = cp1 - temp_key->base + 2;
					if (temp_key->end < temp_key->top)
					{
						*cp1++ += (!new_rec_goes_to_right ?  (n - 1) : 1);
						*cp1++ = 0;
						*cp1 = 0;
					} else
					{
						temp_key->end = temp_key->prev;
						assert(temp_key->end < temp_key->top);
						assert(CDB_STAGNATE > t_tries);
						status = cdb_sc_mkblk;
						GOTO_RETRY;
					}
				} else if (1 == n)
				{
					cp1++;
					if ((cp1 - temp_key->base + 2) < temp_key->top)
					{
						if (i == (bs1_2_len - 1))
							cp2 = (unsigned char *)bs1[3].addr;
						else
							cp2++;
						if ((STR_SUB_MAXVAL != *cp1) || (KEY_DELIMITER != *cp2))
						{
							if (!new_rec_goes_to_right)
							{
								old_ch = *cp2;
								new_ch = old_ch - 1;
								*cp1 = new_ch;
								if (KEY_DELIMITER != old_ch)
									*(cp1 - 1) = *(cp2 - 1);
							} else
							{
								old_ch = *cp1;
								new_ch = old_ch + 1;
								*cp1 = new_ch;
								if (STR_SUB_MAXVAL == old_ch)
									*(cp1 - 1) = *(cp2 - 1);
							}
							cp1++;
							if (KEY_DELIMITER == new_ch)
								temp_key->end--;
							else
								*cp1++ = KEY_DELIMITER;
							*cp1 = KEY_DELIMITER;
							temp_key->end = cp1 - temp_key->base;
						}
					} else
					{
						temp_key->end = temp_key->prev;
						assert(temp_key->end < temp_key->top);
						assert(CDB_STAGNATE > t_tries);
						status = cdb_sc_mkblk;
						GOTO_RETRY;
					}
				}
			}
			assert(temp_key->end < temp_key->top);
			assert(1 <= temp_key->end);
			assert(KEY_DELIMITER == temp_key->base[temp_key->end]);
			assert(KEY_DELIMITER == temp_key->base[temp_key->end - 1]);
			assert((2 > temp_key->end) || (KEY_DELIMITER != temp_key->base[temp_key->end - 2]));
			bq = bh + 1;
			if (HIST_TERMINATOR != bq->blk_num)
			{	/* Not root;  write blocks and continue */
				if (cdb_sc_normal != (status = gvcst_search_blk(temp_key, bq)))
					GOTO_RETRY;
				cse = t_write(bh, (unsigned char *)bs1, ins_chain_offset,
							ins_chain_index, bh_level, TRUE, FALSE, GDS_WRITE_PLAIN);
				assert(!dollar_tlevel || !cse->high_tlevel);
				if (cse)
				{
					assert(dollar_tlevel);
					cse->write_type |= GDS_WRITE_BLOCK_SPLIT;
				}
				value.len = SIZEOF(block_id);
				value.addr = (char *)&zeroes;
				++bh;
				ins_chain_index = next_blk_index;
			} else
			{	/* Create new root */
				if ((bh_level + 1) == MAX_BT_DEPTH)
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_maxlvl;
					GOTO_RETRY;
				}
				ins_chain_index = t_create(blk_num, (uchar_ptr_t)bs1, ins_chain_offset, ins_chain_index, bh_level);
				make_it_null = FALSE;
				if (NULL != cse)
				{     /* adjust block to use the buffer and offsets worked out for the old root */
					assert(cse->done);
				        assert(NULL != cse->new_buff);
					cse_new = si->last_cw_set;
					assert(!cse_new->high_tlevel);
					cse_new->blk_target = cse->blk_target;
					cse_new->first_off = cse->first_off;
					cse_new->next_off = cse->next_off;
							/* to be able to incrementally rollback, we need another copy of new_buff,
							 * pointer copying wouldn't suffice
							 */
					cse_new->new_buff = (unsigned char *)get_new_free_element(si->new_buff_list);
					memcpy(cse_new->new_buff, cse->new_buff, ((blk_hdr_ptr_t)cse->new_buff)->bsiz);
					cse_new->old_block = NULL;
					make_it_null = TRUE;
				}
				/* Build the right child of the new root right now since it is possible that before commit the
				 * root block may have been recycled in the global buffer which wouldn't cause a restart since
				 * it has been built already (see the gvcst_blk_build below). Otherwise, we may be relying
				 * on incorrect data in the root block when we build this right child finally in bg_update.
				 * Note that this needs to be done only in TP since only tp_tend allows for a block with a
				 * cse not to be in the global buffer if a new_buff already exists.
				 */
				if (dollar_tlevel)
				{
					DEBUG_ONLY(tp_get_cw(si->first_cw_set, ins_chain_index, &cse_new);)
					assert(cse_new == si->last_cw_set);
					cse_new = si->last_cw_set;
					assert(FALSE == cse_new->done);
					assert(!cse_new->high_tlevel);
					gvcst_blk_build(cse_new, NULL, 0);
					cse_new->done = TRUE;
				}
				target_key_size = temp_key->end + 1;
				BLK_INIT(bs_ptr, bs1);
				BLK_ADDR(curr_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
				curr_rec_hdr->rsiz = target_key_size + SIZEOF(rec_hdr) + SIZEOF(block_id);
				curr_rec_hdr->cmpc = 0;
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)curr_rec_hdr, SIZEOF(rec_hdr));
				BLK_ADDR(cp1, target_key_size, unsigned char);
				memcpy(cp1, temp_key->base, target_key_size);
				BLK_SEG(bs_ptr, cp1, target_key_size);
				BLK_SEG(bs_ptr, (unsigned char *)&zeroes, SIZEOF(block_id));
				BLK_ADDR(next_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
				next_rec_hdr->rsiz = BSTAR_REC_SIZE;
				next_rec_hdr->cmpc = 0;
				BLK_SEG(bs_ptr, (sm_uc_ptr_t)next_rec_hdr, SIZEOF(rec_hdr));
				BLK_SEG(bs_ptr, (unsigned char *)&zeroes, SIZEOF(block_id));
				if (0 == BLK_FINI(bs_ptr, bs1))
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_mkblk;
					GOTO_RETRY;
				}
				assert(bs1[0].len <= blk_reserved_size); /* Assert that new block has space for reserved bytes */
				ins_off1 = (block_offset)(SIZEOF(blk_hdr) + SIZEOF(rec_hdr) + target_key_size);
				ins_off2 = (block_offset)(SIZEOF(blk_hdr) + (2 * SIZEOF(rec_hdr)) + SIZEOF(block_id) +
							  target_key_size);
				assert(ins_off1 < ins_off2);
				/* Since a new root block is not created but two new children are created, this update to the
				 * root block should disable the "indexmod" optimization (C9B11-001813).
				 */
				cse = t_write(bh, (unsigned char *)bs1, ins_off1, next_blk_index,
							bh_level + 1, TRUE, FALSE, GDS_WRITE_KILLTN);
				if (make_it_null)
					cse->new_buff = NULL;
				assert(!dollar_tlevel || !cse->high_tlevel);
				if (!dollar_tlevel)
				{	/* create a sibling cw-set-element to store ins_off2/ins_chain_index */
					t_write_root(ins_off2, ins_chain_index);
				} else
				{
					cse->write_type |= GDS_WRITE_BLOCK_SPLIT;
					assert(NULL == cse->new_buff);
					cse->first_off = 0;
					cse->next_off = ins_off2 - ins_off1;
					/* the following is the only place where the buffer is not completely built by
					 * gvcst_blk_build. this means that the block chain seen by gvcst_blk_build will
					 * have a bad value (that is fixed below) at the end of the list. therefore the
					 * block chain integrity checking code in gvcst_blk_build will error out normally
					 * in this case. signal that routine to skip checking just this tail element.
					 */
					DEBUG_ONLY(skip_block_chain_tail_check = TRUE;)
					gvcst_blk_build(cse, NULL, 0);
					DEBUG_ONLY(skip_block_chain_tail_check = FALSE;)
					curr_chain.flag = 1;
					curr_chain.cw_index = ins_chain_index;
					curr_chain.next_off = 0;
					curr = cse->new_buff + ins_off2;
					GET_LONGP(curr, &curr_chain);
					cse->done = TRUE;
					gv_target->clue.end = 0;
				}
				succeeded = TRUE;
			}
		}
	}
	assert(succeeded);
	horiz_growth = FALSE;
	assert((csa->dir_tree == gv_target) || tp_root);
	RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
	/* The only case where gv_target is still csa->dir_tree after the above RESET macro is if op_gvput was invoked
	 * with gv_target being set to cs_addrs->dir_tree. In that case gbl_target_was_set would have been set to TRUE. Assert.
	 */
	assert((csa->dir_tree != gv_target) || gbl_target_was_set);
	/* Format the journal records only once for non-TP (irrespective of number of restarts).
	 * We remember this through the variable "jnl_format_done". If TRUE, we do not redo the jnl_format.
	 * The only exception is if we are in $INCREMENT in which case we need to reformat since the
	 *	current value (and hence the post-increment value) of the key might be different in different tries.
	 *	In this case, the restart code checks and resets "jnl_format_done" to FALSE.
	 */
	if (!dollar_tlevel)
	{
		nodeflags = 0;
		if (skip_dbtriggers)
			nodeflags |= JS_SKIP_TRIGGERS_MASK;
		if (duplicate_set)
			nodeflags |= JS_IS_DUPLICATE;
		assert(!jnl_format_done || !is_dollar_incr && (JNL_SET == non_tp_jfb_ptr->ja.operation));
		if (need_extra_block_split)
                        inctn_opcode = inctn_gvcstput_extra_blk_split;
		else if (JNL_WRITE_LOGICAL_RECS(csa) && !jnl_format_done)
		{
			jfb = jnl_format(JNL_SET, gv_currkey, (!is_dollar_incr ? val : post_incr_mval), nodeflags);
			assert(NULL != jfb);
			jnl_format_done = TRUE;
		}
                succeeded = ((trans_num)0 != t_end(&gv_target->hist, dir_hist, TN_NOT_SPECIFIED));
                inctn_opcode = inctn_invalid_op;
		if (succeeded)
		{
			if (NULL != dir_hist)
			{	/* The Global Variable Tree was created in this transaction. So clear its gv_target to be safe.
				 * The directory tree though will have a non-zero value and that can stay as it is since it
				 * was validated in this transaction and was found good enough for us to commit.
				 */
				assert(dir_tree != gv_target);
				gv_target->clue.end = 0;
			}
		} else
		{	/* "t_retry" would have already been invoked by "t_end".
			 * So instead of going to "retry:", do only whatever steps from there are necessary here.
			 */
			RESTORE_ZERO_GVT_ROOT_ON_RETRY(lcl_root, gv_target, tp_root, dir_hist, dir_tree);
			jnl_format_done = FALSE;	/* need to reformat jnl records for $INCR even in case of non-TP */
			GTMTRIG_DBG_ONLY(dbg_trace_array[dbg_num_iters].retry_line = __LINE__);
			goto tn_restart;
		}
	} else
	{
		status = tp_hist(dir_hist);
		if (NULL != dir_hist)
		{	/* Note that although "tp_hist" processes the "dir_hist" history, it only adds "gv_target" to gvt_tp_list.
			 * But csa->dir_tree might have had clue, blk-split related info etc. modified as part of this
			 * gvcst_put invocation that might also need cleanup (just like any other gv_target) so add
			 * csa->dir_tree to gvt_tp_list (if not already done).
			 */
			assert(dir_tree == csa->dir_tree);
			ADD_TO_GVT_TP_LIST(dir_tree);	/* note: macro also updates read_local_tn if necessary */
		}
		if (cdb_sc_normal != status)
			GOTO_RETRY;
		jnl_format_done = FALSE;
	}
	if (succeeded)
	{
		if (0 == tp_root)
		{	/* Fill in gv_target->root with newly created root block value.
			 * Previously, root remained at 0 at the end of the transaction and it was left to the
			 * NEXT transaction to do a gvcst_root_search and determine the new root block.
			 * This was fine until recently when op_gvrectarg was reworked to NOT do a gvcst_root_search
			 * (to avoid potential TP restarts while unwinding the M stack). This meant that gv_target->root
			 * needed to be kept uptodate as otherwise it was possible for gv_target->root to be stale
			 * after a op_gvrectarg causing incorrect behavior of following M code (see v52000/C9B10001765
			 * subtest for example where $order(^gvn,$$extrinsic) is done and extrinsic CREATES <^gvn>).
			 */
			GTMTRIG_ONLY(assert(!ztval_gvcst_put_redo);)
			assert(0 == gv_target->root);
			if (!dollar_tlevel)
			{
				tp_root = cw_set[root_blk_cw_index].blk;
				assert(gds_t_acquired == cw_set[root_blk_cw_index].old_mode);
				assert(gds_t_committed == cw_set[root_blk_cw_index].mode);
				assert(!IS_BITMAP_BLK(tp_root));
			} else
			{
				chain1.flag = 1;
				chain1.cw_index = root_blk_cw_index;
				chain1.next_off = 0;	/* does not matter what value we set this field to */
				assert(SIZEOF(tp_root) == SIZEOF(chain1));
				tp_root = *(block_id *)&chain1;
			}
			gv_target->root = tp_root;
		}
		if (need_extra_block_split)
		{	/* The logical update required an extra block split operation first (which succeeded) so
			 * get back to doing the logical update before doing any trigger invocations etc.
			 */
			GTMTRIG_ONLY(skip_hasht_read = TRUE;)
			goto fresh_tn_start;
		}
		for (bh_level = 0; bh_level < split_depth; bh_level++)
		{
			blk_num = last_split_blk_num[bh_level];
			assert(0 != blk_num);
			split_targ->last_split_blk_num[bh_level] = blk_num;
			assert((NEWREC_DIR_FORCED == last_split_direction[bh_level])
				|| (NEWREC_DIR_LEFT == last_split_direction[bh_level])
				|| (NEWREC_DIR_RIGHT == last_split_direction[bh_level]));
			split_targ->last_split_direction[bh_level] = last_split_direction[bh_level];
			/* Fix blk_num if it was created in this transaction. In case of non-TP, we have the real block number
			 * corresponding to the created block. In case of TP, we can know that only at tp_clean_up time so defer.
			 */
			chain1 = *(off_chain *)&blk_num;
			if (chain1.flag)
			{
				if (!dollar_tlevel)
				{
					assert(chain1.cw_index < ARRAYSIZE(cw_set));
					split_targ->last_split_blk_num[bh_level] = cw_set[chain1.cw_index].blk;
				} else
					split_targ->split_cleanup_needed = TRUE;/* phantom blk# will be fixed at tp_clean_up time */
			}
		}
		if (dollar_tlevel)
		{
			nodeflags = 0;
			if (skip_dbtriggers)
				nodeflags |= JS_SKIP_TRIGGERS_MASK;
			if (duplicate_set)
				nodeflags |= JS_IS_DUPLICATE;
			ja_val = (!is_dollar_incr ? val : post_incr_mval);
			write_logical_jnlrecs = JNL_WRITE_LOGICAL_RECS(csa);
#			ifdef GTM_TRIGGER
			if (!skip_dbtriggers)
			{
				/* Since we are about to invoke the trigger, we better have gv_target->gvt_trigger and
				 * the local variable gvt_trigger in sync. The only exception is when we are here because
				 * of a $ztvalue update and redoing the gvcst_put. In this case, it's possible that
				 * the trigger code that was previously executed deleted the trigger and did an update
				 * on the global which would have set gv_target->gvt_trigger to NULL. Assert accordingly.
				 */
				assert(ztval_gvcst_put_redo || (gvt_trigger == gv_target->gvt_trigger));
				if ((NULL != gvt_trigger) && !ztval_gvcst_put_redo)
				{
					assert(dollar_tlevel);
					/* Format ZTWORM and SET journal records.
					 * "ztworm_jfb", "jfb" and "jnl_format_done" are set by the below macro.
					 */
					JNL_FORMAT_ZTWORM_IF_NEEDED(csa, write_logical_jnlrecs,
							JNL_SET, gv_currkey, ja_val, ztworm_jfb, jfb, jnl_format_done);
					/* Initialize trigger parms that dont depend on the context of the matching trigger */
					trigparms.ztoldval_new = key_exists ? ztold_mval : (mval *)&literal_null;
					PUSH_MV_STENT(MVST_MVAL);	/* protect $ztval from stp_gcol */
					ztval_mval = &mv_chain->mv_st_cont.mvs_mval;
					if (!is_dollar_incr)
						*ztval_mval = *val;
					else
					{
						*ztval_mval = *post_incr_mval;
						/* Since this is pointing to malloced buffer, we need to repoint it to stringpool
						 * to avoid a nested trigger call (that does a $INCR) from overwriting this buffer.
						 * This way buffers corresponding to $ztvals of nested triggers can coexist.
						 */
						s2pool(&ztval_mval->str);
					}
					trigparms.ztvalue_new = ztval_mval;
					trigparms.ztdata_new = key_exists ? &literal_one : &literal_zero;
					gvtr_parms.gvtr_cmd = GVTR_CMDTYPE_SET;
					gvtr_parms.gvt_trigger = gvt_trigger;
					/* Now that we have filled in minimal information, let "gvtr_match_n_invoke" do the rest */
					gtm_trig_status = gvtr_match_n_invoke(&trigparms, &gvtr_parms);
					assert((0 == gtm_trig_status) || (ERR_TPRETRY == gtm_trig_status));
					if (ERR_TPRETRY == gtm_trig_status)
					{	/* A restart has been signaled that we need to handle or complete the handling of.
						 * This restart could have occurred reading the trigger in which case no
						 * tp_restart() has yet been done or it could have occurred in trigger code in
						 * which case we need to finish the incomplete tp_restart. In both cases this
						 * must be an implicitly TP wrapped transaction. Our action is to complete the
						 * necessary tp_restart() logic (t_retry is already completed so should be skipped)
						 * and then re-do the gvcst_put logic.
						 */
						assert(lcl_implicit_tstart);
						assert(CDB_STAGNATE >= t_tries);
						status = cdb_sc_normal;	/* signal "retry:" to avoid t_retry call */
						GOTO_RETRY;
					}
					REMOVE_ZTWORM_JFB_IF_NEEDED(ztworm_jfb, jfb, si);
					if (trigparms.ztvalue_changed)
					{	/* At least one of the invoked triggers changed $ztval.
						 * Redo the gvcst_put with $ztval as the right side of the SET.
						 * Also make sure gtm_trigger calls are NOT done this time around.
						 */
						assert(0 < gvtr_parms.num_triggers_invoked);
						val = trigparms.ztvalue_new;
						MV_FORCE_STR(val); /* in case the updated value happens to be a numeric quantity */
						ztval_gvcst_put_redo = TRUE;
						skip_hasht_read = TRUE;
						/* In case, the current gvcst_put invocation was for $INCR, reset the corresponding
						 * global variable that indicates a $INCR is in progress since the redo of the
						 * gvcst_put is a SET command (no longer $INCR).
						 */
						is_dollar_incr = FALSE;
						/* Dont pop the mvals as we want ztval_mval (which points to the mval containing
						 * "val" for the redo iteration) protected-from-stp_gcol/accessible until the
						 * redo is complete.
						 */
						goto fresh_tn_start;
					}
				}
				POP_MVALS_FROM_M_STACK_IF_NEEDED(ztold_mval, save_msp, save_mv_chain);
					/* pop any stacked mvals before op_tcommit as it does its own popping */
			}
#			endif
			if (write_logical_jnlrecs && !jnl_format_done)
			{
				assert(dollar_tlevel);
#				ifdef GTM_TRIGGER
				/* Do not replicate implicit update or $ztval redo update */
				assert(tstart_trigger_depth <= gtm_trigger_depth);
				if ((gtm_trigger_depth > tstart_trigger_depth) || ztval_gvcst_put_redo)
				{
					/* Ensure that JS_SKIP_TRIGGERS_MASK and JS_NOT_REPLICATED_MASK are mutually exclusive. */
					assert(!(nodeflags & JS_SKIP_TRIGGERS_MASK));
					nodeflags |= JS_NOT_REPLICATED_MASK;
				}
#				endif
				jfb = jnl_format(JNL_SET, gv_currkey, ja_val, nodeflags);
				assert(NULL != jfb);
				jnl_format_done = TRUE;
			}
#			ifdef GTM_TRIGGER
			/* Go ahead with commit of any implicit TP wrapped transaction */
			if (lcl_implicit_tstart)
			{
				GVTR_OP_TCOMMIT(status);
				if (cdb_sc_normal != status)
					GOTO_RETRY;
			}
#			endif
		}
		assert(!JNL_WRITE_LOGICAL_RECS(csa) || jnl_format_done);
		/* Now that the SET/$INCR is finally complete, increment the corresponding GVSTAT counter */
		INCR_GVSTATS_COUNTER(csa, cnl, n_set, 1);
		DBG_CHECK_VAL_AT_FUN_EXIT;
		assert(lcl_dollar_tlevel == dollar_tlevel);
		return;
	}
retry:
	/* Note that it is possible cs_addrs is not equal to csa at this point in case we restarted due to trigger
	 * invocations and in case those triggers referenced globals in different regions. But this should be fixed
	 * by a call to t_retry/tp_restart below (it does a TP_CHANGE_REG(tp_pointer->gd_reg)).
	 */
	RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
	/* Need to restart. If directory tree was used in this transaction, nullify its clue as well (not normally
	 * done by t_retry). The RESTORE_ZERO_GVT_ROOT_ON_RETRY macro call below takes care of that for us.
	 */
	RESTORE_ZERO_GVT_ROOT_ON_RETRY(lcl_root, gv_target, tp_root, dir_hist, dir_tree);
#	ifdef GTM_TRIGGER
	if (lcl_implicit_tstart)
	{
		assert(!skip_dbtriggers);
		assert(!skip_INVOKE_RESTART);
		assert((cdb_sc_normal != status) || (ERR_TPRETRY == gtm_trig_status));
		if (cdb_sc_normal != status)
			skip_INVOKE_RESTART = TRUE; /* causes t_retry to invoke only tp_restart without any rts_error */
		/* else: t_retry has already been done by gtm_trigger so no need to do it again for this try */
		/* If an implicitly TP wrapped transaction is restarting, restore things to what they were
		 * at entry into gvcst_put. Note that we could have done multiple iterations of gvcst_put for
		 * extra_block_split/retry/ztval_gvcst_put_redo.
		 */
		ztval_gvcst_put_redo = FALSE;
		skip_hasht_read = FALSE;
		val = lcl_val;
		/* $increment related fields need to be restored */
		is_dollar_incr = lcl_is_dollar_incr;
		post_incr_mval = lcl_post_incr_mval;
		increment_delta_mval = lcl_increment_delta_mval;
	}
#	endif
	assert((cdb_sc_normal != status) GTMTRIG_ONLY(|| lcl_implicit_tstart));
	if (cdb_sc_normal != status)
	{
		GTMTRIG_ONLY(POP_MVALS_FROM_M_STACK_IF_NEEDED(ztold_mval, save_msp, save_mv_chain));
		t_retry(status);
	} else
	{	/* else: t_retry has already been done so no need to do that again but need to still invoke tp_restart
		 * to complete pending "tprestart_state" related work.
		 */
#		ifdef GTM_TRIGGER
		assert(ERR_TPRETRY == gtm_trig_status);
		TRIGGER_BASE_FRAME_UNWIND_IF_NOMANSLAND;
		POP_MVALS_FROM_M_STACK_IF_NEEDED(ztold_mval, save_msp, save_mv_chain);
#		endif
		rc = tp_restart(1, !TP_RESTART_HANDLES_ERRORS);
		assert(0 == rc GTMTRIG_ONLY(&& TPRESTART_STATE_NORMAL == tprestart_state));
	}
#	ifdef GTM_TRIGGER
	assert(0 < t_tries);
	assert((cdb_sc_normal != status) || (ERR_TPRETRY == gtm_trig_status));
	if (cdb_sc_normal == status)
	{
		DEBUG_ONLY(save_cdb_status = status);
		status = t_fail_hist[t_tries - 1]; /* get the last restart code */
	}
	assert(((cdb_sc_onln_rlbk1 != status) && (cdb_sc_onln_rlbk2 != status))
		|| (TREF(dollar_zonlnrlbk) && !gv_target->root));
	if ((cdb_sc_onln_rlbk2 == status) && lcl_implicit_tstart)
	{	/* Database was taken back to a different logical state and we are an implicit TP transaction.
		 * Issue DBROLLEDBACK error that the application programmer can catch and do the necessary stuff.
		 */
		assert(gtm_trigger_depth == tstart_trigger_depth);
		rts_error(VARLSTCNT(1) ERR_DBROLLEDBACK);
	}
	/* Note: In case of cdb_sc_onln_rlbk1, the restart logic will take care of doing the root search */
#	endif
	GTMTRIG_ONLY(assert(!skip_INVOKE_RESTART);) /* if set to TRUE a few statements above, should have been reset by t_retry */
	/* At this point, we can be in TP only if we implicitly did a tstart in gvcst_put (as part of a trigger update).
	 * Assert that. Since the t_retry/tp_restart would have reset si->update_trans, we need to set it again.
	 * So reinvoke the T_BEGIN call only in case of TP. For non-TP, update_trans is unaffected by t_retry.
	 */
	assert(!dollar_tlevel GTMTRIG_ONLY(|| lcl_implicit_tstart));
	if (dollar_tlevel)
	{
		jnl_format_done = FALSE;	/* need to reformat jnl records unconditionally in case of TP */
		tp_set_sgm();	/* set sgm_info_ptr & first_sgm_info for TP start */
		T_BEGIN_SETORKILL_NONTP_OR_TP(ERR_GVPUTFAIL);	/* set update_trans and t_err for wrapped TP */
	} else if (is_dollar_incr)
		jnl_format_done = FALSE;	/* need to reformat jnl records for $INCR even in case of non-TP */
	assert(dollar_tlevel || update_trans);
	goto tn_restart;
}