254 lines
9.5 KiB
C
254 lines
9.5 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
#include "gdsroot.h"
|
|
#include "gdsblk.h"
|
|
#include "min_max.h" /* needed for gdsblkops.h */
|
|
#include "gdsblkops.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "gdscc.h"
|
|
#include "gdskill.h"
|
|
#include "filestruct.h"
|
|
#include "copy.h"
|
|
#include "jnl.h"
|
|
#include "buddy_list.h" /* needed for tp.h */
|
|
#include "hashtab_int4.h" /* needed for tp.h */
|
|
#include "tp.h"
|
|
#include "gvcst_blk_build.h"
|
|
#include "gtmimagename.h"
|
|
|
|
#ifdef DEBUG
|
|
GBLREF boolean_t skip_block_chain_tail_check;
|
|
#endif
|
|
|
|
GBLREF unsigned char cw_set_depth;
|
|
GBLREF uint4 dollar_tlevel;
|
|
GBLREF sgm_info *sgm_info_ptr;
|
|
GBLREF sgmnt_addrs *cs_addrs;
|
|
GBLREF sgmnt_data_ptr_t cs_data;
|
|
GBLREF boolean_t write_after_image;
|
|
GBLREF unsigned int t_tries;
|
|
#ifdef UNIX
|
|
GBLREF jnl_gbls_t jgbl;
|
|
#endif
|
|
|
|
void gvcst_blk_build(cw_set_element *cse, sm_uc_ptr_t base_addr, trans_num ctn)
|
|
{
|
|
blk_segment *seg, *stop_ptr, *array;
|
|
off_chain chain;
|
|
sm_uc_ptr_t ptr, ptrtop;
|
|
sm_ulong_t n;
|
|
int4 offset;
|
|
trans_num blktn;
|
|
# ifdef DEBUG
|
|
boolean_t integ_error_found;
|
|
rec_hdr_ptr_t rp;
|
|
sm_uc_ptr_t chainptr, input_base_addr;
|
|
unsigned short nRecLen;
|
|
# endif
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
/* For a TP transaction we should reach here with crit as the only function that invokes this is bg_update_phase2
|
|
* which operates outside crit. The exceptions to this are DSE (write_after_image is TRUE) or ONLINE ROLLBACK
|
|
* which holds crit for the entire duration
|
|
*/
|
|
assert((dba_bg != cs_data->acc_meth) || dollar_tlevel || !cs_addrs->now_crit || write_after_image
|
|
UNIX_ONLY(|| jgbl.onlnrlbk));
|
|
assert((dba_mm != cs_data->acc_meth) || dollar_tlevel || cs_addrs->now_crit);
|
|
assert(cse->mode != gds_t_writemap);
|
|
array = (blk_segment *)cse->upd_addr;
|
|
assert(array->len >= SIZEOF(blk_hdr));
|
|
assert(array->len <= cs_data->blk_size);
|
|
assert((cse->ins_off + SIZEOF(block_id)) <= array->len);
|
|
assert((short)cse->index >= 0);
|
|
assert(!cse->undo_next_off[0] && !cse->undo_offset[0]);
|
|
assert(!cse->undo_next_off[1] && !cse->undo_offset[1]);
|
|
DEBUG_ONLY(input_base_addr = base_addr;)
|
|
|
|
if (base_addr == NULL)
|
|
{ /* it's the first private TP build */
|
|
assert(dollar_tlevel);
|
|
assert(cse->blk_target);
|
|
base_addr = cse->new_buff = (unsigned char *)get_new_free_element(sgm_info_ptr->new_buff_list);
|
|
cse->first_copy = TRUE;
|
|
} else
|
|
assert(0 == ((sm_ulong_t)base_addr & 3)); /* word aligned at least */
|
|
|
|
/* The block-transaction-number is modified before the contents of the block are modified. This is
|
|
* done so as to allow a cdb_sc_blkmod check (done in t_qread, gvcst_search, gvcst_put and tp_hist)
|
|
* to be done out-of-crit by just checking for the transaction numbers. If the contents of the block
|
|
* were modified first, there is a possibility that the block-transaction number didn't get updated
|
|
* although the contents of the block may have changed and basing the decision of block-modified on
|
|
* just the transaction numbers may not always be correct.
|
|
* Note that in mm_update and bg_update there is an else block where instead of gvcst_blk_build(),
|
|
* a memcpy is done. To effect the above change, we also need to switch the order of memcpy and
|
|
* block-transaction-number-updation in those places.
|
|
* Note that a similar change is not needed in gvcst_map_build() because that will never be in the
|
|
* search history for any key.
|
|
*/
|
|
if (!ctn && dollar_tlevel)
|
|
{ /* Subtract one so will pass concurrency control for mm databases.
|
|
* This block is guaranteed to be in an earlier history from when it was first read,
|
|
* so this history is superfluous for concurrency control.
|
|
* The correct tn is put in the block in mm_update or bg_update when the block is copied to the database.
|
|
*/
|
|
ctn = cs_addrs->ti->curr_tn - 1;
|
|
}
|
|
/* Assert that the block's transaction number is LESS than the transaction number corresponding to the blk build.
|
|
* i.e. no one else should have touched the block contents in shared memory from the time we locked this in phase1
|
|
* to the time we build it in phase2.
|
|
* There are a few exceptions.
|
|
* a) With DSE, it is possible to change the block transaction number and then a DSE or MUPIP command can run
|
|
* on the above block with the above condition not true.
|
|
* b) tp_tend calls gvcst_blk_build for cse's with mode kill_t_write/kill_t_create. For them we build a private
|
|
* copy of the block for later use in phase2 of the M-kill. In this case, blktn could be
|
|
* uninitialized so cannot do any checks using this value.
|
|
* c) For MM, we dont have two phase commits so dont do any checks in that case.
|
|
* d) For acquired blocks, it is possible that some process had read in the uninitialized block from disk
|
|
* outside of crit (due to concurrency issues). Therefore the buffer could contain garbage. So we cannot
|
|
* rely on the buffer contents to determine the block's transaction number.
|
|
* e) For VMS, if a twin is created, we explicitly set its buffer tn to be equal to ctn in phase1.
|
|
* But since we are not passed the "cr" in this routine, it is not easily possible to check that.
|
|
* Hence in case of VMS, we relax the check so buffertn == ctn is allowed.
|
|
*/
|
|
DEBUG_ONLY(blktn = ((blk_hdr_ptr_t)base_addr)->tn);
|
|
assert(!IS_MCODE_RUNNING || !cs_addrs->t_commit_crit || (dba_bg != cs_data->acc_meth) || (n_gds_t_op < cse->mode)
|
|
|| (cse->mode == gds_t_acquired) || (blktn UNIX_ONLY(<) VMS_ONLY(<=) ctn));
|
|
assert((ctn < cs_addrs->ti->early_tn) || write_after_image);
|
|
((blk_hdr_ptr_t)base_addr)->bver = GDSVCURR;
|
|
((blk_hdr_ptr_t)base_addr)->tn = ctn;
|
|
((blk_hdr_ptr_t)base_addr)->bsiz = UINTCAST(array->len);
|
|
((blk_hdr_ptr_t)base_addr)->levl = cse->level;
|
|
|
|
if (cse->forward_process)
|
|
{
|
|
stop_ptr = (blk_segment *)array->addr;
|
|
seg = cse->first_copy ? array + 1: array + 2;
|
|
ptr = base_addr + SIZEOF(blk_hdr);
|
|
if (!cse->first_copy)
|
|
ptr += ((blk_segment *)(array + 1))->len;
|
|
for ( ; seg <= stop_ptr; )
|
|
{
|
|
assert(0L <= ((INTPTR_T)seg->len));
|
|
DBG_BG_PHASE2_CHECK_CR_IS_PINNED(cs_addrs, seg);
|
|
memmove(ptr, seg->addr, seg->len);
|
|
ptr += seg->len;
|
|
seg++;
|
|
}
|
|
} else
|
|
{
|
|
stop_ptr = cse->first_copy ? array : array + 1;
|
|
seg = (blk_segment *)array->addr;
|
|
ptr = base_addr + array->len;
|
|
while (seg != stop_ptr)
|
|
{
|
|
assert(0L <= ((INTPTR_T)seg->len));
|
|
DBG_BG_PHASE2_CHECK_CR_IS_PINNED(cs_addrs, seg);
|
|
ptr -= (n = seg->len);
|
|
memmove(ptr, seg->addr, n);
|
|
seg--;
|
|
}
|
|
}
|
|
if (dollar_tlevel)
|
|
{
|
|
if (cse->ins_off)
|
|
{ /* if the cw set has a reference to resolve, move it to the block */
|
|
assert(cse->index < sgm_info_ptr->cw_set_depth);
|
|
assert((int)cse->ins_off >= (int)(SIZEOF(blk_hdr) + SIZEOF(rec_hdr)));
|
|
assert((int)(cse->next_off + cse->ins_off + SIZEOF(block_id)) <= array->len);
|
|
if (cse->first_off == 0)
|
|
cse->first_off = cse->ins_off;
|
|
chain.flag = 1;
|
|
chain.cw_index = cse->index;
|
|
chain.next_off = cse->next_off;
|
|
ptr = base_addr + cse->ins_off;
|
|
GET_LONGP(ptr, &chain);
|
|
cse->index = 0;
|
|
cse->ins_off = 0;
|
|
cse->next_off = 0;
|
|
}
|
|
# ifdef DEBUG
|
|
if (offset = cse->first_off)
|
|
{ /* Verify the integrity of the TP chains within a newly created block.
|
|
* If it is the first TP private build, the update array could have referenced
|
|
* shared memory global buffers which could have been concurrently updated.
|
|
* So the integrity of the chain cannot be easily verified. If ever we find
|
|
* an integ error in the chain, we check if this is the first private TP build
|
|
* and if so allow it but set a debug flag donot_commit so we never ever commit
|
|
* this transaction. The hope is that it will instead restart after validation.
|
|
*/
|
|
ptr = base_addr;
|
|
ptrtop = ptr + ((blk_hdr_ptr_t)ptr)->bsiz;
|
|
chainptr = ptr + offset;
|
|
ptr += SIZEOF(blk_hdr);
|
|
integ_error_found = FALSE;
|
|
for ( ; ptr < ptrtop; )
|
|
{
|
|
do
|
|
{
|
|
GET_USHORT(nRecLen, &((rec_hdr_ptr_t)ptr)->rsiz);
|
|
if (0 == nRecLen)
|
|
{
|
|
assert(NULL == input_base_addr);
|
|
integ_error_found = TRUE;
|
|
break;
|
|
}
|
|
ptr += nRecLen;
|
|
if (ptr - SIZEOF(off_chain) == chainptr)
|
|
break;
|
|
if ((ptr - SIZEOF(off_chain)) > chainptr)
|
|
{
|
|
assert(NULL == input_base_addr);
|
|
integ_error_found = TRUE;
|
|
break;
|
|
}
|
|
GET_LONGP(&chain, ptr - SIZEOF(off_chain));
|
|
if (chain.flag)
|
|
{
|
|
assert(NULL == input_base_addr);
|
|
integ_error_found = TRUE;
|
|
break;
|
|
}
|
|
} while (ptr < ptrtop);
|
|
if (integ_error_found)
|
|
break;
|
|
if (chainptr < ptrtop)
|
|
{
|
|
GET_LONGP(&chain, chainptr);
|
|
assert(1 == chain.flag || (skip_block_chain_tail_check && (0 == chain.next_off)));
|
|
assert(chain.cw_index < sgm_info_ptr->cw_set_depth);
|
|
offset = chain.next_off;
|
|
if (0 == offset)
|
|
chainptr = ptrtop;
|
|
else
|
|
{
|
|
chainptr = chainptr + offset;
|
|
assert(chainptr < ptrtop); /* ensure we have not overrun the buffer */
|
|
}
|
|
}
|
|
}
|
|
if (integ_error_found)
|
|
TREF(donot_commit) |= DONOTCOMMIT_GVCST_BLK_BUILD_TPCHAIN;
|
|
else
|
|
assert(0 == offset); /* ensure the chain is NULL terminated */
|
|
}
|
|
# endif
|
|
} else
|
|
assert(dollar_tlevel || (cse->index < (int)cw_set_depth));
|
|
}
|