615 lines
26 KiB
C
615 lines
26 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2011 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
/*********************************************************************************
|
|
mu_swap_blk.c:
|
|
This program will swap the working block with a destination block (dest_blk_id).
|
|
The destination block is the block id, where a block should go for
|
|
better performance of database. This destination block Id is picked
|
|
sequntially starting from block number 3.
|
|
It will NOT swap a block with with
|
|
a) root block of the GVT and
|
|
b) a bitmap block,
|
|
c) a block from other GVT which should be unchnaged as
|
|
a result of mentioning the global in EXCLUDE option
|
|
d) parent block (because it is against the pre-order traversal)
|
|
This module :
|
|
Reads dest_blk_id;
|
|
Reads first key from it or, its descendent;
|
|
Checks if it is part of a dir_tree;
|
|
call gvcst_search for the GVT under which it belongs
|
|
Finally calls t_writes to create blocks.
|
|
************************************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
|
|
#include "cdb_sc.h"
|
|
#include "gdsroot.h"
|
|
#include "gdsblk.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "filestruct.h"
|
|
#include "gdsblkops.h"
|
|
#include "gdskill.h"
|
|
#include "gdscc.h"
|
|
#include "gdsbml.h"
|
|
#include "jnl.h"
|
|
#include "copy.h"
|
|
#include "muextr.h"
|
|
#include "mu_reorg.h"
|
|
#include "hashtab_int4.h"
|
|
#include "cws_insert.h"
|
|
|
|
/* Include prototypes */
|
|
#include "t_qread.h"
|
|
#include "t_write.h"
|
|
#include "t_create.h"
|
|
#include "t_write_map.h"
|
|
#include "mupip_reorg.h"
|
|
#include "gvcst_protos.h" /* for gvcst_search prototype */
|
|
#include "jnl_get_checksum.h"
|
|
|
|
GBLREF gv_namehead *gv_target;
|
|
GBLREF gv_namehead *reset_gv_target;
|
|
GBLREF gv_namehead *reorg_gv_target;
|
|
GBLREF sgmnt_addrs *cs_addrs;
|
|
GBLREF sgmnt_data_ptr_t cs_data;
|
|
GBLREF char *update_array, *update_array_ptr;
|
|
GBLREF uint4 update_array_size; /* for the BLK_* macros */
|
|
GBLREF uint4 t_err;
|
|
GBLREF cw_set_element cw_set[];
|
|
GBLREF unsigned char cw_map_depth;
|
|
GBLREF unsigned char cw_set_depth;
|
|
GBLREF unsigned char rdfail_detail;
|
|
GBLREF unsigned int t_tries;
|
|
GBLREF gv_key *gv_currkey;
|
|
GBLREF hash_table_int4 cw_stagnate;
|
|
|
|
/******************************************************************************************
|
|
Input Parameters:
|
|
level: level of working block
|
|
dest_blk_id: last destination used for swap
|
|
Output Parameters:
|
|
kill_set_ptr: Kill set to be freed
|
|
*exclude_glist_ptr: List of globals not to be moved for a swap destination
|
|
Input/Output Parameters:
|
|
gv_target : as working block's history
|
|
reorg_gv_target->hist : as desitnitions block's history
|
|
******************************************************************************************/
|
|
enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr)
|
|
{
|
|
unsigned char x_blk_lmap;
|
|
unsigned short temp_ushort;
|
|
int rec_size1, rec_size2;
|
|
int wlevel, nslevel, dest_blk_level;
|
|
int piece_len1, piece_len2, first_offset, second_offset,
|
|
work_blk_size, work_parent_size, dest_blk_size, dest_parent_size;
|
|
int dest_child_cycle;
|
|
int blk_seg_cnt, blk_size;
|
|
trans_num ctn;
|
|
int key_len, key_len_dir;
|
|
block_id dest_blk_id, work_blk_id, child1, child2;
|
|
enum cdb_sc status;
|
|
srch_hist *dest_hist_ptr;
|
|
cache_rec_ptr_t dest_child_cr;
|
|
blk_segment *bs1, *bs_ptr;
|
|
sm_uc_ptr_t saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr,
|
|
bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1;
|
|
boolean_t gbl_target_was_set, blk_was_free, deleted;
|
|
gv_namehead *save_targ;
|
|
srch_blk_status bmlhist, destblkhist, *hist_ptr;
|
|
unsigned char save_cw_set_depth;
|
|
cw_set_element *tmpcse;
|
|
jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */
|
|
unsigned int bsiz;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
dest_blk_id = *pdest_blk_id;
|
|
CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */
|
|
if (NULL == TREF(gv_reorgkey))
|
|
GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ));
|
|
dest_hist_ptr = &(reorg_gv_target->hist);
|
|
blk_size = cs_data->blk_size;
|
|
work_parent_ptr = gv_target->hist.h[level+1].buffaddr;
|
|
work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz;
|
|
work_blk_ptr = gv_target->hist.h[level].buffaddr;
|
|
work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz;
|
|
work_blk_id = gv_target->hist.h[level].blk_num;
|
|
if (SIZEOF(blk_hdr) >= work_blk_size || blk_size < work_blk_size)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
cws_reorg_remove_index = 0;
|
|
/*===== Infinite loop to find the destination block =====*/
|
|
for ( ; ; )
|
|
{
|
|
blk_was_free = FALSE;
|
|
INCR_BLK_NUM(dest_blk_id);
|
|
/* A Pre-order traversal should not cause a child block to go to its parent.
|
|
* However, in case it happens because already the organization was like that or for any other reason, skip swap.
|
|
* If we decide to swap, code below should be changed to take care of the special case.
|
|
* Still a grand-child can go to its grand-parent. This is rare and following code can handle it.
|
|
*/
|
|
if (dest_blk_id == gv_target->hist.h[level+1].blk_num)
|
|
continue;
|
|
if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id)
|
|
{
|
|
*pdest_blk_id = dest_blk_id;
|
|
return cdb_sc_oprnotneeded;
|
|
}
|
|
ctn = cs_addrs->ti->curr_tn;
|
|
/* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk")
|
|
* through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below).
|
|
* This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to
|
|
* choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not
|
|
* deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and
|
|
* a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon
|
|
* the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not
|
|
* be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing
|
|
* a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array"
|
|
* variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread
|
|
* in one iteration down below. And that number is the sum of
|
|
* + MAX_BT_DEPTH : for the t_qread while loop down the tree done below
|
|
* + 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below
|
|
* + 2 : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a
|
|
* bitmap block done inside the call to get_lmap below
|
|
* = 3 * MAX_BT_DEPTH + 2
|
|
* To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2.
|
|
* This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h
|
|
*/
|
|
/* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */
|
|
for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--)
|
|
{
|
|
deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]);
|
|
assert(deleted);
|
|
}
|
|
/* read corresponding bitmap block before attempting to read destination block.
|
|
* if bitmap indicates block is free, we will not read the destination block
|
|
*/
|
|
bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
|
|
if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap ||
|
|
((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) ||
|
|
((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL)
|
|
{
|
|
assert(CDB_STAGNATE > t_tries);
|
|
return cdb_sc_badbitmap;
|
|
}
|
|
if (BLK_FREE != x_blk_lmap)
|
|
{ /* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block
|
|
* in case we later detect that the before-image needs to be written.
|
|
*/
|
|
if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr)))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return (enum cdb_sc)rdfail_detail;
|
|
}
|
|
destblkhist.blk_num = dest_blk_id;
|
|
destblkhist.buffaddr = dest_blk_ptr;
|
|
destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl;
|
|
}
|
|
if (BLK_BUSY != x_blk_lmap)
|
|
{ /* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */
|
|
blk_was_free = TRUE;
|
|
break;
|
|
}
|
|
/* dest_blk_id might contain a *-record only.
|
|
* So follow the pointer to go to the data/index block, which has a non-* key to search.
|
|
*/
|
|
nslevel = dest_blk_level;
|
|
if (MAX_BT_DEPTH <= nslevel)
|
|
{
|
|
assert(CDB_STAGNATE > t_tries);
|
|
return cdb_sc_maxlvl;
|
|
}
|
|
rec_base = dest_blk_ptr + SIZEOF(blk_hdr);
|
|
GET_RSIZ(rec_size1, rec_base);
|
|
tblk_ptr = dest_blk_ptr;
|
|
while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel))
|
|
{
|
|
GET_LONG(child1, (rec_base + SIZEOF(rec_hdr)));
|
|
if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_rdfail;
|
|
}
|
|
if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr)))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return (enum cdb_sc)rdfail_detail;
|
|
}
|
|
/* leaf of a killed GVT can have block header only. Skip those blocks */
|
|
if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
|
|
break;
|
|
nslevel--;
|
|
rec_base = tblk_ptr + SIZEOF(blk_hdr);
|
|
GET_RSIZ(rec_size1, rec_base);
|
|
}
|
|
/* leaf of a killed GVT can have block header only. Skip those blocks */
|
|
if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
|
|
continue;
|
|
/* get length of global variable name (do not read subscript) for dest_blk_id */
|
|
GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr));
|
|
/* key_len = length of 1st key value (including subscript) for dest_blk_id */
|
|
GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr));
|
|
if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len))
|
|
{ /* Earlier used to restart here always. But dest_blk_id can be a block,
|
|
* which is just killed and still marked busy. Skip it, if we are in last retry.
|
|
*/
|
|
if (CDB_STAGNATE <= t_tries)
|
|
continue;
|
|
else
|
|
return cdb_sc_blkmod;
|
|
}
|
|
memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir);
|
|
(TREF(gv_reorgkey))->base[key_len_dir] = 0;
|
|
(TREF(gv_reorgkey))->end = key_len_dir;
|
|
if (exclude_glist_ptr->next)
|
|
{ /* exclude blocks for globals in the list of EXCLUDE option */
|
|
if (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr))
|
|
continue;
|
|
}
|
|
save_targ = gv_target;
|
|
if (INVALID_GV_TARGET != reset_gv_target)
|
|
gbl_target_was_set = TRUE;
|
|
else
|
|
{
|
|
gbl_target_was_set = FALSE;
|
|
reset_gv_target = save_targ;
|
|
}
|
|
gv_target = reorg_gv_target;
|
|
gv_target->root = cs_addrs->dir_tree->root;
|
|
gv_target->clue.end = 0;
|
|
/* assign Directory tree path to find dest_blk_id in dest_hist_ptr */
|
|
status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr);
|
|
if (cdb_sc_normal != status)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
|
|
return status;
|
|
}
|
|
if (dest_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1)
|
|
{ /* may be in a kill_set of another process */
|
|
RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
|
|
continue;
|
|
}
|
|
for (wlevel = 0; wlevel <= dest_hist_ptr->depth &&
|
|
dest_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++);
|
|
if (dest_hist_ptr->h[wlevel].blk_num == dest_blk_id)
|
|
{ /* do not swap a dir_tree block */
|
|
RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
|
|
continue;
|
|
}
|
|
/* gv_reorgkey will now have the first key from dest_blk_id,
|
|
* or, from a descendant of dest_blk_id (in case it had a *-key only).
|
|
*/
|
|
memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len);
|
|
(TREF(gv_reorgkey))->end = key_len - 1;
|
|
GET_KEY_LEN(key_len_dir, dest_hist_ptr->h[0].buffaddr + dest_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr));
|
|
/* Get root of GVT for dest_blk_id */
|
|
GET_LONG(gv_target->root,
|
|
dest_hist_ptr->h[0].buffaddr + dest_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir);
|
|
if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1)))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
/* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */
|
|
gv_target->clue.end = 0;
|
|
status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr);
|
|
RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
|
|
if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */
|
|
dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */
|
|
continue;
|
|
if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1)))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return (cdb_sc_normal != status ? status : cdb_sc_blkmod);
|
|
}
|
|
for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++)
|
|
dest_hist_ptr->h[wlevel].tn = ctn;
|
|
dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr;
|
|
dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz;
|
|
dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr;
|
|
dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz;
|
|
break;
|
|
}
|
|
/*===== End of infinite loop to find the destination block =====*/
|
|
/*-----------------------------------------------------
|
|
Now modify blocks for swapping. Maximum of 4 blocks.
|
|
-----------------------------------------------------*/
|
|
if (!blk_was_free)
|
|
{ /* 1: dest_blk_id into work_blk_id */
|
|
BLK_INIT(bs_ptr, bs1);
|
|
BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr));
|
|
if (!BLK_FINI (bs_ptr,bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
assert(gv_target->hist.h[level].blk_num == work_blk_id);
|
|
assert(gv_target->hist.h[level].buffaddr == work_blk_ptr);
|
|
t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN);
|
|
}
|
|
/* 2: work_blk_id into dest_blk_id */
|
|
if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num)
|
|
{ /* work_blk_id will be swapped with its child.
|
|
* This is the only vertical swap. Here working block goes to its child.
|
|
* Working block cannot goto its parent because of traversal
|
|
*/
|
|
if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
BLK_INIT(bs_ptr, bs1);
|
|
BLK_ADDR(saved_blk, dest_parent_size, unsigned char);
|
|
memcpy(saved_blk, dest_parent_ptr, dest_parent_size);
|
|
first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset;
|
|
GET_RSIZ(rec_size1, saved_blk + first_offset);
|
|
if (work_blk_size < first_offset + rec_size1)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
piece_len1 = first_offset + rec_size1;
|
|
BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr));
|
|
BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
|
|
PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */
|
|
BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
|
|
BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1);
|
|
if (!BLK_FINI(bs_ptr, bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num);
|
|
assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr);
|
|
t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
|
|
} else /* free block or, when working block does not move vertically (swap with parent/child) */
|
|
{
|
|
BLK_INIT(bs_ptr, bs1);
|
|
BLK_ADDR(saved_blk, work_blk_size, unsigned char);
|
|
memcpy(saved_blk, work_blk_ptr, work_blk_size);
|
|
BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr));
|
|
if (!BLK_FINI(bs_ptr, bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
if (blk_was_free)
|
|
{
|
|
tmpcse = &cw_set[cw_set_depth];
|
|
t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level);
|
|
/* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode
|
|
* from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here.
|
|
*/
|
|
assert(dest_blk_id == tmpcse->blk);
|
|
tmpcse->mode = gds_t_acquired;
|
|
/* If snapshots are in progress, we might want to read the before images of the FREE blocks also.
|
|
* Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence
|
|
* will not read the before images of the FREE blocks in t_end. To workaround this, set
|
|
* cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of
|
|
* the FREE blocks if needed.
|
|
*/
|
|
tmpcse->was_free = (BLK_FREE == x_blk_lmap);
|
|
/* No need to write before-image in case the block is FREE. In case the database had never been fully
|
|
* upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered
|
|
* FREE (i.e. no need to write before-images since backward journal recovery will never be expected
|
|
* to take the database to a point BEFORE the mupip upgrade).
|
|
*/
|
|
if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once)
|
|
tmpcse->old_block = NULL;
|
|
else
|
|
{ /* Destination is a recycled block that needs a before image */
|
|
tmpcse->old_block = destblkhist.buffaddr;
|
|
/* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */
|
|
tmpcse->cr = destblkhist.cr;
|
|
tmpcse->cycle = destblkhist.cycle;
|
|
jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL;
|
|
if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
|
|
{ /* Compute CHECKSUM for writing PBLK record before getting crit.
|
|
* It is possible that we are reading a block that is actually marked free in
|
|
* the bitmap (due to concurrency issues at this point). Therefore we might be
|
|
* actually reading uninitialized block headers and in turn a bad value of
|
|
* "old_block->bsiz". Restart if we ever access a buffer whose size is greater
|
|
* than the db block size.
|
|
*/
|
|
bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
|
|
if (bsiz > blk_size)
|
|
{
|
|
assert(CDB_STAGNATE > t_tries);
|
|
return cdb_sc_lostbmlcr;
|
|
}
|
|
JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz);
|
|
}
|
|
}
|
|
assert(GDSVCURR == tmpcse->ondsk_blkver); /* should have been set by t_create above */
|
|
} else
|
|
{
|
|
hist_ptr = &dest_hist_ptr->h[dest_blk_level];
|
|
assert(dest_blk_id == hist_ptr->blk_num);
|
|
assert(dest_blk_ptr == hist_ptr->buffaddr);
|
|
t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
|
|
}
|
|
}
|
|
if (!blk_was_free)
|
|
{ /* 3: Parent of destination block (may be parent of working block too) */
|
|
if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num)
|
|
{ /* dest parent == work_blk parent */
|
|
BLK_INIT(bs_ptr, bs1);
|
|
/* Interchange pointer to dest_blk_id and work_blk_id */
|
|
if (level != dest_blk_level ||
|
|
gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset)
|
|
{
|
|
first_offset = gv_target->hist.h[level+1].curr_rec.offset;
|
|
second_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
|
|
} else
|
|
{
|
|
first_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
|
|
second_offset = gv_target->hist.h[level+1].curr_rec.offset;
|
|
}
|
|
GET_RSIZ(rec_size1, dest_parent_ptr + first_offset);
|
|
GET_RSIZ(rec_size2, dest_parent_ptr + second_offset);
|
|
if (dest_parent_size < first_offset + rec_size1 ||
|
|
dest_parent_size < second_offset + rec_size2 ||
|
|
BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
piece_len1 = first_offset + rec_size1 - SIZEOF(block_id);
|
|
piece_len2 = second_offset + rec_size2 - SIZEOF(block_id);
|
|
GET_LONG(child1, dest_parent_ptr + piece_len1);
|
|
GET_LONG(child2, dest_parent_ptr + piece_len2);
|
|
BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr));
|
|
BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
|
|
PUT_LONG(bn_ptr, child2);
|
|
BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
|
|
BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1,
|
|
second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1);
|
|
BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
|
|
PUT_LONG(bn_ptr, child1);
|
|
BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
|
|
BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2,
|
|
dest_parent_size - second_offset - rec_size2);
|
|
if (!BLK_FINI(bs_ptr,bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
assert(level == dest_blk_level);
|
|
assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr);
|
|
t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
|
|
} else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num)
|
|
{ /* Destination block moved in the position of working block.
|
|
* So destination block's parent's pointer should be changed to work_blk_id
|
|
*/
|
|
BLK_INIT(bs_ptr, bs1);
|
|
GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset);
|
|
if (dest_parent_size < rec_size1 + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset ||
|
|
BSTAR_REC_SIZE > rec_size1)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr),
|
|
dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
|
|
BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
|
|
PUT_LONG(bn_ptr, work_blk_id);
|
|
BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
|
|
BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1,
|
|
dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset - rec_size1);
|
|
if (!BLK_FINI(bs_ptr,bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr);
|
|
t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1,
|
|
FALSE, TRUE, GDS_WRITE_KILLTN);
|
|
}
|
|
}
|
|
/* 4: Parent of working block, if different than destination's parent or, destination was a free block */
|
|
if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num)
|
|
{ /* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id */
|
|
GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset));
|
|
if (work_parent_size < rec_size1 + gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1)
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
BLK_INIT(bs_ptr, bs1);
|
|
BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr),
|
|
gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
|
|
BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
|
|
PUT_LONG(bn_ptr, dest_blk_id);
|
|
BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
|
|
BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1,
|
|
work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1);
|
|
if (!BLK_FINI(bs_ptr, bs1))
|
|
{
|
|
assert(t_tries < CDB_STAGNATE);
|
|
return cdb_sc_blkmod;
|
|
}
|
|
assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr);
|
|
t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
|
|
}
|
|
/* else already taken care of, when dest_blk_id moved */
|
|
if (blk_was_free)
|
|
{ /* A free/recycled block will become busy block.
|
|
* So the local bitmap must be updated.
|
|
* Local bit map block will be added in the list of update arrray for concurrency check and
|
|
* also the cw_set element will be created to mark the free/recycled block as free.
|
|
* kill_set_ptr will save the block which will become free.
|
|
*/
|
|
child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */
|
|
bmlhist.buffaddr = bmp_buff;
|
|
bmlhist.blk_num = child1;
|
|
child1 = dest_blk_id - child1;
|
|
assert(child1);
|
|
PUT_LONG(update_array_ptr, child1);
|
|
/* Need to put bit maps on the end of the cw set for concurrency checking.
|
|
* We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth".
|
|
* Hence the save and restore logic (for "cw_set_depth") below.
|
|
*/
|
|
save_cw_set_depth = cw_set_depth;
|
|
assert(!cw_map_depth);
|
|
t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1); /* will increment cw_set_depth */
|
|
cw_map_depth = cw_set_depth; /* set cw_map_depth to the latest cw_set_depth */
|
|
cw_set_depth = save_cw_set_depth; /* restore cw_set_depth */
|
|
/* t_write_map simulation end */
|
|
update_array_ptr += SIZEOF(block_id);
|
|
child1 = 0;
|
|
PUT_LONG(update_array_ptr, child1);
|
|
update_array_ptr += SIZEOF(block_id);
|
|
assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 1 free block is now becoming BLK_USED in the bitmap */
|
|
/* working block will be removed */
|
|
kill_set_ptr->blk[kill_set_ptr->used].flag = 0;
|
|
kill_set_ptr->blk[kill_set_ptr->used].level = 0;
|
|
kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id;
|
|
}
|
|
*pdest_blk_id = dest_blk_id;
|
|
return cdb_sc_normal;
|
|
}
|
|
|
|
/***************************************************************
|
|
Checks if a key is present in exclude global lists.
|
|
curr_key_ptr = Key pointer
|
|
key_len = curr_key_ptr length excludeing nulls
|
|
exclude_glist_ptr = list of globals in -EXCLUDE option
|
|
Returns:
|
|
TRUE if key is also present in list of exclude_glist_ptr
|
|
FALSE Otherwise
|
|
***************************************************************/
|
|
boolean_t in_exclude_list(unsigned char *curr_key_ptr, int key_len, glist *exclude_glist_ptr)
|
|
{
|
|
glist *gl_ptr;
|
|
|
|
for (gl_ptr = exclude_glist_ptr->next; gl_ptr; gl_ptr = gl_ptr->next)
|
|
{
|
|
if (gl_ptr->name.str.len == key_len && 0 == memcmp(gl_ptr->name.str.addr, curr_key_ptr, gl_ptr->name.str.len))
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|