336 lines
14 KiB
C
336 lines
14 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include <stddef.h> /* for offsetof() macro */
|
|
#include "gtm_string.h"
|
|
#include "gdsroot.h"
|
|
#include "gdskill.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "filestruct.h"
|
|
#include "cdb_sc.h"
|
|
#include "min_max.h" /* needed for gdsblkops.h */
|
|
#include "gdsblkops.h"
|
|
#include "jnl.h"
|
|
#include "gdscc.h"
|
|
#include "iosp.h"
|
|
#include <mdefsp.h>
|
|
#include "ccp.h"
|
|
#include "buddy_list.h" /* needed for tp.h */
|
|
#include "hashtab_int4.h" /* needed for tp.h */
|
|
#include "tp.h"
|
|
#include "copy.h"
|
|
#include "jnl_get_checksum.h"
|
|
#include "gdsblk.h" /* for blk_hdr usage in JNL_MAX_SET_KILL_RECLEN macro */
|
|
|
|
#ifdef GTM_CRYPT
|
|
#include "gtmcrypt.h"
|
|
#endif
|
|
|
|
#ifdef GTM_TRIGGER
|
|
/* In case of a ZTWORMHOLE, it should be immediately followed by a SET or KILL record. We do not maintain different
|
|
* update_num values for the ZTWORMHOLE and its corresponding SET or KILL record. So we should decrement the
|
|
* update_num before returning from this function in the hope that the next time jnl_format is called for the SET
|
|
* or KILL, update_num will be incremented thereby using the exact same value that was used for the ZTWORMHOLE record.
|
|
* An exception is journal recovery forward phase in which case, we dont do any increments of jgbl.tp_ztp_jnl_upd_num
|
|
* so we should do no decrements either.
|
|
*/
|
|
#define ZTWORM_DECR_UPD_NUM { if (!jgbl.forw_phase_recovery) jgbl.tp_ztp_jnl_upd_num--; }
|
|
|
|
#define SET_PREV_ZTWORM_JFB_IF_NEEDED(is_ztworm_rec, src_ptr) \
|
|
{ \
|
|
if (is_ztworm_rec && !jgbl.forw_phase_recovery) \
|
|
{ \
|
|
jgbl.save_ztworm_ptr = jgbl.prev_ztworm_ptr; \
|
|
jgbl.prev_ztworm_ptr = (unsigned char *)src_ptr; \
|
|
ZTWORM_DECR_UPD_NUM; \
|
|
} \
|
|
}
|
|
#else
|
|
#define SET_PREV_ZTWORM_JFB_IF_NEEDED(is_ztworm_rec, src_ptr)
|
|
#endif
|
|
|
|
GBLREF gd_region *gv_cur_region;
|
|
GBLREF uint4 dollar_tlevel;
|
|
GBLREF jnl_fence_control jnl_fence_ctl;
|
|
GBLREF sgm_info *sgm_info_ptr;
|
|
GBLREF jnl_format_buffer *non_tp_jfb_ptr;
|
|
GBLREF jnl_gbls_t jgbl;
|
|
#ifdef GTM_TRIGGER
|
|
GBLREF int4 gtm_trigger_depth;
|
|
GBLREF int4 tstart_trigger_depth;
|
|
#endif
|
|
|
|
/* Do NOT define first dimension of jnl_opcode array to be JA_MAX_TYPES. Instead let compiler fill in the value according
|
|
* to the number of rows actually specified in the array. This way, if ever a new entry is added in jnl.h to jnl_action_code
|
|
* (thereby increasing JA_MAX_TYPES) but is forgotten to add a corresponding row here, an assert (in this module) will fail
|
|
* indicating the inconsistency. Defining jnl_opcode[JA_MAX_TYPES][5] causes any changes to JA_MAX_TYPES to automatically
|
|
* allocate a bigger array filled with 0s which might cause one to overlook the inconsistency.
|
|
*/
|
|
static const enum jnl_record_type jnl_opcode[][5] =
|
|
{
|
|
{ JRT_KILL, JRT_FKILL, JRT_TKILL, JRT_GKILL, JRT_UKILL }, /* KILL record types */
|
|
{ JRT_SET, JRT_FSET, JRT_TSET, JRT_GSET, JRT_USET }, /* SET record types */
|
|
{ JRT_ZKILL, JRT_FZKILL, JRT_TZKILL, JRT_GZKILL, JRT_UZKILL }, /* ZKILL record types */
|
|
# ifdef GTM_TRIGGER
|
|
{ JRT_BAD, JRT_BAD, JRT_TZTWORM, JRT_BAD, JRT_UZTWORM }, /* ZTWORM record types */
|
|
{ JRT_BAD, JRT_BAD, JRT_TZTRIG, JRT_BAD, JRT_UZTRIG }, /* ZTRIG record types */
|
|
# endif
|
|
};
|
|
|
|
jnl_format_buffer *jnl_format(jnl_action_code opcode, gv_key *key, mval *val, uint4 nodeflags)
|
|
{
|
|
char *local_buffer, *mumps_node_ptr;
|
|
enum jnl_record_type rectype;
|
|
int subcode;
|
|
jnl_record *rec;
|
|
jnl_action *ja;
|
|
jnl_format_buffer *prev_jfb, *jfb, *prev_prev_jfb;
|
|
jnl_str_len_t keystrlen;
|
|
mstr_len_t valstrlen;
|
|
sgm_info *si;
|
|
sgmnt_addrs *csa;
|
|
sgmnt_data_ptr_t csd;
|
|
uint4 align_fill_size, jrec_size, tmp_jrec_size, update_length;
|
|
boolean_t is_ztworm_rec = FALSE;
|
|
uint4 cursum;
|
|
DEBUG_ONLY(
|
|
static boolean_t dbg_in_jnl_format = FALSE;
|
|
)
|
|
GTMCRYPT_ONLY(int crypt_status;)
|
|
# ifdef GTM_TRIGGER
|
|
boolean_t ztworm_matched, match_possible;
|
|
mstr prev_str, *cur_str;
|
|
# endif
|
|
|
|
/* The below assert ensures that if ever jnl_format is interrupted by a signal, the interrupt handler never calls
|
|
* jnl_format again. This is because jnl_format plays with global pointers and we would possibly end up in a bad
|
|
* state if the interrupt handler calls jnl_format again.
|
|
*/
|
|
assert(!dbg_in_jnl_format);
|
|
DEBUG_ONLY(dbg_in_jnl_format = TRUE;)
|
|
if (jgbl.forw_phase_recovery) /* In case of recovery, copy "nodeflags" from journal record being played */
|
|
nodeflags = jgbl.mur_jrec_nodeflags;
|
|
csa = &FILE_INFO(gv_cur_region)->s_addrs;
|
|
csd = csa->hdr;
|
|
# ifdef GTM_TRIGGER
|
|
/* If opcode is JNL_ZTWORM then check if ztwormhole operation can be avoided altogether.
|
|
* This is the case if the value of $ZTWORMHOLE passed in is identical to the value of
|
|
* $ZTWORMHOLE written for the immediately previous update stored in (global variable) jgbl.prev_ztworm_ptr
|
|
* across regions in the current TP transaction. In that case, return right away.
|
|
* For journal recovery, we skip this part since we want the ztwormhole record to be unconditionally written
|
|
* (because GT.M wrote it in the first place).
|
|
*/
|
|
is_ztworm_rec = (JNL_ZTWORM == opcode);
|
|
if (is_ztworm_rec && !jgbl.forw_phase_recovery)
|
|
{
|
|
assert(REPL_ALLOWED(csa) || jgbl.forw_phase_recovery);
|
|
assert(dollar_tlevel);
|
|
assert(tstart_trigger_depth == gtm_trigger_depth);
|
|
assert((NULL != val) && (NULL == key));
|
|
assert(MV_IS_STRING(val));
|
|
assert(FIXED_UPD_RECLEN == FIXED_ZTWORM_RECLEN);
|
|
if (NULL != jgbl.prev_ztworm_ptr)
|
|
{
|
|
cur_str = &val->str;
|
|
prev_str.len = (*(jnl_str_len_t *)jgbl.prev_ztworm_ptr);
|
|
prev_str.addr = (char *)(jgbl.prev_ztworm_ptr + SIZEOF(jnl_str_len_t));
|
|
if ((prev_str.len == cur_str->len) && !memcmp(prev_str.addr, cur_str->addr, prev_str.len))
|
|
{
|
|
DEBUG_ONLY(dbg_in_jnl_format = FALSE;)
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
# endif
|
|
/* Allocate a jfb structure */
|
|
if (!dollar_tlevel)
|
|
{
|
|
jfb = non_tp_jfb_ptr; /* already malloced in gvcst_init() */
|
|
jgbl.cumul_jnl_rec_len = 0;
|
|
DEBUG_ONLY(jgbl.cumul_index = jgbl.cu_jnl_index = 0;)
|
|
} else
|
|
{
|
|
si = sgm_info_ptr; /* reset "si" since previous set was #ifdef GTM_TRIGGER only code while this is not */
|
|
assert(si->tp_csa == csa);
|
|
assert((NULL != si->jnl_head) || (NULL == csa->next_fenced));
|
|
assert((NULL == si->jnl_head) || (NULL != csa->next_fenced));
|
|
assert((NULL == csa->next_fenced) || (JNL_FENCE_LIST_END == csa->next_fenced)
|
|
|| (NULL != csa->next_fenced->sgm_info_ptr->jnl_head));
|
|
jfb = (jnl_format_buffer *)get_new_element(si->jnl_list, 1);
|
|
jfb->next = NULL;
|
|
assert(NULL != si->jnl_tail);
|
|
GTMTRIG_ONLY(SET_PREV_JFB(si, jfb->prev);)
|
|
assert(NULL == *si->jnl_tail);
|
|
*si->jnl_tail = jfb;
|
|
si->jnl_tail = &jfb->next;
|
|
si->update_trans |= UPDTRNS_JNL_LOGICAL_MASK; /* record that we are writing a logical jnl record in this region */
|
|
if (!(nodeflags & JS_NOT_REPLICATED_MASK))
|
|
si->update_trans |= UPDTRNS_JNL_REPLICATED_MASK;
|
|
}
|
|
ja = &(jfb->ja);
|
|
ja->operation = opcode;
|
|
ja->nodeflags = nodeflags;
|
|
/* Proceed with formatting the journal record in the allocated jfb */
|
|
if (!jnl_fence_ctl.level && !dollar_tlevel)
|
|
{ /* Non-TP */
|
|
subcode = 0;
|
|
tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE;
|
|
assert(0 == jgbl.tp_ztp_jnl_upd_num);
|
|
} else
|
|
{
|
|
if (NULL == csa->next_fenced)
|
|
{ /* F (or T) */
|
|
assert((NULL != jnl_fence_ctl.fence_list) || (0 == jgbl.tp_ztp_jnl_upd_num));
|
|
subcode = 1;
|
|
csa->next_fenced = jnl_fence_ctl.fence_list;
|
|
jnl_fence_ctl.fence_list = csa;
|
|
} else /* G (or U) */
|
|
{ /* At least one call to "jnl_format" has occurred in this TP transaction already. We therefore
|
|
* expect jgbl.tp_ztp_jnl_upd_num to be non-zero at this point. The only exception is if "jnl_format"
|
|
* had been called just once before and that was for a ZTWORM type of record in which case it would be
|
|
* zero (both ZTWORM and following SET/KILL record will have the same update_num value of 1).
|
|
*/
|
|
assert(jgbl.tp_ztp_jnl_upd_num
|
|
GTMTRIG_ONLY(|| ((jfb->prev == si->jnl_head) && (JRT_TZTWORM == jfb->prev->rectype))));
|
|
subcode = 3;
|
|
}
|
|
if (dollar_tlevel)
|
|
++subcode; /* TP */
|
|
tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE;
|
|
assert(FIXED_UPD_RECLEN == FIXED_ZTWORM_RECLEN);
|
|
if (!jgbl.forw_phase_recovery)
|
|
jgbl.tp_ztp_jnl_upd_num++;
|
|
/* In case of forward phase of journal recovery, this would have already been set to appropriate value.
|
|
* It is necessary to honor the incoming jgbl value for ZTP (since recovery could be playing records
|
|
* from the middle of a ZTP transaction because the rest are before the EPOCH), but for TP it is not
|
|
* necessary since all records are guaranteed to be AFTER the EPOCH so we can generate the numbers in
|
|
* this function too. But since we expect recovery to play the TP records in the exact order in which
|
|
* GT.M wrote them no point regenerating the same set of numbers again here. So we use incoming jgbl always.
|
|
*/
|
|
}
|
|
assert(ARRAYSIZE(jnl_opcode) == JA_MAX_TYPES);
|
|
assert(JA_MAX_TYPES > opcode);
|
|
rectype = jnl_opcode[opcode][subcode];
|
|
assert(IS_VALID_JRECTYPE(rectype));
|
|
assert(IS_SET_KILL_ZKILL_ZTRIG_ZTWORM(rectype));
|
|
GTMTRIG_ONLY(assert((JNL_ZTWORM != opcode) || (NULL == key));)
|
|
GTMTRIG_ONLY(assert((JNL_ZTWORM == opcode) || (NULL != key));)
|
|
/* Compute actual record length */
|
|
if (NULL != key)
|
|
{
|
|
keystrlen = key->end;
|
|
tmp_jrec_size += keystrlen + SIZEOF(jnl_str_len_t);
|
|
}
|
|
GTMTRIG_ONLY(assert((JNL_ZTWORM != opcode) || (NULL != val));)
|
|
assert((JNL_SET != opcode) || (NULL != val));
|
|
if (NULL != val)
|
|
{
|
|
valstrlen = val->str.len;
|
|
tmp_jrec_size += valstrlen + SIZEOF(mstr_len_t);
|
|
}
|
|
jrec_size = ROUND_UP2(tmp_jrec_size, JNL_REC_START_BNDRY);
|
|
align_fill_size = jrec_size - tmp_jrec_size; /* For JNL_REC_START_BNDRY alignment */
|
|
if (dollar_tlevel)
|
|
{
|
|
assert((1 << JFB_ELE_SIZE_IN_BITS) == JNL_REC_START_BNDRY);
|
|
assert(JFB_ELE_SIZE == JNL_REC_START_BNDRY);
|
|
jfb->buff = (char *)get_new_element(si->format_buff_list, jrec_size >> JFB_ELE_SIZE_IN_BITS);
|
|
GTMCRYPT_ONLY(
|
|
if (REPL_ALLOWED(csa))
|
|
jfb->alt_buff = (char *)get_new_element(si->format_buff_list, jrec_size >> JFB_ELE_SIZE_IN_BITS);
|
|
)
|
|
/* assume an align record will be written while computing maximum jnl-rec size requirements */
|
|
si->total_jnl_rec_size += (int)(jrec_size + MIN_ALIGN_RECLEN);
|
|
}
|
|
/* else if (!dollar_tlevel) jfb->buff/jfb->alt_buff already malloced in gvcst_init. */
|
|
jfb->record_size = jrec_size;
|
|
jgbl.cumul_jnl_rec_len += jfb->record_size;
|
|
assert(0 == jgbl.cumul_jnl_rec_len % JNL_REC_START_BNDRY);
|
|
DEBUG_ONLY(jgbl.cumul_index++;)
|
|
jfb->rectype = rectype;
|
|
/* PREFIX */
|
|
rec = (jnl_record *)jfb->buff;
|
|
rec->prefix.jrec_type = rectype;
|
|
assert(!IS_SET_KILL_ZKILL_ZTRIG(rectype) || (JNL_MAX_SET_KILL_RECLEN(csd) >= jrec_size));
|
|
GTMTRIG_ONLY(assert(!IS_ZTWORM(rectype) || (MAX_ZTWORM_JREC_LEN >= jrec_size));)
|
|
rec->prefix.forwptr = jrec_size;
|
|
assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num);
|
|
rec->jrec_set_kill.update_num = jgbl.tp_ztp_jnl_upd_num;
|
|
rec->jrec_set_kill.num_participants = 0;
|
|
local_buffer = (char *)rec + FIXED_UPD_RECLEN;
|
|
mumps_node_ptr = local_buffer;
|
|
if (NULL != key)
|
|
{
|
|
((jnl_string *)local_buffer)->length = keystrlen;
|
|
((jnl_string *)local_buffer)->nodeflags = nodeflags;
|
|
local_buffer += SIZEOF(jnl_str_len_t);
|
|
memcpy(local_buffer, (uchar_ptr_t)key->base, keystrlen);
|
|
local_buffer += keystrlen;
|
|
}
|
|
if (NULL != val)
|
|
{
|
|
PUT_MSTR_LEN(local_buffer, valstrlen); /* SET command's data may not be aligned */
|
|
/* The below assert ensures that it is okay for us to increment by jnl_str_len_t (uint4)
|
|
* even though valstrlen (above) is of type mstr_len_t (int). This is because PUT_MSTR_LEN
|
|
* casts the input to (uint4*) before storing it in the destination pointer (in this case
|
|
* local_buffer)
|
|
*/
|
|
assert(SIZEOF(uint4) == SIZEOF(jnl_str_len_t));
|
|
local_buffer += SIZEOF(jnl_str_len_t);
|
|
memcpy(local_buffer, (uchar_ptr_t)val->str.addr, valstrlen);
|
|
local_buffer += valstrlen;
|
|
}
|
|
if (0 != align_fill_size)
|
|
{
|
|
memset(local_buffer, 0, align_fill_size);
|
|
local_buffer += align_fill_size;
|
|
}
|
|
/* SUFFIX */
|
|
((jrec_suffix *)local_buffer)->backptr = jrec_size;
|
|
((jrec_suffix *)local_buffer)->suffix_code = JNL_REC_SUFFIX_CODE;
|
|
update_length = (jrec_size - (JREC_SUFFIX_SIZE + FIXED_UPD_RECLEN));
|
|
# ifdef GTM_CRYPT
|
|
assert(REPL_ALLOWED(csa) || !is_ztworm_rec || jgbl.forw_phase_recovery);
|
|
if (csd->is_encrypted)
|
|
{
|
|
/* At this place we have all the components of the *SET or *KILL or *ZTWORM records filled.
|
|
* Before the variable part of the journal record gets encrypted, we make sure we copy the buff
|
|
* into alt_buff to be used in it's original form in jnl_write.
|
|
*/
|
|
if (REPL_ALLOWED(csa))
|
|
{
|
|
memcpy(jfb->alt_buff, rec, jrec_size);
|
|
SET_PREV_ZTWORM_JFB_IF_NEEDED(is_ztworm_rec, (jfb->alt_buff + FIXED_UPD_RECLEN));
|
|
}
|
|
/* With the fixed length computed above as FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE, we encode
|
|
* the remaining buffer which consists of the *SET or *KILL or *ZTWORM components.
|
|
*/
|
|
ASSERT_ENCRYPTION_INITIALIZED;
|
|
GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, mumps_node_ptr, update_length, NULL, crypt_status);
|
|
if (0 != crypt_status)
|
|
GC_RTS_ERROR(crypt_status, gv_cur_region->dyn.addr->fname);
|
|
} else
|
|
# endif
|
|
{
|
|
SET_PREV_ZTWORM_JFB_IF_NEEDED(is_ztworm_rec, mumps_node_ptr);
|
|
}
|
|
/* The below call to jnl_get_checksum makes sure that checksum computation happens AFTER the encryption (if turned on) */
|
|
jfb->checksum = compute_checksum(INIT_CHECKSUM_SEED, (uint4 *)mumps_node_ptr, (int)(local_buffer - mumps_node_ptr));
|
|
assert(0 == ((UINTPTR_T)local_buffer % SIZEOF(jrec_suffix)));
|
|
DEBUG_ONLY(dbg_in_jnl_format = FALSE;)
|
|
return jfb;
|
|
}
|