fis-gtm/sr_port/muprec.h

995 lines
41 KiB
C
Raw Permalink Normal View History

/****************************************************************
* *
* Copyright 2001, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#ifndef MUPREC_H_INCLUDED
#define MUPREC_H_INCLUDED
#include "muprecsp.h" /* non-portable interface prototype */
#include "jnl_typedef.h" /* for IS_VALID_JRECTYPE macro */
#define JNL_EXTR_LABEL "GDSJEX05" /* format of the simple journal extract */
#define JNL_DET_EXTR_LABEL "GDSJDX05" /* format of the detailed journal extract */
error_def(ERR_MUINFOSTR);
error_def(ERR_MUINFOUINT4);
error_def(ERR_MUINFOUINT8);
error_def(ERR_MUJNLSTAT);
#define EXTQW(I) \
{ \
ptr = &murgbl.extr_buff[extract_len]; \
ptr = (char *)i2ascl((uchar_ptr_t)ptr, I); \
extract_len += (int)(ptr - &murgbl.extr_buff[extract_len]); \
murgbl.extr_buff[extract_len++] = '\\'; \
}
#define EXTINT(I) \
{ \
ptr = &murgbl.extr_buff[extract_len]; \
ptr = (char *)i2asc((uchar_ptr_t)ptr, I); \
extract_len += (int)(ptr - &murgbl.extr_buff[extract_len]); \
murgbl.extr_buff[extract_len++] = '\\'; \
}
#define EXT_DET_COMMON_PREFIX(JCTL) \
{ \
extract_len = SPRINTF(murgbl.extr_buff, "0x%08x [0x%04x] :: ", \
JCTL->rec_offset, JCTL->reg_ctl->mur_desc->jreclen); \
assert(extract_len == STRLEN(murgbl.extr_buff)); \
}
#define EXT_DET_PREFIX(JCTL) \
{ \
EXT_DET_COMMON_PREFIX(JCTL); \
memcpy(murgbl.extr_buff + extract_len, jrt_label[rec->prefix.jrec_type], LAB_LEN); \
extract_len += LAB_LEN; \
memcpy(murgbl.extr_buff + extract_len, LAB_TERM, LAB_TERM_SZ); \
extract_len += LAB_TERM_SZ; \
}
#define EXTTXT(T,L) \
{ \
actual = real_len(L, (uchar_ptr_t)T); \
memcpy (&murgbl.extr_buff[extract_len], T, actual); \
extract_len += actual; \
murgbl.extr_buff[extract_len++] = '\\'; \
}
#define EXT2BYTES(T) \
{ \
murgbl.extr_buff[extract_len++] = *(caddr_t)(T); \
murgbl.extr_buff[extract_len++] = *((caddr_t)(T) + 1); \
murgbl.extr_buff[extract_len++] = '\\'; \
}
/* extract jnl record "rec" using extraction routine "extract" into file "file_info" (extract/lost/broken transaction files) */
#define EXTRACT_JNLREC(jctl, rec, extract, file_info, status) \
{ \
pini_list_struct *plst; \
\
status = mur_get_pini(jctl, (rec)->prefix.pini_addr, &plst); \
if (SS_NORMAL == status) \
(*extract)(jctl, (file_info), (rec), plst); \
}
#define EXTPID(plst) \
{ \
EXTINT(plst->jpv.jpv_pid); \
EXTINT(plst->origjpv.jpv_pid); \
}
#define JNL_PUT_MSG_PROGRESS(LIT) \
{ \
now_t now; /* for GET_CUR_TIME macro */ \
char *time_ptr, time_str[CTIME_BEFORE_NL + 2]; \
\
GET_CUR_TIME; \
gtm_putmsg(VARLSTCNT(6) ERR_MUJNLSTAT, 4, LEN_AND_LIT(LIT), CTIME_BEFORE_NL, time_ptr); \
}
#define JNL_SUCCESS_MSG(mur_options) \
{ \
if (mur_options.show) \
gtm_putmsg(VARLSTCNT(4) ERR_JNLSUCCESS, 2, LEN_AND_LIT(SHOW_STR)); \
if (mur_options.extr[GOOD_TN]) \
gtm_putmsg(VARLSTCNT(4) ERR_JNLSUCCESS, 2, LEN_AND_LIT(EXTRACT_STR)); \
if (mur_options.verify) \
gtm_putmsg(VARLSTCNT(4) ERR_JNLSUCCESS, 2, LEN_AND_LIT(VERIFY_STR)); \
if (mur_options.rollback) \
gtm_putmsg(VARLSTCNT(4) ERR_JNLSUCCESS, 2, LEN_AND_LIT(ROLLBACK_STR)); \
else if (mur_options.update) \
gtm_putmsg(VARLSTCNT(4) ERR_JNLSUCCESS, 2, LEN_AND_LIT(RECOVER_STR)); \
}
#define MUR_FIX_JCTL_BACK_POINTER_TO_RCTL(JCTL, NEW_RCTL, OLD_RCTL, CHK_PREV_GEN) \
{ \
assert(!CHK_PREV_GEN || (NULL == JCTL->prev_gen)); \
do \
{ \
assert(OLD_RCTL == JCTL->reg_ctl); \
JCTL->reg_ctl = NEW_RCTL; \
JCTL = JCTL->next_gen; \
} while (NULL != JCTL); \
}
#if defined(UNIX)
#define TIME_FORMAT_STRING "YYYY/MM/DD HH:MM:SS"
#define LENGTH_OF_TIME STR_LIT_LEN(TIME_FORMAT_STRING)
#define GET_TIME_STR(input_time, time_str) \
{ \
time_t short_time; \
struct tm *tsp; \
\
short_time = (time_t)input_time; \
tsp = localtime((const time_t *)&short_time); \
SPRINTF(time_str, "%04d/%02d/%02d %02d:%02d:%02d", \
(1900 + tsp->tm_year), (1 + tsp->tm_mon), tsp->tm_mday, tsp->tm_hour, tsp->tm_min, tsp->tm_sec); \
}
#define GET_LONG_TIME_STR(long_time, time_str, time_str_len) GET_TIME_STR(long_time, time_str)
#define REL2ABSTIME(deltatime, basetime, roundup) \
{ \
deltatime += basetime; \
}
#elif defined(VMS)
#define TIME_FORMAT_STRING "DD-MON-YYYY HH:MM:SS.CC"
#define LENGTH_OF_TIME STR_LIT_LEN(TIME_FORMAT_STRING)
#define GET_TIME_STR(input_time, time_str) \
{ \
jnl_proc_time long_time; \
\
assert(LENGTH_OF_TIME < SIZEOF(time_str)); \
JNL_WHOLE_FROM_SHORT_TIME(long_time, input_time); \
GET_LONG_TIME_STR(long_time, time_str, SIZEOF(time_str)); \
}
#define GET_LONG_TIME_STR(long_time, time_str, time_str_len) \
{ \
struct dsc$descriptor_s t_desc \
= { time_str_len, DSC$K_DTYPE_T, DSC$K_CLASS_S, time_str}; \
\
sys$asctim(0, &t_desc, &long_time, 0); \
/* ascii time string is returned in time_str in the format DD-MMM-YYYY HH:MM:SS.CC */ \
time_str[20] = '\0'; /* do not need hundredths of seconds field */ \
assert(20 < time_str_len); \
}
#define REL2ABSTIME(deltatime, basetime, roundup) \
deltatime = mur_rel2abstime(deltatime, basetime, roundup);
#endif
#define MUR_GET_IMAGE_COUNT(JCTL, REC, REC_IMAGE_COUNT, STATUS) \
{ \
pini_list_struct *plst; \
\
GBLREF jnl_gbls_t jgbl; \
\
UNIX_ONLY(assert(FALSE);) \
STATUS = mur_get_pini(JCTL, REC->prefix.pini_addr, &plst); \
/* In backward processing, it is possible we encounter corrupt journal records in \
* case the journal file had a crash and we have not yet reached the first epoch \
* (mur_fread_eof on the journal file at the start might not have caught it). In this \
* case, mur_back_process knows to restart the backward processing. Assert this though. \
*/ \
assert((SS_NORMAL == STATUS) || !jgbl.forw_phase_recovery); \
if (SS_NORMAL == STATUS) \
REC_IMAGE_COUNT = plst->jpv.jpv_image_count; \
}
/* Note that JRT_TRIPLE is NOT considered a valid rectype by this macro. This is because this macro is not used
* by the update process and receiver server, the only processes which see this journal record type. Anyone
* else that sees this record type (update process reader, mupip journal etc.) should treat this as an invalid record type.
*/
#define IS_VALID_RECTYPE(JREC) \
( \
IS_VALID_JRECTYPE((JREC)->prefix.jrec_type) && (JRT_TRIPLE != (JREC)->prefix.jrec_type) \
)
#define IS_VALID_LEN_FROM_PREFIX(JREC, JFH) \
( /* length within range */ \
(ROUND_DOWN2((JREC)->prefix.forwptr, JNL_REC_START_BNDRY) == (JREC)->prefix.forwptr) && \
(JREC)->prefix.forwptr > MIN_JNLREC_SIZE && \
(JREC)->prefix.forwptr <= (JFH)->max_jrec_len \
)
#define IS_VALID_LEN_FROM_SUFFIX(SUFFIX, JFH) \
( /* length within range */ \
(ROUND_DOWN2((SUFFIX)->backptr, JNL_REC_START_BNDRY) == (SUFFIX)->backptr) && \
(SUFFIX)->backptr > MIN_JNLREC_SIZE && \
(SUFFIX)->backptr <= (JFH)->max_jrec_len \
)
#define IS_VALID_LINKS(JREC) \
( \
(JREC)->prefix.forwptr == ((jrec_suffix *)((char *)(JREC) + (JREC)->prefix.forwptr - JREC_SUFFIX_SIZE))->backptr \
)
#define IS_VALID_SUFFIX(JREC) \
( /* our terminator */ \
JNL_REC_SUFFIX_CODE == ((jrec_suffix *)((char *)(JREC) + (JREC)->prefix.forwptr - JREC_SUFFIX_SIZE))->suffix_code \
)
#define IS_VALID_PREFIX(JREC, JFH) \
( \
IS_VALID_RECTYPE(JREC) && IS_VALID_LEN_FROM_PREFIX(JREC, JFH) \
)
#define IS_VALID_JNLREC(JREC, JFH) \
( \
IS_VALID_RECTYPE(JREC) && IS_VALID_LEN_FROM_PREFIX(JREC, JFH) && IS_VALID_LINKS(JREC) && IS_VALID_SUFFIX(JREC) \
)
/* The following macro detects abnormal status during forward phase of journal recovery.
* It considers JNLREADEOF (encountered during end of journal file) as a NORMAL status return.
*/
#define CHECK_IF_EOF_REACHED(RCTL, STATUS) \
{ \
if (ERR_JNLREADEOF == STATUS) \
{ \
assert(FALSE == RCTL->forw_eof_seen); \
RCTL->forw_eof_seen = TRUE; \
STATUS = SS_NORMAL; \
} \
}
#define SHOW_NONE 0
#define SHOW_HEADER 1
#define SHOW_STATISTICS 2
#define SHOW_BROKEN 4
#define SHOW_ALL_PROCESSES 8
#define SHOW_ACTIVE_PROCESSES 16
#define SHOW_ALL 31 /* All of the above */
#define TRANS_KILLS 1
#define TRANS_SETS 2
#define DEFAULT_EXTR_BUFSIZE (64 * 1024)
#define JNLEXTR_DELIMSIZE 256
#define MUR_MULTI_LIST_INIT_ALLOC 1024 /* initial allocation for mur_multi_list */
#define MUR_MULTI_HASHTABLE_INIT_ELEMS (16 * 1024) /* initial elements in the token table */
#define MUR_PINI_LIST_INIT_ELEMS 256 /* initial no. of elements in hash table jctl->pini_list */
#define DUMMY_FILE_ID "123456" /* needed only in VMS, but included here for lack of a better generic place */
#define SHOW_STR "Show"
#define RECOVER_STR "Recover"
#define ROLLBACK_STR "Rollback"
#define EXTRACT_STR "Extract"
#define VERIFY_STR "Verify"
#define DOT '.'
#define STR_JNLEXTR "Journal extract"
#define STR_BRKNEXTR "Broken transactions extract"
#define STR_LOSTEXTR "Lost transactions extract"
#define LONG_TIME_FORMAT 0
#define SHORT_TIME_FORMAT 1
enum mur_error
{
MUR_DUPTOKEN = 1,
MUR_PREVJNLNOEOF,
MUR_JNLBADRECFMT,
MUR_CHNGTPRSLVTM,
MUR_BOVTMGTEOVTM
};
enum mur_fence_type
{
FENCE_NONE,
FENCE_PROCESS,
FENCE_ALWAYS
};
enum rec_fence_type
{
NOFENCE = 0,
TPFENCE = 1,
ZTPFENCE = 2
};
enum broken_type
{
GOOD_TN = 0,
BROKEN_TN = 1,
LOST_TN = 2,
TOT_EXTR_TYPES = 3
};
typedef struct
{
boolean_t repl_standalone; /* If standalone access was acheived for the instance file */
boolean_t clean_exit;
boolean_t ok_to_update_db; /* FALSE for LOSTTNONLY type of rollback */
boolean_t intrpt_recovery;
int reg_total; /* total number of regions involved in actual mupip journal flow */
int reg_full_total; /* total including those regions that were opened but were discarded */
int regcnt_remaining; /* number of regions yet to be processed in forward phase of recovery */
int err_cnt;
int wrn_count;
int broken_cnt; /* Number of broken entries */
int max_extr_record_length; /* maximum size of zwr-format extracted journal record */
seq_num resync_seqno; /* is 0, if consistent rollback and no interrupted recovery */
seq_num stop_rlbk_seqno; /* Where fetch_resync/resync rollback stop to apply to database */
seq_num consist_jnl_seqno; /* Simulate replication server's sequence number */
/* the following 3 variables are stored in a global so "mur_forward_play_cur_jrec" function can see it as well */
seq_num losttn_seqno; /* losttn seqno passed from mupip_recover to mur_forward */
seq_num min_broken_seqno; /* min broken seqno passed from mupip_recover to mur_forward */
jnl_tm_t min_broken_time; /* min broken time passed from mupip_recover to mur_forward */
hash_table_int8 token_table; /* hashtable created during backward & used in forward phase of recovery */
hash_table_int8 forw_token_table; /* hashtable created and used only during forward phase of recovery */
buddy_list *multi_list;
buddy_list *forw_multi_list;
buddy_list *pini_buddy_list; /* Buddy list for pini_list */
char *extr_buff;
jnl_process_vector *prc_vec; /* for recover process */
void *file_info[TOT_EXTR_TYPES];/* for a pointer to a structure described in filestruct.h */
# ifdef UNIX
boolean_t was_rootprimary; /* Whether this instance was previously a root primary. Set by
* "gtmrecv_fetchresync" */
char remote_proto_ver; /* Protocol version of the source server with which a -FETCHRESYNC
* rollback communicates. Need to be "signed char" in order to be
* able to do signed comparisons of this with the macros
* REPL_PROTO_VER_DUALSITE (0) and REPL_PROTO_VER_UNINITIALIZED (-1)
*/
char filler_align_4[3];
# endif
boolean_t extr_file_create[TOT_EXTR_TYPES];
} mur_gbls_t;
typedef struct multi_element_struct
{
token_num token;
VMS_ONLY(int4 image_count;) /* Image activations */
DEBUG_ONLY(boolean_t this_is_broken;) /* set in mur_back_process, checked in mur_forward */
jnl_tm_t time;
uint4 regnum; /* Last partner (region) seen */
uint4 partner; /* # of unmatched regions involved in TP/ZTP */
uint4 tot_partner; /* Total # of regions originally involved in TP/ZTP */
enum rec_fence_type fence; /* NOFENCE or TPFENCE or ZTPFENCE */
struct multi_element_struct *next;
} multi_struct;
/* The following is the primary structure maintained in a hashtable whose purpose is to record all currently
* unresolved multi-region TP transactions (no ZTP or non-TP) during the course of forward journal recovery.
* Elements are added as each region's participating journal records are encountered in the forward phase.
* Elements are deleted when all participating region's records have been seen and the transaction is then played.
* A lot of the elements are similar to the "multi_struct" which is maintained during backward phase of recovery.
* There is one unique forw_multi_struct structure for each of the following
* Unix : <token,time>
* VMS : <token,time,image_count>
*/
typedef struct forw_multi_element_struct
{
union {
ht_ent_int8 *tabent; /* Pointer to the hashtable entry containing this forw_multi
* structure. Valid only if this structure is currently in use.
* Having this avoids us from doing a hashtable lookup inside
* mur_forward_play_multireg_tp (performance).
*/
que_ent free_que; /* Should be the first member in the structure in order to be
* able to use "free_element" and "get_new_free_element" functions
* of the buddy list interface. Valid only if this structure was
* previously in use but has since been freed.
*/
} u;
token_num token;
struct reg_ctl_list_struct *first_tp_rctl; /* linked list of regions which had TCOM written for this TP */
struct forw_multi_element_struct *next; /* if non-NULL, points to next in a linked list of structures
* corresponding to same token but with a different time */
multi_struct *multi;
jnl_tm_t time;
VMS_ONLY(int4 image_count;) /* Image activations */
enum broken_type recstat; /* GOOD_TN or BROKEN_TN or LOST_TN */
uint4 num_reg_total;
uint4 num_reg_seen_backward;
uint4 num_reg_seen_forward;
} forw_multi_struct;
typedef struct jnl_ctl_list_struct
{
trans_num turn_around_tn; /* Turn around point transaction number of EPOCH */
seq_num turn_around_seqno; /* Turn around point jnl_seqno of EPOCH */
unsigned char jnl_fn[JNL_NAME_SIZE]; /* Journal file name */
unsigned int jnl_fn_len; /* Length of journal fine name string */
jnl_file_header *jfh; /* journal file header */
jnl_tm_t lvrec_time; /* Last Valid Journal Record's Time Stamp */
off_jnl_t lvrec_off; /* Last Valid Journal Record's Offset */
off_jnl_t rec_offset; /* Last processed record's offset */
off_jnl_t os_filesize; /* OS file size in bytes */
off_jnl_t eof_addr; /* Offset of end of last valid record of the journal */
off_jnl_t apply_pblk_stop_offset; /* Offset where last PBLK was applied. Updated by both
* mur_apply_pblk() and mur_back_process()
*/
off_jnl_t turn_around_offset; /* Turn around point journal record's offset for each region */
jnl_tm_t turn_around_time; /* Turn around time for this region */
boolean_t properly_closed; /* TRUE if journal was properly closed, having written EOF;
FALSE otherwise */
boolean_t tail_analysis; /* true for mur_fread_eof */
boolean_t after_end_of_data; /* true for record offset more than end_of_data */
boolean_t read_only; /* TRUE if read_only for extract/show/verify */
int jnlrec_cnt[JRT_RECTYPES];/* Count of each type of record found in this journal */
int4 status; /* Last status of the last operation done on this journal */
uint4 status2; /* Last secondary status of the last operation done on
this journal */
fd_type channel;
#if defined(VMS)
struct FAB *fab;
#endif
gd_id fid;
hash_table_int4 pini_list; /* hash table of pini_addr to pid list */
struct reg_ctl_list_struct *reg_ctl; /* Back pointer to this region's reg_ctl_list */
struct jnl_ctl_list_struct *next_gen; /* next generation journal file */
struct jnl_ctl_list_struct *prev_gen; /* previous generation journal file */
# ifdef GTM_CRYPT
gtmcrypt_key_t encr_key_handle;
boolean_t is_same_hash_as_db; /* to indicate whether the db and the jnl file share
* the same encryption key. */
# endif
} jnl_ctl_list;
typedef struct
{
unsigned char *base; /* Pointer to the buffer base of this mur_buff_desc */
unsigned char *top; /* Pointer to the buffer top of this mur_buff_desc */
off_jnl_t blen; /* Length of the buffer till end of valid data */
off_jnl_t dskaddr; /* disk offset from which this buffer was read */
boolean_t read_in_progress;/* Asynchronous read requested and in progress */
#if defined(UNIX)
struct aiocb *aiocbp;
int rip_channel; /* channel that has the aio read (for this mur_buff_desc_t) in progress.
* valid only if "read_in_progress" field is TRUE.
* this is a copy of the active channel "jctl->channel" while issuing the AIO.
* in case the active channel "jctl->channel" changes later (due to switching
* to a different journal file) and we want to cancel the previously issued aio
* we cannot use jctl->channel but should use "rip_channel" for the cancel.
*/
#elif defined(VMS)
io_status_block_disk iosb;
short rip_channel; /* same meaning as the Unix field */
short filler; /* to ensure 4-byte alignment for this structure */
#endif
} mur_buff_desc_t;
typedef struct
{
int4 blocksize; /* This amount it reads from disk to memory */
unsigned char *alloc_base; /* Pointer to the buffers allocated. All 5 buffers allocated at once */
int4 alloc_len; /* Size of alloc_base buffer */
mur_buff_desc_t random_buff; /* For reading pini_rec which could be at a random offset before current record */
unsigned char *aux_buff1; /* For partial records for mur_next at the end of seq_buff[1] */
mur_buff_desc_t seq_buff[2]; /* Two buffers for double buffering */
mur_buff_desc_t aux_buff2; /* For partial records for mur_prev just previous of seq_buff[0] or for overflow */
int buff_index; /* Which one of the two seq_buff is in use */
mur_buff_desc_t *cur_buff; /* pointer to active mur_buff_desc_t */
mur_buff_desc_t *sec_buff; /* pointer to second mur_buff_desc_t for the double buffering*/
/* The following fields were formerly part of a separate mur_rab_t type structure but
* are now folded into one region-specific mur_read_desc_t type structure.
*/
jnl_record *jnlrec; /* points to last jnl record read in this region */
unsigned int jreclen; /* length of the last journal record read in this region */
} mur_read_desc_t;
typedef struct reg_ctl_list_struct
{
trans_num db_tn; /* database curr_tn when region is opened first by recover */
FILL8DCL(sgmnt_data_ptr_t, csd, 0); /* cs_data of this region */
struct gd_region_struct *gd; /* region info */
sgmnt_addrs *csa; /* cs_addrs of this region */
struct sgm_info_struct *sgm_info_ptr; /* sgm_info_ptr of this region */
file_control *db_ctl; /* To do dbfilop() */
jnl_ctl_list *jctl; /* Current generation journal file control info */
jnl_ctl_list *jctl_head; /* For forward recovery starting (earliest) generation
journal file to be processed. */
jnl_ctl_list *jctl_apply_pblk; /* Journal file where PBLK application last stopped.
* Updated by mur_apply_pblk() and mur_back_process()
*/
jnl_ctl_list *jctl_turn_around; /* final pass turn around point journal file */
jnl_ctl_list *jctl_alt_head; /* For backward recovery turn around point
journal file of interrupted recovery. */
hash_table_mname gvntab; /* Used for gv_target info for globals in mur_output_record() */
jnl_tm_t lvrec_time; /* Last Valid Journal Record's Time Stamp across all generations */
int jnl_state;
int repl_state;
int4 lookback_count;
boolean_t before_image; /* True if the database has before image journaling enabled */
boolean_t standalone; /* If standalone access was acheived for the region */
boolean_t recov_interrupted; /* A copy of csd->recov_interrupted before resetting it to TRUE */
boolean_t jfh_recov_interrupted; /* Whether latest generation journal file was created by recover */
int4 blks_to_upgrd_adjust; /* Delta to adjust turn around point's blks_to_upgrd counter with.
* This will include all bitmaps created in V4 format by gdsfilext */
uint4 trnarnd_free_blocks; /* Free_blocks counter stored in the turnaround point epoch record */
uint4 trnarnd_total_blks; /* Total_blks counter stored in the turnaround point epoch record */
struct pini_list *mur_plst; /* pini_addr hash-table entry of currently simulating GT.M process
* for this region (used only if jgbl.forw_phase_recovery) */
mur_read_desc_t *mur_desc; /* Region specific structure storing last mur_read_file* context.
* It is a pointer to a structure (instead of the structure itself)
* as otherwise when we swap reg_ctl_list structures in mur_sort_files
* while asynchronous reads are active, the iosb buffer pointers could
* get mixed up amongst regions and cause hangs in mur_fread_wait.
*/
boolean_t db_updated; /* whether this region's database has been updated as part of this recovery */
boolean_t forw_eof_seen; /* whether JNLREADEOF was encountered in forward phase */
/* Below are region-specific variables used in the functions "mur_forward" and "mur_forward_next". They store
* region-specific context needed while going back and forth between "mur_forward" and "mur_forward_next".
*/
boolean_t process_losttn; /* whether this region has started losttn processing */
trans_num last_tn; /* tn of last applied record in this region (to compare with next record's tn) */
struct reg_ctl_list_struct *next_rctl; /* Next region that has records to be processed (used only in mur_forward).
* Initially, all journaled regions are in this circular linked list.
* As soon as there are no more journal records in a region to be
* processed in the forward phase, this region is removed from the list.
* Minimizes the time spent in mur_forward hopping around the remaining
* regions resolving multi-region tokens.
*/
struct reg_ctl_list_struct *prev_rctl; /* Prev region that has records to be processed (counterpart of next_rctl) */
struct reg_ctl_list_struct *next_tp_rctl; /* Next in a linked list of regions participating in this TP transaction */
struct reg_ctl_list_struct *prev_tp_rctl; /* Prev in a linked list of regions participating in this TP transaction */
forw_multi_struct *forw_multi; /* If non-NULL, this is a pointer to the structure containing all information
* related to the TP transaction that is currently being processed. */
boolean_t initialized; /* Set to TRUE only after journaling and replication state information has
* been copied over from csd into rctl. This way mur_close_files knows if
* it is safe to use the rctl values to copy them back to csd. Previously,
* an interrupt in mur_open_files before the journaling and/or replication
* fields in rctl got initialized took us to mur_close_files which
* unconditionally used those to restore the corresponding csd fields
* resulting in journaling/replication getting incorrectly turned OFF. */
# ifdef DEBUG
boolean_t deleted_from_unprocessed_list;
jnl_ctl_list *last_processed_jctl;
uint4 last_processed_rec_offset;
# endif
} reg_ctl_list;
typedef struct redirect_list_struct
{
struct redirect_list_struct
*next;
unsigned int org_name_len,
new_name_len;
char *org_name,
*new_name;
} redirect_list;
typedef struct select_list_struct
{
struct select_list_struct
*next;
char *buff;
short len;
bool exclude;
boolean_t has_wildcard;
} select_list;
typedef struct long_list_struct
{
struct long_list_struct *next;
uint4 num;
bool exclude;
} long_list;
typedef struct
{
jnl_proc_time lookback_time,
before_time,
since_time,
after_time;
enum mur_fence_type fences;
int4 error_limit,
fetchresync_port;
int show,
lookback_opers;
boolean_t forward,
update,
rollback,
rollback_losttnonly,
verify,
before_time_specified,
since_time_specified,
resync_specified,
lookback_time_specified,
lookback_opers_specified,
interactive,
selection,
apply_after_image,
chain,
notncheck,
verbose,
log,
detail,
extract_full,
show_head_only,
extr[TOT_EXTR_TYPES];
char transaction;
redirect_list *redirect;
select_list *user,
*database, /* UNIX only? */
*global,
*process;
long_list *id;
char *extr_fn[TOT_EXTR_TYPES];
int extr_fn_len[TOT_EXTR_TYPES];
} mur_opt_struct;
/* This macro is invoked whenever all records of a region have been processed. It deletes the current region
* from the list of unprocessed regions thereby removing this from the list of regions examined whenever a
* future TP journal record spanning multiple regions needs to be resolved.
*/
#define DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl) \
{ \
GBLREF reg_ctl_list *rctl_start; \
\
reg_ctl_list *p_rctl, *n_rctl; \
\
assert(0 < murgbl.regcnt_remaining); \
assert(!rctl->deleted_from_unprocessed_list); \
p_rctl = rctl->prev_rctl; \
n_rctl = rctl->next_rctl; \
assert((NULL != p_rctl) || (NULL == n_rctl)); \
assert((NULL == p_rctl) || (NULL != n_rctl)); \
assert(p_rctl != rctl); \
assert(n_rctl != rctl); \
if (NULL != n_rctl) \
{ \
if (p_rctl != n_rctl) \
{ \
p_rctl->next_rctl = n_rctl; \
n_rctl->prev_rctl = p_rctl; \
} else \
{ \
p_rctl->next_rctl = NULL; \
p_rctl->prev_rctl = NULL; \
} \
} \
DEBUG_ONLY(rctl->deleted_from_unprocessed_list = TRUE;) \
murgbl.regcnt_remaining--; \
rctl_start = n_rctl; \
}
#define MUR_CHANGE_REG(rctl) \
{ \
GBLREF gd_region *gv_cur_region; \
GBLREF sgmnt_data_ptr_t cs_data; \
GBLREF sgmnt_addrs *cs_addrs; \
GBLREF sgm_info *sgm_info_ptr; \
GBLREF gv_namehead *gv_target; \
GBLREF uint4 dollar_tlevel; \
\
sgmnt_addrs *csa; \
gd_region *reg; \
\
reg = rctl->gd; \
if (gv_cur_region != reg) \
{ \
gv_cur_region = reg; \
cs_addrs = csa = rctl->csa; \
cs_data = rctl->csd; \
sgm_info_ptr = dollar_tlevel ? rctl->sgm_info_ptr : NULL; \
/* Keep gv_target and gv_cur_region in sync always. \
* Now that region has switched, set gv_target to NULL \
* Or else asserts that check the in-syncness (e.g. op_tstart) fail. \
*/ \
gv_target = NULL; \
} \
assert(gv_cur_region == rctl->gd); \
assert(cs_addrs == rctl->csa); \
assert(cs_data == rctl->csd); \
assert(!dollar_tlevel || (sgm_info_ptr == rctl->sgm_info_ptr)); \
assert(dollar_tlevel || (NULL == sgm_info_ptr)); \
}
#define SET_THIS_TN_AS_BROKEN(multi, reg_total) \
{ \
multi->partner = reg_total; \
multi->tot_partner = reg_total + 1; \
/* Set a debug-only flag indicating this "multi" structure never be \
* treated as a GOOD_TN in forward processing. This will be checked there. \
*/ \
DEBUG_ONLY(multi->this_is_broken = TRUE;) \
}
/* This macro is used in forward processing. A record can be broken only if its time is > minimum broken time determined
* in backward processing or its seqno is > minimum broken seqno determine in backward processing.
* The below checks are done in order to avoid hash table lookup (performance), when it is not needed.
*/
#define IS_REC_POSSIBLY_BROKEN(REC_TIME, REC_TOKEN_SEQ) ((!mur_options.rollback && (REC_TIME >= murgbl.min_broken_time)) \
|| (mur_options.rollback && (REC_TOKEN_SEQ >= murgbl.min_broken_seqno)))
#define MUR_TOKEN_ADD(multi, rec_token, rec_image_count, rec_tok_time, rec_partner, rec_fence, rec_regno) \
{ \
ht_ent_int8 *tabent; \
uint4 partner_cnt; \
\
multi = (multi_struct *)get_new_element(murgbl.multi_list, 1); \
multi->token = rec_token; \
VMS_ONLY(multi->image_count = rec_image_count;) \
multi->time = rec_tok_time; \
partner_cnt = rec_partner; \
assert(0 < (int4)partner_cnt); \
multi->tot_partner = partner_cnt; \
DEBUG_ONLY(multi->this_is_broken = FALSE;) \
partner_cnt--; \
multi->partner = partner_cnt; \
multi->fence = rec_fence; \
multi->regnum = rec_regno; \
multi->next = NULL; \
if (!add_hashtab_int8(&murgbl.token_table, &multi->token, multi, &tabent)) \
{ \
assert(NULL != tabent->value); \
multi->next = (multi_struct *)tabent->value; \
tabent->value = (char *)multi; \
} \
if (partner_cnt) \
murgbl.broken_cnt = murgbl.broken_cnt + 1; \
}
/* If any one region that has started losttn processing gets added to the forw_multi region list, the entire
* transaction (including across all the other regions) should be treated as a LOST transaction even if
* other regions have not started losttn processing (i.e. have not seen any broken transaction yet). Not doing
* so will cause only a portion of the transaction to be played which breaks the atomicity ACID property of TP.
*/
#define MUR_FORW_MULTI_RECSTAT_UPDATE_IF_NEEDED(FORW_MULTI, RCTL) \
{ \
assert(RCTL->forw_multi == FORW_MULTI); \
if (RCTL->process_losttn && (GOOD_TN == FORW_MULTI->recstat)) \
FORW_MULTI->recstat = LOST_TN; \
}
#define MUR_FORW_TOKEN_ADD(FORW_MULTI, REC_TOKEN, REC_TIME, RCTL, REG_TOTAL, RECSTAT, MULTI, REC_IMAGE_COUNT) \
{ \
ht_ent_int8 *tabent; \
\
FORW_MULTI = (forw_multi_struct *)get_new_free_element(murgbl.forw_multi_list); \
FORW_MULTI->token = REC_TOKEN; \
FORW_MULTI->first_tp_rctl = RCTL; \
RCTL->next_tp_rctl = RCTL; \
RCTL->prev_tp_rctl = RCTL; \
FORW_MULTI->multi = MULTI; \
UNIX_ONLY(assert(0 == REC_IMAGE_COUNT);) \
VMS_ONLY(FORW_MULTI->image_count = REC_IMAGE_COUNT;) \
FORW_MULTI->time = REC_TIME; \
FORW_MULTI->recstat = RECSTAT; \
FORW_MULTI->num_reg_total = REG_TOTAL; \
FORW_MULTI->num_reg_seen_forward = 1; \
/* If tn is NOT broken, we would have seen all the regions in backward processing. \
* If tn is broken, then we would have a non-null multi use that to find out how many regions \
* were unresolved and accordingly determine how many regions were seen in backward processing. \
*/ \
assert((GOOD_TN == RECSTAT) || (BROKEN_TN == RECSTAT)); \
if (GOOD_TN == RECSTAT) \
FORW_MULTI->num_reg_seen_backward = REG_TOTAL; \
else \
{ \
assert(NULL != MULTI); \
assert(0 < MULTI->partner); \
FORW_MULTI->num_reg_seen_backward = MULTI->tot_partner - MULTI->partner; \
} \
if (!add_hashtab_int8(&murgbl.forw_token_table, &FORW_MULTI->token, FORW_MULTI, &tabent)) \
{ /* More than one TP transaction has the same token. This is possible in case of \
* non-replication but we expect the rec_time to be different between the colliding \
* transactions. In replication, we use jnl_seqno which should be unique. Assert that. \
*/ \
assert(!mur_options.rollback); \
assert(NULL != tabent->value); \
FORW_MULTI->next = (forw_multi_struct *)tabent->value; \
tabent->value = (char *)FORW_MULTI; \
} else \
FORW_MULTI->next = NULL; \
assert(NULL != tabent); \
FORW_MULTI->u.tabent = tabent; \
RCTL->forw_multi = FORW_MULTI; \
MUR_FORW_MULTI_RECSTAT_UPDATE_IF_NEEDED(FORW_MULTI, RCTL); \
}
#define MUR_FORW_TOKEN_ONE_MORE_REG(FORW_MULTI, RCTL) \
{ \
reg_ctl_list *start_rctl, *tmp_rctl; \
\
start_rctl = FORW_MULTI->first_tp_rctl; \
assert(NULL != start_rctl); \
assert(RCTL != start_rctl); \
tmp_rctl = start_rctl->prev_tp_rctl; \
assert(NULL != tmp_rctl); \
assert(tmp_rctl->next_tp_rctl == start_rctl); \
start_rctl->prev_tp_rctl = RCTL; \
RCTL->next_tp_rctl = start_rctl; \
tmp_rctl->next_tp_rctl = RCTL; \
RCTL->prev_tp_rctl = tmp_rctl; \
assert(FORW_MULTI->num_reg_seen_forward < FORW_MULTI->num_reg_seen_backward); \
FORW_MULTI->num_reg_seen_forward++; \
RCTL->forw_multi = FORW_MULTI; \
MUR_FORW_MULTI_RECSTAT_UPDATE_IF_NEEDED(FORW_MULTI, RCTL); \
}
#define MUR_FORW_TOKEN_LOOKUP(FORW_MULTI, REC_TOKEN, REC_TIME, REC_IMAGE_COUNT) \
{ \
ht_ent_int8 *tabent; \
\
if (NULL != (tabent = lookup_hashtab_int8(&murgbl.forw_token_table, (gtm_uint64_t *)&REC_TOKEN))) \
{ \
if (mur_options.rollback) \
{ \
assert(0 == REC_IMAGE_COUNT); \
assert(NULL != ((forw_multi_struct *)tabent->value)); \
assert(NULL == ((forw_multi_struct *)tabent->value)->next); \
FORW_MULTI = (forw_multi_struct *)tabent->value; \
FORW_MULTI->u.tabent = tabent; \
} else \
{ \
for (FORW_MULTI = (forw_multi_struct *)tabent->value; NULL != FORW_MULTI; \
FORW_MULTI = (forw_multi_struct *)FORW_MULTI->next) \
{ \
if ((FORW_MULTI->time == REC_TIME) VMS_ONLY(&& (FORW_MULTI->image_count == REC_IMAGE_COUNT))) \
{ \
FORW_MULTI->u.tabent = tabent; \
break; \
} \
} \
} \
assert((NULL == FORW_MULTI) || (FORW_MULTI->time == REC_TIME) && (FORW_MULTI->token == REC_TOKEN)); \
} else \
FORW_MULTI = NULL; \
}
#define MUR_FORW_TOKEN_REMOVE(RCTL) \
{ \
reg_ctl_list *n_rctl, *p_rctl; \
\
n_rctl = RCTL->next_tp_rctl; \
p_rctl = RCTL->prev_tp_rctl; \
assert(NULL != p_rctl); \
assert(NULL != n_rctl); \
assert((RCTL != p_rctl) || (RCTL == n_rctl)); \
if (RCTL != p_rctl) \
{ \
assert(RCTL == p_rctl->next_tp_rctl); \
p_rctl->next_tp_rctl = n_rctl; \
assert(RCTL == n_rctl->prev_tp_rctl); \
n_rctl->prev_tp_rctl = p_rctl; \
} \
RCTL->forw_multi = NULL; \
}
#define MUR_INCTN_BLKS_TO_UPGRD_ADJUST(rctl) \
{ \
inctn_opcode_t opcode; \
struct_jrec_inctn *inctn_rec; \
\
inctn_rec = &rctl->mur_desc->jnlrec->jrec_inctn; \
opcode = (inctn_opcode_t)inctn_rec->detail.blks2upgrd_struct.opcode; \
if ((inctn_gdsfilext_gtm == opcode) || (inctn_gdsfilext_mu_reorg == opcode)) \
{ /* Note down the number of bitmaps that were created during this file extension \
* in V4 format. At the turn around point, blks_to_upgrd counter has to be \
* increased by this amount to reflect the current state of the new bitmaps. \
*/ \
(rctl)->blks_to_upgrd_adjust += (inctn_rec)->detail.blks2upgrd_struct.blks_to_upgrd_delta; \
} \
}
#define MUR_WITHIN_ERROR_LIMIT(err_cnt, error_limit) ((++err_cnt <= error_limit) || (mur_options.interactive && mur_interactive()))
#if defined(UNIX)
#define MUR_TOKEN_LOOKUP(token, image_count, rec_time, fence) mur_token_lookup(token, rec_time, fence)
#elif defined(VMS)
#define MUR_TOKEN_LOOKUP(token, image_count, rec_time, fence) mur_token_lookup(token, image_count, rec_time, fence)
#endif
#define PRINT_VERBOSE_STAT(JCTL, MODULE) \
{ \
GBLREF jnl_gbls_t jgbl; \
\
if (mur_options.verbose) \
{ \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOSTR, 4, LEN_AND_LIT("Module"), \
LEN_AND_LIT(MODULE)); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOSTR, 4, LEN_AND_LIT(" Journal file"), \
JCTL->jnl_fn_len, JCTL->jnl_fn); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Record Offset"), \
JCTL->rec_offset, JCTL->rec_offset); \
if (!jgbl.forw_phase_recovery) \
{ \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Turn around Offset"), \
JCTL->turn_around_offset, JCTL->turn_around_offset); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Turn around timestamp"), \
JCTL->turn_around_time, JCTL->turn_around_time); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT8, 4, LEN_AND_LIT(" Turn around transaction"), \
&JCTL->turn_around_tn, &JCTL->turn_around_tn); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT8, 4, LEN_AND_LIT(" Turn around seqno"), \
&JCTL->turn_around_seqno, &JCTL->turn_around_seqno); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Tp_resolve_time"), \
jgbl.mur_tp_resolve_time, jgbl.mur_tp_resolve_time); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Token total"), \
murgbl.token_table.count, murgbl.token_table.count); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT(" Token broken"), \
murgbl.broken_cnt, murgbl.broken_cnt); \
} \
} \
}
#define PRINT_VERBOSE_TAIL_BAD(JCTL) \
{ \
if (mur_options.verbose) \
{ \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOSTR, 4, \
LEN_AND_LIT("Tail analysis found bad record for journal file"), \
JCTL->jnl_fn_len, JCTL->jnl_fn); \
gtm_putmsg(VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("Record Offset"), \
JCTL->rec_offset, JCTL->rec_offset); \
} \
}
/* Prototypes */
void jnlext_write(fi_type *file_info, char *buffer, int length);
uint4 mur_apply_pblk(boolean_t apply_intrpt_pblk);
boolean_t mur_back_process(boolean_t apply_pblk, seq_num *pre_resolve_seqno);
uint4 mur_back_processing(jnl_ctl_list **jjctl, boolean_t apply_pblk, seq_num *pre_resolve_seqno,
jnl_tm_t alt_tp_resolve_time);
uint4 mur_block_count_correct(reg_ctl_list *rctl);
int4 mur_blocks_free(reg_ctl_list *rctl);
void mur_close_files(void);
void mur_close_file_extfmt(void);
int4 mur_cre_file_extfmt(jnl_ctl_list *jctl, int recstat);
boolean_t mur_do_wildcard(char *jnl_str, char *pat_str, int jnl_len, int pat_len);
uint4 mur_forward(jnl_tm_t min_broken_time, seq_num min_broken_seqno, seq_num losttn_seqno);
uint4 mur_forward_play_cur_jrec(reg_ctl_list *rctl);
#ifdef GTM_TRIGGER
uint4 mur_forward_play_multireg_tp(forw_multi_struct *forw_multi, reg_ctl_list *rctl);
#endif
boolean_t mur_fopen_sp(jnl_ctl_list *jctl);
boolean_t mur_fopen(jnl_ctl_list *jctl);
boolean_t mur_fclose(jnl_ctl_list *jctl);
void mur_get_options(void);
uint4 mur_get_pini(jnl_ctl_list *jctl, off_jnl_t pini_addr, pini_list_struct **pplst);
void mur_init(void);
void mur_free(void);
void mur_rctl_desc_alloc(reg_ctl_list *rctl);
void mur_rctl_desc_free(reg_ctl_list *rctl);
boolean_t mur_insert_prev(jnl_ctl_list **jjctl);
boolean_t mur_interactive(void);
boolean_t mur_jctl_from_next_gen(void);
void mur_multi_rehash(void);
uint4 mur_next(jnl_ctl_list *jctl, off_jnl_t dskaddr);
uint4 mur_next_rec(jnl_ctl_list **jjctl);
boolean_t mur_open_files(void);
uint4 mur_output_pblk(reg_ctl_list *rctl);
uint4 mur_output_record(reg_ctl_list *rctl);
void mur_output_show(void);
void mur_pini_addr_reset(sgmnt_addrs *csa);
uint4 mur_pini_state(jnl_ctl_list *jctl, uint4 pini_addr, int state);
uint4 mur_prev(jnl_ctl_list *jctl, off_jnl_t dskaddr);
uint4 mur_prev_rec(jnl_ctl_list **jjctl);
uint4 mur_process_intrpt_recov(void);
void mur_process_seqno_table(seq_num *min_broken_seqno, seq_num *losttn_seqno);
void mur_process_timequal(jnl_tm_t max_lvrec_time, jnl_tm_t min_bov_time);
jnl_tm_t mur_process_token_table(boolean_t *ztp_broken);
void mur_put_aimg_rec(jnl_record *rec);
uint4 mur_read(jnl_ctl_list *jctl);
void mur_rem_jctls(reg_ctl_list *rctl);
boolean_t mur_report_error(jnl_ctl_list *jctl, enum mur_error code);
#if defined(UNIX)
multi_struct *mur_token_lookup(token_num token, off_jnl_t rec_time, enum rec_fence_type fence);
int gtmrecv_fetchresync(int port, seq_num *resync_seqno, seq_num max_reg_seqno);
#elif defined(VMS)
multi_struct *mur_token_lookup(token_num token, int4 image_count, off_jnl_t rec_time, enum rec_fence_type fence);
int gtmrecv_fetchresync(int port, seq_num *resync_seqno);
#endif
void mur_tp_resolve_time(jnl_tm_t max_lvrec_time);
void mur_show_header(jnl_ctl_list *jctl);
boolean_t mur_select_rec(jnl_ctl_list *jctl);
void mur_sort_files(void);
boolean_t mur_ztp_lookback(void);
int format_time(jnl_proc_time proc_time, char *string, int string_len, int time_format);
#endif /* MUPREC_H_INCLUDED */