177 lines
5.4 KiB
C
177 lines
5.4 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2009 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#ifdef GTM_FD_TRACE
|
|
|
|
#include "gtm_stat.h"
|
|
#include "gtm_string.h"
|
|
|
|
#include "gdsroot.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "filestruct.h"
|
|
#include "gdscc.h"
|
|
#include "jnl.h"
|
|
#include "eintr_wrappers.h"
|
|
#include "dpgbldir.h"
|
|
#include "gtm_dbjnl_dupfd_check.h"
|
|
#include "error.h"
|
|
#include "send_msg.h"
|
|
#include "is_file_identical.h"
|
|
|
|
#define MAX_FD_FOR_FASTCHECK 256
|
|
|
|
GBLDEF gd_region *dupfd_check_reg; /* for debugging purposes */
|
|
GBLDEF int dupfd_check_fd; /* for debugging purposes */
|
|
GBLDEF fdinfo_t *dupfd_check_openfdarray; /* for debugging purposes */
|
|
|
|
/* Before fixing corrupt jnl fd take a core dump and send syslog message to ensure it gets analyzed */
|
|
#define FIX_CORRUPT_JNLFD(REG) \
|
|
{ \
|
|
jnl_private_control *jpc; \
|
|
\
|
|
error_def(ERR_GVFAILCORE); \
|
|
\
|
|
assert(FALSE); \
|
|
gtm_fork_n_core(); \
|
|
send_msg(VARLSTCNT(1) ERR_GVFAILCORE); \
|
|
jpc = FILE_INFO(REG)->s_addrs.jnl; \
|
|
jpc->channel = NOJNL; \
|
|
jpc->cycle--; \
|
|
jpc->pini_addr = 0; \
|
|
}
|
|
|
|
boolean_t gtm_check_fd_is_valid(gd_region *reg, boolean_t is_db, int fd)
|
|
{
|
|
struct stat stat_buf;
|
|
sgmnt_addrs *csa;
|
|
int fstat_res;
|
|
|
|
FSTAT_FILE(fd, &stat_buf, fstat_res);
|
|
if (-1 == fstat_res)
|
|
GTMASSERT;
|
|
assert(reg->open);
|
|
if (is_db)
|
|
{
|
|
if (!is_gdid_stat_identical(&FILE_ID(reg), &stat_buf))
|
|
GTMASSERT; /* db fd does not corespond back to itself */
|
|
} else
|
|
{
|
|
csa = &FILE_INFO(reg)->s_addrs;
|
|
/* If fd does not point back to journal file, it could be because of a concurrent journal switch.
|
|
* Check that. If that fails as well, go ahead and fix the journal file descriptor.
|
|
*/
|
|
if (!is_gdid_stat_identical(JNL_GDID_PTR(csa), &stat_buf) && !JNL_FILE_SWITCHED(csa->jnl))
|
|
{
|
|
FIX_CORRUPT_JNLFD(reg); /* Journal file fd is corrupt. Fix it. */
|
|
return FALSE;
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
void gtm_dupfd_check_specific(gd_region *reg, fdinfo_t *open_fdarray, int fd, boolean_t is_db)
|
|
{
|
|
gd_region *db_reg, *jnl_reg;
|
|
int fstat_res;
|
|
struct stat stat_buf;
|
|
|
|
/* Record key local variables in globals in case we take a GTMASSERT and need to analyze the pro core */
|
|
dupfd_check_fd = fd;
|
|
dupfd_check_reg = reg;
|
|
dupfd_check_openfdarray = open_fdarray;
|
|
if (0 > fd)
|
|
GTMASSERT;
|
|
if (MAX_FD_FOR_FASTCHECK > fd)
|
|
{ /* fd is within fastcheck range. We assume the first fd that fills the array is valid and skip the
|
|
* heavyweight fstat check. For dbg builds though, we do this check just so that code is exercised as well.
|
|
*/
|
|
assert((NULL != open_fdarray[fd].reg) || gtm_check_fd_is_valid(reg, is_db, fd));
|
|
if (NULL != open_fdarray[fd].reg)
|
|
{
|
|
if (is_db && open_fdarray[fd].is_db)
|
|
GTMASSERT; /* Two databases have SAME fd. Cannot do much to recover from this situation */
|
|
/* The fds of one region's database and another region's journal collide.
|
|
* Check if db fd is indeed valid and if so close the journal's fd.
|
|
* If db fd is not valid, then cannot do much to recover from this situation.
|
|
*/
|
|
FSTAT_FILE(fd, &stat_buf, fstat_res);
|
|
if (-1 == fstat_res)
|
|
GTMASSERT;
|
|
if (is_db)
|
|
{
|
|
db_reg = reg;
|
|
jnl_reg = open_fdarray[fd].reg;
|
|
} else
|
|
{
|
|
db_reg = open_fdarray[fd].reg;
|
|
jnl_reg = reg;
|
|
}
|
|
if (!is_gdid_stat_identical(&FILE_ID(db_reg), &stat_buf))
|
|
GTMASSERT; /* fd does not correspond back to the db file so the db fd got corrupt somehow */
|
|
/* fd corresponds back to the database which means the jnl file structure is corrupt which can be fixed */
|
|
FIX_CORRUPT_JNLFD(jnl_reg);
|
|
if (!is_db) /* Entry in open_fdarray[fd] is correct. So return without updating it (to the wrong value) */
|
|
return;
|
|
}
|
|
open_fdarray[fd].reg = reg;
|
|
open_fdarray[fd].is_db = is_db;
|
|
} else
|
|
{ /* fd is outside the fast check range. no other go but check that fd is indeed valid (using heavyweight fstat) */
|
|
gtm_check_fd_is_valid(reg, is_db, fd);
|
|
}
|
|
}
|
|
|
|
/* This routine is a debugging tool written to detect the symptom of D9I11-002714 before any damage to the database occurs.
|
|
* It checks all open db and jnl file descriptors and identifies any duplicates and if so creates a core file for analysis.
|
|
*/
|
|
void gtm_dbjnl_dupfd_check(void)
|
|
{
|
|
fdinfo_t open_fdarray[MAX_FD_FOR_FASTCHECK];
|
|
gd_addr *addr_ptr;
|
|
gd_region *r_top, *reg;
|
|
gd_segment *seg;
|
|
int fd;
|
|
sgmnt_addrs *csa;
|
|
unix_db_info *udi;
|
|
|
|
memset(open_fdarray, 0, SIZEOF(open_fdarray));
|
|
for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr))
|
|
{
|
|
for (reg = addr_ptr->regions, r_top = reg + addr_ptr->n_regions; reg < r_top; reg++)
|
|
{
|
|
seg = reg->dyn.addr;
|
|
if ((dba_bg != seg->acc_meth) && (dba_mm != seg->acc_meth))
|
|
continue;
|
|
if (!reg->open || reg->was_open)
|
|
continue;
|
|
udi = FILE_INFO(reg);
|
|
/* Check DB first */
|
|
fd = udi->fd;
|
|
gtm_dupfd_check_specific(reg, open_fdarray, fd, TRUE);
|
|
/* Check JNL next */
|
|
csa = &udi->s_addrs;
|
|
if (JNL_ALLOWED(csa))
|
|
{
|
|
fd = csa->jnl->channel;
|
|
if (NOJNL != fd)
|
|
gtm_dupfd_check_specific(reg, open_fdarray, fd, FALSE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|