fis-gtm/sr_port/jnl_write_attempt.c

/****************************************************************
 *								*
 *	Copyright 2001, 2012 Fidelity Information Services, Inc	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "gdsbgtr.h"
#include "filestruct.h"
#include "iosp.h"
#include "jnl.h"
#include "lockconst.h"
#include "interlock.h"
#include "sleep_cnt.h"
#include "send_msg.h"
#include "wcs_sleep.h"
#include "is_proc_alive.h"
#include "compswap.h"
#include "is_file_identical.h"
#include "have_crit.h"
#include "wbox_test_init.h"
#include "anticipatory_freeze.h"

#ifdef UNIX
#include "repl_msg.h"			/* needed for gtmsource.h */
#include "gtmsource.h"			/* needed for jnlpool_addrs typedef */
#include "gtmmsg.h"
#endif
#include "gtm_c_stack_trace.h"

#ifdef UNIX
GBLREF	jnlpool_addrs	jnlpool;
#endif
GBLREF	pid_t		process_id;
GBLREF	uint4		image_count;

error_def(ERR_JNLCNTRL);
error_def(ERR_JNLFLUSH);
error_def(ERR_JNLFLUSHNOPROG);
error_def(ERR_JNLPROCSTUCK);
error_def(ERR_JNLWRTDEFER);
error_def(ERR_JNLWRTNOWWRTR);
error_def(ERR_TEXT);
error_def(ERR_JNLWRTDEFER);
error_def(ERR_JNLWRTNOWWRTR);

#ifdef VMS
#  define CURRENT_WRITER jb->now_writer
#else
#  define CURRENT_WRITER jb->io_in_prog_latch.u.parts.latch_pid
#endif

static uint4 jnl_sub_write_attempt(jnl_private_control *jpc, unsigned int *lcnt, uint4 threshold)
{
	sgmnt_addrs		*csa;
	jnl_buffer_ptr_t	jb;
	unsigned int		status;
	boolean_t		was_crit, exact_check;
	/**** Note static/local */
	static uint4		loop_image_count, writer;	/* assumes calls from one loop at a time */
	uint4			new_dskaddr, new_dsk;
	static uint4		stuck_cnt = 0;

	/* Some callers of jnl_sub_write_attempt (jnl_flush->jnl_write_attempt, jnl_write->jnl_write_attempt) are in
	 * crit, and some other (jnl_wait->jnl_write_attempt) are not. Callers in crit do not need worry about journal
	 * buffer fields (dskaddr, freeaddr) changing underneath them, but for those not in crit, jnl_sub_write_attempt
	 * might incorrectly return an error status when journal file is switched. Such callers should check for
	 * journal file switched condition and terminate any loops they are in.
	 */
	jb = jpc->jnl_buff;
	status = ERR_JNLWRTDEFER;
	csa = &FILE_INFO(jpc->region)->s_addrs;
	was_crit = csa->now_crit;
	exact_check = was_crit && (threshold == jb->freeaddr);	/* see comment in jnl_write_attempt() for why this is needed */
	while (exact_check ? (jb->dskaddr != threshold) : (jb->dskaddr < threshold))
	{
#ifdef UNIX
		if (jb->io_in_prog_latch.u.parts.latch_pid == process_id)
		{
			/* if error condition occurred while doing jnl_qio_start(), then release the lock before waiting */
			/* note that this is done only in UNIX because Unix does synchronous I/O */
			jb->image_count = 0;
			RELEASE_SWAPLOCK(&jb->io_in_prog_latch);
		}
		if (!jb->io_in_prog_latch.u.parts.latch_pid)
			status = jnl_qio_start(jpc);
#elif defined VMS
		if (lib$ast_in_prog())
		{
			if (!jb->io_in_prog)
			{
				assert(jb->blocked == process_id);
				jnl_start_ast(jpc);
				if (jb->now_writer == process_id)
					status = jb->iosb.cond;
			}
			break;		/* no fancy stuff within an AST */
		} else if (!jb->io_in_prog)
		{	/* Note down jpc->new_dskaddr/new_dsk into local variables so we get a consistent copy of these two
			 * variables for checking them later.
			 */
			new_dskaddr = jpc->new_dskaddr;
			new_dsk = jpc->new_dsk;
			status = jnl_qio_start(jpc);
		}
#else
#error UNSUPPORTED PLATFORM
#endif
		if (SS_NORMAL == status)
		{
#			if defined VMS
			/* Check if JNLCNTRL error was signalled by jnl_qio_start(). Note that it does not explicitly
			 * return this error since it in turn calls an AST routine jnl_start_ast that actually has the
			 * qio lock (and hence can look at dskaddr/dsk without any concurrency issues). But jpc will
			 * have two fields new_dskaddr/new_dsk set to what dskaddr/dsk were right after obtaining the
			 * qio lock but before releasing it in case of a JNLCNTRL error. We use those two values to
			 * recheck if this is a JNLCNTRL error situation and if so return that error from here.
			 * Note that we cannot use fields from jpc since they could be set by an AST that pops right
			 * after we check new_dskaddr below but before we fetch the value of new_dsk. So it is important
			 * to use the local variables which we know are a consistent snapshot of jpc->new_dskaddr/new_dsk.
			 * The only consequence of this approach is that in case there is a dskaddr/dsk inconsistency,
			 * it will be detected by the local variables in the next iteration (not the first time around).
			 */
			if ((new_dskaddr % jb->size) != new_dsk)
			{
				assert(gtm_white_box_test_case_enabled
					&& (WBTEST_JNL_FILE_LOST_DSKADDR == gtm_white_box_test_case_number));
				status = ERR_JNLCNTRL;
			}
#			endif
			break;
		}
		UNIX_ONLY(assert(ERR_JNLWRTNOWWRTR != status);)	/* dont have asynchronous jnl writes in Unix */
		if ((ERR_JNLWRTNOWWRTR != status) && (ERR_JNLWRTDEFER != status))
			return status;
		if ((writer != CURRENT_WRITER) || (1 == *lcnt))
		{
			writer = CURRENT_WRITER;
			loop_image_count = jb->image_count;
			*lcnt = 1;	/* !!! this should be detected and limited by the caller !!! */
			break;
		}
		if (*lcnt <= JNL_MAX_FLUSH_TRIES)
		{
			wcs_sleep(*lcnt);
			break;
		}
		VMS_ONLY(
			if ((CURRENT_WRITER == process_id) && (jpc->qio_active == TRUE) && (jb->iosb.cond == -2))
		        {	/* this an "impossible" condition where the private flag and the io have lost sync */
				GTMASSERT;	/* this should only occur in VMS; secshr_db_clnup should clear the problem */
			}
		)
		if (writer == CURRENT_WRITER)
		{
			if (!was_crit)
				grab_crit(jpc->region);	/* jnl_write_attempt has an assert about have_crit that this relies on */
			if (VMS_ONLY(0 == writer ||) FALSE == is_proc_alive(writer, jb->image_count))
			{	/* no one home, clear the semaphore; */
				BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_lost);
				jnl_send_oper(jpc, ERR_JNLFLUSH);
				send_msg(VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, CURRENT_WRITER);
				send_msg(VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("Journal IO writer changed during wait"));
				VMS_ONLY(jb->io_in_prog = 0);
				UNIX_ONLY(COMPSWAP_UNLOCK(&jb->io_in_prog_latch, writer, jb->image_count, LOCK_AVAILABLE, 0));
				if (!was_crit)
					rel_crit(jpc->region);
				*lcnt = 1;
				continue;
			}
			if (!was_crit)
				rel_crit(jpc->region);
			/* this is the interesting case: a process is stuck */
			BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_stuck);
			jpc->status = status;
			jnl_send_oper(jpc, ERR_JNLFLUSH);
			send_msg(VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, CURRENT_WRITER);
			stuck_cnt++;
			GET_C_STACK_FROM_SCRIPT("JNLPROCSTUCK", process_id, CURRENT_WRITER, stuck_cnt);
			*lcnt = 1;	/* ??? is it necessary to limit this, and if so, how ??? */
			status = ERR_JNLPROCSTUCK;
			break;
		}
		break;
	}
	if ((threshold > jb->freeaddr)
		|| (csa->now_crit && ((jb->dskaddr > jb->freeaddr) || (jb->free != (jb->freeaddr % jb->size)))))
	{	/* threshold > jb->freeaddr => somebody decremented jb->freeaddr after we computed threshold, or jnl was switched
		 * jb->dsk != jb->freeaddr % jb->size => out of design condition
		 * jb->dskaddr > jb->freeaddr => out of design condition, or jnl was switched
		 */
		status = ERR_JNLCNTRL;
	}
	return status;
}

uint4 jnl_write_attempt(jnl_private_control *jpc, uint4 threshold)
{
	jnl_buffer_ptr_t	jb;
	unsigned int		lcnt, prev_lcnt, cnt, proc_stuck_cnt;
	sgmnt_addrs		*csa;
	unsigned int		status;
	boolean_t		was_crit, jnlfile_lost, exact_check;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	jb = jpc->jnl_buff;
	csa = &FILE_INFO(jpc->region)->s_addrs;
	was_crit = csa->now_crit;

	/* If holding crit and input threshold matches jb->freeaddr, then we need to wait in the loop as long as dskaddr
	 * is not EQUAL to threshold. This is because if dskaddr is lesser than threshold we need to wait. If ever it
	 * becomes greater than threshold, it is an out-of-design situation (since dskaddr has effectively become > freeaddr)
	 * and so we need to trigger "jnl_file_lost" which is done in "jnl_sub_write_attempt" so it is important to invoke
	 * that routine (in the for loop below). Hence the need to do an exact match instead of a < match. If not holding
	 * crit or input threshold does not match jb->freeaddr, then dskaddr becoming GREATER than threshold is a valid
	 * condition so we should do a (dskaddr < threshold), not a (dskaddr != threshold) check in that case.
	 */
	exact_check = was_crit && (threshold == jb->freeaddr);
	assert(!was_crit || threshold <= jb->freeaddr);
	/* Check that we either own crit on the current region or we DONT own crit on ANY region. This is relied upon by
	 * the grab_crit calls (done in jnl_write_attempt and jnl_sub_write_attempt) to ensure no deadlocks are possible.
	 */
	assert(was_crit || (0 == have_crit(CRIT_HAVE_ANY_REG)));
	for (prev_lcnt = lcnt = cnt = 1, proc_stuck_cnt = 0;
		(was_crit || (NOJNL != jpc->channel)) && (exact_check ? jb->dskaddr != threshold : jb->dskaddr < threshold);
		lcnt++, prev_lcnt = lcnt, cnt++)
	{
		status = jnl_sub_write_attempt(jpc, &lcnt, threshold);
		if (JNL_FILE_SWITCHED(jpc))
		{	/* If we are holding crit, the journal file switch could happen in the form of journaling getting
			 * turned OFF (due to disk space issues etc.)
			 */
			jpc->status = SS_NORMAL;
			return SS_NORMAL;
		}
		if (SS_NORMAL == status)
		{
			if (JNL_FLUSH_PROG_TRIES > lcnt)
			{
				proc_stuck_cnt = 0;
				/* In VMS, jnl writes are asynchronous. The above call to "jnl_sub_write_attempt" has returned
				 * SS_NORMAL status. This means the jnl qio lock is not in use by anyone else and is up for grabs.
				 * We would have scheduled a jnl qio write through a sys$dclast call. We have no control of when
				 * the AST routine "jnl_start_ast" will actually get control and start the write. Until then
				 * we dont want to keep reinvoking "jnl_sub_write_attempt" in a hard spin loop. So sleep.
				 * In Unix, writes are synchronous so SS_NORMAL status return implies we have completed a jnl
				 * write and "jb->dskaddr" is closer to "threshold" than it was in the previous iteration.
				 * A sleep at this point will only slow things down unnecessarily. Hence no sleep if Unix.
				 */
				VMS_ONLY(wcs_sleep(lcnt);)
				continue;
			}
			jpc->status = SS_NORMAL;
			jnl_send_oper(jpc, ERR_JNLFLUSH);
			send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),
				 ERR_TEXT, 2, LEN_AND_LIT("Could not flush all the buffered journal data"));
			GTMASSERT; /* too many attempts to flush journal data */
		}
		if ((ERR_JNLCNTRL == status)
			|| (csa->now_crit
				&& (ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status) && (ERR_JNLPROCSTUCK != status)))
		{	/* If JNLCNTRL or if holding crit and not waiting for some other writer (or self in VMS)
			 * better turn off journaling and proceed with database update to avoid a database hang.
			 */
			if (was_crit)
				jb->blocked = 0;
			else
				grab_crit(jpc->region);	/* jnl_write_attempt has an assert about have_crit that this relies on */
			jnlfile_lost = FALSE;
			if (jb->free_update_pid)
			{
				FIX_NONZERO_FREE_UPDATE_PID(csa, jb);
			} else
			{
				assert(gtm_white_box_test_case_enabled
					&& (WBTEST_JNL_FILE_LOST_DSKADDR == gtm_white_box_test_case_number));
				if (JNL_ENABLED(csa->hdr))
				{	/* We ignore the return value of jnl_file_lost() since we always want to report the journal
					 * error, whatever its error handling method is.  Also, an operator log will be sent by some
					 * callers (t_end()) only if an error is returned here, and the operator log is wanted in
					 * those cases.
					 */
					jnl_file_lost(jpc, status);
					jnlfile_lost = TRUE;
				}
				/* Else journaling got closed concurrently by another process by invoking "jnl_file_lost"
				 * just before we got crit. Do not invoke "jnl_file_lost" again on the same journal file.
				 * Instead continue and next iteration will detect the journal file has switched and terminate.
				 */
			}
			if (!was_crit)
				rel_crit(jpc->region);
			if (!jnlfile_lost)
				continue;
			else
				return status;
		}
#		ifdef UNIX
		if ((ERR_JNLWRTDEFER == status) && IS_REPL_INST_FROZEN)
		{	/* Check if instance freeze is in effect and this db has instance freeze activation enabled.
			 * In that case, we do not want to keep retrying the jnl_qio as that might cause lcnt to increase
			 * and eventually GTMASSERT implying this is an IO issue whereas it is possible some other process
			 * is holding the jnl_qio lock on this region and is not able to write to the journal file for a
			 * long time because the instance is frozen. To avoid false GTMASSERTs, wait for freeze to be
			 * lifted before continuing with normal flow (which is to increment lcnt and keep retrying the
			 * attempt at the jnl_qio lock). Note that this process is guaranteed not to have set the instance
			 * freeze due to a ENOSPC situation as in that case we would never have allowed any interrupt to occur
			 * until we succeed with that write and will clear the freeze before moving on.
			 * Note that the below macro takes care of the "db has instance freeze activation enabled" check too.
			 */
			 WAIT_FOR_REPL_INST_UNFREEZE(csa);
		}
#		endif
		if ((ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status) && (ERR_JNLPROCSTUCK != status))
		{	/* If holding crit, then jnl_sub_write_attempt would have invoked jnl_file_lost which would have
			 * caused the JNL_FILE_SWITCHED check at the beginning of this for loop to succeed and return from
			 * this function so we should never have gotten here. Assert accordingly. If not holding crit,
			 * wait for some crit holder to invoke jnl_file_lost. Until then keep sleep looping indefinitely.
			 * The sleep in this case is not time-limited because the callers of jnl_write_attempt (particularly
			 * jnl_wait) do not check its return value so they assume success returns from this function. It is
			 * non-trivial to change the interface and code of all callers to handle the error situation so we
			 * instead choose to sleep indefinitely here until some crit process encounters the same error and
			 * triggers jnl_file_lost processing which will terminate the loop due to the JNL_FILE_SWITCHED check.
			 */
			assert(!csa->now_crit);
			wcs_sleep(lcnt);
		} else if (prev_lcnt != lcnt)
		{
			assert(1 == lcnt);
			if (ERR_JNLWRTDEFER == status)
			{	/* Change of writer */
				if (JNL_FLUSH_PROG_TRIES <= cnt)
				{
					send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),
						ERR_TEXT, 2, LEN_AND_LIT("No progress even with multiple writers"));
					GTMASSERT;
				}
				proc_stuck_cnt = 0;
			} else if (ERR_JNLPROCSTUCK == status && (JNL_FLUSH_PROG_FACTOR <= ++proc_stuck_cnt))
			{
				send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr), ERR_TEXT, 2,
					LEN_AND_LIT("Progress prevented by a process stuck flushing journal data"));
				VMS_ONLY(
					if (TREF(gtm_environment_init))
					{
						proc_stuck_cnt = 0;
						continue;
					}
				)

				GTMASSERT;
			}
		}
	}
	return SS_NORMAL;
}
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00			`/****************************************************************`
			`* *`
ENH: Version 5.5000 from sourceforge. Source code taken from here: http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.5-000/ 2012-03-24 14:06:46 -04:00			`* Copyright 2001, 2012 Fidelity Information Services, Inc *`
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00			`* *`
			`* This source code contains the intellectual property *`
			`* of its copyright holder(s), and is made available *`
			`* under a license. If you do not know the terms of *`
			`* the license, please stop and do not read further. *`
			`* *`
			`****************************************************************/`

			`#include "mdef.h"`

			`#include "gdsroot.h"`
			`#include "gtm_facility.h"`
			`#include "fileinfo.h"`
			`#include "gdsbt.h"`
			`#include "gdsblk.h"`
			`#include "gdsfhead.h"`
			`#include "gdsbgtr.h"`
			`#include "filestruct.h"`
			`#include "iosp.h"`
			`#include "jnl.h"`
			`#include "lockconst.h"`
			`#include "interlock.h"`
			`#include "sleep_cnt.h"`
			`#include "send_msg.h"`
			`#include "wcs_sleep.h"`
			`#include "is_proc_alive.h"`
			`#include "compswap.h"`
			`#include "is_file_identical.h"`
			`#include "have_crit.h"`
			`#include "wbox_test_init.h"`
Synchronize with upstream GT.M V6.0-000 Please refer to the release notes for more information about this version. http://tinco.pair.com/bhaskar/gtm/doc/articles/GTM_V6.0-000_Release_Notes.html This commit includes all files from the x86 and x86_64 source tarballs, including generated files. 2012-10-29 18:54:31 -04:00			`#include "anticipatory_freeze.h"`
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00
			`#ifdef UNIX`
Synchronize with upstream GT.M V6.0-000 Please refer to the release notes for more information about this version. http://tinco.pair.com/bhaskar/gtm/doc/articles/GTM_V6.0-000_Release_Notes.html This commit includes all files from the x86 and x86_64 source tarballs, including generated files. 2012-10-29 18:54:31 -04:00			`#include "repl_msg.h" /* needed for gtmsource.h */`
			`#include "gtmsource.h" /* needed for jnlpool_addrs typedef */`
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00			`#include "gtmmsg.h"`
			`#endif`
			`#include "gtm_c_stack_trace.h"`

Synchronize with upstream GT.M V6.0-000 Please refer to the release notes for more information about this version. http://tinco.pair.com/bhaskar/gtm/doc/articles/GTM_V6.0-000_Release_Notes.html This commit includes all files from the x86 and x86_64 source tarballs, including generated files. 2012-10-29 18:54:31 -04:00			`#ifdef UNIX`
			`GBLREF jnlpool_addrs jnlpool;`
			`#endif`
			`GBLREF pid_t process_id;`
			`GBLREF uint4 image_count;`
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00
			`error_def(ERR_JNLCNTRL);`
			`error_def(ERR_JNLFLUSH);`
			`error_def(ERR_JNLFLUSHNOPROG);`
			`error_def(ERR_JNLPROCSTUCK);`
			`error_def(ERR_JNLWRTDEFER);`
			`error_def(ERR_JNLWRTNOWWRTR);`
			`error_def(ERR_TEXT);`
			`error_def(ERR_JNLWRTDEFER);`
			`error_def(ERR_JNLWRTNOWWRTR);`

			`#ifdef VMS`
			`# define CURRENT_WRITER jb->now_writer`
			`#else`
			`# define CURRENT_WRITER jb->io_in_prog_latch.u.parts.latch_pid`
			`#endif`

			`static uint4 jnl_sub_write_attempt(jnl_private_control jpc, unsigned int lcnt, uint4 threshold)`
			`{`
			`sgmnt_addrs *csa;`
			`jnl_buffer_ptr_t jb;`
			`unsigned int status;`
			`boolean_t was_crit, exact_check;`
			`/**** Note static/local */`
			`static uint4 loop_image_count, writer; /* assumes calls from one loop at a time */`
			`uint4 new_dskaddr, new_dsk;`
			`static uint4 stuck_cnt = 0;`

			`/* Some callers of jnl_sub_write_attempt (jnl_flush->jnl_write_attempt, jnl_write->jnl_write_attempt) are in`
			`* crit, and some other (jnl_wait->jnl_write_attempt) are not. Callers in crit do not need worry about journal`
			`* buffer fields (dskaddr, freeaddr) changing underneath them, but for those not in crit, jnl_sub_write_attempt`
			`* might incorrectly return an error status when journal file is switched. Such callers should check for`
			`* journal file switched condition and terminate any loops they are in.`
			`*/`
			`jb = jpc->jnl_buff;`
			`status = ERR_JNLWRTDEFER;`
			`csa = &FILE_INFO(jpc->region)->s_addrs;`
			`was_crit = csa->now_crit;`
			`exact_check = was_crit && (threshold == jb->freeaddr); /* see comment in jnl_write_attempt() for why this is needed */`
			`while (exact_check ? (jb->dskaddr != threshold) : (jb->dskaddr < threshold))`
			`{`
			`#ifdef UNIX`
			`if (jb->io_in_prog_latch.u.parts.latch_pid == process_id)`
			`{`
			`/* if error condition occurred while doing jnl_qio_start(), then release the lock before waiting */`
			`/* note that this is done only in UNIX because Unix does synchronous I/O */`
			`jb->image_count = 0;`
			`RELEASE_SWAPLOCK(&jb->io_in_prog_latch);`
			`}`
			`if (!jb->io_in_prog_latch.u.parts.latch_pid)`
			`status = jnl_qio_start(jpc);`
			`#elif defined VMS`
			`if (lib$ast_in_prog())`
			`{`
			`if (!jb->io_in_prog)`
			`{`
			`assert(jb->blocked == process_id);`
			`jnl_start_ast(jpc);`
			`if (jb->now_writer == process_id)`
			`status = jb->iosb.cond;`
			`}`
			`break; /* no fancy stuff within an AST */`
			`} else if (!jb->io_in_prog)`
			`{ /* Note down jpc->new_dskaddr/new_dsk into local variables so we get a consistent copy of these two`
			`* variables for checking them later.`
			`*/`
			`new_dskaddr = jpc->new_dskaddr;`
			`new_dsk = jpc->new_dsk;`
			`status = jnl_qio_start(jpc);`
			`}`
			`#else`
			`#error UNSUPPORTED PLATFORM`
			`#endif`
			`if (SS_NORMAL == status)`
			`{`
			`# if defined VMS`
			`/* Check if JNLCNTRL error was signalled by jnl_qio_start(). Note that it does not explicitly`
			`* return this error since it in turn calls an AST routine jnl_start_ast that actually has the`
			`* qio lock (and hence can look at dskaddr/dsk without any concurrency issues). But jpc will`
			`* have two fields new_dskaddr/new_dsk set to what dskaddr/dsk were right after obtaining the`
			`* qio lock but before releasing it in case of a JNLCNTRL error. We use those two values to`
			`* recheck if this is a JNLCNTRL error situation and if so return that error from here.`
			`* Note that we cannot use fields from jpc since they could be set by an AST that pops right`
			`* after we check new_dskaddr below but before we fetch the value of new_dsk. So it is important`
			`* to use the local variables which we know are a consistent snapshot of jpc->new_dskaddr/new_dsk.`
			`* The only consequence of this approach is that in case there is a dskaddr/dsk inconsistency,`
			`* it will be detected by the local variables in the next iteration (not the first time around).`
			`*/`
			`if ((new_dskaddr % jb->size) != new_dsk)`
			`{`
			`assert(gtm_white_box_test_case_enabled`
			`&& (WBTEST_JNL_FILE_LOST_DSKADDR == gtm_white_box_test_case_number));`
			`status = ERR_JNLCNTRL;`
			`}`
			`# endif`
			`break;`
			`}`
			`UNIX_ONLY(assert(ERR_JNLWRTNOWWRTR != status);) /* dont have asynchronous jnl writes in Unix */`
			`if ((ERR_JNLWRTNOWWRTR != status) && (ERR_JNLWRTDEFER != status))`
			`return status;`
			`if ((writer != CURRENT_WRITER) \|\| (1 == *lcnt))`
			`{`
			`writer = CURRENT_WRITER;`
			`loop_image_count = jb->image_count;`
			`lcnt = 1; / !!! this should be detected and limited by the caller !!! */`
			`break;`
			`}`
			`if (*lcnt <= JNL_MAX_FLUSH_TRIES)`
			`{`
			`wcs_sleep(*lcnt);`
			`break;`
			`}`
			`VMS_ONLY(`
			`if ((CURRENT_WRITER == process_id) && (jpc->qio_active == TRUE) && (jb->iosb.cond == -2))`
			`{ /* this an "impossible" condition where the private flag and the io have lost sync */`
			`GTMASSERT; /* this should only occur in VMS; secshr_db_clnup should clear the problem */`
			`}`
			`)`
			`if (writer == CURRENT_WRITER)`
			`{`
			`if (!was_crit)`
			`grab_crit(jpc->region); /* jnl_write_attempt has an assert about have_crit that this relies on */`
			`if (VMS_ONLY(0 == writer \|\|) FALSE == is_proc_alive(writer, jb->image_count))`
			`{ /* no one home, clear the semaphore; */`
			`BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_lost);`
			`jnl_send_oper(jpc, ERR_JNLFLUSH);`
			`send_msg(VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, CURRENT_WRITER);`
			`send_msg(VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("Journal IO writer changed during wait"));`
			`VMS_ONLY(jb->io_in_prog = 0);`
			`UNIX_ONLY(COMPSWAP_UNLOCK(&jb->io_in_prog_latch, writer, jb->image_count, LOCK_AVAILABLE, 0));`
			`if (!was_crit)`
			`rel_crit(jpc->region);`
			`*lcnt = 1;`
			`continue;`
			`}`
			`if (!was_crit)`
			`rel_crit(jpc->region);`
			`/* this is the interesting case: a process is stuck */`
			`BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_stuck);`
			`jpc->status = status;`
			`jnl_send_oper(jpc, ERR_JNLFLUSH);`
			`send_msg(VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, CURRENT_WRITER);`
			`stuck_cnt++;`
			`GET_C_STACK_FROM_SCRIPT("JNLPROCSTUCK", process_id, CURRENT_WRITER, stuck_cnt);`
			`lcnt = 1; / ??? is it necessary to limit this, and if so, how ??? */`
			`status = ERR_JNLPROCSTUCK;`
			`break;`
			`}`
			`break;`
			`}`
			`if ((threshold > jb->freeaddr)`
			`\|\| (csa->now_crit && ((jb->dskaddr > jb->freeaddr) \|\| (jb->free != (jb->freeaddr % jb->size)))))`
			`{ /* threshold > jb->freeaddr => somebody decremented jb->freeaddr after we computed threshold, or jnl was switched`
			`* jb->dsk != jb->freeaddr % jb->size => out of design condition`
			`* jb->dskaddr > jb->freeaddr => out of design condition, or jnl was switched`
			`*/`
			`status = ERR_JNLCNTRL;`
			`}`
			`return status;`
			`}`

			`uint4 jnl_write_attempt(jnl_private_control *jpc, uint4 threshold)`
			`{`
			`jnl_buffer_ptr_t jb;`
			`unsigned int lcnt, prev_lcnt, cnt, proc_stuck_cnt;`
			`sgmnt_addrs *csa;`
			`unsigned int status;`
			`boolean_t was_crit, jnlfile_lost, exact_check;`
			`DCL_THREADGBL_ACCESS;`

			`SETUP_THREADGBL_ACCESS;`
			`jb = jpc->jnl_buff;`
			`csa = &FILE_INFO(jpc->region)->s_addrs;`
			`was_crit = csa->now_crit;`

			`/* If holding crit and input threshold matches jb->freeaddr, then we need to wait in the loop as long as dskaddr`
			`* is not EQUAL to threshold. This is because if dskaddr is lesser than threshold we need to wait. If ever it`
			`* becomes greater than threshold, it is an out-of-design situation (since dskaddr has effectively become > freeaddr)`
			`* and so we need to trigger "jnl_file_lost" which is done in "jnl_sub_write_attempt" so it is important to invoke`
			`* that routine (in the for loop below). Hence the need to do an exact match instead of a < match. If not holding`
			`* crit or input threshold does not match jb->freeaddr, then dskaddr becoming GREATER than threshold is a valid`
			`* condition so we should do a (dskaddr < threshold), not a (dskaddr != threshold) check in that case.`
			`*/`
			`exact_check = was_crit && (threshold == jb->freeaddr);`
			`assert(!was_crit \|\| threshold <= jb->freeaddr);`
			`/* Check that we either own crit on the current region or we DONT own crit on ANY region. This is relied upon by`
			`* the grab_crit calls (done in jnl_write_attempt and jnl_sub_write_attempt) to ensure no deadlocks are possible.`
			`*/`
			`assert(was_crit \|\| (0 == have_crit(CRIT_HAVE_ANY_REG)));`
			`for (prev_lcnt = lcnt = cnt = 1, proc_stuck_cnt = 0;`
			`(was_crit \|\| (NOJNL != jpc->channel)) && (exact_check ? jb->dskaddr != threshold : jb->dskaddr < threshold);`
			`lcnt++, prev_lcnt = lcnt, cnt++)`
			`{`
			`status = jnl_sub_write_attempt(jpc, &lcnt, threshold);`
			`if (JNL_FILE_SWITCHED(jpc))`
			`{ /* If we are holding crit, the journal file switch could happen in the form of journaling getting`
			`* turned OFF (due to disk space issues etc.)`
			`*/`
			`jpc->status = SS_NORMAL;`
			`return SS_NORMAL;`
			`}`
			`if (SS_NORMAL == status)`
			`{`
			`if (JNL_FLUSH_PROG_TRIES > lcnt)`
			`{`
			`proc_stuck_cnt = 0;`
			`/* In VMS, jnl writes are asynchronous. The above call to "jnl_sub_write_attempt" has returned`
			`* SS_NORMAL status. This means the jnl qio lock is not in use by anyone else and is up for grabs.`
			`* We would have scheduled a jnl qio write through a sys$dclast call. We have no control of when`
			`* the AST routine "jnl_start_ast" will actually get control and start the write. Until then`
			`* we dont want to keep reinvoking "jnl_sub_write_attempt" in a hard spin loop. So sleep.`
			`* In Unix, writes are synchronous so SS_NORMAL status return implies we have completed a jnl`
			`* write and "jb->dskaddr" is closer to "threshold" than it was in the previous iteration.`
			`* A sleep at this point will only slow things down unnecessarily. Hence no sleep if Unix.`
			`*/`
			`VMS_ONLY(wcs_sleep(lcnt);)`
			`continue;`
			`}`
			`jpc->status = SS_NORMAL;`
			`jnl_send_oper(jpc, ERR_JNLFLUSH);`
			`send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),`
			`ERR_TEXT, 2, LEN_AND_LIT("Could not flush all the buffered journal data"));`
			`GTMASSERT; /* too many attempts to flush journal data */`
			`}`
			`if ((ERR_JNLCNTRL == status)`
			`\|\| (csa->now_crit`
			`&& (ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status) && (ERR_JNLPROCSTUCK != status)))`
			`{ /* If JNLCNTRL or if holding crit and not waiting for some other writer (or self in VMS)`
			`* better turn off journaling and proceed with database update to avoid a database hang.`
			`*/`
			`if (was_crit)`
			`jb->blocked = 0;`
			`else`
			`grab_crit(jpc->region); /* jnl_write_attempt has an assert about have_crit that this relies on */`
			`jnlfile_lost = FALSE;`
			`if (jb->free_update_pid)`
			`{`
			`FIX_NONZERO_FREE_UPDATE_PID(csa, jb);`
			`} else`
			`{`
			`assert(gtm_white_box_test_case_enabled`
			`&& (WBTEST_JNL_FILE_LOST_DSKADDR == gtm_white_box_test_case_number));`
			`if (JNL_ENABLED(csa->hdr))`
			`{ /* We ignore the return value of jnl_file_lost() since we always want to report the journal`
			`* error, whatever its error handling method is. Also, an operator log will be sent by some`
			`* callers (t_end()) only if an error is returned here, and the operator log is wanted in`
			`* those cases.`
			`*/`
			`jnl_file_lost(jpc, status);`
			`jnlfile_lost = TRUE;`
			`}`
			`/* Else journaling got closed concurrently by another process by invoking "jnl_file_lost"`
			`* just before we got crit. Do not invoke "jnl_file_lost" again on the same journal file.`
			`* Instead continue and next iteration will detect the journal file has switched and terminate.`
			`*/`
			`}`
			`if (!was_crit)`
			`rel_crit(jpc->region);`
			`if (!jnlfile_lost)`
			`continue;`
			`else`
			`return status;`
			`}`
Synchronize with upstream GT.M V6.0-000 Please refer to the release notes for more information about this version. http://tinco.pair.com/bhaskar/gtm/doc/articles/GTM_V6.0-000_Release_Notes.html This commit includes all files from the x86 and x86_64 source tarballs, including generated files. 2012-10-29 18:54:31 -04:00			`# ifdef UNIX`
			`if ((ERR_JNLWRTDEFER == status) && IS_REPL_INST_FROZEN)`
			`{ /* Check if instance freeze is in effect and this db has instance freeze activation enabled.`
			`* In that case, we do not want to keep retrying the jnl_qio as that might cause lcnt to increase`
			`* and eventually GTMASSERT implying this is an IO issue whereas it is possible some other process`
			`* is holding the jnl_qio lock on this region and is not able to write to the journal file for a`
			`* long time because the instance is frozen. To avoid false GTMASSERTs, wait for freeze to be`
			`* lifted before continuing with normal flow (which is to increment lcnt and keep retrying the`
			`* attempt at the jnl_qio lock). Note that this process is guaranteed not to have set the instance`
			`* freeze due to a ENOSPC situation as in that case we would never have allowed any interrupt to occur`
			`* until we succeed with that write and will clear the freeze before moving on.`
			`* Note that the below macro takes care of the "db has instance freeze activation enabled" check too.`
			`*/`
			`WAIT_FOR_REPL_INST_UNFREEZE(csa);`
			`}`
			`# endif`
ENH: Initial import from sourceforge. These source tree was directly imported from http://sourceforge.net/projects/fis-gtm/files/GT.M-x86-Linux-src/V5.4-002B/ by extracting the file: gtm_V54002B_linux_i686_src.tar.gz 2012-02-05 11:35:58 -05:00			`if ((ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status) && (ERR_JNLPROCSTUCK != status))`
			`{ /* If holding crit, then jnl_sub_write_attempt would have invoked jnl_file_lost which would have`
			`* caused the JNL_FILE_SWITCHED check at the beginning of this for loop to succeed and return from`
			`* this function so we should never have gotten here. Assert accordingly. If not holding crit,`
			`* wait for some crit holder to invoke jnl_file_lost. Until then keep sleep looping indefinitely.`
			`* The sleep in this case is not time-limited because the callers of jnl_write_attempt (particularly`
			`* jnl_wait) do not check its return value so they assume success returns from this function. It is`
			`* non-trivial to change the interface and code of all callers to handle the error situation so we`
			`* instead choose to sleep indefinitely here until some crit process encounters the same error and`
			`* triggers jnl_file_lost processing which will terminate the loop due to the JNL_FILE_SWITCHED check.`
			`*/`
			`assert(!csa->now_crit);`
			`wcs_sleep(lcnt);`
			`} else if (prev_lcnt != lcnt)`
			`{`
			`assert(1 == lcnt);`
			`if (ERR_JNLWRTDEFER == status)`
			`{ /* Change of writer */`
			`if (JNL_FLUSH_PROG_TRIES <= cnt)`
			`{`
			`send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),`
			`ERR_TEXT, 2, LEN_AND_LIT("No progress even with multiple writers"));`
			`GTMASSERT;`
			`}`
			`proc_stuck_cnt = 0;`
			`} else if (ERR_JNLPROCSTUCK == status && (JNL_FLUSH_PROG_FACTOR <= ++proc_stuck_cnt))`
			`{`
			`send_msg(VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr), ERR_TEXT, 2,`
			`LEN_AND_LIT("Progress prevented by a process stuck flushing journal data"));`
			`VMS_ONLY(`
			`if (TREF(gtm_environment_init))`
			`{`
			`proc_stuck_cnt = 0;`
			`continue;`
			`}`
			`)`

			`GTMASSERT;`
			`}`
			`}`
			`}`
			`return SS_NORMAL;`
			`}`