/**************************************************************** * * * Copyright 2007, 2011 Fidelity Information Services, Inc * * * * This source code contains the intellectual property * * of its copyright holder(s), and is made available * * under a license. If you do not know the terms of * * the license, please stop and do not read further. * * * ****************************************************************/ #include "mdef.h" #include "gtm_facility.h" #include "gdsroot.h" #include "fileinfo.h" #include "gdsbt.h" #include "gdsfhead.h" #include "filestruct.h" #include "interlock.h" #include "jnl.h" #include "sleep_cnt.h" #include "gdsbgtr.h" #include "wbox_test_init.h" /* Include prototypes */ #include "send_msg.h" #include "wcs_get_space.h" #include "gtmmsg.h" #include "gt_timer.h" #include "wcs_sleep.h" #include "relqop.h" #include "error.h" /* for gtm_fork_n_core() prototype */ #include "rel_quant.h" #include "performcaslatchcheck.h" #include "wcs_phase2_commit_wait.h" #include "wcs_recover.h" #include "gtm_c_stack_trace.h" GBLDEF cache_rec_ptr_t get_space_fail_cr; /* gbldefed to be accessible in a pro core */ GBLDEF wcs_conflict_trace_t *get_space_fail_array; /* gbldefed to be accessilbe in a pro core */ GBLDEF int4 get_space_fail_arridx; /* gbldefed to be accessilbe in a pro core */ GBLREF sgmnt_addrs *cs_addrs; GBLREF sgmnt_data_ptr_t cs_data; GBLREF gd_region *gv_cur_region; /* needed for the JNL_ENSURE_OPEN_WCS_WTSTART macro */ GBLREF int num_additional_processors; GBLREF uint4 process_id; GBLREF volatile int4 fast_lock_count; error_def(ERR_DBFILERR); error_def(ERR_WAITDSKSPACE); error_def(ERR_GBLOFLOW); #define WCS_CONFLICT_TRACE_ARRAYSIZE 64 #define LCNT_INTERVAL DIVIDE_ROUND_UP(UNIX_GETSPACEWAIT, WCS_CONFLICT_TRACE_ARRAYSIZE) #define WCS_GET_SPACE_RETURN_FAIL(TRACEARRAY, CR) \ { \ assert(FALSE); /* We have failed */ \ get_space_fail_cr = CR; \ get_space_fail_array = TRACEARRAY; \ if (TREF(gtm_environment_init)) \ gtm_fork_n_core(); /* take a snapshot in case running in-house */ \ return FALSE; \ } #define GET_IO_LATCH_PID(CSA) (CSA->jnl ? CSA->jnl->jnl_buff->io_in_prog_latch.u.parts.latch_pid : -1) #define GET_FSYNC_LATCH_PID(CSA) (CSA->jnl ? CSA->jnl->jnl_buff->fsync_in_prog_latch.u.parts.latch_pid : -1) #define INVOKE_C_STACK_APPROPRIATE(CR, CSA, STUCK_CNT) \ { \ int4 io_latch_pid, fsync_latch_pid; \ \ if (CR->epid) \ { \ GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_CR", process_id, CR->epid, STUCK_CNT); \ } \ if (0 < (io_latch_pid = GET_IO_LATCH_PID(CSA))) \ { \ GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_IO_PROG", process_id, io_latch_pid, STUCK_CNT); \ } \ if (0 < (fsync_latch_pid = GET_FSYNC_LATCH_PID(CSA))) \ { \ GET_C_STACK_FROM_SCRIPT("WCS_GET_SPACE_RETURN_FAIL_FSYNC_PROG", process_id, fsync_latch_pid, STUCK_CNT); \ } \ } \ /* go after a specific number of buffers or a particular buffer */ /* not called if UNTARGETED_MSYNC and MM mode */ bool wcs_get_space(gd_region *reg, int needed, cache_rec_ptr_t cr) { sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; cache_que_head_ptr_t q0, base; int4 n, save_errno = 0, k, i, dummy_errno, max_count, count; int maxspins, retries, spins; uint4 lcnt, size, to_wait, to_msg, this_idx; wcs_conflict_trace_t wcs_conflict_trace[WCS_CONFLICT_TRACE_ARRAYSIZE]; boolean_t is_mm; cache_rec cr_contents; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert((0 != needed) || (NULL != cr)); get_space_fail_arridx = 0; csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; cnl = csa->nl; is_mm = (dba_mm == csd->acc_meth); assert(is_mm || (dba_bg == csd->acc_meth)); if (FALSE == csa->now_crit) { assert(0 != needed); /* if needed == 0, then we should be in crit */ for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu); 0 < lcnt; lcnt--) JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, invokes wcs_wtstart() and checks for errors etc. */ return TRUE; } UNTARGETED_MSYNC_ONLY(assert(!is_mm);) csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger / STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts)); /* Routine actually serves two purposes: * 1 - Free up required number of buffers or * 2 - Free up a specific buffer * Do a different kind of loop depending on which is our current calling. */ if (0 != needed) { BG_TRACE_ANY(csa, bufct_buffer_flush); for (lcnt = 1; (cnl->wc_in_free < needed) && (BUF_OWNER_STUCK > lcnt); ++lcnt) { JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, needed, save_errno); if (is_mm && (ERR_GBLOFLOW == save_errno)) wcs_recover(reg); if (cnl->wc_in_free < needed) { if ((ENOSPC == save_errno) && (csa->hdr->wait_disk_space > 0)) { /* not enough disk space to flush the buffers to regain them * so wait for it to become available, * and if it takes too long, just * quit. Unfortunately, quitting would * invoke the recovery logic which * should be of no help to this * situation. Then what? */ lcnt = BUF_OWNER_STUCK; to_wait = cs_data->wait_disk_space; to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* output error message around 8 times */ while ((0 < to_wait) && (ENOSPC == save_errno)) { if ((to_wait == cs_data->wait_disk_space) || (0 == to_wait % to_msg)) { send_msg(VARLSTCNT(7) ERR_WAITDSKSPACE, 4, process_id, to_wait, DB_LEN_STR(reg), save_errno); gtm_putmsg(VARLSTCNT(7) ERR_WAITDSKSPACE, 4, process_id, to_wait, DB_LEN_STR(reg), save_errno); } hiber_start(1000); to_wait--; JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, needed, save_errno); if (is_mm && (ERR_GBLOFLOW == save_errno)) wcs_recover(reg); if (cnl->wc_in_free >= needed) break; } } wcs_sleep(lcnt); } else return TRUE; BG_TRACE_ANY(csa, bufct_buffer_flush_loop); } if (cnl->wc_in_free >= needed) return TRUE; } else { /* Wait for a specific buffer to be flushed. We attempt to speed this along by shuffling the entry * we want to the front of the queue before we call routines to do some writing. * Formerly we used to wait for this buffer to be flushed irrespective of its position in the active queue. * We keep this code commented just in case this needs to be resurrected in the future. */ # ifdef old_code BG_TRACE_ANY(csa, spcfc_buffer_flush); for (lcnt = 1; (0 != cr->dirty) && (BUF_OWNER_STUCK > lcnt); ++lcnt) { for (; 0 != cr->dirty && 0 != csa->acc_meth.bg.cache_state->cacheq_active.fl;) JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, save_errno); if (0 != cr->dirty) wcs_sleep(lcnt); else return TRUE; BG_TRACE_ANY(csa, spcfc_buffer_flush_loop); } if (0 == cr->dirty) return TRUE; # endif assert(csa->now_crit); /* must be crit to play with queues when not the writer */ BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush); ++fast_lock_count; /* Disable wcs_stale for duration */ if (!is_mm) /* Determine queue base to use */ { base = &csa->acc_meth.bg.cache_state->cacheq_active; /* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */ if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr)) return FALSE; /* assumption is that caller will set wc_blocked and trigger cache recovery */ } else base = &csa->acc_meth.mm.mmblk_state->mmblkq_active; maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1; for (retries = LOCK_TRIES - 1; retries > 0 ; retries--) { for (spins = maxspins; spins > 0 ; spins--) { if (GET_SWAPLOCK(&base->latch)) /* Lock queue to prevent interference */ { if (0 != cr->state_que.fl) { /* If it is still in the active queue, then insert it at the head of the queue */ csa->wbuf_dqd++; q0 = (cache_que_head_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl); shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base); csa->wbuf_dqd--; VERIFY_QUEUE(base); } /* release the queue header lock so that the writers can proceed */ RELEASE_SWAPLOCK(&base->latch); --fast_lock_count; assert(0 <= fast_lock_count); /* Fire off a writer to write it out. Another writer may grab our cache * record so we have to be willing to wait for him to flush it. * Flush this one buffer the first time through. * If this didn't work, flush normal amount next time in the loop. */ JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 1, save_errno); if (is_mm && (ERR_GBLOFLOW == save_errno)) wcs_recover(reg); for (lcnt = 1; (0 != cr->dirty) && (UNIX_GETSPACEWAIT > lcnt); ++lcnt) { if (0 == (lcnt % LCNT_INTERVAL)) { this_idx = (lcnt / LCNT_INTERVAL); assert(this_idx < WCS_CONFLICT_TRACE_ARRAYSIZE); wcs_conflict_trace[this_idx].wcs_active_lvl = cnl->wcs_active_lvl; wcs_conflict_trace[this_idx].io_in_prog_pid = GET_IO_LATCH_PID(csa); wcs_conflict_trace[this_idx].fsync_in_prog_pid = GET_FSYNC_LATCH_PID(csa); } get_space_fail_arridx = lcnt; max_count = ROUND_UP(cnl->wcs_active_lvl, csd->n_wrt_per_flu); /* Check if cache recovery is needed (could be set by another process in * secshr_db_clnup finishing off a phase2 commit). If so, no point invoking * wcs_wtstart as it will return right away. Instead return FALSE so * cache-recovery can be triggered by the caller. */ if (csd->wc_blocked) { assert(gtm_white_box_test_case_enabled); return FALSE; } /* loop till the active queue is exhausted */ for (count = 0; 0 != cr->dirty && 0 != cnl->wcs_active_lvl && max_count > count; count++) { BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush_retries); JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, save_errno); if (is_mm && (ERR_GBLOFLOW == save_errno)) wcs_recover(reg); } /* Usually we want to sleep only if we need to wait on someone else * i.e. (i) if we are waiting for another process' fsync to complete * We have seen jnl_fsync() to take more than a minute. * Hence we wait for a max. of 2 mins (UNIX_GETSPACEWAIT). * (ii) if some concurrent writer has taken this cache-record out. * (iii) if someone else is holding the io_in_prog lock. * Right now we know of only one case where there is no point in waiting * which is if the cache-record is out of the active queue and is dirty. * But since that is quite rare and we don't lose much in that case by * sleeping we do an unconditional sleep (only if cr is dirty). */ if (!cr->dirty) return TRUE; else { DEBUG_ONLY(cr_contents = *cr;) /* Assert that if the cache-record is dirty, it better be in the * active queue or be in the process of getting flushed by a concurrent * writer or phase2 of the commit is in progress. If none of this is * true, it should have become non-dirty by now even though we found it * dirty a few lines above. Note that the cache-record could be in the * process of being released by a concurrent writer; This is done by * resetting 3 fields cr->epid, cr->dirty, cr->interlock; Since the write * interlock is the last field to be released, check that BEFORE dirty. */ assert(cr_contents.state_que.fl || cr_contents.epid || cnl->in_wtstart || cr_contents.in_tend || (LATCH_CLEAR != WRITE_LATCH_VAL(&cr_contents)) || !cr_contents.dirty); wcs_sleep(lcnt); } BG_TRACE_PRO_ANY(csa, spcfc_buffer_flush_loop); } if (0 == cr->dirty) return TRUE; INVOKE_C_STACK_APPROPRIATE(cr, csa, 1); WCS_GET_SPACE_RETURN_FAIL(wcs_conflict_trace, cr); } else { /* buffer was locked */ if (0 == cr->dirty) { BG_TRACE_ANY(csa, spcfc_buffer_flushed_during_lockwait); --fast_lock_count; assert(0 <= fast_lock_count); return TRUE; } } } if (retries & 0x3) /* On all but every 4th pass, do a simple rel_quant */ rel_quant(); /* Release processor to holder of lock (hopefully) */ else { /* On every 4th pass, we bide for awhile */ wcs_sleep(LOCK_SLEEP); /* If near end of loop, see if target is dead and/or wake it up */ if (RETRY_CASLATCH_CUTOFF == retries) performCASLatchCheck(&base->latch, TRUE); } } --fast_lock_count; assert(0 <= fast_lock_count); if (0 == cr->dirty) return TRUE; } if (ENOSPC == save_errno) rts_error(VARLSTCNT(7) ERR_WAITDSKSPACE, 4, process_id, to_wait, DB_LEN_STR(reg), save_errno); else assert(FALSE); INVOKE_C_STACK_APPROPRIATE(cr, csa, 2); WCS_GET_SPACE_RETURN_FAIL(wcs_conflict_trace, cr); }