282 lines
8.7 KiB
C
282 lines
8.7 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
|
|
#include "cdb_sc.h"
|
|
#include "gdsroot.h"
|
|
#include "gdsblk.h"
|
|
#include "gtm_facility.h"
|
|
#include "fileinfo.h"
|
|
#include "gdsbt.h"
|
|
#include "gdsfhead.h"
|
|
#include "filestruct.h"
|
|
#include "jnl.h"
|
|
#include "gdsblkops.h"
|
|
#include "gdskill.h"
|
|
#include "gdscc.h"
|
|
#include "copy.h"
|
|
#include "interlock.h"
|
|
#include "muextr.h"
|
|
#include "mu_reorg.h"
|
|
|
|
/* Include prototypes */
|
|
#include "t_end.h"
|
|
#include "t_retry.h"
|
|
#include "mupip_size.h"
|
|
#include "util.h"
|
|
#include "t_begin.h"
|
|
#include "op.h"
|
|
#include "gvcst_protos.h" /* for gvcst_rtsib,gvcst_search prototype */
|
|
#include "gvcst_bmp_mark_free.h"
|
|
#include "gvcst_kill_sort.h"
|
|
#include "gtmmsg.h"
|
|
#include "add_inter.h"
|
|
#include "t_abort.h"
|
|
#include "sleep_cnt.h"
|
|
#include "wcs_sleep.h"
|
|
#include "memcoherency.h"
|
|
|
|
#include "gtm_time.h"
|
|
#include "mvalconv.h"
|
|
#include "t_qread.h"
|
|
#include "longset.h" /* needed for cws_insert.h */
|
|
#include "hashtab_int4.h"
|
|
#include "cws_insert.h"
|
|
#include <math.h>
|
|
|
|
error_def(ERR_GBLNOEXIST);
|
|
error_def(ERR_MUSIZEFAIL);
|
|
|
|
GBLREF bool mu_ctrlc_occurred;
|
|
GBLREF bool mu_ctrly_occurred;
|
|
GBLREF gv_namehead *gv_target;
|
|
GBLREF unsigned int t_tries;
|
|
GBLREF int4 process_id;
|
|
GBLREF inctn_opcode_t inctn_opcode;
|
|
|
|
#define MAX_RECS_PER_BLK 65535
|
|
#define MAX_RELIABLE 10000 /* Used to tweak the error estimates */
|
|
#define EPS 1e-6
|
|
#define SQR(X) ((double)(X) * (double)(X))
|
|
#define ROUND(X) ((int)((X) + 0.5)) /* c89 does not have round() and some Solaris machines uses that compiler */
|
|
|
|
typedef struct
|
|
{ /* cumulative stats */
|
|
int4 n; /* number of samples */
|
|
double W[MAX_BT_DEPTH + 1]; /* Sum of the importance values of samples for each depth level */
|
|
double w_mu[MAX_BT_DEPTH + 1]; /* The mean of importance values. It is used to calculate w_variance */
|
|
double w_variance[MAX_BT_DEPTH + 1];/* The variance of importance values. It is used to calculate effective sample size */
|
|
double mu[MAX_BT_DEPTH + 1]; /* mu[j] := mean of weighted r[j]'s over previous n traversals.
|
|
It is the expected number of records at depth j
|
|
* Note: mu_n = mu_{n-1} + w_n/W_n*(r_n - M_{n-1})
|
|
*/
|
|
double S[MAX_BT_DEPTH + 1]; /* S[j] := sum of w_i*(r_i[j] - M[j])^2 over previous traversals.
|
|
* Note: S_n = S_{n-1} + w_n*(r_n - M_n)*(r_n - M_{n-1})
|
|
* Later, S values are divided by W values to give plugin estimate of the variance.
|
|
* Subsequently they are divided by the effective sample size to give the variance
|
|
* of the mean
|
|
*/
|
|
/* Final estimates */
|
|
double blktot[MAX_BT_DEPTH + 1]; /* estimated #blocks at each level */
|
|
double blkerr[MAX_BT_DEPTH + 1]; /* approximate variance of blktot */
|
|
double rectot[MAX_BT_DEPTH + 1]; /* estimated #records at each level */
|
|
double B; /* estimated total blocks */
|
|
double error; /* approximate error in estimate B */
|
|
double R; /* estimated total records */
|
|
} stat_t;
|
|
|
|
STATICFNDCL void clear_vector_impsmpl(double *v);
|
|
STATICFNDCL void init_stats_impsmpl(stat_t *stat);
|
|
STATICFNDCL void finalize_stats_impsmpl(stat_t *stat);
|
|
STATICFNDCL void accum_stats_impsmpl(stat_t *stat, double *r);
|
|
|
|
STATICFNDEF void clear_vector_impsmpl(double *v)
|
|
{
|
|
int j;
|
|
for (j = 0; j <= MAX_BT_DEPTH; j++)
|
|
v[j] = 0;
|
|
}
|
|
|
|
|
|
STATICFNDEF void init_stats_impsmpl(stat_t *stat)
|
|
{
|
|
stat->n = 0;
|
|
clear_vector_impsmpl(stat->W);
|
|
clear_vector_impsmpl(stat->w_mu);
|
|
clear_vector_impsmpl(stat->w_variance);
|
|
clear_vector_impsmpl(stat->mu);
|
|
clear_vector_impsmpl(stat->S);
|
|
clear_vector_impsmpl(stat->blktot);
|
|
clear_vector_impsmpl(stat->blkerr);
|
|
clear_vector_impsmpl(stat->rectot);
|
|
}
|
|
|
|
/*
|
|
* Importance Sampling
|
|
*/
|
|
int4 mu_size_impsample(mval *gn, int4 M, int4 seed)
|
|
{
|
|
enum cdb_sc status;
|
|
trans_num ret_tn;
|
|
int k, h;
|
|
boolean_t verify_reads;
|
|
boolean_t tn_aborted;
|
|
unsigned int lcl_t_tries;
|
|
double r[MAX_BT_DEPTH + 1]; /* r[j] is #records in level j block of current traversal */
|
|
stat_t rstat, ustat;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
inctn_opcode = inctn_invalid_op;
|
|
op_gvname(VARLSTCNT(1) gn);
|
|
if (0 == gv_target->root)
|
|
{ /* Global does not exist (online rollback). Not an error. */
|
|
gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr);
|
|
return EXIT_NRM;
|
|
}
|
|
if (!seed)
|
|
seed = (int4)(time(0) * process_id);
|
|
srand48(seed);
|
|
|
|
/* do M random traversals */
|
|
init_stats_impsmpl(&rstat);
|
|
for (k = 1; k <= M; k++)
|
|
{
|
|
if (mu_ctrlc_occurred || mu_ctrly_occurred)
|
|
return EXIT_ERR;
|
|
t_begin(ERR_MUSIZEFAIL, 0);
|
|
for (;;)
|
|
{
|
|
clear_vector_impsmpl(r);
|
|
if (cdb_sc_normal != (status = rand_traverse(r)))
|
|
{
|
|
assert(CDB_STAGNATE > t_tries);
|
|
t_retry(status);
|
|
continue;
|
|
}
|
|
gv_target->clue.end = 0;
|
|
gv_target->hist.h[0] = gv_target->hist.h[1]; /* No level 0 block to validate */
|
|
DEBUG_ONLY(lcl_t_tries = t_tries);
|
|
if ((trans_num)0 == (ret_tn = t_end(&gv_target->hist, NULL, TN_NOT_SPECIFIED)))
|
|
{
|
|
ABORT_TRANS_IF_GBL_EXIST_NOMORE(lcl_t_tries, tn_aborted);
|
|
if (tn_aborted)
|
|
{ /* Global does not exist (online rollback). Not an error. */
|
|
gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr);
|
|
return EXIT_NRM;
|
|
}
|
|
continue;
|
|
}
|
|
accum_stats_impsmpl(&rstat, r);
|
|
break;
|
|
}
|
|
}
|
|
finalize_stats_impsmpl(&rstat);
|
|
|
|
/* display rstat data
|
|
* Showing the error as 2 standard deviations which is a 95% confidence interval for the
|
|
* mean number of blocks at each level
|
|
*/
|
|
util_out_print("Number of generated samples = !UL", FLUSH, rstat.n);
|
|
util_out_print("Level Blocks 2 sigma(+/-)", FLUSH);
|
|
for (h = MAX_BT_DEPTH; (h >= 0) && (rstat.blktot[h] < EPS); h--);
|
|
for ( ; h > 0; h--)
|
|
util_out_print("!5UL !15UL !15UL ~ !3UL%", FLUSH, h, (int)ROUND(rstat.blktot[h]),
|
|
(int)ROUND(sqrt(rstat.blkerr[h])*2),
|
|
(int)ROUND(sqrt(rstat.blkerr[h])*2/rstat.blktot[h]*100.0)
|
|
);
|
|
util_out_print("!5UL !15UL !15UL ~ !3UL%", FLUSH, h, (int)ROUND(rstat.blktot[h]),
|
|
(int)ROUND(sqrt(rstat.blkerr[h])*2),
|
|
(int)ROUND(sqrt(rstat.blkerr[h])*2/rstat.blktot[h]*100.0)
|
|
);
|
|
util_out_print("Total !15UL !15UL ~ !3UL%", FLUSH, (int)ROUND(rstat.B),
|
|
(int)ROUND(sqrt(rstat.error)*2),
|
|
(int)ROUND(sqrt(rstat.error)*2/rstat.B*100.0)
|
|
);
|
|
|
|
return EXIT_NRM;
|
|
}
|
|
|
|
|
|
STATICFNDEF void accum_stats_impsmpl(stat_t *stat, double *r)
|
|
{
|
|
int l, root_level, n;
|
|
double mu0, w_mu0, w[MAX_BT_DEPTH + 1] /* importance */;
|
|
|
|
++stat->n;
|
|
for (l = MAX_BT_DEPTH; (l >= 0) && (r[l] < EPS); l--)
|
|
w[l] = 0;
|
|
root_level = l;
|
|
assert(root_level >= 0);
|
|
w[root_level] = 1;
|
|
for (l = root_level - 1; l >= 1; l--)
|
|
w[l] = w[l + 1] * r[l + 1]; /* TODO consider using log to avoid overflow if it becomes an issue */
|
|
w[0] = 0; /* computing #blks (e.g #recs in lvl 1+), not #recs in lvl 0+ */
|
|
|
|
for (l = 1; l <= root_level; l++)
|
|
{
|
|
stat->W[l] += w[l];
|
|
w_mu0 = stat->w_mu[l];
|
|
stat->w_mu[l] += (w[l] - stat->w_mu[l])/stat->n;
|
|
stat->w_variance[l] += (w[l] - stat->w_mu[l])*(w[l] - w_mu0);
|
|
mu0 = stat->mu[l];
|
|
stat->mu[l] += w[l]/stat->W[l]*(r[l] - stat->mu[l]);
|
|
stat->S[l] += w[l]*(r[l] - stat->mu[l])*(r[l] - mu0);
|
|
}
|
|
}
|
|
|
|
|
|
STATICFNDEF void finalize_stats_impsmpl(stat_t *stat)
|
|
{
|
|
int h;
|
|
double ess; /* effective sample size */
|
|
|
|
for (h = 1; h <= MAX_BT_DEPTH; h++)
|
|
if (stat->W[h] > 0)
|
|
{
|
|
/* ess = n / ( 1 + Var( w/mu(w) ) ).
|
|
* This comes from effective sample size for importance sampling in the literature*/
|
|
ess = stat->n / ( 1 + (stat->w_variance[h]/stat->n)/SQR(stat->w_mu[h]) );
|
|
/* Variance of the mean (mean referes to avg number of records per block) is
|
|
* Var(R)/N where N is effective sample size */
|
|
stat->S[h] /= stat->W[h];
|
|
stat->S[h] /= (ess + 1);
|
|
}
|
|
stat->W[0] = stat->n; /* for arithmetic below */
|
|
for (h = MAX_BT_DEPTH; (h >= 0) && (stat->mu[h] < EPS); h--);
|
|
assert(h >= 0); /* stat->mu[0] should remain zero */
|
|
stat->blktot[h] = 1;
|
|
stat->blkerr[h] = 0;
|
|
for (h-- ; h >= 0; h--)
|
|
{
|
|
stat->blktot[h] = stat->blktot[h + 1] * stat->mu[h + 1];
|
|
/* Var(XY) assuming X and Y are independent = E[X]^2*Var(Y) + E[Y]^2*Var(X) + Var(X)*Var(Y) */
|
|
stat->blkerr[h] = SQR(stat->mu[h + 1])*stat->blkerr[h + 1] + SQR(stat->blktot[h + 1])*stat->S[h + 1]
|
|
+ stat->blkerr[h + 1]*stat->S[h + 1];
|
|
}
|
|
stat->B = 0;
|
|
stat->error = 0;
|
|
for (h = 0; h <= MAX_BT_DEPTH; h++)
|
|
{
|
|
stat->B += stat->blktot[h];
|
|
stat->error += stat->blkerr[h];
|
|
}
|
|
stat->R = 0;
|
|
for (h = 0; h <= MAX_BT_DEPTH; h++)
|
|
{
|
|
stat->rectot[h] = stat->blktot[h] * stat->mu[h];
|
|
stat->R += stat->rectot[h];
|
|
}
|
|
}
|