Category: fix Problem Ref: Organization: Contact: Ake Sandgren Description: # Enter a detailed description including how the issue was resolved, why it was necessary, customer impact, etc. git-svn-id: svn://opensvn.adaptivecomputing.com/maui/trunk@85 3f5042e3-fb1d-0410-be18-d6ca2573e517
2691 lines
57 KiB
C
2691 lines
57 KiB
C
/*
|
|
*/
|
|
|
|
#include "moab.h"
|
|
#include "msched-proto.h"
|
|
#include "mprof-proto.h"
|
|
|
|
#define MAX_TRACES 8192
|
|
#define MAX_HOUR_SLOTS 15000
|
|
#define PMAX_SCALE 128
|
|
#define MAX_MEMORY_SIZE 20
|
|
|
|
#define DEFAULT_BEGINTIME 0
|
|
#define DEFAULT_ENDTIME MAX_MTIME
|
|
|
|
#define PROF_MAX_JOBSIZE 512
|
|
#define PROF_TIMEINT_COUNT 1345
|
|
#define PROF_TIMEINT_LENGTH 450
|
|
|
|
#define DEFAULT_TRACE_FLAGS 0
|
|
|
|
/* Standard Deviation Equation:
|
|
|
|
(Sum((xi - X)^2)/(n - 1))^.5
|
|
|
|
*/
|
|
|
|
|
|
typedef struct {
|
|
mulong FirstQueueTime;
|
|
mulong EarliestQueueTime;
|
|
mulong EarliestStartTime;
|
|
mulong LatestQueueTime;
|
|
mulong LatestStartTime;
|
|
mulong LatestCompletionTime;
|
|
|
|
int QueueTimeDistProfile[MAX_HOUR_SLOTS];
|
|
double QueueJobDepthProfile[MAX_HOUR_SLOTS];
|
|
double QueuePSDepthProfile[MAX_HOUR_SLOTS];
|
|
double ActiveJobDepthProfile[MAX_HOUR_SLOTS];
|
|
double ActiveNodeDepthProfile[MAX_HOUR_SLOTS];
|
|
double ActivePSDepthProfile[MAX_HOUR_SLOTS];
|
|
int CompletionTimeDistProfile[MAX_HOUR_SLOTS];
|
|
int TimeQueuedDistProfile[MAX_HOUR_SLOTS];
|
|
int XFactorDistProfile[1001];
|
|
int JobAccuracy[101];
|
|
int TotalAccSum;
|
|
int TotalJobCount;
|
|
double TotalPS;
|
|
|
|
int Acc5_99Sum;
|
|
int Acc5_99Count;
|
|
|
|
int JobSizeDist[PROF_MAX_JOBSIZE + 1];
|
|
double JobEfficDist[PROF_MAX_JOBSIZE + 1];
|
|
int JobLengthDist[PROF_TIMEINT_COUNT + 1];
|
|
int JobPHDist[1024];
|
|
|
|
double PSSizeDist[PROF_MAX_JOBSIZE + 1];
|
|
double PSLengthDist[PROF_TIMEINT_COUNT + 1];
|
|
double PSPHDist[1024];
|
|
} mprof_t;
|
|
|
|
mprof_t SystemProfile;
|
|
|
|
extern mclass_t MClass[];
|
|
extern mrm_t MRM[MAX_MRM];
|
|
extern mam_t MAM[MAX_MAM];
|
|
extern mre_t MRE[MAX_MRES << 2];
|
|
extern mres_t *MRes[MAX_MRES];
|
|
extern mrange_t MRange[MAX_MRANGE];
|
|
extern mckpt_t MCP;
|
|
extern sres_t SRes[];
|
|
extern sres_t OSRes[];
|
|
|
|
mrmfunc_t MRMFunc[MAX_MRMTYPE];
|
|
|
|
char TraceFile[MAX_MNAME];
|
|
char PlotFile[MAX_MNAME];
|
|
|
|
int PJobCountDistribution();
|
|
int PPSRequestDistribution();
|
|
int PPSRunDistribution();
|
|
int PQueueTimeDistribution();
|
|
int PJobLengthAccuracy();
|
|
int PXFactor();
|
|
int PMaxXFactor();
|
|
int ProfileMemoryUsage();
|
|
int PAccuracyDistribution();
|
|
int PJobEfficiency();
|
|
int PQOSSuccessRate();
|
|
|
|
int (*Function[])() = {
|
|
PJobCountDistribution,
|
|
PPSRequestDistribution,
|
|
PPSRunDistribution,
|
|
PQueueTimeDistribution,
|
|
PJobLengthAccuracy,
|
|
PXFactor,
|
|
PMaxXFactor,
|
|
ProfileMemoryUsage,
|
|
PAccuracyDistribution,
|
|
PJobEfficiency,
|
|
PQOSSuccessRate,
|
|
NULL };
|
|
|
|
char *ProfileType[] = {
|
|
"JobCount",
|
|
"PSRequest",
|
|
"PSRun",
|
|
"QueueTime",
|
|
"WCAccuracy",
|
|
"XFactor",
|
|
"MemUsage",
|
|
"WCAccuracyD",
|
|
"JobEfficiency",
|
|
"QOSSuccessRate",
|
|
0 };
|
|
|
|
enum {
|
|
prJobCount = 0,
|
|
prPSRequest,
|
|
prPSRun,
|
|
prQueueTime,
|
|
prWCAccuracy,
|
|
prXFactor,
|
|
prMaxXFactor,
|
|
prMemUsage,
|
|
prWCAccuracyD,
|
|
prJobEfficiency,
|
|
prQOSSuccessRate
|
|
};
|
|
|
|
FILE *plotfp;
|
|
|
|
extern mlog_t mlog;
|
|
extern mcfg_t MCfg[];
|
|
|
|
typedef struct {
|
|
char UserNameList[64][MAX_MNAME];
|
|
char GroupNameList[64][MAX_MNAME];
|
|
char AccountNameList[64][MAX_MNAME];
|
|
char QOSList[MAX_MLINE];
|
|
long StartTime;
|
|
long EndTime;
|
|
char Host[MAX_MNAME];
|
|
} mpcons_t;
|
|
|
|
int ProfileFunction;
|
|
|
|
extern mattrlist_t MAList;
|
|
|
|
mpcons_t Profile;
|
|
|
|
extern mgcred_t *MUser[MAX_MUSER + MAX_MHBUF];
|
|
extern mgcred_t MGroup[MAX_MGROUP + MAX_MHBUF];
|
|
extern mgcred_t MAcct[MAX_MACCT + MAX_MHBUF];
|
|
extern mnode_t *MNode[MAX_MNODE];
|
|
|
|
extern mstat_t MStat;
|
|
extern mqos_t MQOS[MAX_MQOS];
|
|
|
|
extern msched_t MSched;
|
|
msim_t MSim;
|
|
|
|
char DistFile[MAX_MNAME];
|
|
|
|
int ProfileMode;
|
|
|
|
unsigned long JobsatMemorySize[MAX_MEMORY_SIZE];
|
|
unsigned long NodesatMemorySize[MAX_MEMORY_SIZE];
|
|
double PSRequestatMemorySize[MAX_MEMORY_SIZE];
|
|
double PSRunatMemorySize[MAX_MEMORY_SIZE];
|
|
|
|
int TotalJobs;
|
|
unsigned long TotalNodes;
|
|
double TotalPSRequest;
|
|
double TotalPSRun;
|
|
|
|
extern mjob_t *MJob[];
|
|
extern mframe_t MFrame[MAX_MFRAME];
|
|
extern mpar_t MPar[];
|
|
extern mjobl_t MJobName[MAX_MJOB + MAX_MHBUF];
|
|
|
|
extern mx_t X;
|
|
|
|
extern const char *MNodeState[];
|
|
|
|
#include "mprof-stub.c"
|
|
|
|
|
|
|
|
|
|
|
|
int main(
|
|
|
|
int argc,
|
|
char *argv[])
|
|
|
|
{
|
|
mlog.logfp = stderr;
|
|
|
|
DBG(2,fCORE) DPrint("main()\n");
|
|
|
|
MPInitialize();
|
|
|
|
MPReadArgs(argc,argv);
|
|
|
|
MSched.Mode = msmProfile;
|
|
|
|
MStatProfInitialize(&MStat.P);
|
|
|
|
if (TraceFile[0] == '\0')
|
|
{
|
|
fprintf(stderr,"USAGE ERROR: (tracefile not specified)\n");
|
|
|
|
exit(1);
|
|
}
|
|
|
|
MPLoadTrace(TraceFile);
|
|
|
|
if (ProfileFunction == -1)
|
|
{
|
|
if (DistFile[0] != '\0')
|
|
{
|
|
PQueueDistribution();
|
|
}
|
|
|
|
PJobCountDistribution();
|
|
|
|
PBFCountDistribution();
|
|
|
|
PPSRequestDistribution();
|
|
|
|
PPSRunDistribution();
|
|
|
|
PQueueTimeDistribution();
|
|
|
|
PJobLengthAccuracy();
|
|
|
|
PXFactor();
|
|
|
|
PMaxXFactor();
|
|
|
|
ProfileMemoryUsage();
|
|
|
|
PAccuracyDistribution(ProfileMode);
|
|
|
|
PJobEfficiency();
|
|
|
|
PQOSSuccessRate();
|
|
}
|
|
else
|
|
{
|
|
switch (ProfileFunction)
|
|
{
|
|
case prJobCount:
|
|
|
|
PJobCountDistribution();
|
|
|
|
break;
|
|
|
|
case prPSRequest:
|
|
|
|
PPSRequestDistribution();
|
|
|
|
break;
|
|
|
|
case prPSRun:
|
|
|
|
PPSRunDistribution();
|
|
|
|
break;
|
|
|
|
case prQueueTime:
|
|
|
|
PQueueTimeDistribution();
|
|
|
|
break;
|
|
|
|
case prWCAccuracy:
|
|
|
|
PJobLengthAccuracy();
|
|
|
|
break;
|
|
|
|
case prXFactor:
|
|
|
|
PXFactor();
|
|
|
|
break;
|
|
|
|
case prMaxXFactor:
|
|
|
|
PMaxXFactor();
|
|
|
|
break;
|
|
|
|
case prMemUsage:
|
|
|
|
ProfileMemoryUsage();
|
|
|
|
break;
|
|
|
|
case prWCAccuracyD:
|
|
|
|
PAccuracyDistribution(ProfileMode);
|
|
|
|
break;
|
|
|
|
case prJobEfficiency:
|
|
|
|
PJobEfficiency();
|
|
|
|
break;
|
|
|
|
case prQOSSuccessRate:
|
|
|
|
PQOSSuccessRate();
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
} /* END switch (ProfileFunction) */
|
|
} /* END else (ProfileFunction == -1) */
|
|
|
|
/* profile user and group */
|
|
|
|
if (ProfileFunction == -1)
|
|
{
|
|
ProfileUser();
|
|
|
|
ProfileGroup();
|
|
|
|
ProfileAccount();
|
|
}
|
|
else
|
|
{
|
|
switch(ProfileFunction)
|
|
{
|
|
case prPSRequest:
|
|
case prPSRun:
|
|
case prJobCount:
|
|
case prJobEfficiency:
|
|
|
|
ProfileUser();
|
|
|
|
ProfileGroup();
|
|
|
|
ProfileAccount();
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return(SUCCESS);
|
|
} /* END main() */
|
|
|
|
|
|
|
|
|
|
int MPInitialize()
|
|
|
|
{
|
|
int index;
|
|
|
|
time_t tmpTime;
|
|
|
|
const char *FName = "MPInitialize";
|
|
|
|
DBG(2,fCORE) DPrint("%s()\n",
|
|
FName);
|
|
|
|
time(&tmpTime);
|
|
MSched.Time = (long)tmpTime;
|
|
|
|
MUBuildPList(MCfg,MParam);
|
|
|
|
mlog.Threshold = 1;
|
|
|
|
/* load state attribute values */
|
|
|
|
for (index = 0;MNodeState[index] != NULL;index++)
|
|
strcpy(MAList[eNodeState][index],MNodeState[index]);
|
|
|
|
DistFile[0] = '\0';
|
|
|
|
/* initialize plot values */
|
|
|
|
MStat.P.MaxTime = DEFAULT_MAXTIME;
|
|
MStat.P.MinTime = DEFAULT_MINTIME;
|
|
MStat.P.TimeStepCount = DEFAULT_TIMESCALE;
|
|
|
|
MStat.P.MaxNode = PROF_MAX_JOBSIZE;
|
|
MStat.P.MinNode = DEFAULT_MINNODE;
|
|
MStat.P.NodeStepCount = MDEF_NODESCALE;
|
|
|
|
MStat.P.AccuracyScale = DEFAULT_ACCURACYSCALE;
|
|
|
|
MStat.P.BeginTime = DEFAULT_BEGINTIME;
|
|
MStat.P.EndTime = DEFAULT_ENDTIME;
|
|
|
|
ProfileFunction = -1;
|
|
|
|
Profile.QOSList[0] = '\0';
|
|
ProfileMode = 0;
|
|
|
|
/* set initial profile */
|
|
|
|
Profile.UserNameList[0][0] = '\0';
|
|
Profile.GroupNameList[0][0] = '\0';
|
|
Profile.AccountNameList[0][0] = '\0';
|
|
|
|
Profile.StartTime = 0;
|
|
Profile.EndTime = MAX_MTIME;
|
|
|
|
memset(MUser,0,sizeof(MUser));
|
|
memset(MGroup,0,sizeof(MGroup));
|
|
memset(MAcct,0,sizeof(MAcct));
|
|
|
|
memset(JobsatMemorySize,0,sizeof(JobsatMemorySize));
|
|
memset(NodesatMemorySize,0,sizeof(NodesatMemorySize));
|
|
memset(PSRequestatMemorySize,0,sizeof(PSRequestatMemorySize));
|
|
memset(PSRunatMemorySize,0,sizeof(PSRunatMemorySize));
|
|
|
|
memset(&SystemProfile,0,sizeof(SystemProfile));
|
|
|
|
SystemProfile.FirstQueueTime = MAX_MTIME;
|
|
SystemProfile.EarliestQueueTime = MAX_MTIME;
|
|
SystemProfile.EarliestStartTime = MAX_MTIME;
|
|
|
|
TotalJobs = 0;
|
|
TotalNodes = 0;
|
|
TotalPSRequest = 0.0;
|
|
TotalPSRun = 0.0;
|
|
|
|
MPar[0].FSC.FSPolicy = FALSE;
|
|
|
|
return(SUCCESS);
|
|
} /* END Initialize() */
|
|
|
|
|
|
|
|
|
|
|
|
int OpenPlotFile(
|
|
|
|
char *FileName)
|
|
|
|
{
|
|
const char *FName = "OpenPlotFile";
|
|
|
|
DBG(3,fCORE) DPrint("%s(%s)\n",
|
|
FName,
|
|
FileName);
|
|
|
|
if ((plotfp = fopen(FileName,"w+")) == NULL)
|
|
{
|
|
DBG(0,fCORE) DPrint("ERROR: cannot open plotfile '%s', errno: %d\n",
|
|
FileName,
|
|
errno);
|
|
|
|
perror("cannot open file");
|
|
|
|
/* dump plot to stderr */
|
|
|
|
plotfp = stderr;
|
|
|
|
return(FAILURE);
|
|
}
|
|
|
|
return(SUCCESS);
|
|
} /* END OpenPlotFile() */
|
|
|
|
|
|
|
|
|
|
|
|
int MPReadArgs(
|
|
|
|
int argc,
|
|
char *argv[])
|
|
|
|
{
|
|
int Flag;
|
|
int index;
|
|
|
|
extern int opterr;
|
|
extern int optind;
|
|
extern int optopt;
|
|
extern char *optarg;
|
|
|
|
char *ptr;
|
|
|
|
const char *FName = "MPReadArgs";
|
|
|
|
DBG(2,fCONFIG) DPrint("%s(%d,argv)\n",
|
|
FName,
|
|
argc);
|
|
|
|
while ((Flag = getopt(argc,argv,"a:A:b:c:d:D:e:fg:hH:mn:N:p:P:qQ:rRs:S:t:T:u:x?")) != -1)
|
|
{
|
|
switch (Flag)
|
|
{
|
|
case 'a':
|
|
|
|
MStat.P.AccuracyScale = (int)strtol(optarg,NULL,0);
|
|
|
|
if (MStat.P.AccuracyScale > MAX_ACCURACY)
|
|
{
|
|
MStat.P.AccuracyScale = MAX_ACCURACY;
|
|
|
|
DBG(2,fCONFIG) DPrint("ALERT: accuracy scale reduced to MAXACCURACY (%d)\n",
|
|
MAX_ACCURACY);
|
|
}
|
|
else if (MStat.P.AccuracyScale == 0)
|
|
{
|
|
DBG(2,fCONFIG) DPrint("ERROR: invalid accuracy scale specified (%s)\n",
|
|
optarg);
|
|
|
|
exit(1);
|
|
}
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: accuracy scale set to %ld\n",
|
|
MStat.P.AccuracyScale);
|
|
|
|
break;
|
|
|
|
case 'A':
|
|
|
|
ptr = strtok(optarg,":");
|
|
|
|
index = 0;
|
|
|
|
while (ptr != NULL)
|
|
{
|
|
strcpy(Profile.AccountNameList[index],ptr);
|
|
|
|
fprintf(stdout,"profiling account '%s'\n",
|
|
ptr);
|
|
|
|
index++;
|
|
|
|
ptr = strtok(NULL,":");
|
|
}
|
|
|
|
Profile.AccountNameList[index][0] = '\0';
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
/* begin time */
|
|
|
|
if (MUStringToE(optarg,&Profile.StartTime) != SUCCESS)
|
|
{
|
|
fprintf(stderr,"ERROR: invalid BeginTime specified, '%s'\n",
|
|
optarg);
|
|
|
|
exit(1);
|
|
}
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: BeginTime set to %ld\n",
|
|
Profile.StartTime);
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
MStat.P.TraceCount = atoi(optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: TraceCount set to %ld\n",
|
|
MStat.P.TraceCount);
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
strcpy(DistFile,optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: DistFile set to %s\n",
|
|
DistFile);
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
mlog.Threshold = atoi(optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: LOGLEVEL set to %d\n",
|
|
mlog.Threshold);
|
|
|
|
break;
|
|
|
|
case 'e':
|
|
|
|
/* end time */
|
|
|
|
if (MUStringToE(optarg,&Profile.EndTime) != SUCCESS)
|
|
{
|
|
fprintf(stderr,"ERROR: invalid EndTime specified, '%s'\n",
|
|
optarg);
|
|
|
|
exit(1);
|
|
}
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: EndTime set to %ld\n",
|
|
Profile.EndTime);
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
MSched.TraceFlags |= (1 << tfFixCorruption);
|
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
ptr = strtok(optarg,":");
|
|
|
|
index = 0;
|
|
|
|
while (ptr != NULL)
|
|
{
|
|
strcpy(Profile.GroupNameList[index],ptr);
|
|
|
|
fprintf(stdout,"profiling group '%s'\n",
|
|
ptr);
|
|
|
|
index++;
|
|
|
|
ptr = strtok(NULL,":");
|
|
}
|
|
|
|
Profile.GroupNameList[index][0] = '\0';
|
|
|
|
break;
|
|
|
|
case 'H':
|
|
|
|
strcpy(Profile.Host,optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: profile host set to %s\n",
|
|
Profile.Host);
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
ProfileMode |= (1 << mMatrix);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: matrix mode enabled\n");
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
MStat.P.NodeStepCount = atoi(optarg);
|
|
|
|
if (MStat.P.NodeStepCount > PMAX_SCALE)
|
|
{
|
|
MStat.P.NodeStepCount = PMAX_SCALE;
|
|
|
|
fprintf(stderr,"WARNING: node step count reduced to %d\n",
|
|
PMAX_SCALE);
|
|
}
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: node step count set to %ld\n",
|
|
MStat.P.NodeStepCount);
|
|
|
|
break;
|
|
|
|
case 'N':
|
|
|
|
MStat.P.MaxNode = atoi(optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: max node set to %ld\n",
|
|
MStat.P.MaxNode);
|
|
|
|
break;
|
|
|
|
case 'o':
|
|
|
|
MStat.P.NodeStepSize = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
strcpy(PlotFile,optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: plotfile set to %s\n",
|
|
PlotFile);
|
|
|
|
OpenPlotFile(PlotFile);
|
|
|
|
break;
|
|
|
|
case 'P':
|
|
|
|
for (index = 0;ProfileType[index] != NULL;index++)
|
|
{
|
|
if (!strcmp(ProfileType[index],optarg))
|
|
break;
|
|
}
|
|
|
|
if (ProfileType[index] == NULL)
|
|
{
|
|
fprintf(stderr,"ERROR: invalid profile specified (%s)\n",
|
|
optarg);
|
|
|
|
fprintf(stderr,"valid profiles: ");
|
|
|
|
for (index = 0;ProfileType[index] != NULL;index++)
|
|
{
|
|
fprintf(stderr,"%s ",
|
|
ProfileType[index]);
|
|
}
|
|
|
|
fprintf(stderr,"\n\n");
|
|
|
|
exit(1);
|
|
}
|
|
else
|
|
{
|
|
ProfileFunction = index;
|
|
}
|
|
|
|
break;
|
|
|
|
case 'q':
|
|
|
|
ProfileMode |= (1 << mSystemQueue);
|
|
|
|
break;
|
|
|
|
case 'Q':
|
|
|
|
MUStrCpy(Profile.QOSList,optarg,sizeof(Profile.QOSList));
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
ProfileMode |= (1 << mUseRunTime);
|
|
|
|
break;
|
|
|
|
case 'R':
|
|
|
|
ProfileMode |= (1 << mUseRemoved);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: using removed jobs\n");
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
MStat.P.TimeStepCount = atoi(optarg);
|
|
|
|
if (MStat.P.TimeStepCount > PMAX_SCALE)
|
|
{
|
|
MStat.P.TimeStepCount = PMAX_SCALE;
|
|
|
|
fprintf(stderr,"WARNING: time step count reduced to %d\n",
|
|
PMAX_SCALE);
|
|
}
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: time step count set to %ld\n",
|
|
MStat.P.TimeStepCount);
|
|
|
|
break;
|
|
|
|
case 'S':
|
|
|
|
MStat.P.TimeStepSize = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
strcpy(TraceFile,optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: tracefile set to %s\n",
|
|
TraceFile);
|
|
|
|
break;
|
|
|
|
case 'T':
|
|
|
|
MStat.P.MaxTime = atoi(optarg);
|
|
|
|
DBG(2,fCONFIG) DPrint("INFO: max time set to %ld\n",
|
|
MStat.P.MaxTime);
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
ptr = strtok(optarg,":");
|
|
|
|
index = 0;
|
|
|
|
while (ptr != NULL)
|
|
{
|
|
strcpy(Profile.UserNameList[index],ptr);
|
|
|
|
fprintf(stdout,"profiling user '%s'\n",
|
|
ptr);
|
|
|
|
index++;
|
|
|
|
ptr = strtok(NULL,":");
|
|
}
|
|
|
|
Profile.UserNameList[index][0] = '\0';
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
ProfileMode |= (1 << mTrueXFactor);
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
case '?':
|
|
|
|
fprintf(stderr,"Usage: %s [FLAGS]\n",
|
|
argv[0]);
|
|
|
|
fprintf(stderr," [ -a <ACCURACY_SCALE> ]\n");
|
|
fprintf(stderr," [ -A <ACCOUNT>[:<ACCOUNT>]...]\n");
|
|
fprintf(stderr," [ -b <BEGIN TIME> ]\n");
|
|
fprintf(stderr," [ -c <TRACECOUNT> ]\n");
|
|
fprintf(stderr," [ -d <DISTFILE> ]\n");
|
|
fprintf(stderr," [ -D <LOGLEVEL> ]\n");
|
|
fprintf(stderr," [ -e <END TIME> ]\n");
|
|
fprintf(stderr," [ -f ] // FIX CORRUPT TRACE INFO\n");
|
|
fprintf(stderr," [ -g <GROUP>[:<GROUP>]...]\n");
|
|
fprintf(stderr," [ -h ] // HELP\n");
|
|
fprintf(stderr," [ -H <PROFILE_HOST> ]\n");
|
|
fprintf(stderr," [ -m ] // ENABLE MATRIX MODE\n");
|
|
fprintf(stderr," [ -n <PROC_STEP_COUNT> ]\n");
|
|
fprintf(stderr," [ -N <MAX_PROC> ]\n");
|
|
fprintf(stderr," [ -o <PROC_STEP_SIZE> ]\n");
|
|
fprintf(stderr," [ -p <PLOTFILE> ]\n");
|
|
fprintf(stderr," [ -P <PROFILE_CHART_LIST> ]\n");
|
|
fprintf(stderr," [ -q ] // USE 'SYSTEMQUEUETIME' FOR XFACTOR CALCULATION\n");
|
|
fprintf(stderr," [ -Q <QOS> ]\n");
|
|
fprintf(stderr," [ -r ] // USE 'RUN' TIME (NOT 'REQUEST' TIME)\n");
|
|
fprintf(stderr," [ -R ] // INCLUDE REMOVED JOB\n");
|
|
fprintf(stderr," [ -s ] <TIME_STEP_COUNT>\n");
|
|
fprintf(stderr," [ -S ] <TIME_STEP_SIZE>\n");
|
|
fprintf(stderr," [ -t <TRACEFILE> ]\n");
|
|
fprintf(stderr," [ -T <MAX_TIME> ]\n");
|
|
fprintf(stderr," [ -u <USER>[:<USER>]...]\n");
|
|
fprintf(stderr," [ -x ] // USE CANONICAL XFACTOR CALCULATION\n");
|
|
fprintf(stderr," [ -? ] \n");
|
|
|
|
exit(1);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
fprintf(stderr,"WARNING: unknown flag '%c'\n",
|
|
Flag);
|
|
|
|
exit(1);
|
|
|
|
break;
|
|
} /* END switch(Flag) */
|
|
} /* END while (Flag) */
|
|
|
|
return(SUCCESS);
|
|
} /* END MPReadArgs() */
|
|
|
|
|
|
|
|
|
|
|
|
int PJobCountDistribution()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(3,fUI) DPrint("PJobCountDistribution()\n");
|
|
|
|
MStatBuildGrid(stJobCount,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
} /* END PJobCountDistribution() */
|
|
|
|
|
|
|
|
|
|
|
|
int PBFCountDistribution()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(3,fUI) DPrint("PBFCountDistribution()\n");
|
|
|
|
MStatBuildGrid(stBFCount,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
int PQueueDistribution()
|
|
|
|
{
|
|
int start;
|
|
int end;
|
|
|
|
int hindex;
|
|
|
|
int count;
|
|
|
|
double AvgSubmit;
|
|
double AvgQueuedJobs;
|
|
double AvgQueuedPS;
|
|
|
|
double AvgActiveJobs;
|
|
double AvgActiveNodes;
|
|
double AvgActivePS;
|
|
|
|
double AvgComplJobs;
|
|
|
|
double TotalQueueTime;
|
|
|
|
double TotalXFactor;
|
|
|
|
int ActiveSlots;
|
|
int BackLogSlots;
|
|
int TotalSlots;
|
|
|
|
int MaxSubmit;
|
|
int MaxQueuedJobs;
|
|
int MaxQueuedPS;
|
|
int MaxActiveJobs;
|
|
int MaxActiveNodes;
|
|
int MaxActivePS;
|
|
int MaxComplJobs;
|
|
|
|
int tindex;
|
|
int nindex;
|
|
|
|
int pindex;
|
|
int powerjobs;
|
|
|
|
int jcmode;
|
|
int nsmode;
|
|
|
|
int jobcount;
|
|
|
|
int totaltime;
|
|
|
|
int cumjobs;
|
|
double cumns;
|
|
int cumnodes;
|
|
|
|
char Time[MAX_MNAME];
|
|
long ETime;
|
|
|
|
mprof_t *S;
|
|
|
|
FILE *dfp;
|
|
|
|
time_t tmpTime1;
|
|
time_t tmpTime2;
|
|
|
|
const char *FName = "PQueueDistribution";
|
|
|
|
DBG(2,fUI) DPrint("%s()\n",
|
|
FName);
|
|
|
|
AvgSubmit = 0.0;
|
|
AvgQueuedJobs = 0.0;
|
|
AvgQueuedPS = 0.0;
|
|
|
|
AvgActiveJobs = 0.0;
|
|
AvgActiveNodes = 0.0;
|
|
AvgActivePS = 0.0;
|
|
|
|
AvgComplJobs = 0.0;
|
|
|
|
MaxSubmit = 0;
|
|
MaxQueuedJobs = 0;
|
|
MaxQueuedPS = 0;
|
|
|
|
MaxActiveJobs = 0;
|
|
MaxActiveNodes = 0;
|
|
MaxActivePS = 0;
|
|
|
|
MaxComplJobs = 0;
|
|
|
|
count = 0;
|
|
|
|
S = &SystemProfile;
|
|
|
|
if ((dfp = fopen(DistFile,"w+")) == NULL)
|
|
{
|
|
DBG(0,fCORE) DPrint("ERROR: cannot open distfile '%s', errno: %d\n",
|
|
DistFile,
|
|
errno);
|
|
|
|
perror("cannot open file");
|
|
|
|
/* dump plot to stderr */
|
|
|
|
dfp = stderr;
|
|
|
|
return(FAILURE);
|
|
}
|
|
|
|
/* display header */
|
|
|
|
fprintf(dfp,"Total Jobs: %d Period Profiled: %6.2f days\n",
|
|
S->TotalJobCount,
|
|
(double)(S->LatestStartTime - S->EarliestStartTime) / 86400);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
tmpTime1 = (time_t)Profile.StartTime;
|
|
tmpTime2 = (time_t)S->EarliestStartTime;
|
|
|
|
fprintf(dfp,"First Job Start: %s",
|
|
(Profile.StartTime > 0) ?
|
|
ctime(&tmpTime1) : ctime(&tmpTime2));
|
|
|
|
tmpTime1 = (time_t)Profile.EndTime;
|
|
tmpTime2 = (time_t)S->LatestStartTime;
|
|
|
|
fprintf(dfp,"Last Job Start: %s",
|
|
(Profile.EndTime < MAX_MTIME) ?
|
|
ctime(&tmpTime1) : ctime(&tmpTime2));
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
/* display workload distribution */
|
|
|
|
fprintf(dfp,"Job Size Distribution\n");
|
|
|
|
jcmode = 0;
|
|
nsmode = 0;
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%6s %6s %6s %6s %6s %6s %6s\n",
|
|
"Procs",
|
|
"Jobs",
|
|
"Effic",
|
|
"PctJob",
|
|
"PctPS",
|
|
"CumJob",
|
|
"CumPS");
|
|
|
|
jcmode = 0;
|
|
nsmode = 0;
|
|
|
|
cumnodes = 0;
|
|
cumjobs = 0;
|
|
cumns = 0;
|
|
|
|
for (nindex = 1;nindex <= MStat.P.MaxNode;nindex++)
|
|
{
|
|
cumjobs += S->JobSizeDist[nindex];
|
|
cumns += S->PSSizeDist[nindex];
|
|
cumnodes += nindex * S->JobSizeDist[nindex];
|
|
|
|
fprintf(dfp,"%06d %6d %6.3f %6.3f %6.3f %6.3f %6.3f\n",
|
|
nindex,
|
|
S->JobSizeDist[nindex],
|
|
(S->JobSizeDist[nindex] > 0) ?
|
|
S->JobEfficDist[nindex] / S->JobSizeDist[nindex] * 100.0 :
|
|
0.0,
|
|
(double)S->JobSizeDist[nindex] / S->TotalJobCount * 100.0,
|
|
S->PSSizeDist[nindex] / S->TotalPS * 100.0,
|
|
(double)cumjobs / S->TotalJobCount * 100.0,
|
|
cumns / S->TotalPS * 100.0);
|
|
|
|
if ((jcmode == 0) && (cumjobs >= S->TotalJobCount / 2))
|
|
jcmode = (nindex + 1);
|
|
|
|
if ((nsmode == 0) && (cumns >= S->TotalPS / 2))
|
|
nsmode = (nindex + 1);
|
|
} /* END for (nindex) */
|
|
|
|
powerjobs = 0;
|
|
|
|
for (pindex = 1;(1 << pindex) <= MStat.P.MaxNode;pindex++)
|
|
{
|
|
powerjobs += S->JobSizeDist[1 << pindex];
|
|
}
|
|
|
|
fprintf(dfp,"============\n");
|
|
|
|
fprintf(dfp,"Total Jobs: %d Serial: %6.2f%c Power: %6.2f%c Size Mean/Median (Job Count): %6.2f/%d (PH): %6.2f PH/%d nodes\n",
|
|
S->TotalJobCount,
|
|
(double)S->JobSizeDist[1] / S->TotalJobCount * 100.0,
|
|
'%',
|
|
(double)powerjobs / S->TotalJobCount * 100.0,
|
|
'%',
|
|
(double)cumnodes / S->TotalJobCount,
|
|
jcmode,
|
|
S->TotalPS / 3600.0 / S->TotalJobCount,
|
|
nsmode);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"Job Length Distribution\n");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%6s - %6s %6s %6s %6s %6s\n",
|
|
"Start",
|
|
"End",
|
|
"Jobs",
|
|
"PctJob",
|
|
"PctPS",
|
|
"CumJob");
|
|
|
|
jcmode = 0;
|
|
totaltime = 0;
|
|
|
|
cumjobs = 0;
|
|
|
|
for (tindex = 0;tindex <= MStat.P.MaxTime / PROF_TIMEINT_LENGTH;tindex++)
|
|
{
|
|
if (tindex > PROF_TIMEINT_COUNT)
|
|
break;
|
|
|
|
cumjobs += S->JobLengthDist[tindex];
|
|
|
|
fprintf(dfp,"%06.3f - %06.3f %6d %6.3f %6.3f %6.3f\n",
|
|
(double)tindex / 8.0,
|
|
(double)(tindex + 1) / 8.0 - .01,
|
|
S->JobLengthDist[tindex],
|
|
(double)S->JobLengthDist[tindex] / S->TotalJobCount * 100.0,
|
|
S->PSLengthDist[tindex] / S->TotalPS * 100.0,
|
|
(double)cumjobs / S->TotalJobCount * 100.0);
|
|
|
|
totaltime += S->JobLengthDist[tindex] * (tindex + 1) / 8.0;
|
|
|
|
if ((jcmode == 0) && (cumjobs >= S->TotalJobCount / 2))
|
|
jcmode = (tindex + 1);
|
|
} /* END for (tindex) */
|
|
|
|
fprintf(dfp,"============\n");
|
|
|
|
fprintf(dfp,"Total Jobs: %d Length Mean/Median: %6.2f/%.2f hours\n",
|
|
S->TotalJobCount,
|
|
(double)totaltime / S->TotalJobCount,
|
|
(double)jcmode / 8.0);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
/* display workload accuracy summary */
|
|
|
|
start = 1000 - (S->FirstQueueTime - S->EarliestQueueTime) / 3600;
|
|
|
|
end = (S->LatestQueueTime - S->FirstQueueTime) / 3600 + 1000;
|
|
|
|
fprintf(dfp,"Job Accuracy Distribution\n");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%8s %6s %6s %6s\n",
|
|
"Accuracy",
|
|
"Count",
|
|
"PctJob",
|
|
"CumPct");
|
|
|
|
jobcount = 0;
|
|
|
|
jcmode = 0;
|
|
|
|
for (hindex = 0;hindex <= 100;hindex++)
|
|
{
|
|
jobcount += S->JobAccuracy[hindex];
|
|
|
|
fprintf(dfp,"%8d %6d %6.2f %6.2f\n",
|
|
hindex,
|
|
S->JobAccuracy[hindex],
|
|
(double)S->JobAccuracy[hindex] / S->TotalJobCount * 100.0,
|
|
(double)jobcount / S->TotalJobCount * 100.0);
|
|
|
|
if ((jcmode == 0) && (jobcount >= S->TotalJobCount / 2))
|
|
jcmode = hindex;
|
|
}
|
|
|
|
fprintf(dfp,"============\n");
|
|
|
|
fprintf(dfp,"Total Jobs: %d Average WCA: %6.2f Acc5-99 Jobs: %d Average WCA: %6.2f Median: %d\n",
|
|
S->TotalJobCount,
|
|
(double)S->TotalAccSum / S->TotalJobCount,
|
|
S->Acc5_99Count,
|
|
(double)S->Acc5_99Sum / S->Acc5_99Count,
|
|
jcmode);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
/* display QTime distribution */
|
|
|
|
fprintf(dfp,"QueueTime Distribution\n");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%8s %6s %6s %6s\n",
|
|
"QTime",
|
|
"Count",
|
|
"PctJob",
|
|
"CumPct");
|
|
|
|
TotalQueueTime = 0.0;
|
|
|
|
jobcount = 0;
|
|
jcmode = 0;
|
|
|
|
for (hindex = 0;hindex <= 1000;hindex++)
|
|
{
|
|
jobcount += S->TimeQueuedDistProfile[hindex];
|
|
|
|
TotalQueueTime += ((hindex / 5) + .1) * S->TimeQueuedDistProfile[hindex];
|
|
|
|
fprintf(dfp,"%8.3f %6d %6.2f %6.2f\n",
|
|
(double)hindex / 5.0,
|
|
S->TimeQueuedDistProfile[hindex],
|
|
(double)S->TimeQueuedDistProfile[hindex] / S->TotalJobCount * 100.0,
|
|
(double)jobcount / S->TotalJobCount * 100.0);
|
|
|
|
if ((jcmode == 0) && (jobcount >= S->TotalJobCount / 2))
|
|
jcmode = hindex;
|
|
}
|
|
|
|
fprintf(dfp,"============\n");
|
|
|
|
fprintf(dfp,"Total Jobs: %d Average QTime (hours): %6.2f Median: %6.2f\n",
|
|
S->TotalJobCount,
|
|
(double)TotalQueueTime / S->TotalJobCount,
|
|
(double)jcmode / 5.0);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
/* display XFactor distribution */
|
|
|
|
fprintf(dfp,"XFactor Distribution\n");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%8s %6s %6s %6s\n\n",
|
|
"XFactor",
|
|
"Count",
|
|
"PctJob",
|
|
"CumPct");
|
|
|
|
TotalXFactor = 0.0;
|
|
|
|
jobcount = 0;
|
|
jcmode = 0;
|
|
|
|
for (hindex = 0;hindex <= 1000;hindex++)
|
|
{
|
|
jobcount += S->XFactorDistProfile[hindex];
|
|
|
|
TotalXFactor += ((double)(hindex / 10.0) + .05) * S->XFactorDistProfile[hindex];
|
|
|
|
fprintf(dfp,"%8.3f %6d %6.2f %6.2f\n",
|
|
(double)hindex / 10.0,
|
|
S->XFactorDistProfile[hindex],
|
|
(double)S->XFactorDistProfile[hindex] / S->TotalJobCount * 100.0,
|
|
(double)jobcount / S->TotalJobCount * 100.0);
|
|
|
|
if ((jcmode == 0) && (jobcount >= S->TotalJobCount / 2))
|
|
jcmode = hindex;
|
|
}
|
|
|
|
fprintf(dfp,"============\n");
|
|
|
|
fprintf(dfp,"Total Jobs: %d Average XFactor: %6.2f Median: %6.2f\n",
|
|
S->TotalJobCount,
|
|
(double)TotalXFactor / S->TotalJobCount,
|
|
(double)jcmode / 10.0);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
/* display time distribution */
|
|
|
|
fprintf(dfp,"Time Distribution\n\n");
|
|
|
|
fprintf(dfp,"%-19s %6s %6s %15s %6s %6s %15s %6s\n\n",
|
|
"Time",
|
|
"Submit",
|
|
"QJobs",
|
|
"QPS",
|
|
"RJobs",
|
|
"RProcs",
|
|
"RPS",
|
|
"CmplCt");
|
|
|
|
ActiveSlots = 0;
|
|
BackLogSlots = 0;
|
|
TotalSlots = 0;
|
|
|
|
for (hindex = start;hindex < end;hindex++)
|
|
{
|
|
TotalSlots++;
|
|
|
|
if (S->ActiveJobDepthProfile[hindex] > 0.0)
|
|
{
|
|
ActiveSlots++;
|
|
|
|
if (S->QueueJobDepthProfile[hindex] > 0.0)
|
|
BackLogSlots++;
|
|
}
|
|
|
|
if ((S->QueueJobDepthProfile[hindex] >= 1.0) || (S->ActiveJobDepthProfile[hindex] >= 1.0))
|
|
{
|
|
AvgSubmit += (double)S->QueueTimeDistProfile[hindex];
|
|
AvgQueuedJobs += S->QueueJobDepthProfile[hindex];
|
|
AvgQueuedPS += S->QueuePSDepthProfile[hindex];
|
|
|
|
AvgActiveJobs += S->ActiveJobDepthProfile[hindex];
|
|
AvgActiveNodes += S->ActiveNodeDepthProfile[hindex];
|
|
AvgActivePS += S->ActivePSDepthProfile[hindex];
|
|
|
|
AvgComplJobs += S->CompletionTimeDistProfile[hindex];
|
|
|
|
MaxSubmit = MAX(MaxSubmit,S->QueueTimeDistProfile[hindex]);
|
|
MaxQueuedJobs = MAX(MaxQueuedJobs,S->QueueJobDepthProfile[hindex]);
|
|
MaxQueuedPS = MAX(MaxQueuedPS,S->QueuePSDepthProfile[hindex]);
|
|
MaxActiveJobs = MAX(MaxActiveJobs,S->ActiveJobDepthProfile[hindex]);
|
|
MaxActiveNodes = MAX(MaxActiveNodes,S->ActiveNodeDepthProfile[hindex]);
|
|
MaxActivePS = MAX(MaxActivePS,S->ActivePSDepthProfile[hindex]);
|
|
MaxComplJobs = MAX(MaxComplJobs,S->CompletionTimeDistProfile[hindex]);
|
|
|
|
count++;
|
|
|
|
ETime = S->FirstQueueTime + ((hindex -1000) * 3600);
|
|
|
|
strcpy(Time,MULToDString((mulong *)&ETime));
|
|
|
|
Time[19] = '\0';
|
|
|
|
fprintf(dfp,"%19s %6d %6lu %15.2f %6lu %6.2f %15.2f %6d\n",
|
|
Time,
|
|
S->QueueTimeDistProfile[hindex],
|
|
(unsigned long)S->QueueJobDepthProfile[hindex],
|
|
S->QueuePSDepthProfile[hindex],
|
|
(unsigned long)S->ActiveJobDepthProfile[hindex],
|
|
S->ActiveNodeDepthProfile[hindex],
|
|
S->ActivePSDepthProfile[hindex],
|
|
S->CompletionTimeDistProfile[hindex]);
|
|
}
|
|
} /* END for (hindex) */
|
|
|
|
fprintf(dfp,"==================================================================\n");
|
|
|
|
fprintf(dfp,"%6s %6s %6s %15s %6s %6s %15s %6s\n",
|
|
"",
|
|
"Submit",
|
|
"QJobs",
|
|
"QPS",
|
|
"RCnt",
|
|
"RProcs",
|
|
"RPS",
|
|
"CmplCt");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"%6s %6.2f %6.2f %15.2f %6.2f %6.2f %15.2f %6.2f\n",
|
|
"Avg",
|
|
AvgSubmit / count,
|
|
AvgQueuedJobs / count,
|
|
AvgQueuedPS / count,
|
|
AvgActiveJobs / count,
|
|
AvgActiveNodes / count,
|
|
AvgActivePS / count,
|
|
AvgComplJobs / count);
|
|
|
|
fprintf(dfp,"%6s %6.2f %6.2f %15.2f %6.2f %6.2f %15.2f %6.2f\n",
|
|
"Max",
|
|
(double)MaxSubmit,
|
|
(double)MaxQueuedJobs,
|
|
(double)MaxQueuedPS,
|
|
(double)MaxActiveJobs,
|
|
(double)MaxActiveNodes,
|
|
(double)MaxActivePS,
|
|
(double)MaxComplJobs);
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
fprintf(dfp,"Total Slots: %d Active: %6.2f%s BackLog: %6.2f%s\n",
|
|
TotalSlots,
|
|
(double)ActiveSlots / TotalSlots * 100.0,
|
|
"%",
|
|
(double)BackLogSlots / ActiveSlots * 100.0,
|
|
"%");
|
|
|
|
fprintf(dfp,"\n");
|
|
|
|
return(SUCCESS);
|
|
} /* END PQueueDistribution() */
|
|
|
|
|
|
|
|
|
|
int PAccuracyDistribution(
|
|
|
|
int Mode)
|
|
|
|
{
|
|
int timeindex;
|
|
int aindex;
|
|
|
|
must_t *C;
|
|
must_t *T;
|
|
|
|
char *THeader = "[ %8s ]";
|
|
char *MTHeader = " %12s ";
|
|
|
|
char *TTotal = "[ %5s ]";
|
|
char *MTTotal = " %5s ";
|
|
|
|
char *AVal = "[ %4d ]";
|
|
char *MAVal = " %4d ";
|
|
|
|
char *ATitle = "[ %5s ]";
|
|
char *MATitle = " %5s ";
|
|
|
|
char *Val = "[%7.2f %4d]";
|
|
char *MVal = " %7.2f ";
|
|
|
|
char *Null = "[ ---------- ]";
|
|
char *MNull = " 0 ";
|
|
|
|
char *CTotal = "[%7.2f %4d]";
|
|
char *MCTotal = " %7.2f ";
|
|
|
|
const char *FName = "PAccuracyDistribution";
|
|
|
|
DBG(2,fUI) DPrint("%s(%d)\n",
|
|
FName,
|
|
Mode);
|
|
|
|
fprintf(stdout,"\n\nAccuracy Distribution (in percent)\n\n");
|
|
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MATitle : ATitle,
|
|
"ACCUR");
|
|
|
|
for (timeindex = 0;timeindex < MStat.P.TimeStepCount;timeindex++)
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MTHeader : THeader,
|
|
MUBStringTime(MStat.P.TimeStep[timeindex]));
|
|
} /* END for (timeindex) */
|
|
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MTTotal : TTotal,
|
|
"TOTAL");
|
|
|
|
fprintf(stdout,"\n");
|
|
|
|
T = &MPar[0].S;
|
|
|
|
for (aindex = 0;aindex < MStat.P.AccuracyScale;aindex++)
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MAVal : AVal,
|
|
MStat.P.AccuracyStep[aindex]);
|
|
|
|
for (timeindex = 0;timeindex < MStat.P.TimeStepCount;timeindex++)
|
|
{
|
|
C = &MStat.CTotal[timeindex];
|
|
|
|
if (C->Accuracy[aindex] != 0)
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MVal : Val,
|
|
(double)C->Accuracy[aindex] / C->Count * 100,
|
|
C->Accuracy[aindex]);
|
|
}
|
|
else
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MNull : Null);
|
|
}
|
|
}
|
|
|
|
if (T->Accuracy[aindex] != 0)
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MVal : Val,
|
|
(double)T->Accuracy[aindex] / T->Count * 100,
|
|
T->Accuracy[aindex]);
|
|
|
|
fprintf(stdout,"\n");
|
|
}
|
|
else
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MNull : Null);
|
|
|
|
fprintf(stdout,"\n");
|
|
}
|
|
}
|
|
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MATitle : ATitle,
|
|
"TOTAL");
|
|
|
|
for (timeindex = 0;timeindex < MStat.P.TimeStepCount;timeindex++)
|
|
{
|
|
C = &MStat.CTotal[timeindex];
|
|
|
|
if (C->Count != 0)
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MCTotal : CTotal,
|
|
(double)C->TotalRunTime / C->TotalRequestTime * 100,
|
|
C->Count);
|
|
}
|
|
else
|
|
{
|
|
fprintf(stdout,
|
|
(Mode & (1 << mMatrix)) ? MNull : Null);
|
|
}
|
|
}
|
|
|
|
fprintf(stdout,"\n");
|
|
|
|
return(SUCCESS);
|
|
} /* END PAccuracyDistribution() */
|
|
|
|
|
|
|
|
|
|
|
|
int PPSRequestDistribution()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(3,fUI) DPrint("PPSRequestDistribution()\n");
|
|
|
|
MStatBuildGrid(stPSRequest,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int PPSRunDistribution()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(3,fUI) DPrint("PPSRunDistribution()\n");
|
|
|
|
MStatBuildGrid(stPSRun,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int PQueueTimeDistribution()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(3,fUI) DPrint("PQueueTimeDistribution()\n");
|
|
|
|
MStatBuildGrid(stAvgQTime,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int PJobLengthAccuracy()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(2,fUI) DPrint("PJobLengthAccuracy()\n");
|
|
|
|
MStatBuildGrid(stWCAccuracy,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int PXFactor()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(2,fUI) DPrint("PXFactor()\n");
|
|
|
|
MStatBuildGrid(stAvgXFactor,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int PMaxXFactor()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(2,fUI) DPrint("PMaxXFactor()\n");
|
|
|
|
MStatBuildGrid(stMaxXFactor,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int PJobEfficiency()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(2,fUI) DPrint("PJobEfficiency()\n");
|
|
|
|
MStatBuildGrid(stJobEfficiency,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int PQOSSuccessRate()
|
|
|
|
{
|
|
char Buffer[MAX_MBUFFER];
|
|
|
|
DBG(2,fUI) DPrint("PQOSSuccessRate()\n");
|
|
|
|
MStatBuildGrid(stQOSDelivered,Buffer,ProfileMode);
|
|
|
|
fprintf(stdout,"%s\n",
|
|
Buffer);
|
|
|
|
return(SUCCESS);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int ProfileMemoryUsage()
|
|
|
|
{
|
|
int MaxMemory = 2048;
|
|
|
|
int mindex;
|
|
|
|
DBG(2,fUI) DPrint("ProfileMemoryUsage()\n");
|
|
|
|
fprintf(stdout,"\n\nMemory Request Distribution\n\n");
|
|
|
|
for (mindex = 0;MaxMemory > (32 << (mindex - 1));mindex++)
|
|
{
|
|
fprintf(stdout,"%4d MB: %4lu Jobs (%5.2f) %6lu Procs (%5.2f) %13.1f PSRequest (%5.2f) %13.1f PSRun (%5.2f)\n",
|
|
(32 << mindex),
|
|
JobsatMemorySize[mindex],
|
|
(double)JobsatMemorySize[mindex] / TotalJobs * 100.0,
|
|
NodesatMemorySize[mindex],
|
|
(double)NodesatMemorySize[mindex] / TotalNodes * 100.0,
|
|
PSRequestatMemorySize[mindex],
|
|
PSRequestatMemorySize[mindex] / TotalPSRequest * 100.0,
|
|
PSRunatMemorySize[mindex],
|
|
PSRunatMemorySize[mindex] / TotalPSRun * 100.0);
|
|
}
|
|
|
|
return(SUCCESS);
|
|
} /* ENDProfileMemoryUsage() */
|
|
|
|
|
|
|
|
|
|
|
|
int ProfileUser()
|
|
|
|
{
|
|
int uindex;
|
|
mgcred_t *U;
|
|
must_t *S;
|
|
must_t *T;
|
|
|
|
int dindex;
|
|
int aindex;
|
|
|
|
double var;
|
|
double *val;
|
|
double mean;
|
|
|
|
DBG(2,fUI) DPrint("ProfileUser()\n");
|
|
|
|
fprintf(stdout,"\n\nuser job distribution\n");
|
|
|
|
fprintf(stdout,"\n%10s %10s (PCT) %13s (PCT) %13s (PCT) %10s %10s %10s %10s %10s %10s %10s\n\n",
|
|
"Name",
|
|
"JobCount",
|
|
"PHRequest",
|
|
"PHRun",
|
|
"QueueTime",
|
|
"AvgRunTime",
|
|
"AvgReqTime",
|
|
"WCAccuracy",
|
|
"WCAStdDev",
|
|
"XFactor",
|
|
"NodeXF");
|
|
|
|
/* NOTE: must determine number of users in table */
|
|
|
|
qsort(
|
|
(void *)&MUser[0],
|
|
MAX_MUSER + MAX_MHBUF,
|
|
sizeof(mgcred_t *),
|
|
(int(*)())UPSComp);
|
|
|
|
T = &MPar[0].S;
|
|
|
|
for (uindex = 0;uindex < MAX_MUSER + MAX_MHBUF;uindex++)
|
|
{
|
|
U = MUser[uindex];
|
|
|
|
if ((U == NULL) || (U->Name[0] == '\0') || (U->Name[0] == '\1'))
|
|
continue;
|
|
|
|
if (U->Stat.PSRequest <= 0)
|
|
continue;
|
|
|
|
if (Profile.UserNameList[0][0] != '\0')
|
|
{
|
|
for (aindex = 0;Profile.UserNameList[aindex][0] != '\0';aindex++)
|
|
{
|
|
if (!strcmp(U->Name,Profile.UserNameList[aindex]))
|
|
break;
|
|
}
|
|
|
|
if (Profile.UserNameList[aindex][0] == '\0')
|
|
continue;
|
|
}
|
|
|
|
S = &(U->Stat);
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&S->DStat[dstatWCA]) == SUCCESS) &&
|
|
(S->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = S->JobAcc / S->Count;
|
|
|
|
for (dindex = 0;dindex < S->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&S->DStat[dstatWCA].Data[dindex * S->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(S->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.2f) %11.2f (%05.2f) %11.2f (%05.2f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
U->Name,
|
|
S->Count,
|
|
(double)S->Count / T->Count * 100.0,
|
|
S->PSRequest / 3600.0,
|
|
(double)S->PSRequest / T->PSRequest * 100.0,
|
|
S->PSRun / 3600.0,
|
|
(double)S->PSRun / T->PSRun * 100.0,
|
|
(double)S->TotalQTS / S->Count / 3600.0,
|
|
(double)S->TotalRunTime / S->Count / 3600.0,
|
|
(double)S->TotalRequestTime / S->Count / 3600.0,
|
|
S->JobAcc / S->Count * 100.0,
|
|
var * 100.0,
|
|
S->XFactor / S->Count,
|
|
S->NXFactor / S->NCount);
|
|
} /* END for (uindex) */
|
|
|
|
fprintf(stdout,"\n");
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&T->DStat[dstatWCA]) == SUCCESS) &&
|
|
(T->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = T->JobAcc / T->Count;
|
|
|
|
for (dindex = 0;dindex < T->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&T->DStat[dstatWCA].Data[dindex * T->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(T->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.1f) %11.2f (%05.1f) %11.2f (%05.1f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
"Total",
|
|
T->Count,
|
|
(double)T->Count / T->Count * 100.0,
|
|
T->PSRequest / 3600.0,
|
|
(double)T->PSRequest / T->PSRequest * 100.0,
|
|
T->PSRun / 3600.0,
|
|
(double)T->PSRun / T->PSRun * 100.0,
|
|
(double)T->TotalQTS / T->Count / 3600.0,
|
|
(double)T->TotalRunTime / T->Count / 3600.0,
|
|
(double)T->TotalRequestTime / T->Count / 3600.0,
|
|
T->JobAcc / T->Count * 100.0,
|
|
var * 100.0,
|
|
T->XFactor / T->Count,
|
|
T->NXFactor / T->NCount);
|
|
|
|
return(SUCCESS);
|
|
} /* END ProfileUser() */
|
|
|
|
|
|
|
|
|
|
|
|
int ProfileGroup()
|
|
|
|
{
|
|
int gindex;
|
|
|
|
mgcred_t *G;
|
|
must_t *S;
|
|
must_t *T;
|
|
|
|
int dindex;
|
|
int aindex;
|
|
|
|
double var;
|
|
double *val;
|
|
double mean;
|
|
|
|
DBG(2,fUI) DPrint("ProfileGroup()\n");
|
|
|
|
fprintf(stdout,"\n\ngroup job distribution\n");
|
|
|
|
fprintf(stdout,"\n%10s %10s (PCT) %13s (PCT) %13s (PCT) %10s %10s %10s %10s %10s %10s %10s\n\n",
|
|
"Name",
|
|
"JobCount",
|
|
"PHRequest",
|
|
"PHRun",
|
|
"QueueTime",
|
|
"AvgRunTime",
|
|
"AvgReqTime",
|
|
"WCAccuracy",
|
|
"WCAStdDev",
|
|
"XFactor",
|
|
"NodeXF");
|
|
|
|
/* NOTE: must determine number of groups in table */
|
|
|
|
qsort(
|
|
(void *)&MGroup[0],
|
|
MAX_MGROUP + MAX_MHBUF,
|
|
sizeof(mgcred_t),
|
|
(int(*)())GPSComp);
|
|
|
|
T = &MPar[0].S;
|
|
|
|
for (gindex = 0;MGroup[gindex].Stat.PSRequest > 0;gindex++)
|
|
{
|
|
G = &MGroup[gindex];
|
|
|
|
if (Profile.GroupNameList[0][0] != '\0')
|
|
{
|
|
for (aindex = 0;Profile.GroupNameList[aindex][0] != '\0';aindex++)
|
|
{
|
|
if (!strcmp(G->Name,Profile.GroupNameList[aindex]))
|
|
break;
|
|
}
|
|
|
|
if (Profile.GroupNameList[aindex][0] == '\0')
|
|
continue;
|
|
}
|
|
|
|
S = &(G->Stat);
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&S->DStat[dstatWCA]) == SUCCESS) &&
|
|
(S->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = S->JobAcc / S->Count;
|
|
|
|
for (dindex = 0;dindex < S->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&S->DStat[dstatWCA].Data[dindex * S->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(S->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.2f) %11.2f (%05.2f) %11.2f (%05.2f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
G->Name,
|
|
S->Count,
|
|
(double)S->Count / T->Count * 100.0,
|
|
S->PSRequest / 3600.0,
|
|
(double)S->PSRequest / T->PSRequest * 100.0,
|
|
S->PSRun / 3600.0,
|
|
(double)S->PSRun / T->PSRun * 100.0,
|
|
(double)S->TotalQTS / S->Count / 3600.0,
|
|
(double)S->TotalRunTime / S->Count / 3600.0,
|
|
(double)S->TotalRequestTime / S->Count / 3600.0,
|
|
S->JobAcc / S->Count * 100.0,
|
|
var * 100.0,
|
|
S->XFactor / S->Count,
|
|
S->NXFactor / S->NCount);
|
|
} /* END for (gindex) */
|
|
|
|
fprintf(stdout,"\n");
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&T->DStat[dstatWCA]) == SUCCESS) &&
|
|
(T->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = T->JobAcc / T->Count;
|
|
|
|
for (dindex = 0;dindex < T->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&T->DStat[dstatWCA].Data[dindex * T->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(T->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.1f) %11.2f (%05.1f) %11.2f (%05.1f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
"Total",
|
|
T->Count,
|
|
(double)T->Count / T->Count * 100.0,
|
|
T->PSRequest / 3600.0,
|
|
(double)T->PSRequest / T->PSRequest * 100.0,
|
|
T->PSRun / 3600.0,
|
|
(double)T->PSRun / T->PSRun * 100.0,
|
|
(double)T->TotalQTS / T->Count / 3600.0,
|
|
(double)T->TotalRunTime / T->Count / 3600.0,
|
|
(double)T->TotalRequestTime / T->Count / 3600.0,
|
|
T->JobAcc / T->Count * 100.0,
|
|
var * 100.0,
|
|
T->XFactor / T->Count,
|
|
T->NXFactor / T->NCount);
|
|
|
|
return(SUCCESS);
|
|
} /* END ProfileGroup() */
|
|
|
|
|
|
|
|
|
|
|
|
int ProfileAccount()
|
|
|
|
{
|
|
int aindex;
|
|
mgcred_t *A;
|
|
must_t *S;
|
|
must_t *T;
|
|
|
|
int dindex;
|
|
|
|
double var;
|
|
double *val;
|
|
double mean;
|
|
|
|
DBG(2,fUI) DPrint("ProfileAccount()\n");
|
|
|
|
fprintf(stdout,"\n\naccount job distribution\n");
|
|
|
|
fprintf(stdout,"\n%10s %10s (PCT) %13s (PCT) %13s (PCT) %10s %10s %10s %10s %10s %10s %10s\n\n",
|
|
"Name",
|
|
"JobCount",
|
|
"PHRequest",
|
|
"PHRun",
|
|
"QueueTime",
|
|
"AvgRunTime",
|
|
"AvgReqTime",
|
|
"WCAccuracy",
|
|
"WCAStdDev",
|
|
"XFactor",
|
|
"NodeXF");
|
|
|
|
/* NOTE: must determine number of accounts in table */
|
|
|
|
qsort(
|
|
(void *)&MAcct[0],
|
|
MAX_MACCT + MAX_MHBUF,
|
|
sizeof(mgcred_t),
|
|
(int(*)())APSComp);
|
|
|
|
T = &MPar[0].S;
|
|
|
|
for (aindex = 0;MAcct[aindex].Stat.PSRequest > 0;aindex++)
|
|
{
|
|
A = &MAcct[aindex];
|
|
|
|
S = &(A->Stat);
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&S->DStat[dstatWCA]) == SUCCESS) &&
|
|
(S->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = S->JobAcc / S->Count;
|
|
|
|
for (dindex = 0;dindex < S->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&S->DStat[dstatWCA].Data[dindex * S->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(S->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.2f) %11.2f (%05.2f) %11.2f (%05.2f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
A->Name,
|
|
S->Count,
|
|
(double)S->Count / T->Count * 100.0,
|
|
S->PSRequest / 3600.0,
|
|
(double)S->PSRequest / T->PSRequest * 100.0,
|
|
S->PSRun / 3600.0,
|
|
(double)S->PSRun / T->PSRun * 100.0,
|
|
(double)S->TotalQTS / S->Count / 3600.0,
|
|
(double)S->TotalRunTime / S->Count / 3600.0,
|
|
(double)S->TotalRequestTime / S->Count / 3600.0,
|
|
S->JobAcc / S->Count * 100.0,
|
|
var * 100.0,
|
|
S->XFactor / S->Count,
|
|
S->NXFactor / S->NCount);
|
|
} /* END for (uindex) */
|
|
|
|
fprintf(stdout,"\n");
|
|
|
|
var = 0.0;
|
|
|
|
if ((MUDStatIsEnabled(&T->DStat[dstatWCA]) == SUCCESS) &&
|
|
(T->DStat[dstatWCA].Count > 1))
|
|
{
|
|
mean = T->JobAcc / T->Count;
|
|
|
|
for (dindex = 0;dindex < T->DStat[dstatWCA].Count;dindex++)
|
|
{
|
|
val = (double *)&T->DStat[dstatWCA].Data[dindex * T->DStat[dstatWCA].DSize];
|
|
|
|
var += pow((*val - mean),2.0);
|
|
}
|
|
|
|
var /= (double)(T->DStat[dstatWCA].Count - 1);
|
|
|
|
var = pow(var,0.5);
|
|
}
|
|
|
|
fprintf(stdout,"%10s %10d (%05.1f) %11.2f (%05.1f) %11.2f (%05.1f) %10.2f %10.2f %10.2f %10.3f %10.3f %10.3f %10.3f\n",
|
|
"Total",
|
|
T->Count,
|
|
(double)T->Count / T->Count * 100.0,
|
|
T->PSRequest / 3600.0,
|
|
(double)T->PSRequest / T->PSRequest * 100.0,
|
|
T->PSRun / 3600.0,
|
|
(double)T->PSRun / T->PSRun * 100.0,
|
|
(double)T->TotalQTS / T->Count / 3600.0,
|
|
(double)T->TotalRunTime / T->Count / 3600.0,
|
|
(double)T->TotalRequestTime / T->Count / 3600.0,
|
|
T->JobAcc / T->Count * 100.0,
|
|
var * 100.0,
|
|
T->XFactor / T->Count,
|
|
T->NXFactor / T->NCount);
|
|
|
|
return(SUCCESS);
|
|
} /* END ProfileAccount() */
|
|
|
|
|
|
|
|
|
|
|
|
/* order high to low */
|
|
|
|
int UPSComp(mgcred_t **a,mgcred_t **b)
|
|
|
|
{
|
|
static int tmp;
|
|
|
|
if ((*a != NULL) && (*b != NULL))
|
|
tmp = (*b)->Stat.PSRun - (*a)->Stat.PSRun;
|
|
else
|
|
tmp = 0;
|
|
|
|
return (tmp);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* order high to low */
|
|
|
|
int GPSComp(
|
|
|
|
mgcred_t *A,
|
|
mgcred_t *B)
|
|
|
|
{
|
|
static int tmp;
|
|
|
|
tmp = B->Stat.PSRun - A->Stat.PSRun;
|
|
|
|
return(tmp);
|
|
} /* END GPBSComp() */
|
|
|
|
|
|
|
|
|
|
|
|
/* order high to low */
|
|
|
|
int APSComp(mgcred_t *a,mgcred_t *b)
|
|
|
|
{
|
|
static int tmp;
|
|
|
|
tmp = b->Stat.PSRun - a->Stat.PSRun;
|
|
|
|
return (tmp);
|
|
}
|
|
|
|
|
|
|
|
|
|
int MPLoadTrace(
|
|
|
|
char *TraceFile)
|
|
|
|
{
|
|
int count;
|
|
|
|
int aindex;
|
|
|
|
int queueindex;
|
|
int startindex;
|
|
int endindex;
|
|
int hindex;
|
|
|
|
int rindex;
|
|
int windex;
|
|
int pindex;
|
|
|
|
int nhindex;
|
|
|
|
char *buf;
|
|
char *ptr;
|
|
char *head;
|
|
char *tail;
|
|
|
|
int Version;
|
|
|
|
int Offset;
|
|
|
|
int Accuracy;
|
|
int RunTime;
|
|
int RunPS;
|
|
|
|
mjob_t tmpJ;
|
|
mjob_t *J;
|
|
|
|
int mem;
|
|
int mindex;
|
|
|
|
int QTime;
|
|
double XFactor;
|
|
double HFraction;
|
|
|
|
mprof_t *S;
|
|
|
|
mreq_t *RQ;
|
|
|
|
int SC;
|
|
|
|
int LineCount;
|
|
|
|
DBG(2,fSIM) DPrint("LoadTrace(%s)\n",
|
|
TraceFile);
|
|
|
|
S = &SystemProfile;
|
|
|
|
if ((buf = MFULoad(TraceFile,1,macmRead,&count,&SC)) == NULL)
|
|
{
|
|
DBG(0,fSIM) DPrint("ERROR: cannot open tracefile '%s'\n",
|
|
TraceFile);
|
|
|
|
exit(1);
|
|
}
|
|
|
|
Version = DEFAULT_WORKLOAD_TRACE_VERSION;
|
|
|
|
/* set head to first line after marker */
|
|
|
|
/* load workload traces */
|
|
|
|
DBG(3,fSIM) DPrint("INFO: loading workload traces from TraceFile '%s'\n",
|
|
TraceFile);
|
|
|
|
count = 0;
|
|
|
|
head = buf;
|
|
|
|
ptr = head;
|
|
|
|
tail = head + strlen(head);
|
|
|
|
if (MUDStatIsEnabled(&MPar[0].S.DStat[dstatWCA]) == FAILURE)
|
|
{
|
|
MUDStatInitialize(&MPar[0].S.DStat[dstatWCA],sizeof(double));
|
|
}
|
|
|
|
LineCount = 0;
|
|
|
|
while (ptr < tail)
|
|
{
|
|
LineCount++;
|
|
Offset = 0;
|
|
|
|
if (MTraceLoadWorkload(ptr,&Offset,&tmpJ,msmProfile,&Version) == SUCCESS)
|
|
{
|
|
ptr += Offset;
|
|
|
|
J = &tmpJ;
|
|
|
|
RQ = J->Req[0]; /* FIXME */
|
|
|
|
if ((J->Cred.U->Name[0] == '\0') || (J->Cred.G->Name[0] == '\0'))
|
|
{
|
|
DBG(1,fSIM) DPrint("ALERT: cannot determine UName/GName for job '%s' (ignoring job)\n",
|
|
J->Name);
|
|
|
|
continue;
|
|
}
|
|
|
|
/* ignore record if job not profiled */
|
|
|
|
if ((Profile.Host[0] != '\0') && (strcmp(Profile.Host,J->MasterHostName)))
|
|
continue;
|
|
|
|
if ((J->Cred.Q != NULL) && (Profile.QOSList[0] != '\0') && !strstr(Profile.QOSList,J->Cred.Q->Name))
|
|
continue;
|
|
|
|
if (Profile.UserNameList[0][0] != '\0')
|
|
{
|
|
for (aindex = 0;Profile.UserNameList[aindex][0] != '\0';aindex++)
|
|
{
|
|
if (!strcmp(J->Cred.U->Name,Profile.UserNameList[aindex]))
|
|
break;
|
|
}
|
|
|
|
if (Profile.UserNameList[aindex][0] == '\0')
|
|
continue;
|
|
}
|
|
|
|
if (Profile.GroupNameList[0][0] != '\0')
|
|
{
|
|
for (aindex = 0;Profile.GroupNameList[aindex][0] != '\0';aindex++)
|
|
{
|
|
if (!strcmp(J->Cred.G->Name,Profile.GroupNameList[aindex]))
|
|
break;
|
|
}
|
|
|
|
if (Profile.GroupNameList[aindex][0] == '\0')
|
|
continue;
|
|
}
|
|
|
|
if ((Profile.AccountNameList[0][0] != '\0') && (J->Cred.A != NULL))
|
|
{
|
|
for (aindex = 0;Profile.AccountNameList[aindex][0] != '\0';aindex++)
|
|
{
|
|
if (!strcmp(J->Cred.A->Name,Profile.AccountNameList[aindex]))
|
|
break;
|
|
}
|
|
|
|
if (Profile.AccountNameList[aindex][0] == '\0')
|
|
continue;
|
|
}
|
|
|
|
if (Profile.StartTime > J->StartTime)
|
|
continue;
|
|
|
|
if (Profile.EndTime < J->StartTime)
|
|
continue;
|
|
|
|
/* enable detailed wca collection */
|
|
|
|
if (MUDStatIsEnabled(&J->Cred.U->Stat.DStat[dstatWCA]) == FAILURE)
|
|
{
|
|
MUDStatInitialize(&J->Cred.U->Stat.DStat[dstatWCA],sizeof(double));
|
|
}
|
|
|
|
if (MUDStatIsEnabled(&J->Cred.G->Stat.DStat[dstatWCA]) == FAILURE)
|
|
{
|
|
MUDStatInitialize(&J->Cred.G->Stat.DStat[dstatWCA],sizeof(double));
|
|
}
|
|
|
|
if (J->Cred.A != NULL)
|
|
{
|
|
if (MUDStatIsEnabled(&J->Cred.A->Stat.DStat[dstatWCA]) == FAILURE)
|
|
{
|
|
MUDStatInitialize(&J->Cred.A->Stat.DStat[dstatWCA],sizeof(double));
|
|
}
|
|
}
|
|
|
|
J->StartTime = J->DispatchTime;
|
|
|
|
J->PSDedicated = (double)(J->CompletionTime - J->StartTime) * MJobGetProcCount(J);
|
|
|
|
if (((J->State == mjsRemoved) || (J->State == mjsNotRun)) &&
|
|
!(ProfileMode & (1 << mUseRemoved)))
|
|
{
|
|
MStatUpdateRejectedJobUsage(&tmpJ,ProfileMode);
|
|
|
|
continue;
|
|
}
|
|
|
|
DBG(6,fSIM) DPrint("INFO: job '%s' loaded\n",
|
|
J->Name);
|
|
|
|
/* add user/group/account record */
|
|
|
|
if (S->FirstQueueTime == MAX_MTIME)
|
|
{
|
|
S->FirstQueueTime = J->SubmitTime;
|
|
MStat.InitTime = J->SubmitTime;
|
|
}
|
|
|
|
if (((long)S->FirstQueueTime - (long)J->SubmitTime) / 3600 > 1000)
|
|
{
|
|
DBG(0,fSIM) DPrint("ALERT: traces are not in queuetime order (job '%s' queued before first job trace)\n",
|
|
J->Name);
|
|
}
|
|
|
|
RunTime = (J->CompletionTime > J->StartTime) ?
|
|
J->CompletionTime - J->StartTime : 0;
|
|
|
|
RunTime = MIN(RunTime,MStat.P.MaxTime);
|
|
|
|
if ((RunTime > J->WCLimit) && (RunTime - J->WCLimit < 600))
|
|
RunTime = J->WCLimit;
|
|
|
|
Accuracy = (100 * RunTime) / J->WCLimit;
|
|
|
|
if ((Accuracy > 100) || (Accuracy < 0))
|
|
{
|
|
DBG(4,fSIM) DPrint("WARNING: job '%s' exceeded wallclock limit (%d > %ld)\n",
|
|
J->Name,
|
|
RunTime,
|
|
J->WCLimit);
|
|
|
|
Accuracy = MAX(Accuracy,0);
|
|
Accuracy = MIN(Accuracy,100);
|
|
}
|
|
else
|
|
{
|
|
if ((Accuracy >= 5) && (Accuracy < 100))
|
|
{
|
|
S->Acc5_99Sum += Accuracy;
|
|
S->Acc5_99Count++;
|
|
}
|
|
}
|
|
|
|
S->JobAccuracy[Accuracy]++;
|
|
|
|
S->TotalAccSum += Accuracy;
|
|
S->TotalJobCount++;
|
|
|
|
/* calculate QTime/XFactor distributions */
|
|
|
|
QTime = (J->StartTime - J->SubmitTime);
|
|
|
|
QTime = MIN(720000,QTime);
|
|
QTime = MAX(0,QTime);
|
|
|
|
S->TimeQueuedDistProfile[QTime / 720]++;
|
|
|
|
XFactor = (double)(QTime + J->WCLimit) / J->WCLimit;
|
|
|
|
XFactor = MIN(100,XFactor);
|
|
|
|
S->XFactorDistProfile[(int)(XFactor * 10)]++;
|
|
|
|
S->EarliestQueueTime = MIN(S->EarliestQueueTime,J->SubmitTime);
|
|
S->EarliestStartTime = MIN(S->EarliestStartTime,J->StartTime);
|
|
|
|
S->LatestQueueTime = MAX(S->LatestQueueTime,J->SubmitTime);
|
|
S->LatestStartTime = MAX(S->LatestStartTime,J->StartTime);
|
|
S->LatestCompletionTime = MAX(S->LatestCompletionTime,J->CompletionTime);
|
|
|
|
queueindex = ((long)J->SubmitTime - (long)S->FirstQueueTime) / 3600 + 1000;
|
|
startindex = ((long)J->StartTime - (long)S->FirstQueueTime) / 3600 + 1000;
|
|
endindex = ((long)J->CompletionTime - (long)S->FirstQueueTime) / 3600 + 1000;
|
|
|
|
DBG(5,fSIM) DPrint("INFO: job '%s' (%d:%d:%d) FQT: %ld QT: %ld ST: %ld\n",
|
|
J->Name,
|
|
queueindex,
|
|
startindex,
|
|
endindex,
|
|
S->FirstQueueTime,
|
|
J->SubmitTime,
|
|
J->StartTime);
|
|
|
|
S->QueueTimeDistProfile[queueindex]++;
|
|
S->CompletionTimeDistProfile[endindex]++;
|
|
|
|
/* locate/update start of queued job */
|
|
|
|
if (startindex > queueindex)
|
|
HFraction = (double)(1.0 - (double)((J->SubmitTime - S->FirstQueueTime) % 3600) / 3600.0);
|
|
else
|
|
HFraction = (double)(J->StartTime - J->SubmitTime) / 3600.0;
|
|
|
|
S->QueueJobDepthProfile[queueindex] += HFraction;
|
|
S->QueuePSDepthProfile[queueindex] += HFraction * (J->WCLimit * MJobGetProcCount(J));
|
|
|
|
/* update middle of queued job */
|
|
|
|
for (hindex = queueindex + 1;hindex < startindex;hindex++)
|
|
{
|
|
S->QueueJobDepthProfile[hindex]++;
|
|
S->QueuePSDepthProfile[hindex] += (double)(J->WCLimit * MJobGetProcCount(J));
|
|
}
|
|
|
|
/* locate/update end of queued job */
|
|
|
|
if (startindex > queueindex)
|
|
HFraction = (double)((J->StartTime - S->FirstQueueTime) % 3600) / 3600.0;
|
|
else
|
|
HFraction = 0.0;
|
|
|
|
S->QueueJobDepthProfile[startindex] += HFraction;
|
|
S->QueuePSDepthProfile[startindex] += HFraction * (J->WCLimit * MJobGetProcCount(J));
|
|
|
|
/* locate/update start of active job */
|
|
|
|
if (endindex > startindex)
|
|
HFraction = (double)(1.0 - (double)((J->StartTime - S->FirstQueueTime) % 3600) / 3600.0);
|
|
else
|
|
HFraction = (double)(J->CompletionTime - J->StartTime) / 3600.0;
|
|
|
|
S->ActiveJobDepthProfile[startindex] += HFraction;
|
|
S->ActiveNodeDepthProfile[startindex] += HFraction * MJobGetProcCount(J);
|
|
S->ActivePSDepthProfile[startindex] += HFraction * (J->WCLimit * MJobGetProcCount(J));
|
|
|
|
DBG(5,fSIM) DPrint("INFO: job '%s'(%3d/%6ld) dist %4d : %6.2f (%15.2f)\n",
|
|
J->Name,
|
|
MJobGetProcCount(J),
|
|
J->WCLimit,
|
|
startindex,
|
|
HFraction * MJobGetProcCount(J),
|
|
S->ActiveNodeDepthProfile[startindex]);
|
|
|
|
/* update middle of active job */
|
|
|
|
for (hindex = startindex + 1;hindex < endindex;hindex++)
|
|
{
|
|
S->ActiveJobDepthProfile[hindex] ++;
|
|
S->ActiveNodeDepthProfile[hindex] += (double)MJobGetProcCount(J);
|
|
S->ActivePSDepthProfile[hindex] += (double)(J->WCLimit * MJobGetProcCount(J));
|
|
|
|
DBG(5,fSIM) DPrint("INFO: job '%s'(%3d/%6ld) dist %4d : %6.2f (%15.2f)\n",
|
|
J->Name,
|
|
MJobGetProcCount(J),
|
|
J->WCLimit,
|
|
hindex,
|
|
(double)MJobGetProcCount(J),
|
|
S->ActiveNodeDepthProfile[hindex]);
|
|
}
|
|
|
|
/* locate/update end of active job */
|
|
|
|
if (endindex > startindex)
|
|
HFraction = (double)((J->CompletionTime - S->FirstQueueTime) % 3600) / 3600.0;
|
|
else
|
|
HFraction = 0.0;
|
|
|
|
S->ActiveJobDepthProfile[endindex] += HFraction;
|
|
S->ActiveNodeDepthProfile[endindex] += HFraction * MJobGetProcCount(J);
|
|
S->ActivePSDepthProfile[endindex] += HFraction * (J->WCLimit * MJobGetProcCount(J));
|
|
|
|
DBG(5,fSIM) DPrint("INFO: job '%s'(%3d/%6ld) dist %4d : %6.2f (%15.2f)\n",
|
|
J->Name,
|
|
MJobGetProcCount(J),
|
|
J->WCLimit,
|
|
endindex,
|
|
HFraction * MJobGetProcCount(J),
|
|
S->ActiveNodeDepthProfile[endindex]);
|
|
|
|
MStatUpdateCompletedJobUsage(J,msmProfile,ProfileMode);
|
|
|
|
/* update distribution data */
|
|
|
|
windex = MIN(MStat.P.MaxTime,J->WCLimit / PROF_TIMEINT_LENGTH);
|
|
rindex = MIN(MStat.P.MaxTime,RunTime / PROF_TIMEINT_LENGTH);
|
|
pindex = MIN(MStat.P.MaxNode,MJobGetProcCount(J));
|
|
|
|
if (ProfileMode & (1 << mUseRunTime))
|
|
{
|
|
RunPS = MJobGetProcCount(J) * J->WCLimit;
|
|
|
|
S->JobLengthDist[windex]++;
|
|
S->PSLengthDist[windex] += RunPS;
|
|
}
|
|
else
|
|
{
|
|
RunPS = MJobGetProcCount(J) * RunTime;
|
|
|
|
S->JobLengthDist[rindex]++;
|
|
S->PSLengthDist[rindex] += RunPS;
|
|
}
|
|
|
|
S->TotalPS += RunPS;
|
|
|
|
nhindex = MIN(1000,RunPS / 36000);
|
|
|
|
S->JobPHDist[nhindex]++;
|
|
|
|
S->PSSizeDist[pindex] += RunPS;
|
|
S->JobSizeDist[pindex]++;
|
|
|
|
S->JobEfficDist[pindex] += (RunPS > 0.0) ?
|
|
J->PSUtilized / RunPS :
|
|
0.0;
|
|
|
|
/* profile memory usage */
|
|
|
|
mem = RQ->RequiredMemory;
|
|
|
|
switch(RQ->MemCmp)
|
|
{
|
|
case mcmpGT:
|
|
|
|
mem++;
|
|
|
|
break;
|
|
|
|
case mcmpLT:
|
|
case mcmpLE:
|
|
case mcmpNONE:
|
|
default:
|
|
|
|
mem = 0;
|
|
|
|
break;
|
|
} /* END (RQ->MemCmp) */
|
|
|
|
for (mindex = 0;mem > (32 << mindex);mindex++);
|
|
|
|
JobsatMemorySize[mindex]++;
|
|
|
|
NodesatMemorySize[mindex] += MJobGetProcCount(J);
|
|
|
|
PSRequestatMemorySize[mindex] += (double)(MJobGetProcCount(J) * J->WCLimit);
|
|
|
|
PSRunatMemorySize[mindex] += (double)(MJobGetProcCount(J) * (J->CompletionTime - J->StartTime));
|
|
|
|
TotalJobs ++;
|
|
|
|
TotalNodes += MJobGetProcCount(J);
|
|
|
|
TotalPSRequest += (double)(MJobGetProcCount(J) * J->WCLimit);
|
|
|
|
TotalPSRun += (double)MJobGetProcCount(J) * (J->CompletionTime - J->StartTime);
|
|
|
|
MDEBUG(4)
|
|
MJobShow(J,0,NULL);
|
|
}
|
|
else
|
|
{
|
|
if (Offset == 0)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
DBG(2,fSIM) DPrint("INFO: no job on line %20.20s...\n",
|
|
ptr);
|
|
|
|
ptr += Offset;
|
|
}
|
|
} /* END else (LoadWorkloadTrace() == SUCCESS) */
|
|
|
|
if ((MStat.P.TraceCount > 0) && (TotalJobs == MStat.P.TraceCount))
|
|
{
|
|
DBG(2,fSIM) DPrint("INFO: TraceCount limit (%d) reached\n",
|
|
TotalJobs);
|
|
|
|
break;
|
|
}
|
|
} /* END while (ptr < tail) */
|
|
|
|
DBG(3,fSIM) DPrint("INFO: %d job traces loaded from tracefile '%s' (%d lines)\n",
|
|
TotalJobs,
|
|
TraceFile,
|
|
LineCount);
|
|
|
|
return(SUCCESS);
|
|
} /* END MPLoadTrace() */
|
|
|
|
|
|
/* END mprof.c */
|
|
|