代码拉取完成,页面将自动刷新
#!/bin/sh
# Name: pestat
# Usage: pestat [-f] [nodeA nodeB ...]
# Torque resource manager utility script:
# Print a 1-line summary of jobs on each node.
# The printout may be customized as needed.
# Author: zhangtao@sugon.com
# Version: 3.0
# Update Notes:
# - Support to specified nodes, if no nodelist given, show all nodes
# - Support to get job list from \"jobs\"(jobs_from_status=0), NOT "status->jobs"
# - Reapire numtask[] error for Torque v4 (tasklist is not splited with space)
# - Reapire disorder for columns with different width, formating them
# Date: 08 November 2014
# Author: Ole.H.Nielsen@fysik.dtu.dk
# Version: 2.0
# Date: 27 September 2007
declare nodelist=""
declare listflagged=0
declare jobs_from_status=1
until [ $# -eq 0 ]
do
# Argument -f: Print only those nodes that are flagged
if test $1 = "-f"
then
echo "Listing only nodes that are flagged by \*"
listflagged=1
else
if test "X$nodelist" = "X"
then
nodelist=$1
else
nodelist="${nodelist}#$1"
fi
fi
shift
done
# Locations of commands used
PBSNODES=/opt/torque/bin/pbsnodes
QSTAT=/opt/torque/bin/qstat
AWK=/bin/awk
# Heading for printout showing:
# node: Node hostname
# state: Torque state
# load: CPU load average
# pmem: Physical memory
# ncpu: Number of CPUs
# mem: Physical+virtual memory
# resi: Resident (used) memory
# usrs: Number of sessions / Number of users
# jobs: Number of jobs
# jobids/users: Jobids and corresponding usernames of Torque jobs on this node
# initial title of pestat
#echo " node state load pmem ncpu mem resi usrs tasks jobids/users"
#
# Show the Torque node status and parse the results
#
#$PBSNODES -a | $AWK -v listflagged=$listflagged -v QSTAT=$QSTAT '
$PBSNODES -a | $AWK -v listflagged=$listflagged -v QSTAT=$QSTAT -v nodelist=$nodelist -v jobs_from_status=$jobs_from_status '
BEGIN {
#
# First get the list of jobids versus usernames from qstat
#
QSTAT = QSTAT " -r" # Append -r flag (running jobs) to qstat.
while ((QSTAT | getline) > 0) { # Parse lines from qstat -r
if (++line>5) { # Skip first 5 header lines
split($1,b,".") # Jobid is b[1]
username[b[1]] = $2 # Username of this jobid
}
}
close(QSTAT)
# CareHere: Parse node array specified
if (length(nodelist) != 0)
{
inode_number=split(nodelist,nodeArray,"#")
} else {
inode_number = 0
}
# CareHere: add field print
# CareHere: modify all \"node\" length
printf (" %12-s %6-s %8-s %8-s %5-s %8s %7-s %8-s %6-s %s\n",
"node", "state", "load",
"phymem", "ncpus", "allmem", "resi",
"usrs", "tasks", "jobidlist")
#printf (" ------------ ------ -------- -------- ----- -------- ------- -------- ------ -------------\n");
}
#
# Parse the output of pbsnodes
#
NF==1 {
node=$1 # 1st line is nodename
nodename[node] = node # Node name
getline # Get the next input line
numjobs[node] = 0 # Torque jobs on the node
numtasks[node] = 0 # Number of tasks started by Torque on the node
listnode=0 # Set to > 0 if this node gets flagged
carenode=1 # Set to > 0 if this node name specified in nodelist
if (inode_number != 0) {
findit=0
for (inode in nodeArray){
if(nodeArray[inode]==node)
{
findit=1
break
}
}
if (findit == 1)
carenode=1
else
carenode=0
}
while (NF >= 3) { # Read a number of non-blank lines
if ($1 == "state") {
if ($3 == "job-exclusive") state[node] = "excl"
else if ($3 == "job-exclusive,busy") state[node] = "busy"
else if ($3 == "busy") state[node] = "busy"
else if ($3 == "free") state[node] = "free"
else if ($3 == "offline") state[node] = "offl"
else if ($3 == "offline,job-exclusive") state[node] = "offl"
else if ($3 == "offline,job-exclusive,busy") state[node] = "offl"
else if ($3 == "down") state[node] = "down"
else if ($3 == "down,offline") state[node] = "down"
else if ($3 == "down,job-exclusive") state[node] = "down"
else if ($3 == "down,offline,job-exclusive") state[node] = "down"
else if ($3 == "down,offline,busy") state[node] = "down"
else if ($3 == "down,offline,job-exclusive,busy") state[node] = "down"
else if ($3 == "UNKN") state[node] = "UNKN"
}
else if ($1 == "np") np[node] = $3
else if ($1 == "properties") properties[node] = $3
else if ($1 == "ntype") ntype[node] = $3
else if ($1 == "jobs") {
# Get Task Number
# for torque4.x, NF = 3; torque 2.x, NF >= 3
if (NF != 3)
numtasks[node] = NF - 2
else
numtasks[node] = split($3, tarr, ",")
# CareHere: new added, because "jobs=*" item of "status = " maybe wrong
if (jobs_from_status == 0) {
loop = 0
for (i=1; i <= numtasks[node]; i++) {
if (NF != 3 )
{
fd = i + 2
tasks = $fd
if (i == numtasks[node]) {
tasks = sprintf("%s,",tasks);
}
}
else
{
tasks = tarr[i];
}
split(tasks, e, "/")
# Get full jobid
fjobid = e[2]
if (jobidarray[loop] == fjobid) {
} else {
loop++
jobidarray[loop] = fjobid
}
}
numjobs[node] = loop
# Get the list of jobids/users for this node
for (i=1; i <= numjobs[node]; i++) {
split(jobidarray[i], d, ".")
# Get jobid and username
jobid = d[1]
user = username[jobid]
# Case where the node pbs_mom has a (dead job) jobid unknown to pbs_server:
if (length(user) == 0) { # Flag non-existent username
user="NONE*"
listnode++
}
# Append jobid and username to the job list
jobidlist[node] = jobidlist[node] " " jobid " " user
}
}
}
else if ($1 == "status") {
# Get the node status subfields
split (substr($0,15), a, ",") # Remove leading "status =", split subfields separated by ","
for (field in a) { # Process individual status subfields
split(a[field],b,"=") # Split var=value fields
if (b[1]=="arch") arch[node]=b[2]
else if (b[1]=="opsys") opsys[node]=b[2]
else if (b[1]=="sessions") sessions[node]=b[2]
else if (b[1]=="nsessions") nsessions[node]=int(b[2])
else if (b[1]=="nusers") nusers[node]=b[2]
else if (b[1]=="idletime") idletime[node]=b[2]
else if (b[1]=="totmem") totmem[node]=b[2]
else if (b[1]=="availmem") availmem[node]=b[2]
else if (b[1]=="physmem") physmem[node]=b[2]
else if (b[1]=="ncpus") ncpus[node]=b[2]
else if (b[1]=="loadave") loadave[node]=b[2]
else if (b[1]=="netload") netload[node]=b[2]
else if (b[1]=="size") size[node]=b[2]
else if (b[1]=="jobs") {
if (jobs_from_status == 1) {
# Get the list of jobids/users for this node
if (b[2] == "? 0") b[2] = "" # Fix for a bug in pbsnodes ?
numjobs[node]=split(b[2],c)
for (i=1; i <= numjobs[node]; i++) {
split(c[i], d, ".")
# Get jobid and username
jobid = d[1]
user = username[jobid]
# Case where the node pbs_mom has a (dead job) jobid unknown to pbs_server:
if (length(user) == 0) { # Flag non-existent username
user="NONE*"
listnode++
}
# Append jobid and username to the job list
jobidlist[node] = jobidlist[node] " " jobid " " user
}
}
} else if (b[1]=="rectime") rectime[node]=b[2]
}
}
getline # Get the next input line
}
# Print out values that we are interested in. Flag unexpected values with a "*".
# Flag nodes with status down, offline or unknown
if (state[node] == "busy" || state[node] == "down" || state[node] == "offl" || state[node] == "UNKN") {
stateflag="*"
listnode++
} else
stateflag=" "
# Flag unexpected CPU load average
loaddiff = loadave[node] - numtasks[node]
if (loaddiff > 0.5 || loaddiff < -0.5) {
loadflag="*"
listnode++
} else
loadflag=" "
# Resident memory
resi = (totmem[node]-availmem[node])/1024
if (resi > 50 && resi > physmem[node]/1024 - 50) { # High memory usage
resiflag="*"
listnode++
} else
resiflag=" "
# Flag unexpected number of processes or users
if (nsessions[node] > 2*ncpus[node] + 1) { # More than 2 sessions per job
sessflag="*"
listnode++
} else if (nusers[node] > ncpus[node]) { # More users than nCPUs is bad
sessflag="*"
listnode++
} else
sessflag=" "
# Flag unexpected number of jobs
if (numjobs[node] > numtasks[node]) { # Should be at least 1 task per job
jobflag="*"
listnode++
} else
jobflag=" "
# CONFIGURE: Comment out the line below
# Omit down nodes from the flagged list because we do not bother to see them
# (Use "pbsnodes -l" to list down nodes).
if (state[node] == "down") listnode=0
# get print format for combined fields
statstr[node] = sprintf("%4-s%2-s",state[node],stateflag);
loadstr[node] = sprintf("%4.2f%1s",loadave[node],loadflag);
resistr[node] = sprintf("%d%s",resi,resiflag);
sesnstr[node] = sprintf("%d/%d%1s",nsessions[node],nusers[node],sessflag);
ntaskstr[node] = sprintf("%d%1s",numtasks[node],jobflag);
# CareHere: just list flagged host
# CareHere: just list specified host and flagged host
#if (!listflagged || listnode > 0) {
if ((length(nodelist)==0 || carenode==1) && (!listflagged || listnode > 0)) {
printf (" %12-s %6-s %8-s %8-d %5d %8d %7-s %8-s %6-s %s\n",
node, statstr[node], loadstr[node],
physmem[node]/1024, np[node], totmem[node]/1024, resistr[node],
sesnstr[node], ntaskstr[node], jobidlist[node])
# initial formation of pestat
#printf (" %s %s%1s %4.2f%1s %6d %3d %6d %6d%1s %1d/%1d%1s %3d%1s %s\n",
# node, state[node], stateflag, loadave[node], loadflag,
# physmem[node]/1024, np[node], totmem[node]/1024, resi, resiflag,
# nsessions[node], nusers[node], sessflag, numtasks[node], jobflag, jobidlist[node])
}
}'
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。