Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for condor on CMS Connect in singularity #3845

Open
wants to merge 5 commits into
base: mg265UL
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions bin/MadGraph5_aMCatNLO/PLUGIN/CMS_CLUSTER/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
try:
from madgraph import MadGraph5Error
import madgraph.various.misc as misc
except Exception, error:
except Exception as error:
if __debug__:
print str(error)
print(str(error))
from internal import MadGraph5Error
import internal.misc as misc

Expand All @@ -52,6 +52,14 @@
multiple_try = misc.multiple_try
pjoin = os.path.join

import socket
hostname = socket.gethostname()

def singularityWraper():
if "lxplus" in hostname:
return '\nMY.WantOS = \"{}\"\n' % hostname.split(".")[0].replace("lxplus","el") # Following https://batchdocs.web.cern.ch/local/submit.html#os-selection-via-containers and simply using hostname
elif "uscms" in hostname:
return '\nMY.WantOS = \"scl7\"\n' # Following https://batchdocs.web.cern.ch/local/submit.html#os-selection-via-containers and simply using hostname

def cleansubproc(subproc):
subproc.terminate()
Expand All @@ -72,8 +80,8 @@ def __init__(self, *args, **opt):
import htcondor
self.schedd = htcondor.Schedd()
self._action = htcondor.JobAction
except Exception, error:
raise ClusterManagmentError, 'could not import htcondor python API: \n%s' % error
except Exception as error:
raise ClusterManagmentError('could not import htcondor python API: \n%s' % error)
self.hold_list = os.environ.get("CONDOR_RELEASE_HOLDCODES", "")
self.max_shadows = os.environ.get("CONDOR_RELEASE_HOLDCODES_SHADOW_LIM", "")
self.walltimes = os.environ.get("CONDOR_SET_MAXWALLTIMES", "")
Expand Down Expand Up @@ -225,12 +233,13 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
%(requirement)s
getenv=True
%(leave_in_queue)s
+REQUIRED_OS = "rhel7"

+JobFlavour = "%(job_flavour)s"

queue 1
"""

text += singularityWraper()
if self.cluster_queue not in ['None', None]:
requirement = 'Requirements = %s=?=True' % self.cluster_queue
else:
Expand All @@ -248,6 +257,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
prog = os.path.join(cwd, prog)
if argument:
argument = 'Arguments = %s' % ' '.join([str(a) for a in argument])
#argument = '%s' % ' '.join([str(a) for a in argument])
else:
argument = ''
# input/output file treatment
Expand Down Expand Up @@ -283,19 +293,20 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
cmd.append("-spool")
a = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
output, _ = a.communicate(text % dico)
output, _ = a.communicate((text % dico).encode())
#output = a.stdout.read()
#Submitting job(s).
#Logging submit event(s).
#1 job(s) submitted to cluster 2253622.
if self.debug_print : logger.info( output )

pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
output = output.decode()
try:
id = pat.search(output).groups()[0]
except:
raise ClusterManagmentError, 'fail to submit to the cluster: \n%s' \
% output
raise ClusterManagmentError('fail to submit to the cluster: \n%s' \
% output)
self.submitted += 1
self.submitted_ids.append(id)
return id
Expand All @@ -310,8 +321,8 @@ def control_one_job(self, id):
q = self.query([str(id)], ["JobStatus", "HoldReason"], lim=1)
try:
status = q[0]["JobStatus"]
except Exception, error:
raise ClusterManagmentError, 'could not retrieve job query:\n%s' % error
except Exception as error:
raise ClusterManagmentError('could not retrieve job query:\n%s' % error)

s = self.status_map(status)

Expand Down Expand Up @@ -419,7 +430,6 @@ def __init__(self,*args, **opts):

if self.temp_dir!=None:
self.dorsync = True
#print "starting rsync"

cwd = os.getcwd()

Expand All @@ -434,7 +444,6 @@ def __init__(self,*args, **opts):
sock.close()

self.rsyncport = port
#print self.rsyncport

rsynclog = os.path.join(cwd, 'rsyncd_%i.log' % self.rsyncport)
rsynclock = os.path.join(cwd, 'rsyncd_%i.lock' % self.rsyncport)
Expand All @@ -447,11 +456,10 @@ def __init__(self,*args, **opts):
rsyncsecrets = "%s:%s" % (self.rsyncuser,rsyncpasswd)
rsyncsecretsfile = os.path.join(cwd, 'rsyncsecrets_%i' % self.rsyncport)
secretsh = open(rsyncsecretsfile,'w')
os.chmod(rsyncsecretsfile, 0600)
os.chmod(rsyncsecretsfile, 0o600)
secretsh.write(rsyncsecrets)

os.environ["MADGRAPHRSYNCPASSWD_%i" % self.rsyncport] = rsyncpasswd
#print rsyncpasswd

rsyncconf = """
port = %(rsyncport)s
Expand Down Expand Up @@ -528,11 +536,11 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
try:
id = output.split('>',1)[0].split('<')[1]
except:
raise ClusterManagmentError, 'fail to submit to the cluster: \n%s' \
% output
raise ClusterManagmentError('fail to submit to the cluster: \n%s' \
% output)
if not id.isdigit():
raise ClusterManagmentError, 'fail to submit to the cluster: \n%s' \
% output
raise ClusterManagmentError('fail to submit to the cluster: \n%s' \
% output)
self.submitted += 1
self.submitted_ids.append(id)
return id
Expand All @@ -545,8 +553,6 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
"""How to make one submission. Return status id on the cluster.
NO SHARE DISK"""

#print "running lsf submit2"

if cwd is None:
cwd = os.getcwd()
if not os.path.exists(prog):
Expand Down Expand Up @@ -660,11 +666,11 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
try:
id = output.split('>',1)[0].split('<')[1]
except:
raise ClusterManagmentError, 'fail to submit to the cluster: \n%s' \
% output
raise ClusterManagmentError('fail to submit to the cluster: \n%s' \
% output)
if not id.isdigit():
raise ClusterManagmentError, 'fail to submit to the cluster: \n%s' \
% output
raise ClusterManagmentError('fail to submit to the cluster: \n%s' \
% output)
self.submitted += 1
self.submitted_ids.append(id)
return id
Expand Down
27 changes: 9 additions & 18 deletions bin/MadGraph5_aMCatNLO/Utilities/source_condor.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
#! /bin/bash
#!/bin/bash

# HTCondor python bindings are lost after cmsenv/scram
# unless PYTHONPATH is set including its location
# Workaround: Include original location in the path
# if not there already.
PYTHON_BINDINGS="$(python -c 'import htcondor; import os; print os.path.dirname(htcondor.__path__[0])' 2>/dev/null)"
PYTHON_BINDINGS="$(python -c 'import htcondor; import os; print(os.path.dirname(htcondor.__path__[0]))' 2>/dev/null)"
if [ -z "$PYTHON_BINDINGS" ]; then
echo "Error: Could not find htcondor python binding (htcondor.so), please include the directory in PYTHONPATH."
exit 1
if [ -d $CMSSW_BASE/venv ];
then
echo "venv exists"
else
if [ -n "$PYTHONPATH" ]; then
case ":$PYTHONPATH:" in
*:$PYTHON_BINDINGS:*) :
;;
*) export PYTHONPATH="$PYTHON_BINDINGS:$PYTHONPATH"
;;
esac
else
export PYTHONPATH="$PYTHON_BINDINGS"
fi
export PYTHON_BINDINGS="$PYTHON_BINDINGS"
scram-venv
cmsenv
pip3 install htcondor --upgrade #FIXME need better way to interface HTCondor Python API for Python3.9
fi
fi
10 changes: 6 additions & 4 deletions bin/MadGraph5_aMCatNLO/gridpack_generation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ make_tarball () {
if [ -e merge.pl ]; then
EXTRA_TAR_ARGS+="merge.pl "
fi
XZ_OPT="$XZ_OPT" tar -cJpsf ${PRODHOME}/${name}_${scram_arch}_${cmssw_version}_tarball.tar.xz mgbasedir process runcmsgrid.sh gridpack_generation*.log InputCards $EXTRA_TAR_ARGS
XZ_OPT="$XZ_OPT" tar -cJpf ${PRODHOME}/${name}_${scram_arch}_${cmssw_version}_tarball.tar.xz mgbasedir process runcmsgrid.sh gridpack_generation*.log InputCards $EXTRA_TAR_ARGS

echo "Gridpack created successfully at ${PRODHOME}/${name}_${scram_arch}_${cmssw_version}_tarball.tar.xz"
echo "End of job"
Expand Down Expand Up @@ -176,14 +176,15 @@ make_gridpack () {
wget --no-check-certificate ${MGSOURCE}
tar xzf ${MG}
rm "$MG"

#############################################
#Apply any necessary patches on top of official release
#############################################

cd $MGBASEDIRORIG
cat $PRODHOME/patches/*.patch | patch -p1
cp -r $PRODHOME/PLUGIN/CMS_CLUSTER/ PLUGIN/
cp $PRODHOME/PLUGIN/*.sh/ PLUGIN/
# Intended for expert use only!
if ls $CARDSDIR/${name}*.patch; then
echo " WARNING: Applying custom user patch. I hope you know what you're doing!"
Expand Down Expand Up @@ -238,7 +239,6 @@ make_gridpack () {
echo "set cluster_status_update $long_wait $short_wait" >> mgconfigscript
echo "set cluster_nb_retry $n_retries" >> mgconfigscript
echo "set cluster_retry_wait 300" >> mgconfigscript
#echo "set cluster_local_path `${LHAPDFCONFIG} --datadir`" >> mgconfigscript
if [[ ! "$RUNHOME" =~ ^/afs/.* ]]; then
echo "local path is not an afs path, batch jobs will use worker node scratch space instead of afs"
#*FIXME* broken in mg_amc 2.4.0
Expand Down Expand Up @@ -317,6 +317,7 @@ make_gridpack () {
cp -r $PRODHOME/PLUGIN/MadSTR $MGBASEDIRORIG/PLUGIN/ # copy plugin
./$MGBASEDIRORIG/bin/mg5_aMC --mode=MadSTR ${name}_proc_card.dat # run invoking MadSTR plugin
fi
cat ${name}_proc_card.dat

is5FlavorScheme=0
if tail -n 20 $LOGFILE | grep -q -e "^p *=.*b\~.*b" -e "^p *=.*b.*b\~"; then
Expand Down Expand Up @@ -728,6 +729,7 @@ fi

#catch unset variables
set -u
unset PERL5LIB

if [ -z ${carddir} ]; then
echo "Card directory not provided"
Expand Down Expand Up @@ -772,7 +774,7 @@ if [[ `uname -a` == *"lxplus"* ]]; then
fi

LOGFILE=${RUNHOME}/${name}.log
LOGFILE_NAME=${LOGFILE/.log/}
LOGFILE_NAME=${RUNHOME}/${name}

# where to search for datacards, that have to follow a naming code:
# ${name}_proc_card_mg5.dat
Expand Down