Skip to content

Commit bc604ec

Browse files
committed
fix: get batch system info from the pilot
1 parent d7da5b0 commit bc604ec

File tree

2 files changed

+101
-28
lines changed

2 files changed

+101
-28
lines changed

Diff for: Pilot/pilotCommands.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self, pilotParams):
4747
try:
4848
from Pilot.pilotTools import (
4949
CommandBase,
50-
getFlavour,
50+
getSubmitterInfo,
5151
retrieveUrlTimeout,
5252
safe_listdir,
5353
sendMessage,
@@ -56,7 +56,7 @@ def __init__(self, pilotParams):
5656
except ImportError:
5757
from pilotTools import (
5858
CommandBase,
59-
getFlavour,
59+
getSubmitterInfo,
6060
retrieveUrlTimeout,
6161
safe_listdir,
6262
sendMessage,
@@ -550,8 +550,7 @@ def execute(self):
550550
551551
VOs may want to replace/extend the _getBasicsCFG and _getSecurityCFG functions
552552
"""
553-
554-
self.pp.flavour, self.pp.pilotReference = getFlavour(self.pp.ceName)
553+
self.pp.flavour, self.pp.pilotReference, self.pp.batchSystemInfo = getSubmitterInfo(self.pp.ceName)
555554

556555
self._getBasicsCFG()
557556
self._getSecurityCFG()
@@ -846,6 +845,17 @@ def execute(self):
846845
"""Setup configuration parameters"""
847846
self.cfg.append("-o /LocalSite/GridMiddleware=%s" % self.pp.flavour)
848847

848+
# Add batch system details to the configuration
849+
# Can be used by the pilot/job later on, to interact with the batch system
850+
self.cfg.append("-o /LocalSite/BatchSystem/Type=%s" % self.pp.batchSystemInfo.get("Type", "Unknown"))
851+
self.cfg.append("-o /LocalSite/BatchSystem/JobID=%s" % self.pp.batchSystemInfo.get("JobID", "Unknown"))
852+
853+
batchSystemParams = self.pp.batchSystemInfo.get("Parameters", {})
854+
self.cfg.append("-o /LocalSite/BatchSystem/Parameters/Queue=%s" % batchSystemParams.get("Queue", "Unknown"))
855+
self.cfg.append("-o /LocalSite/BatchSystem/Parameters/BinaryPath=%s" % batchSystemParams.get("BinaryPath", "Unknown"))
856+
self.cfg.append("-o /LocalSite/BatchSystem/Parameters/Host=%s" % batchSystemParams.get("Host", "Unknown"))
857+
self.cfg.append("-o /LocalSite/BatchSystem/Parameters/InfoPath=%s" % batchSystemParams.get("InfoPath", "Unknown"))
858+
849859
self.cfg.append('-n "%s"' % self.pp.site)
850860
self.cfg.append('-S "%s"' % self.pp.setup)
851861

Diff for: Pilot/pilotTools.py

+87-24
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from datetime import datetime
1919
from functools import partial, wraps
2020
from threading import RLock
21+
import warnings
2122

2223
############################
2324
# python 2 -> 3 "hacks"
@@ -214,53 +215,103 @@ def listdir(directory):
214215
return contents
215216

216217

217-
def getFlavour(ceName):
218+
def getSubmitterInfo(ceName):
219+
"""Get information about the submitter of the pilot.
220+
221+
Check the environment variables to determine the type of batch system and CE used
222+
to submit the pilot being used and return this information in a tuple.
223+
"""
218224

219225
pilotReference = os.environ.get("DIRAC_PILOT_STAMP", "")
226+
# Batch system taking care of the pilot
227+
# Might be useful to extract the info to interact with it later on
228+
batchSystemType = "Unknown"
229+
batchSystemJobID = "Unknown"
230+
batchSystemParameters = {
231+
"BinaryPath": "Unknown",
232+
"Host": "Unknown",
233+
"InfoPath": "Unknown",
234+
"Queue": "Unknown",
235+
}
236+
# Flavour of the pilot
237+
# Inform whether the pilot was sent through SSH+batch system or a CE
220238
flavour = "DIRAC"
221239

222240
# # Batch systems
223241

224-
# Take the reference from the Torque batch system
242+
# Torque
225243
if "PBS_JOBID" in os.environ:
226-
flavour = "SSHTorque"
227-
pilotReference = "sshtorque://" + ceName + "/" + os.environ["PBS_JOBID"].split(".")[0]
244+
batchSystemType = "PBS"
245+
batchSystemJobID = os.environ["PBS_JOBID"]
246+
batchSystemParameters["BinaryPath"] = os.environ.get("PBS_O_PATH", "Unknown")
247+
batchSystemParameters["Queue"] = os.environ.get("PBS_O_QUEUE", "Unknown")
228248

229-
# Take the reference from the OAR batch system
249+
flavour = "SSH%s" % batchSystemType
250+
pilotReference = "sshpbs://" + ceName + "/" + batchSystemJobID.split(".")[0]
251+
252+
# OAR
230253
if "OAR_JOBID" in os.environ:
231-
flavour = "SSHOAR"
232-
pilotReference = "sshoar://" + ceName + "/" + os.environ["OAR_JOBID"]
254+
batchSystemType = "OAR"
255+
batchSystemJobID = os.environ["OAR_JOBID"]
256+
257+
flavour = "SSH%s" % batchSystemType
258+
pilotReference = "sshoar://" + ceName + "/" + batchSystemJobID
233259

234260
# Grid Engine
235-
if "JOB_ID" in os.environ and "SGE_TASK_ID" in os.environ:
236-
flavour = "SSHGE"
237-
pilotReference = "sshge://" + ceName + "/" + os.environ["JOB_ID"]
238-
# Generic JOB_ID
239-
elif "JOB_ID" in os.environ:
240-
flavour = "Generic"
241-
pilotReference = "generic://" + ceName + "/" + os.environ["JOB_ID"]
261+
if "SGE_TASK_ID" in os.environ:
262+
batchSystemType = "SGE"
263+
batchSystemJobID = os.environ["JOB_ID"]
264+
batchSystemParameters["BinaryPath"] = os.environ.get("SGE_BINARY_PATH", "Unknown")
265+
batchSystemParameters["Queue"] = os.environ.get("QUEUE", "Unknown")
266+
267+
flavour = "SSH%s" % batchSystemType
268+
pilotReference = "sshge://" + ceName + "/" + batchSystemJobID
242269

243270
# LSF
244271
if "LSB_BATCH_JID" in os.environ:
245-
flavour = "SSHLSF"
246-
pilotReference = "sshlsf://" + ceName + "/" + os.environ["LSB_BATCH_JID"]
272+
batchSystemType = "LSF"
273+
batchSystemJobID = os.environ["LSB_BATCH_JID"]
274+
batchSystemParameters["BinaryPath"] = os.environ.get("LSF_BINDIR", "Unknown")
275+
batchSystemParameters["Host"] = os.environ.get("LSB_HOSTS", "Unknown")
276+
batchSystemParameters["InfoPath"] = os.environ.get("LSF_ENVDIR", "Unknown")
277+
batchSystemParameters["Queue"] = os.environ.get("LSB_QUEUE", "Unknown")
247278

248-
# SLURM batch system
279+
flavour = "SSH%s" % batchSystemType
280+
pilotReference = "sshlsf://" + ceName + "/" + batchSystemJobID
281+
282+
# SLURM
249283
if "SLURM_JOBID" in os.environ:
250-
flavour = "SSHSLURM"
251-
pilotReference = "sshslurm://" + ceName + "/" + os.environ["SLURM_JOBID"]
284+
batchSystemType = "SLURM"
285+
batchSystemJobID = os.environ["SLURM_JOBID"]
286+
287+
flavour = "SSH%s" % batchSystemType
288+
pilotReference = "sshslurm://" + ceName + "/" + batchSystemJobID
252289

253290
# Condor
254291
if "CONDOR_JOBID" in os.environ:
255-
flavour = "SSHCondor"
256-
pilotReference = "sshcondor://" + ceName + "/" + os.environ["CONDOR_JOBID"]
292+
batchSystemType = "HTCondor"
293+
batchSystemJobID = os.environ["CONDOR_JOBID"]
294+
batchSystemParameters["InfoPath"] = os.environ.get("_CONDOR_JOB_AD", "Unknown")
257295

258-
# # CEs
296+
flavour = "SSH%s" % batchSystemType
297+
pilotReference = "sshcondor://" + ceName + "/" + batchSystemJobID
298+
299+
# # CEs/Batch Systems
259300

260301
# HTCondor
261302
if "HTCONDOR_JOBID" in os.environ:
303+
batchSystemType = "HTCondor"
304+
batchSystemJobID = os.environ["HTCONDOR_JOBID"]
305+
262306
flavour = "HTCondorCE"
263-
pilotReference = "htcondorce://" + ceName + "/" + os.environ["HTCONDOR_JOBID"]
307+
pilotReference = "htcondorce://" + ceName + "/" + batchSystemJobID
308+
309+
# # Local/SSH
310+
311+
# Local submission to the host
312+
if "LOCAL_JOBID" in os.environ:
313+
flavour = "Local"
314+
pilotReference = "local://" + ceName + "/" + os.environ["LOCAL_JOBID"]
264315

265316
# Direct SSH tunnel submission
266317
if "SSHCE_JOBID" in os.environ:
@@ -274,6 +325,8 @@ def getFlavour(ceName):
274325
"sshbatchhost://" + ceName + "/" + os.environ["SSH_NODE_HOST"] + "/" + os.environ["SSHBATCH_JOBID"]
275326
)
276327

328+
# # CEs
329+
277330
# ARC
278331
if "GRID_GLOBAL_JOBURL" in os.environ:
279332
flavour = "ARC"
@@ -284,9 +337,18 @@ def getFlavour(ceName):
284337
flavour = "VMDIRAC"
285338
pilotReference = "vm://" + ceName + "/" + os.environ["JOB_ID"]
286339

287-
return flavour, pilotReference
340+
return flavour, pilotReference, {"Type": batchSystemType, "JobID": batchSystemJobID, "Parameters": batchSystemParameters}
288341

289342

343+
def getFlavour(ceName):
344+
"""Old method to get the flavour of the pilot. Deprecated.
345+
346+
Please use getSubmitterInfo instead.
347+
"""
348+
warnings.warn("getFlavour() is deprecated. Please use getSubmitterInfo() instead.", category=DeprecationWarning, stacklevel=2)
349+
flavour, pilotReference, _ = getSubmitterInfo(ceName)
350+
return flavour, pilotReference
351+
290352
class ObjectLoader(object):
291353
"""Simplified class for loading objects from a DIRAC installation.
292354
@@ -834,6 +896,7 @@ def __init__(self):
834896
self.stopOnApplicationFailure = True
835897
self.stopAfterFailedMatches = 10
836898
self.flavour = "DIRAC"
899+
self.batchSystemInfo = {}
837900
self.pilotReference = ""
838901
self.releaseVersion = ""
839902
self.releaseProject = ""

0 commit comments

Comments
 (0)