Package Ganga :: Package Lib :: Package MonitoringServices :: Package Dashboard :: Module LCGMS
[hide private]
[frames] | no frames]

Source Code for Module Ganga.Lib.MonitoringServices.Dashboard.LCGMS

  1  """Dashboard LCG Monitoring Service plugin 
  2   
  3  N.B. This code is under development and should not generally be used or relied upon. 
  4   
  5  """ 
  6   
  7  #TODO: disable if backend is not LCG 
  8  #TODO: report LB state change times 
  9   
 10   
 11  from Ganga.Lib.MonitoringServices.Dashboard import CommonUtil 
 12  from Ganga.Lib.MonitoringServices.Dashboard import LCGUtil 
 13   
 14   
 15  from Ganga.Lib.MonitoringServices.Dashboard.DashboardMS import DashboardMS 
16 -class LCGMS(DashboardMS):
17 """Dashboard LCG Monitoring Service based on MSG.""" 18
19 - def __init__(self, job_info, config_info):
20 """Construct the Dashboard LCG Monitoring Service.""" 21 DashboardMS.__init__(self, job_info, config_info)
22
23 - def getSandboxModules(self):
31
32 - def getJobInfo(self):
33 """Create job_info from Job object.""" 34 j = self.job_info # called on client, so job_info is Job object 35 ji = { 36 'fqid': j.fqid, 37 'EXECUTION_BACKEND': LCGUtil.cl_execution_backend(j), 38 'OWNERDN': LCGUtil.cl_ownerdn(), 39 'JOB_ID_INSIDE_THE_TASK': LCGUtil.cl_job_id_inside_the_task(j), 40 'TASKNAME': LCGUtil.cl_task_name(j), 41 'UNIQUEJOBID': LCGUtil.cl_unique_job_id(j), 42 } 43 return ji
44 45 #----- event call-backs ----- 46
47 - def submitting(self, **opts):
48 j = self.job_info # called on client, so job_info is Job object 49 self._log('debug', 'submitting %s' % j.fqid)
50
51 - def prepare(self, **opts):
52 j = self.job_info # called on client, so job_info is Job object 53 self._log('debug', 'prepare %s' % j.fqid)
54
55 - def submit(self, **opts):
56 """Log submit event on client.""" 57 j = self.job_info # called on client, so job_info is Job object 58 self._log('debug', 'submit %s' % j.fqid) 59 # ignore master wrapper jobs 60 if j.subjobs: 61 self._log('debug', 'Not sending unwanted message on submit for master wrapper job %s.' % j.fqid) 62 return 63 # send Ganga submitted job-status message 64 message = self._cl_job_status_message('submitted', 'Ganga', CommonUtil.utcnow()) 65 if message['GRIDJOBID'] is None: 66 # This is to handle the temporary workaround in 67 # LCG.master_bulk_updateMonitoringInformation() which results in two 68 # submit messages being sent, one without a grid_job_id. 69 self._log('debug', 'Not sending redundant message on submit without grid_job_id for job %s.' % j.fqid) 70 else: 71 self._send(self.config_info['destination_job_status'], message)
72
73 - def start(self, **opts):
74 """Log start event on worker node.""" 75 ji = self.job_info # called on worker node, so job_info is dictionary 76 self._log('debug', 'start %s' % ji['fqid']) 77 # send Ganga running job-status message 78 message = self._wn_job_status_message('running', 'Ganga', CommonUtil.utcnow()) 79 self._send(self.config_info['destination_job_status'], message)
80
81 - def stop(self, exitcode, **opts):
82 """Log stop event on worker node.""" 83 ji = self.job_info # called on worker node, so job_info is dictionary 84 self._log('debug', 'stop %s' % ji['fqid']) 85 if exitcode == 0: 86 status = 'completed' 87 else: 88 status = 'failed' 89 # send Ganga completed or failed job-status message 90 message = self._wn_job_status_message(status, 'Ganga', CommonUtil.utcnow()) 91 message['JOBEXITCODE'] = exitcode 92 message['JOBEXITREASON'] = None #TODO: how can we know this? 93 self._send(self.config_info['destination_job_status'], message)
94
95 - def complete(self, **opts):
96 """Log complete event on client.""" 97 j = self.job_info # called on client, so job_info is Job object 98 self._log('debug', 'complete %s' % j.fqid) 99 # ignore master wrapper jobs 100 if j.subjobs: 101 self._log('debug', 'Not sending unwanted message on complete for master wrapper job %s.' % j.fqid) 102 return 103 # send LB Done job-status message 104 message = self._cl_job_status_message(LCGUtil.cl_grid_status(j), 'LB', None) 105 message['GRIDEXITCODE'] = LCGUtil.cl_grid_exit_code(j) 106 message['GRIDEXITREASON'] = LCGUtil.cl_grid_exit_reason(j) 107 self._send(self.config_info['destination_job_status'], message)
108
109 - def fail(self, **opts):
110 """Log fail event on client.""" 111 j = self.job_info # called on client, so job_info is Job object 112 self._log('debug', 'fail %s' % j.fqid) 113 # ignore master wrapper jobs 114 if j.subjobs: 115 self._log('debug', 'Not sending unwanted message on fail for master wrapper job %s.' % j.fqid) 116 return 117 # send LB Done or Aborted job-status message 118 message = self._cl_job_status_message(LCGUtil.cl_grid_status(j), 'LB', None) 119 message['GRIDEXITCODE'] = LCGUtil.cl_grid_exit_code(j) 120 message['GRIDEXITREASON'] = LCGUtil.cl_grid_exit_reason(j) 121 self._send(self.config_info['destination_job_status'], message)
122
123 - def kill(self, **opts):
124 """Log kill event on client.""" 125 j = self.job_info # called on client, so job_info is Job object 126 self._log('debug', 'kill %s' % j.fqid) 127 # ignore master wrapper jobs 128 if j.subjobs: 129 self._log('debug', 'Not sending unwanted message on kill for master wrapper job %s.' % j.fqid) 130 return 131 # send LB Cancelled job-status message 132 message = self._cl_job_status_message('Cancelled', 'LB', None) 133 self._send(self.config_info['destination_job_status'], message)
134
135 - def rollback(self, **opts):
136 j = self.job_info # called on client, so job_info is Job object 137 self._log('debug', 'rollback %s' % j.fqid)
138 139 #----- message builders ----- 140
141 - def _cl_job_status_message(self, status, status_source, status_start_time=None):
142 # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID 143 j = self.job_info # called on client, so job_info is Job object 144 msg = { 145 'DESTCE': LCGUtil.cl_dest_ce(j), # Actual CE. e.g. ce-3-fzk.gridka.de:2119/jobmanager-pbspro-atlasXS 146 'DESTSITE': None, # Actual site. e.g. FZK-LCG2 147 'DESTWN': None, # Actual worker node hostname. e.g. c01-102-103.gridka.de 148 'EXECUTION_BACKEND': LCGUtil.cl_execution_backend(j), # Backend. e.g. LCG 149 'GRIDEXITCODE': None, # e.g. 0 150 'GRIDEXITREASON': None, # e.g. Job terminated successfully 151 'GRIDJOBID': LCGUtil.cl_grid_job_id(j), # e.g. https://grid-lb0.desy.de:9000/moqY5njFGurEuoDkkJmtBA 152 'JOBEXITCODE': None, # e.g. 0 153 'JOBEXITREASON': None, 154 'JOB_ID_INSIDE_THE_TASK': LCGUtil.cl_job_id_inside_the_task(j), # subjob id e.g. 0 155 'OWNERDN': LCGUtil.cl_ownerdn(), # Grid certificate. e.g. /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=dtuckett/CN=671431/CN=David Tuckett/CN=proxy 156 'REPORTER': 'ToolUI', # e.g. ToolUI, JobWN 157 'REPORTTIME': CommonUtil.utcnow(), # e.g. 2009-11-25T14:59:24.754249Z 158 'STATENAME': status, # e.g. submitted, Done (Success) 159 'STATESOURCE': status_source, # e.g. Ganga, LB 160 'STATESTARTTIME': status_start_time, # e.g. 2009-11-25T14:32:51.428988Z 161 'TASKNAME': LCGUtil.cl_task_name(j), # e.g. ganga:6702b50a-8a31-4476-8189-62ea5b8e00b3:TrigStudy 162 'UNIQUEJOBID': LCGUtil.cl_unique_job_id(j), # Ganga uuid e.g. 1c08ff3b-904f-4f77-a481-d6fa765813cb 163 '___fqid' : j.fqid, 164 } 165 return msg
166
167 - def _wn_job_status_message(self, status, status_source, status_start_time):
168 # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID 169 ji = self.job_info # called on worker node, so job_info is dictionary 170 msg = { 171 'DESTCE': LCGUtil.wn_dest_ce(ji), 172 'DESTSITE': LCGUtil.wn_dest_site(ji), 173 'DESTWN': LCGUtil.wn_dest_wn(), 174 'EXECUTION_BACKEND': ji['EXECUTION_BACKEND'], 175 'GRIDEXITCODE': None, 176 'GRIDEXITREASON': None, 177 'GRIDJOBID': LCGUtil.wn_grid_job_id(ji), 178 'JOBEXITCODE': None, 179 'JOBEXITREASON': None, 180 'JOB_ID_INSIDE_THE_TASK': ji['JOB_ID_INSIDE_THE_TASK'], 181 'OWNERDN': ji['OWNERDN'], 182 'REPORTER': 'JobWN', 183 'REPORTTIME': CommonUtil.utcnow(), 184 'STATENAME': status, 185 'STATESOURCE': status_source, 186 'STATESTARTTIME': status_start_time, 187 'TASKNAME': ji['TASKNAME'], 188 'UNIQUEJOBID': ji['UNIQUEJOBID'], 189 '___fqid' : ji['fqid'], 190 } 191 return msg
192