Home | Trees | Indices | Help |
---|
|
1 """Dashboard LCG Monitoring Service plugin 2 3 N.B. This code is under development and should not generally be used or relied upon. 4 5 """ 6 7 #TODO: disable if backend is not LCG 8 #TODO: report LB state change times 9 10 11 from Ganga.Lib.MonitoringServices.Dashboard import CommonUtil 12 from Ganga.Lib.MonitoringServices.Dashboard import LCGUtil 13 14 15 from Ganga.Lib.MonitoringServices.Dashboard.DashboardMS import DashboardMS17 """Dashboard LCG Monitoring Service based on MSG.""" 1819220 """Construct the Dashboard LCG Monitoring Service.""" 21 DashboardMS.__init__(self, job_info, config_info)2224 """Return list of module dependencies.""" 25 import Ganga.Lib.MonitoringServices.Dashboard 26 return DashboardMS.getSandboxModules(self) + [ 27 Ganga.Lib.MonitoringServices.Dashboard.CommonUtil, 28 Ganga.Lib.MonitoringServices.Dashboard.LCGMS, 29 Ganga.Lib.MonitoringServices.Dashboard.LCGUtil, 30 ]3133 """Create job_info from Job object.""" 34 j = self.job_info # called on client, so job_info is Job object 35 ji = { 36 'fqid': j.fqid, 37 'EXECUTION_BACKEND': LCGUtil.cl_execution_backend(j), 38 'OWNERDN': LCGUtil.cl_ownerdn(), 39 'JOB_ID_INSIDE_THE_TASK': LCGUtil.cl_job_id_inside_the_task(j), 40 'TASKNAME': LCGUtil.cl_task_name(j), 41 'UNIQUEJOBID': LCGUtil.cl_unique_job_id(j), 42 } 43 return ji44 45 #----- event call-backs ----- 4648 j = self.job_info # called on client, so job_info is Job object 49 self._log('debug', 'submitting %s' % j.fqid)5052 j = self.job_info # called on client, so job_info is Job object 53 self._log('debug', 'prepare %s' % j.fqid)5456 """Log submit event on client.""" 57 j = self.job_info # called on client, so job_info is Job object 58 self._log('debug', 'submit %s' % j.fqid) 59 # ignore master wrapper jobs 60 if j.subjobs: 61 self._log('debug', 'Not sending unwanted message on submit for master wrapper job %s.' % j.fqid) 62 return 63 # send Ganga submitted job-status message 64 message = self._cl_job_status_message('submitted', 'Ganga', CommonUtil.utcnow()) 65 if message['GRIDJOBID'] is None: 66 # This is to handle the temporary workaround in 67 # LCG.master_bulk_updateMonitoringInformation() which results in two 68 # submit messages being sent, one without a grid_job_id. 69 self._log('debug', 'Not sending redundant message on submit without grid_job_id for job %s.' % j.fqid) 70 else: 71 self._send(self.config_info['destination_job_status'], message)7274 """Log start event on worker node.""" 75 ji = self.job_info # called on worker node, so job_info is dictionary 76 self._log('debug', 'start %s' % ji['fqid']) 77 # send Ganga running job-status message 78 message = self._wn_job_status_message('running', 'Ganga', CommonUtil.utcnow()) 79 self._send(self.config_info['destination_job_status'], message)8082 """Log stop event on worker node.""" 83 ji = self.job_info # called on worker node, so job_info is dictionary 84 self._log('debug', 'stop %s' % ji['fqid']) 85 if exitcode == 0: 86 status = 'completed' 87 else: 88 status = 'failed' 89 # send Ganga completed or failed job-status message 90 message = self._wn_job_status_message(status, 'Ganga', CommonUtil.utcnow()) 91 message['JOBEXITCODE'] = exitcode 92 message['JOBEXITREASON'] = None #TODO: how can we know this? 93 self._send(self.config_info['destination_job_status'], message)9496 """Log complete event on client.""" 97 j = self.job_info # called on client, so job_info is Job object 98 self._log('debug', 'complete %s' % j.fqid) 99 # ignore master wrapper jobs 100 if j.subjobs: 101 self._log('debug', 'Not sending unwanted message on complete for master wrapper job %s.' % j.fqid) 102 return 103 # send LB Done job-status message 104 message = self._cl_job_status_message(LCGUtil.cl_grid_status(j), 'LB', None) 105 message['GRIDEXITCODE'] = LCGUtil.cl_grid_exit_code(j) 106 message['GRIDEXITREASON'] = LCGUtil.cl_grid_exit_reason(j) 107 self._send(self.config_info['destination_job_status'], message)108110 """Log fail event on client.""" 111 j = self.job_info # called on client, so job_info is Job object 112 self._log('debug', 'fail %s' % j.fqid) 113 # ignore master wrapper jobs 114 if j.subjobs: 115 self._log('debug', 'Not sending unwanted message on fail for master wrapper job %s.' % j.fqid) 116 return 117 # send LB Done or Aborted job-status message 118 message = self._cl_job_status_message(LCGUtil.cl_grid_status(j), 'LB', None) 119 message['GRIDEXITCODE'] = LCGUtil.cl_grid_exit_code(j) 120 message['GRIDEXITREASON'] = LCGUtil.cl_grid_exit_reason(j) 121 self._send(self.config_info['destination_job_status'], message)122124 """Log kill event on client.""" 125 j = self.job_info # called on client, so job_info is Job object 126 self._log('debug', 'kill %s' % j.fqid) 127 # ignore master wrapper jobs 128 if j.subjobs: 129 self._log('debug', 'Not sending unwanted message on kill for master wrapper job %s.' % j.fqid) 130 return 131 # send LB Cancelled job-status message 132 message = self._cl_job_status_message('Cancelled', 'LB', None) 133 self._send(self.config_info['destination_job_status'], message)134136 j = self.job_info # called on client, so job_info is Job object 137 self._log('debug', 'rollback %s' % j.fqid)138 139 #----- message builders ----- 140142 # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID 143 j = self.job_info # called on client, so job_info is Job object 144 msg = { 145 'DESTCE': LCGUtil.cl_dest_ce(j), # Actual CE. e.g. ce-3-fzk.gridka.de:2119/jobmanager-pbspro-atlasXS 146 'DESTSITE': None, # Actual site. e.g. FZK-LCG2 147 'DESTWN': None, # Actual worker node hostname. e.g. c01-102-103.gridka.de 148 'EXECUTION_BACKEND': LCGUtil.cl_execution_backend(j), # Backend. e.g. LCG 149 'GRIDEXITCODE': None, # e.g. 0 150 'GRIDEXITREASON': None, # e.g. Job terminated successfully 151 'GRIDJOBID': LCGUtil.cl_grid_job_id(j), # e.g. https://grid-lb0.desy.de:9000/moqY5njFGurEuoDkkJmtBA 152 'JOBEXITCODE': None, # e.g. 0 153 'JOBEXITREASON': None, 154 'JOB_ID_INSIDE_THE_TASK': LCGUtil.cl_job_id_inside_the_task(j), # subjob id e.g. 0 155 'OWNERDN': LCGUtil.cl_ownerdn(), # Grid certificate. e.g. /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=dtuckett/CN=671431/CN=David Tuckett/CN=proxy 156 'REPORTER': 'ToolUI', # e.g. ToolUI, JobWN 157 'REPORTTIME': CommonUtil.utcnow(), # e.g. 2009-11-25T14:59:24.754249Z 158 'STATENAME': status, # e.g. submitted, Done (Success) 159 'STATESOURCE': status_source, # e.g. Ganga, LB 160 'STATESTARTTIME': status_start_time, # e.g. 2009-11-25T14:32:51.428988Z 161 'TASKNAME': LCGUtil.cl_task_name(j), # e.g. ganga:6702b50a-8a31-4476-8189-62ea5b8e00b3:TrigStudy 162 'UNIQUEJOBID': LCGUtil.cl_unique_job_id(j), # Ganga uuid e.g. 1c08ff3b-904f-4f77-a481-d6fa765813cb 163 '___fqid' : j.fqid, 164 } 165 return msg166168 # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID 169 ji = self.job_info # called on worker node, so job_info is dictionary 170 msg = { 171 'DESTCE': LCGUtil.wn_dest_ce(ji), 172 'DESTSITE': LCGUtil.wn_dest_site(ji), 173 'DESTWN': LCGUtil.wn_dest_wn(), 174 'EXECUTION_BACKEND': ji['EXECUTION_BACKEND'], 175 'GRIDEXITCODE': None, 176 'GRIDEXITREASON': None, 177 'GRIDJOBID': LCGUtil.wn_grid_job_id(ji), 178 'JOBEXITCODE': None, 179 'JOBEXITREASON': None, 180 'JOB_ID_INSIDE_THE_TASK': ji['JOB_ID_INSIDE_THE_TASK'], 181 'OWNERDN': ji['OWNERDN'], 182 'REPORTER': 'JobWN', 183 'REPORTTIME': CommonUtil.utcnow(), 184 'STATENAME': status, 185 'STATESOURCE': status_source, 186 'STATESTARTTIME': status_start_time, 187 'TASKNAME': ji['TASKNAME'], 188 'UNIQUEJOBID': ji['UNIQUEJOBID'], 189 '___fqid' : ji['fqid'], 190 } 191 return msg
Home | Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Mon Jun 25 10:35:34 2012 | http://epydoc.sourceforge.net |