Coverage for python/lsst/verify/bin/dispatchverify.py: 11%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

180 statements  

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21"""Upload LSST Science Pipelines Verification `~lsst.verify.Job` datasets to 

22the SQUASH dashboard. 

23 

24Job JSON files can be created by `lsst.verify.Job.write` or 

25`lsst.verify.output_quantities`. A `~lsst.verify.Job` dataset consists of 

26metric measurements, associated blobs, and pipeline execution metadata. 

27Individual LSST Science Pipelines tasks typically write separate JSON datasets. 

28This command can collect and combine multiple Job JSON datasets into a single 

29Job upload. 

30 

31**Configuration** 

32 

33dispatch_verify.py is configurable from both the command line and environment 

34variables. See the argument documenation for environment variable equivalents. 

35Command line settings override environment variable configuration. 

36 

37**Metadata and environment** 

38 

39dispatch_verify.py can enrich Verification Job metadata with information 

40from the environment. Currently dispatch_verify.py supports the Jenkins CI 

41and the LSST Data Facility (LDF) execution environments. 

42 

43In the Jenkins CI execution environment (``--env=jenkins``) the 

44following environment variables are consumed: 

45 

46- ``BUILD_ID``: ID in the CI system 

47- ``BUILD_URL``: CI page with information about the build 

48- ``PRODUCT``: the name of the product built, e.g. 'validate_drp' 

49- ``dataset``: the name of the dataset processed, e.g. 'validation_data_cfht' 

50- ``label``: the name of the platform where it runs 

51- ``refs``: the branches run by Jenkins, e.g. 'tickets/DM-12345 master' 

52 

53If ``--lsstsw`` is used, additional Git branch information is included with 

54Science Pipelines package metadata. 

55 

56In the LSST Data Facility execution environment (``--env=ldf``) the following 

57environment variables are consumed: 

58 

59- ``DATASET``: the name of the dataset processed, e.g 'HSC RC2' 

60- ``DATASET_REPO_URL``: a reference URL with information about the dataset 

61- ``RUN_ID``: ID of the run in the LDF environment 

62- ``RUN_ID_URL``: a reference URL with information about the run 

63- ``VERSION_TAG``: the version tag of the LSST software used, e.g. 'w_2018_18' 

64 

65Note: currently it is not possible to gather Science Pipelines package metadata 

66in the LDF environment, thus if ``--env=ldf`` is used ``--ignore-lsstsw`` is 

67aslo used by default in this environment. 

68""" 

69# For determining what is documented in Sphinx 

70__all__ = ['build_argparser', 'main', 'insert_lsstsw_metadata', 

71 'insert_extra_package_metadata', 'insert_env_metadata', 

72 'validate_date_created', 'Configuration'] 

73 

74import argparse 

75import os 

76import json 

77import getpass 

78from dateutil import parser as date_parser 

79from datetime import datetime, timezone 

80import logging 

81 

82try: 

83 import git 

84except ImportError: 

85 # GitPython is not a standard Stack package; skip gracefully if unavailable 

86 git = None 

87 

88from lsst.verify import Job 

89from lsst.verify.metadata.lsstsw import LsstswRepos 

90from lsst.verify.metadata.eupsmanifest import Manifest 

91from lsst.verify.metadata.jenkinsci import get_jenkins_env 

92from lsst.verify.metadata.ldf import get_ldf_env 

93 

94_LOG = logging.getLogger(__name__) 

95 

96 

97def build_argparser(): 

98 parser = argparse.ArgumentParser( 

99 description=__doc__, 

100 formatter_class=argparse.RawDescriptionHelpFormatter, 

101 epilog='More information is available at https://pipelines.lsst.io.') 

102 

103 parser.add_argument( 

104 'json_paths', 

105 nargs='+', 

106 metavar='json', 

107 help='Verification job JSON file, or files. When multiple JSON ' 

108 'files are present, their measurements, blobs, and metadata ' 

109 'are merged.') 

110 parser.add_argument( 

111 '--test', 

112 default=False, 

113 action='store_true', 

114 help='Run this command without uploading to the SQUASH service. ' 

115 'The JSON payload is printed to standard out.') 

116 parser.add_argument( 

117 '--write', 

118 metavar='PATH', 

119 dest='output_filepath', 

120 help='Write the merged and enriched Job JSON dataset to the given ' 

121 'path.') 

122 parser.add_argument( 

123 '--show', 

124 dest='show_json', 

125 action='store_true', 

126 default=False, 

127 help='Print the assembled Job JSON to standard output.') 

128 parser.add_argument( 

129 '--ignore-blobs', 

130 dest='ignore_blobs', 

131 action='store_true', 

132 default=False, 

133 help='Ignore data blobs even if they are available in the verification' 

134 'job.') 

135 

136 env_group = parser.add_argument_group('Environment arguments') 

137 env_group.add_argument( 

138 '--env', 

139 dest='env_name', 

140 choices=Configuration.allowed_env, 

141 help='Name of the environment where the verification job is being ' 

142 'run. In some environments display_verify.py will gather ' 

143 'additional metadata automatically:\n' 

144 '\n' 

145 'jenkins\n' 

146 ' For the Jenkins CI (https://ci.lsst.codes)' 

147 ' environment.\n' 

148 'ldf\n' 

149 ' For the LSST Data Facility environment. \n' 

150 '\n' 

151 'Equivalent to the $VERIFY_ENV environment variable.') 

152 env_group.add_argument( 

153 '--lsstsw', 

154 dest='lsstsw', 

155 metavar='PATH', 

156 help='lsstsw directory path. If available, Stack package versions are ' 

157 'read from lsstsw. Equivalent to the ``$LSSTSW`` environment ' 

158 'variable. Disabled with ``--ignore-lsstsw.``') 

159 env_group.add_argument( 

160 '--package-repos', 

161 dest='extra_package_paths', 

162 nargs='*', 

163 metavar='PATH', 

164 help='Paths to additional Stack package Git repositories. These ' 

165 'packages are tracked in Job metadata, like lsstsw-based ' 

166 'packages.') 

167 env_group.add_argument( 

168 '--ignore-lsstsw', 

169 dest='ignore_lsstsw', 

170 action='store_true', 

171 default=False, 

172 help='Ignore lsstsw metadata even if it is available (for example, ' 

173 'the ``$LSSTSW`` variable is set).') 

174 

175 api_group = parser.add_argument_group('SQUASH API arguments') 

176 api_group.add_argument( 

177 '--url', 

178 dest='api_url', 

179 metavar='URL', 

180 help='Root URL of the SQUASH API. Equivalent to the ``$SQUASH_URL`` ' 

181 'environment variable.') 

182 api_group.add_argument( 

183 '--user', 

184 dest='api_user', 

185 metavar='USER', 

186 help='Username for SQUASH API. Equivalent to the $SQUASH_USER ' 

187 'environment variable.') 

188 api_group.add_argument( 

189 '--password', 

190 dest='api_password', 

191 metavar='PASSWORD', 

192 help='Password for SQUASH API. Equivalent to the ``$SQUASH_PASSWORD`` ' 

193 'environment variable. If neither is set, you will be prompted.') 

194 api_group.add_argument( 

195 '--date-created', 

196 dest='date_created', 

197 help='ISO8601 formatted datetime in UTC for the Job creation date, ' 

198 'e.g. 2021-06-30T22:28:25Z. If not provided the current ' 

199 'datetime is used.') 

200 return parser 

201 

202 

203def main(): 

204 """Entrypoint for the ``dispatch_verify.py`` command line executable. 

205 """ 

206 log = _LOG.getChild('main') 

207 

208 parser = build_argparser() 

209 args = parser.parse_args() 

210 config = Configuration(args) 

211 log.debug(str(config)) 

212 

213 # Parse all Job JSON 

214 jobs = [] 

215 for json_path in config.json_paths: 

216 log.info('Loading {0}'.format(json_path)) 

217 with open(json_path) as fp: 

218 json_data = json.load(fp) 

219 # Ignore blobs from the verification jobs 

220 if config.ignore_blobs: 

221 log.info('Ignoring blobs from Job JSON {0}'.format(json_path)) 

222 json_data = delete_blobs(json_data) 

223 job = Job.deserialize(**json_data) 

224 jobs.append(job) 

225 

226 # Merge all Jobs into one 

227 job = jobs.pop(0) 

228 if len(jobs) > 0: 

229 log.info('Merging verification Job JSON.') 

230 for other_job in jobs: 

231 job += other_job 

232 

233 # Ensure all measurements have a metric so that units are normalized 

234 log.info('Refreshing metric definitions from verify_metrics') 

235 job.reload_metrics_package('verify_metrics') 

236 

237 # Insert package metadata from lsstsw 

238 if not config.ignore_lsstsw: 

239 log.info('Inserting lsstsw package metadata from ' 

240 '{0}.'.format(config.lsstsw)) 

241 job = insert_lsstsw_metadata(job, config) 

242 

243 # Insert metadata from additional specified packages 

244 if config.extra_package_paths is not None: 

245 job = insert_extra_package_metadata(job, config) 

246 

247 # Add environment variable metadata from the Jenkins CI environment 

248 if config.env_name == 'jenkins': 

249 log.info('Inserting Jenkins CI environment metadata.') 

250 jenkins_metadata = get_jenkins_env() 

251 job = insert_env_metadata(job, 'jenkins', jenkins_metadata, 

252 config.date_created) 

253 elif config.env_name == 'ldf': 

254 log.info('Inserting LSST Data Facility environment metadata.') 

255 ldf_metadata = get_ldf_env() 

256 job = insert_env_metadata(job, 'ldf', ldf_metadata, 

257 config.date_created) 

258 

259 # Upload job 

260 if not config.test: 

261 log.info('Uploading Job JSON to {0}.'.format(config.api_url)) 

262 response = job.dispatch(api_user=config.api_user, 

263 api_password=config.api_password, 

264 api_url=config.api_url) 

265 log.info(response.json()['message']) 

266 

267 if config.show_json: 

268 print(json.dumps(job.json, 

269 sort_keys=True, indent=4, separators=(',', ': '))) 

270 

271 # Write a json file 

272 if config.output_filepath is not None: 

273 log.info('Writing Job JSON to {0}.'.format(config.output_filepath)) 

274 job.write(config.output_filepath) 

275 

276 

277def delete_blobs(json_data): 

278 """Delete data blobs from the Job JSON 

279 """ 

280 if 'blobs' in json_data: 

281 del json_data['blobs'] 

282 return json_data 

283 

284 

285def insert_lsstsw_metadata(job, config): 

286 """Insert metadata for lsstsw-based packages into ``Job.meta['packages']``. 

287 """ 

288 lsstsw_repos = LsstswRepos(config.lsstsw) 

289 

290 with open(lsstsw_repos.manifest_path) as fp: 

291 manifest = Manifest(fp) 

292 

293 packages = {} 

294 for package_name, manifest_item in manifest.items(): 

295 package_doc = { 

296 'name': package_name, 

297 'git_branch': lsstsw_repos.get_package_branch(package_name), 

298 'git_url': lsstsw_repos.get_package_repo_url(package_name), 

299 'git_sha': manifest_item.git_sha, 

300 'eups_version': manifest_item.version 

301 } 

302 packages[package_name] = package_doc 

303 

304 if 'packages' in job.meta: 

305 # Extend packages entry 

306 job.meta['packages'].update(packages) 

307 else: 

308 # Create new packages entry 

309 job.meta['packages'] = packages 

310 return job 

311 

312 

313def insert_extra_package_metadata(job, config): 

314 """Insert metadata for extra packages ('--package-repos') into 

315 ``Job.meta['packages']``. 

316 """ 

317 log = _LOG.getChild("insert_extra_package_metadata") 

318 

319 if 'packages' not in job.meta: 

320 job.meta['packages'] = dict() 

321 

322 for package_path in config.extra_package_paths: 

323 log.info('Inserting extra package metadata: {0}'.format(package_path)) 

324 package_name = package_path.split(os.sep)[-1] 

325 

326 package = {'name': package_name} 

327 

328 if git is not None: 

329 git_repo = git.Repo(package_path) 

330 package['git_sha'] = git_repo.active_branch.commit.hexsha 

331 package['git_branch'] = git_repo.active_branch.name 

332 package['git_url'] = git_repo.remotes.origin.url 

333 

334 if package_name in job.meta['packages']: 

335 # Update pre-existing package metadata 

336 job.meta['packages'][package_name].update(package) 

337 else: 

338 # Create new package metadata 

339 job.meta['packages'][package_name] = package 

340 

341 return job 

342 

343 

344def insert_env_metadata(job, env_name, metadata, date_created): 

345 """Insert environment metadata into the Job. 

346 """ 

347 metadata.update({'env_name': env_name}) 

348 

349 if date_created is not None: 

350 date = date_created 

351 else: 

352 date = datetime.now(timezone.utc).isoformat() 

353 

354 metadata.update({'date': date}) 

355 

356 job.meta['env'] = metadata 

357 

358 return job 

359 

360 

361def validate_date_created(date_created): 

362 """Ensure date_created is a valid datetime string in UTC. 

363 """ 

364 try: 

365 date = date_parser.parse(date_created) 

366 except ValueError: 

367 return False 

368 

369 if date.tzname() == 'UTC': 

370 return True 

371 else: 

372 return False 

373 

374 

375class Configuration(object): 

376 """Configuration for dispatch_verify.py that reconciles command line and 

377 environment variable arguments. 

378 

379 Configuration is validated for completeness and certain errors. 

380 

381 Parameters 

382 ---------- 

383 args : `argparse.Namespace` 

384 Parsed command line arguments, produced by `parse_args`. 

385 """ 

386 

387 allowed_env = ('jenkins', 'ldf') 

388 

389 def __init__(self, args): 

390 self.json_paths = args.json_paths 

391 

392 self.test = args.test 

393 

394 self.output_filepath = args.output_filepath 

395 

396 self.show_json = args.show_json 

397 

398 self.env_name = args.env_name or os.getenv('VERIFY_ENV') 

399 if self.env_name is not None and self.env_name not in self.allowed_env: 

400 message = '$VERIFY_ENV not one of {0!s}'.format(self.allowed_env) 

401 raise RuntimeError(message) 

402 

403 self.ignore_blobs = args.ignore_blobs 

404 

405 self.ignore_lsstsw = args.ignore_lsstsw 

406 

407 # Make sure --ignore-lsstw is used in the LDF environment 

408 if self.env_name == 'ldf': 

409 self.ignore_lsstsw = True 

410 

411 self.lsstsw = args.lsstsw or os.getenv('LSSTSW') 

412 if self.lsstsw is not None: 

413 self.lsstsw = os.path.abspath(self.lsstsw) 

414 if not self.ignore_lsstsw and not self.lsstsw: 

415 message = 'lsstsw directory not found at {0}'.format(self.lsstsw) 

416 raise RuntimeError(message) 

417 

418 if args.extra_package_paths is not None: 

419 self.extra_package_paths = [os.path.abspath(p) 

420 for p in args.extra_package_paths] 

421 else: 

422 self.extra_package_paths = [] 

423 for path in self.extra_package_paths: 

424 if not os.path.isdir(path): 

425 message = 'Package directory not found: {0}'.format(path) 

426 raise RuntimeError(message) 

427 

428 default_url = 'https://squash.lsst.codes/dashboard/api' 

429 self.api_url = args.api_url or os.getenv('SQUASH_URL', default_url) 

430 

431 self.api_user = args.api_user or os.getenv('SQUASH_USER') 

432 if not self.test and self.api_user is None: 

433 message = '--user or $SQUASH_USER configuration required' 

434 raise RuntimeError(message) 

435 

436 self.api_password = (args.api_password 

437 or os.getenv('SQUASH_password')) 

438 if not self.test and self.api_password is None: 

439 # If password hasn't been set, prompt for it. 

440 self.api_password = getpass.getpass(prompt="SQuaSH password: ") 

441 

442 self.date_created = args.date_created 

443 

444 if self.date_created is not None: 

445 if not validate_date_created(self.date_created): 

446 message = 'Invalid datetime string, use a ISO8601 formatted ' \ 

447 'datetime in UTC, e.g. 2021-06-30T22:28:25Z.' 

448 raise RuntimeError(message) 

449 else: 

450 self.date_created = \ 

451 date_parser.parse(self.date_created).isoformat() 

452 

453 def __str__(self): 

454 configs = { 

455 'json_paths': self.json_paths, 

456 'test': self.test, 

457 'output_filepath': self.output_filepath, 

458 'show_json': self.show_json, 

459 'ignore_blobs': self.ignore_blobs, 

460 'env': self.env_name, 

461 'ignore_lsstsw': self.ignore_lsstsw, 

462 'lsstsw': self.lsstsw, 

463 'extra_package_paths': self.extra_package_paths, 

464 'api_url': self.api_url, 

465 'api_user': self.api_user, 

466 'date_created': self.date_created, 

467 } 

468 if self.api_password is None: 

469 configs['api_password'] = None 

470 else: 

471 configs['api_password'] = '*' * len(self.api_password) 

472 

473 return json.dumps(configs, 

474 sort_keys=True, indent=4, separators=(',', ': '))