Coverage for python/lsst/verify/bin/dispatchverify.py: 12%

182 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-23 01:45 -0800

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21"""Upload LSST Science Pipelines Verification `~lsst.verify.Job` datasets to 

22the SQUASH dashboard. 

23 

24Job JSON files can be created by `lsst.verify.Job.write` or 

25`lsst.verify.output_quantities`. A `~lsst.verify.Job` dataset consists of 

26metric measurements, associated blobs, and pipeline execution metadata. 

27Individual LSST Science Pipelines tasks typically write separate JSON datasets. 

28This command can collect and combine multiple Job JSON datasets into a single 

29Job upload. 

30 

31**Configuration** 

32 

33dispatch_verify.py is configurable from both the command line and environment 

34variables. See the argument documenation for environment variable equivalents. 

35Command line settings override environment variable configuration. 

36 

37**Metadata and environment** 

38 

39dispatch_verify.py can enrich Verification Job metadata with information 

40from the environment. Currently dispatch_verify.py supports the Jenkins CI 

41and the LSST Data Facility (LDF) execution environments. 

42 

43In the Jenkins CI execution environment (``--env=jenkins``) the 

44following environment variables are consumed: 

45 

46- ``BUILD_ID``: ID in the CI system 

47- ``BUILD_URL``: CI page with information about the build 

48- ``PRODUCT``: the name of the product built, e.g. 'validate_drp' 

49- ``dataset``: the name of the dataset processed, e.g. 'validation_data_cfht' 

50- ``label``: the name of the platform where it runs 

51- ``refs``: the branches run by Jenkins, e.g. 'tickets/DM-12345 main' 

52 

53If ``--lsstsw`` is used, additional Git branch information is included with 

54Science Pipelines package metadata. 

55 

56In the LSST Data Facility execution environment (``--env=ldf``) the following 

57environment variables are consumed: 

58 

59- ``DATASET``: the name of the dataset processed, e.g 'HSC RC2' 

60- ``DATASET_REPO_URL``: a reference URL with information about the dataset 

61- ``RUN_ID``: ID of the run in the LDF environment 

62- ``RUN_ID_URL``: a reference URL with information about the run 

63- ``VERSION_TAG``: the version tag of the LSST software used, e.g. 'w_2018_18' 

64 

65Note: currently it is not possible to gather Science Pipelines package metadata 

66in the LDF environment, thus if ``--env=ldf`` is used ``--ignore-lsstsw`` is 

67aslo used by default in this environment. 

68""" 

69# For determining what is documented in Sphinx 

70__all__ = ['build_argparser', 'main', 'insert_lsstsw_metadata', 

71 'insert_extra_package_metadata', 'insert_env_metadata', 

72 'validate_date_created', 'Configuration'] 

73 

74import argparse 

75import os 

76import json 

77import getpass 

78from dateutil import parser as date_parser 

79from datetime import datetime, timezone 

80import logging 

81import sys 

82 

83try: 

84 import git 

85except ImportError: 

86 # GitPython is not a standard Stack package; skip gracefully if unavailable 

87 git = None 

88 

89from lsst.verify import Job 

90from lsst.verify.metadata.lsstsw import LsstswRepos 

91from lsst.verify.metadata.eupsmanifest import Manifest 

92from lsst.verify.metadata.jenkinsci import get_jenkins_env 

93from lsst.verify.metadata.ldf import get_ldf_env 

94 

95_LOG = logging.getLogger(__name__) 

96 

97 

98def build_argparser(): 

99 parser = argparse.ArgumentParser( 

100 description=__doc__, 

101 formatter_class=argparse.RawDescriptionHelpFormatter, 

102 epilog='More information is available at https://pipelines.lsst.io.') 

103 

104 parser.add_argument( 

105 'json_paths', 

106 nargs='+', 

107 metavar='json', 

108 help='Verification job JSON file, or files. When multiple JSON ' 

109 'files are present, their measurements, blobs, and metadata ' 

110 'are merged.') 

111 parser.add_argument( 

112 '--test', 

113 default=False, 

114 action='store_true', 

115 help='Run this command without uploading to the SQUASH service. ' 

116 'The JSON payload is printed to standard out.') 

117 parser.add_argument( 

118 '--write', 

119 metavar='PATH', 

120 dest='output_filepath', 

121 help='Write the merged and enriched Job JSON dataset to the given ' 

122 'path.') 

123 parser.add_argument( 

124 '--show', 

125 dest='show_json', 

126 action='store_true', 

127 default=False, 

128 help='Print the assembled Job JSON to standard output.') 

129 parser.add_argument( 

130 '--ignore-blobs', 

131 dest='ignore_blobs', 

132 action='store_true', 

133 default=False, 

134 help='Ignore data blobs even if they are available in the verification' 

135 'job.') 

136 

137 env_group = parser.add_argument_group('Environment arguments') 

138 env_group.add_argument( 

139 '--env', 

140 dest='env_name', 

141 choices=Configuration.allowed_env, 

142 help='Name of the environment where the verification job is being ' 

143 'run. In some environments display_verify.py will gather ' 

144 'additional metadata automatically:\n' 

145 '\n' 

146 'jenkins\n' 

147 ' For the Jenkins CI (https://ci.lsst.codes)' 

148 ' environment.\n' 

149 'ldf\n' 

150 ' For the LSST Data Facility environment. \n' 

151 '\n' 

152 'Equivalent to the $VERIFY_ENV environment variable.') 

153 env_group.add_argument( 

154 '--lsstsw', 

155 dest='lsstsw', 

156 metavar='PATH', 

157 help='lsstsw directory path. If available, Stack package versions are ' 

158 'read from lsstsw. Equivalent to the ``$LSSTSW`` environment ' 

159 'variable. Disabled with ``--ignore-lsstsw.``') 

160 env_group.add_argument( 

161 '--package-repos', 

162 dest='extra_package_paths', 

163 nargs='*', 

164 metavar='PATH', 

165 help='Paths to additional Stack package Git repositories. These ' 

166 'packages are tracked in Job metadata, like lsstsw-based ' 

167 'packages.') 

168 env_group.add_argument( 

169 '--ignore-lsstsw', 

170 dest='ignore_lsstsw', 

171 action='store_true', 

172 default=False, 

173 help='Ignore lsstsw metadata even if it is available (for example, ' 

174 'the ``$LSSTSW`` variable is set).') 

175 

176 api_group = parser.add_argument_group('SQUASH API arguments') 

177 api_group.add_argument( 

178 '--url', 

179 dest='api_url', 

180 metavar='URL', 

181 help='Root URL of the SQUASH API. Equivalent to the ``$SQUASH_URL`` ' 

182 'environment variable.') 

183 api_group.add_argument( 

184 '--user', 

185 dest='api_user', 

186 metavar='USER', 

187 help='Username for SQUASH API. Equivalent to the $SQUASH_USER ' 

188 'environment variable.') 

189 api_group.add_argument( 

190 '--password', 

191 dest='api_password', 

192 metavar='PASSWORD', 

193 help='Password for SQUASH API. Equivalent to the ``$SQUASH_PASSWORD`` ' 

194 'environment variable. If neither is set, you will be prompted.') 

195 api_group.add_argument( 

196 '--date-created', 

197 dest='date_created', 

198 help='ISO8601 formatted datetime in UTC for the Job creation date, ' 

199 'e.g. 2021-06-30T22:28:25Z. If not provided the current ' 

200 'datetime is used.') 

201 return parser 

202 

203 

204def main(): 

205 """Entrypoint for the ``dispatch_verify.py`` command line executable. 

206 """ 

207 logging.basicConfig(level=logging.INFO, stream=sys.stdout, 

208 format="{name} {levelname}: {message}", style="{") 

209 log = _LOG.getChild('main') 

210 

211 parser = build_argparser() 

212 args = parser.parse_args() 

213 config = Configuration(args) 

214 log.debug(str(config)) 

215 

216 # Parse all Job JSON 

217 jobs = [] 

218 for json_path in config.json_paths: 

219 log.info('Loading {0}'.format(json_path)) 

220 with open(json_path) as fp: 

221 json_data = json.load(fp) 

222 # Ignore blobs from the verification jobs 

223 if config.ignore_blobs: 

224 log.info('Ignoring blobs from Job JSON {0}'.format(json_path)) 

225 json_data = delete_blobs(json_data) 

226 job = Job.deserialize(**json_data) 

227 jobs.append(job) 

228 

229 # Merge all Jobs into one 

230 job = jobs.pop(0) 

231 if len(jobs) > 0: 

232 log.info('Merging verification Job JSON.') 

233 for other_job in jobs: 

234 job += other_job 

235 

236 # Ensure all measurements have a metric so that units are normalized 

237 log.info('Refreshing metric definitions from verify_metrics') 

238 job.reload_metrics_package('verify_metrics') 

239 

240 # Insert package metadata from lsstsw 

241 if not config.ignore_lsstsw: 

242 log.info('Inserting lsstsw package metadata from ' 

243 '{0}.'.format(config.lsstsw)) 

244 job = insert_lsstsw_metadata(job, config) 

245 

246 # Insert metadata from additional specified packages 

247 if config.extra_package_paths is not None: 

248 job = insert_extra_package_metadata(job, config) 

249 

250 # Add environment variable metadata from the Jenkins CI environment 

251 if config.env_name == 'jenkins': 

252 log.info('Inserting Jenkins CI environment metadata.') 

253 jenkins_metadata = get_jenkins_env() 

254 job = insert_env_metadata(job, 'jenkins', jenkins_metadata, 

255 config.date_created) 

256 elif config.env_name == 'ldf': 

257 log.info('Inserting LSST Data Facility environment metadata.') 

258 ldf_metadata = get_ldf_env() 

259 job = insert_env_metadata(job, 'ldf', ldf_metadata, 

260 config.date_created) 

261 

262 # Upload job 

263 if not config.test: 

264 log.info('Uploading Job JSON to {0}.'.format(config.api_url)) 

265 response = job.dispatch(api_user=config.api_user, 

266 api_password=config.api_password, 

267 api_url=config.api_url) 

268 log.info(response.json()['message']) 

269 

270 if config.show_json: 

271 print(json.dumps(job.json, 

272 sort_keys=True, indent=4, separators=(',', ': '))) 

273 

274 # Write a json file 

275 if config.output_filepath is not None: 

276 log.info('Writing Job JSON to {0}.'.format(config.output_filepath)) 

277 job.write(config.output_filepath) 

278 

279 

280def delete_blobs(json_data): 

281 """Delete data blobs from the Job JSON 

282 """ 

283 if 'blobs' in json_data: 

284 del json_data['blobs'] 

285 return json_data 

286 

287 

288def insert_lsstsw_metadata(job, config): 

289 """Insert metadata for lsstsw-based packages into ``Job.meta['packages']``. 

290 """ 

291 lsstsw_repos = LsstswRepos(config.lsstsw) 

292 

293 with open(lsstsw_repos.manifest_path) as fp: 

294 manifest = Manifest(fp) 

295 

296 packages = {} 

297 for package_name, manifest_item in manifest.items(): 

298 package_doc = { 

299 'name': package_name, 

300 'git_branch': lsstsw_repos.get_package_branch(package_name), 

301 'git_url': lsstsw_repos.get_package_repo_url(package_name), 

302 'git_sha': manifest_item.git_sha, 

303 'eups_version': manifest_item.version 

304 } 

305 packages[package_name] = package_doc 

306 

307 if 'packages' in job.meta: 

308 # Extend packages entry 

309 job.meta['packages'].update(packages) 

310 else: 

311 # Create new packages entry 

312 job.meta['packages'] = packages 

313 return job 

314 

315 

316def insert_extra_package_metadata(job, config): 

317 """Insert metadata for extra packages ('--package-repos') into 

318 ``Job.meta['packages']``. 

319 """ 

320 log = _LOG.getChild("insert_extra_package_metadata") 

321 

322 if 'packages' not in job.meta: 

323 job.meta['packages'] = dict() 

324 

325 for package_path in config.extra_package_paths: 

326 log.info('Inserting extra package metadata: {0}'.format(package_path)) 

327 package_name = package_path.split(os.sep)[-1] 

328 

329 package = {'name': package_name} 

330 

331 if git is not None: 

332 git_repo = git.Repo(package_path) 

333 package['git_sha'] = git_repo.active_branch.commit.hexsha 

334 package['git_branch'] = git_repo.active_branch.name 

335 package['git_url'] = git_repo.remotes.origin.url 

336 

337 if package_name in job.meta['packages']: 

338 # Update pre-existing package metadata 

339 job.meta['packages'][package_name].update(package) 

340 else: 

341 # Create new package metadata 

342 job.meta['packages'][package_name] = package 

343 

344 return job 

345 

346 

347def insert_env_metadata(job, env_name, metadata, date_created): 

348 """Insert environment metadata into the Job. 

349 """ 

350 metadata.update({'env_name': env_name}) 

351 

352 if date_created is not None: 

353 date = date_created 

354 else: 

355 date = datetime.now(timezone.utc).isoformat() 

356 

357 metadata.update({'date': date}) 

358 

359 job.meta['env'] = metadata 

360 

361 return job 

362 

363 

364def validate_date_created(date_created): 

365 """Ensure date_created is a valid datetime string in UTC. 

366 """ 

367 try: 

368 date = date_parser.parse(date_created) 

369 except ValueError: 

370 return False 

371 

372 if date.tzname() == 'UTC': 

373 return True 

374 else: 

375 return False 

376 

377 

378class Configuration(object): 

379 """Configuration for dispatch_verify.py that reconciles command line and 

380 environment variable arguments. 

381 

382 Configuration is validated for completeness and certain errors. 

383 

384 Parameters 

385 ---------- 

386 args : `argparse.Namespace` 

387 Parsed command line arguments, produced by `parse_args`. 

388 """ 

389 

390 allowed_env = ('jenkins', 'ldf') 

391 

392 def __init__(self, args): 

393 self.json_paths = args.json_paths 

394 

395 self.test = args.test 

396 

397 self.output_filepath = args.output_filepath 

398 

399 self.show_json = args.show_json 

400 

401 self.env_name = args.env_name or os.getenv('VERIFY_ENV') 

402 if self.env_name is not None and self.env_name not in self.allowed_env: 

403 message = '$VERIFY_ENV not one of {0!s}'.format(self.allowed_env) 

404 raise RuntimeError(message) 

405 

406 self.ignore_blobs = args.ignore_blobs 

407 

408 self.ignore_lsstsw = args.ignore_lsstsw 

409 

410 # Make sure --ignore-lsstw is used in the LDF environment 

411 if self.env_name == 'ldf': 

412 self.ignore_lsstsw = True 

413 

414 self.lsstsw = args.lsstsw or os.getenv('LSSTSW') 

415 if self.lsstsw is not None: 

416 self.lsstsw = os.path.abspath(self.lsstsw) 

417 if not self.ignore_lsstsw and not self.lsstsw: 

418 message = 'lsstsw directory not found at {0}'.format(self.lsstsw) 

419 raise RuntimeError(message) 

420 

421 if args.extra_package_paths is not None: 

422 self.extra_package_paths = [os.path.abspath(p) 

423 for p in args.extra_package_paths] 

424 else: 

425 self.extra_package_paths = [] 

426 for path in self.extra_package_paths: 

427 if not os.path.isdir(path): 

428 message = 'Package directory not found: {0}'.format(path) 

429 raise RuntimeError(message) 

430 

431 default_url = 'https://squash.lsst.codes/dashboard/api' 

432 self.api_url = args.api_url or os.getenv('SQUASH_URL', default_url) 

433 

434 self.api_user = args.api_user or os.getenv('SQUASH_USER') 

435 if not self.test and self.api_user is None: 

436 message = '--user or $SQUASH_USER configuration required' 

437 raise RuntimeError(message) 

438 

439 self.api_password = (args.api_password 

440 or os.getenv('SQUASH_password')) 

441 if not self.test and self.api_password is None: 

442 # If password hasn't been set, prompt for it. 

443 self.api_password = getpass.getpass(prompt="SQuaSH password: ") 

444 

445 self.date_created = args.date_created 

446 

447 if self.date_created is not None: 

448 if not validate_date_created(self.date_created): 

449 message = 'Invalid datetime string, use a ISO8601 formatted ' \ 

450 'datetime in UTC, e.g. 2021-06-30T22:28:25Z.' 

451 raise RuntimeError(message) 

452 else: 

453 self.date_created = \ 

454 date_parser.parse(self.date_created).isoformat() 

455 

456 def __str__(self): 

457 configs = { 

458 'json_paths': self.json_paths, 

459 'test': self.test, 

460 'output_filepath': self.output_filepath, 

461 'show_json': self.show_json, 

462 'ignore_blobs': self.ignore_blobs, 

463 'env': self.env_name, 

464 'ignore_lsstsw': self.ignore_lsstsw, 

465 'lsstsw': self.lsstsw, 

466 'extra_package_paths': self.extra_package_paths, 

467 'api_url': self.api_url, 

468 'api_user': self.api_user, 

469 'date_created': self.date_created, 

470 } 

471 if self.api_password is None: 

472 configs['api_password'] = None 

473 else: 

474 configs['api_password'] = '*' * len(self.api_password) 

475 

476 return json.dumps(configs, 

477 sort_keys=True, indent=4, separators=(',', ': '))