Coverage for python/lsst/verify/bin/dispatchverify.py: 11%

179 statements  

« prev     ^ index     » next       coverage.py v7.2.1, created at 2023-03-12 01:54 -0800

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21"""Upload LSST Science Pipelines Verification `~lsst.verify.Job` datasets to 

22the SQUASH dashboard. 

23 

24Job JSON files can be created by `lsst.verify.Job.write` or 

25`lsst.verify.output_quantities`. A `~lsst.verify.Job` dataset consists of 

26metric measurements, associated blobs, and pipeline execution metadata. 

27Individual LSST Science Pipelines tasks typically write separate JSON datasets. 

28This command can collect and combine multiple Job JSON datasets into a single 

29Job upload. 

30 

31**Configuration** 

32 

33dispatch_verify.py is configurable from both the command line and environment 

34variables. See the argument documenation for environment variable equivalents. 

35Command line settings override environment variable configuration. 

36 

37**Metadata and environment** 

38 

39dispatch_verify.py can enrich Verification Job metadata with information 

40from the environment. Currently dispatch_verify.py supports the Jenkins CI 

41and the LSST Data Facility (LDF) execution environments. 

42 

43In the Jenkins CI execution environment (``--env=jenkins``) the 

44following environment variables are consumed: 

45 

46- ``BUILD_ID``: ID in the CI system 

47- ``BUILD_URL``: CI page with information about the build 

48- ``PRODUCT``: the name of the product built, e.g. 'validate_drp' 

49- ``dataset``: the name of the dataset processed, e.g. 'validation_data_cfht' 

50- ``label``: the name of the platform where it runs 

51- ``refs``: the branches run by Jenkins, e.g. 'tickets/DM-12345 master' 

52 

53If ``--lsstsw`` is used, additional Git branch information is included with 

54Science Pipelines package metadata. 

55 

56In the LSST Data Facility execution environment (``--env=ldf``) the following 

57environment variables are consumed: 

58 

59- ``DATASET``: the name of the dataset processed, e.g 'HSC RC2' 

60- ``DATASET_REPO_URL``: a reference URL with information about the dataset 

61- ``RUN_ID``: ID of the run in the LDF environment 

62- ``RUN_ID_URL``: a reference URL with information about the run 

63- ``VERSION_TAG``: the version tag of the LSST software used, e.g. 'w_2018_18' 

64 

65Note: currently it is not possible to gather Science Pipelines package metadata 

66in the LDF environment, thus if ``--env=ldf`` is used ``--ignore-lsstsw`` is 

67aslo used by default in this environment. 

68""" 

69# For determining what is documented in Sphinx 

70__all__ = ['build_argparser', 'main', 'insert_lsstsw_metadata', 

71 'insert_extra_package_metadata', 'insert_env_metadata', 

72 'validate_date_created', 'Configuration'] 

73 

74import argparse 

75import os 

76import json 

77import getpass 

78from dateutil import parser as date_parser 

79from datetime import datetime, timezone 

80 

81try: 

82 import git 

83except ImportError: 

84 # GitPython is not a standard Stack package; skip gracefully if unavailable 

85 git = None 

86 

87import lsst.log 

88from lsst.verify import Job 

89from lsst.verify.metadata.lsstsw import LsstswRepos 

90from lsst.verify.metadata.eupsmanifest import Manifest 

91from lsst.verify.metadata.jenkinsci import get_jenkins_env 

92from lsst.verify.metadata.ldf import get_ldf_env 

93 

94 

95def build_argparser(): 

96 parser = argparse.ArgumentParser( 

97 description=__doc__, 

98 formatter_class=argparse.RawDescriptionHelpFormatter, 

99 epilog='More information is available at https://pipelines.lsst.io.') 

100 

101 parser.add_argument( 

102 'json_paths', 

103 nargs='+', 

104 metavar='json', 

105 help='Verification job JSON file, or files. When multiple JSON ' 

106 'files are present, their measurements, blobs, and metadata ' 

107 'are merged.') 

108 parser.add_argument( 

109 '--test', 

110 default=False, 

111 action='store_true', 

112 help='Run this command without uploading to the SQUASH service. ' 

113 'The JSON payload is printed to standard out.') 

114 parser.add_argument( 

115 '--write', 

116 metavar='PATH', 

117 dest='output_filepath', 

118 help='Write the merged and enriched Job JSON dataset to the given ' 

119 'path.') 

120 parser.add_argument( 

121 '--show', 

122 dest='show_json', 

123 action='store_true', 

124 default=False, 

125 help='Print the assembled Job JSON to standard output.') 

126 parser.add_argument( 

127 '--ignore-blobs', 

128 dest='ignore_blobs', 

129 action='store_true', 

130 default=False, 

131 help='Ignore data blobs even if they are available in the verification' 

132 'job.') 

133 

134 env_group = parser.add_argument_group('Environment arguments') 

135 env_group.add_argument( 

136 '--env', 

137 dest='env_name', 

138 choices=Configuration.allowed_env, 

139 help='Name of the environment where the verification job is being ' 

140 'run. In some environments display_verify.py will gather ' 

141 'additional metadata automatically:\n' 

142 '\n' 

143 'jenkins\n' 

144 ' For the Jenkins CI (https://ci.lsst.codes)' 

145 ' environment.\n' 

146 'ldf\n' 

147 ' For the LSST Data Facility environment. \n' 

148 '\n' 

149 'Equivalent to the $VERIFY_ENV environment variable.') 

150 env_group.add_argument( 

151 '--lsstsw', 

152 dest='lsstsw', 

153 metavar='PATH', 

154 help='lsstsw directory path. If available, Stack package versions are ' 

155 'read from lsstsw. Equivalent to the ``$LSSTSW`` environment ' 

156 'variable. Disabled with ``--ignore-lsstsw.``') 

157 env_group.add_argument( 

158 '--package-repos', 

159 dest='extra_package_paths', 

160 nargs='*', 

161 metavar='PATH', 

162 help='Paths to additional Stack package Git repositories. These ' 

163 'packages are tracked in Job metadata, like lsstsw-based ' 

164 'packages.') 

165 env_group.add_argument( 

166 '--ignore-lsstsw', 

167 dest='ignore_lsstsw', 

168 action='store_true', 

169 default=False, 

170 help='Ignore lsstsw metadata even if it is available (for example, ' 

171 'the ``$LSSTSW`` variable is set).') 

172 

173 api_group = parser.add_argument_group('SQUASH API arguments') 

174 api_group.add_argument( 

175 '--url', 

176 dest='api_url', 

177 metavar='URL', 

178 help='Root URL of the SQUASH API. Equivalent to the ``$SQUASH_URL`` ' 

179 'environment variable.') 

180 api_group.add_argument( 

181 '--user', 

182 dest='api_user', 

183 metavar='USER', 

184 help='Username for SQUASH API. Equivalent to the $SQUASH_USER ' 

185 'environment variable.') 

186 api_group.add_argument( 

187 '--password', 

188 dest='api_password', 

189 metavar='PASSWORD', 

190 help='Password for SQUASH API. Equivalent to the ``$SQUASH_PASSWORD`` ' 

191 'environment variable. If neither is set, you will be prompted.') 

192 api_group.add_argument( 

193 '--date-created', 

194 dest='date_created', 

195 help='ISO8601 formatted datetime in UTC for the Job creation date, ' 

196 'e.g. 2021-06-30T22:28:25Z. If not provided the current ' 

197 'datetime is used.') 

198 return parser 

199 

200 

201def main(): 

202 """Entrypoint for the ``dispatch_verify.py`` command line executable. 

203 """ 

204 log = lsst.log.Log.getLogger('verify.bin.dispatchverify.main') 

205 

206 parser = build_argparser() 

207 args = parser.parse_args() 

208 config = Configuration(args) 

209 log.debug(str(config)) 

210 

211 # Parse all Job JSON 

212 jobs = [] 

213 for json_path in config.json_paths: 

214 log.info('Loading {0}'.format(json_path)) 

215 with open(json_path) as fp: 

216 json_data = json.load(fp) 

217 # Ignore blobs from the verification jobs 

218 if config.ignore_blobs: 

219 log.info('Ignoring blobs from Job JSON {0}'.format(json_path)) 

220 json_data = delete_blobs(json_data) 

221 job = Job.deserialize(**json_data) 

222 jobs.append(job) 

223 

224 # Merge all Jobs into one 

225 job = jobs.pop(0) 

226 if len(jobs) > 0: 

227 log.info('Merging verification Job JSON.') 

228 for other_job in jobs: 

229 job += other_job 

230 

231 # Ensure all measurements have a metric so that units are normalized 

232 log.info('Refreshing metric definitions from verify_metrics') 

233 job.reload_metrics_package('verify_metrics') 

234 

235 # Insert package metadata from lsstsw 

236 if not config.ignore_lsstsw: 

237 log.info('Inserting lsstsw package metadata from ' 

238 '{0}.'.format(config.lsstsw)) 

239 job = insert_lsstsw_metadata(job, config) 

240 

241 # Insert metadata from additional specified packages 

242 if config.extra_package_paths is not None: 

243 job = insert_extra_package_metadata(job, config) 

244 

245 # Add environment variable metadata from the Jenkins CI environment 

246 if config.env_name == 'jenkins': 

247 log.info('Inserting Jenkins CI environment metadata.') 

248 jenkins_metadata = get_jenkins_env() 

249 job = insert_env_metadata(job, 'jenkins', jenkins_metadata, 

250 config.date_created) 

251 elif config.env_name == 'ldf': 

252 log.info('Inserting LSST Data Facility environment metadata.') 

253 ldf_metadata = get_ldf_env() 

254 job = insert_env_metadata(job, 'ldf', ldf_metadata, 

255 config.date_created) 

256 

257 # Upload job 

258 if not config.test: 

259 log.info('Uploading Job JSON to {0}.'.format(config.api_url)) 

260 response = job.dispatch(api_user=config.api_user, 

261 api_password=config.api_password, 

262 api_url=config.api_url) 

263 log.info(response.json()['message']) 

264 

265 if config.show_json: 

266 print(json.dumps(job.json, 

267 sort_keys=True, indent=4, separators=(',', ': '))) 

268 

269 # Write a json file 

270 if config.output_filepath is not None: 

271 log.info('Writing Job JSON to {0}.'.format(config.output_filepath)) 

272 job.write(config.output_filepath) 

273 

274 

275def delete_blobs(json_data): 

276 """Delete data blobs from the Job JSON 

277 """ 

278 if 'blobs' in json_data: 

279 del json_data['blobs'] 

280 return json_data 

281 

282 

283def insert_lsstsw_metadata(job, config): 

284 """Insert metadata for lsstsw-based packages into ``Job.meta['packages']``. 

285 """ 

286 lsstsw_repos = LsstswRepos(config.lsstsw) 

287 

288 with open(lsstsw_repos.manifest_path) as fp: 

289 manifest = Manifest(fp) 

290 

291 packages = {} 

292 for package_name, manifest_item in manifest.items(): 

293 package_doc = { 

294 'name': package_name, 

295 'git_branch': lsstsw_repos.get_package_branch(package_name), 

296 'git_url': lsstsw_repos.get_package_repo_url(package_name), 

297 'git_sha': manifest_item.git_sha, 

298 'eups_version': manifest_item.version 

299 } 

300 packages[package_name] = package_doc 

301 

302 if 'packages' in job.meta: 

303 # Extend packages entry 

304 job.meta['packages'].update(packages) 

305 else: 

306 # Create new packages entry 

307 job.meta['packages'] = packages 

308 return job 

309 

310 

311def insert_extra_package_metadata(job, config): 

312 """Insert metadata for extra packages ('--package-repos') into 

313 ``Job.meta['packages']``. 

314 """ 

315 log = lsst.log.Log.getLogger( 

316 'verify.bin.dispatchverify.insert_extra_package_metadata') 

317 

318 if 'packages' not in job.meta: 

319 job.meta['packages'] = dict() 

320 

321 for package_path in config.extra_package_paths: 

322 log.info('Inserting extra package metadata: {0}'.format(package_path)) 

323 package_name = package_path.split(os.sep)[-1] 

324 

325 package = {'name': package_name} 

326 

327 if git is not None: 

328 git_repo = git.Repo(package_path) 

329 package['git_sha'] = git_repo.active_branch.commit.hexsha 

330 package['git_branch'] = git_repo.active_branch.name 

331 package['git_url'] = git_repo.remotes.origin.url 

332 

333 if package_name in job.meta['packages']: 

334 # Update pre-existing package metadata 

335 job.meta['packages'][package_name].update(package) 

336 else: 

337 # Create new package metadata 

338 job.meta['packages'][package_name] = package 

339 

340 return job 

341 

342 

343def insert_env_metadata(job, env_name, metadata, date_created): 

344 """Insert environment metadata into the Job. 

345 """ 

346 metadata.update({'env_name': env_name}) 

347 

348 if date_created is not None: 

349 date = date_created 

350 else: 

351 date = datetime.now(timezone.utc).isoformat() 

352 

353 metadata.update({'date': date}) 

354 

355 job.meta['env'] = metadata 

356 

357 return job 

358 

359 

360def validate_date_created(date_created): 

361 """Ensure date_created is a valid datetime string in UTC. 

362 """ 

363 try: 

364 date = date_parser.parse(date_created) 

365 except ValueError: 

366 return False 

367 

368 if date.tzname() == 'UTC': 

369 return True 

370 else: 

371 return False 

372 

373 

374class Configuration(object): 

375 """Configuration for dispatch_verify.py that reconciles command line and 

376 environment variable arguments. 

377 

378 Configuration is validated for completeness and certain errors. 

379 

380 Parameters 

381 ---------- 

382 args : `argparse.Namespace` 

383 Parsed command line arguments, produced by `parse_args`. 

384 """ 

385 

386 allowed_env = ('jenkins', 'ldf') 

387 

388 def __init__(self, args): 

389 self.json_paths = args.json_paths 

390 

391 self.test = args.test 

392 

393 self.output_filepath = args.output_filepath 

394 

395 self.show_json = args.show_json 

396 

397 self.env_name = args.env_name or os.getenv('VERIFY_ENV') 

398 if self.env_name is not None and self.env_name not in self.allowed_env: 

399 message = '$VERIFY_ENV not one of {0!s}'.format(self.allowed_env) 

400 raise RuntimeError(message) 

401 

402 self.ignore_blobs = args.ignore_blobs 

403 

404 self.ignore_lsstsw = args.ignore_lsstsw 

405 

406 # Make sure --ignore-lsstw is used in the LDF environment 

407 if self.env_name == 'ldf': 

408 self.ignore_lsstsw = True 

409 

410 self.lsstsw = args.lsstsw or os.getenv('LSSTSW') 

411 if self.lsstsw is not None: 

412 self.lsstsw = os.path.abspath(self.lsstsw) 

413 if not self.ignore_lsstsw and not self.lsstsw: 

414 message = 'lsstsw directory not found at {0}'.format(self.lsstsw) 

415 raise RuntimeError(message) 

416 

417 if args.extra_package_paths is not None: 

418 self.extra_package_paths = [os.path.abspath(p) 

419 for p in args.extra_package_paths] 

420 else: 

421 self.extra_package_paths = [] 

422 for path in self.extra_package_paths: 

423 if not os.path.isdir(path): 

424 message = 'Package directory not found: {0}'.format(path) 

425 raise RuntimeError(message) 

426 

427 default_url = 'https://squash.lsst.codes/dashboard/api' 

428 self.api_url = args.api_url or os.getenv('SQUASH_URL', default_url) 

429 

430 self.api_user = args.api_user or os.getenv('SQUASH_USER') 

431 if not self.test and self.api_user is None: 

432 message = '--user or $SQUASH_USER configuration required' 

433 raise RuntimeError(message) 

434 

435 self.api_password = (args.api_password 

436 or os.getenv('SQUASH_password')) 

437 if not self.test and self.api_password is None: 

438 # If password hasn't been set, prompt for it. 

439 self.api_password = getpass.getpass(prompt="SQuaSH password: ") 

440 

441 self.date_created = args.date_created 

442 

443 if self.date_created is not None: 

444 if not validate_date_created(self.date_created): 

445 message = 'Invalid datetime string, use a ISO8601 formatted ' \ 

446 'datetime in UTC, e.g. 2021-06-30T22:28:25Z.' 

447 raise RuntimeError(message) 

448 else: 

449 self.date_created = \ 

450 date_parser.parse(self.date_created).isoformat() 

451 

452 def __str__(self): 

453 configs = { 

454 'json_paths': self.json_paths, 

455 'test': self.test, 

456 'output_filepath': self.output_filepath, 

457 'show_json': self.show_json, 

458 'ignore_blobs': self.ignore_blobs, 

459 'env': self.env_name, 

460 'ignore_lsstsw': self.ignore_lsstsw, 

461 'lsstsw': self.lsstsw, 

462 'extra_package_paths': self.extra_package_paths, 

463 'api_url': self.api_url, 

464 'api_user': self.api_user, 

465 'date_created': self.date_created, 

466 } 

467 if self.api_password is None: 

468 configs['api_password'] = None 

469 else: 

470 configs['api_password'] = '*' * len(self.api_password) 

471 

472 return json.dumps(configs, 

473 sort_keys=True, indent=4, separators=(',', ': '))