Coverage for python/lsst/verify/bin/dispatchverify.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of verify.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21"""Upload LSST Science Pipelines Verification `~lsst.verify.Job` datasets to
22the SQUASH dashboard.
24Job JSON files can be created by `lsst.verify.Job.write` or
25`lsst.verify.output_quantities`. A `~lsst.verify.Job` dataset consists of
26metric measurements, associated blobs, and pipeline execution metadata.
27Individual LSST Science Pipelines tasks typically write separate JSON datasets.
28This command can collect and combine multiple Job JSON datasets into a single
29Job upload.
31**Configuration**
33dispatch_verify.py is configurable from both the command line and environment
34variables. See the argument documenation for environment variable equivalents.
35Command line settings override environment variable configuration.
37**Metadata and environment**
39dispatch_verify.py can enrich Verification Job metadata with information
40from the environment. Currently dispatch_verify.py supports the Jenkins CI
41and the LSST Data Facility (LDF) execution environments.
43In the Jenkins CI execution environment (``--env=jenkins``) the
44following environment variables are consumed:
46- ``BUILD_ID``: ID in the CI system
47- ``BUILD_URL``: CI page with information about the build
48- ``PRODUCT``: the name of the product built, e.g. 'validate_drp'
49- ``dataset``: the name of the dataset processed, e.g. 'validation_data_cfht'
50- ``label``: the name of the platform where it runs
51- ``refs``: the branches run by Jenkins, e.g. 'tickets/DM-12345 master'
53If ``--lsstsw`` is used, additional Git branch information is included with
54Science Pipelines package metadata.
56In the LSST Data Facility execution environment (``--env=ldf``) the following
57environment variables are consumed:
59- ``DATASET``: the name of the dataset processed, e.g 'HSC RC2'
60- ``DATASET_REPO_URL``: a reference URL with information about the dataset
61- ``RUN_ID``: ID of the run in the LDF environment
62- ``RUN_ID_URL``: a reference URL with information about the run
63- ``VERSION_TAG``: the version tag of the LSST software used, e.g. 'w_2018_18'
65Note: currently it is not possible to gather Science Pipelines package metadata
66in the LDF environment, thus if ``--env=ldf`` is used ``--ignore-lsstsw`` is
67aslo used by default in this environment.
68"""
69# For determining what is documented in Sphinx
70__all__ = ['build_argparser', 'main', 'insert_lsstsw_metadata',
71 'insert_extra_package_metadata', 'insert_env_metadata',
72 'validate_date_created', 'Configuration']
74import argparse
75import os
76import json
77import getpass
78from dateutil import parser as date_parser
79from datetime import datetime, timezone
81try:
82 import git
83except ImportError:
84 # GitPython is not a standard Stack package; skip gracefully if unavailable
85 git = None
87import lsst.log
88from lsst.verify import Job
89from lsst.verify.metadata.lsstsw import LsstswRepos
90from lsst.verify.metadata.eupsmanifest import Manifest
91from lsst.verify.metadata.jenkinsci import get_jenkins_env
92from lsst.verify.metadata.ldf import get_ldf_env
95def build_argparser():
96 parser = argparse.ArgumentParser(
97 description=__doc__,
98 formatter_class=argparse.RawDescriptionHelpFormatter,
99 epilog='More information is available at https://pipelines.lsst.io.')
101 parser.add_argument(
102 'json_paths',
103 nargs='+',
104 metavar='json',
105 help='Verification job JSON file, or files. When multiple JSON '
106 'files are present, their measurements, blobs, and metadata '
107 'are merged.')
108 parser.add_argument(
109 '--test',
110 default=False,
111 action='store_true',
112 help='Run this command without uploading to the SQUASH service. '
113 'The JSON payload is printed to standard out.')
114 parser.add_argument(
115 '--write',
116 metavar='PATH',
117 dest='output_filepath',
118 help='Write the merged and enriched Job JSON dataset to the given '
119 'path.')
120 parser.add_argument(
121 '--show',
122 dest='show_json',
123 action='store_true',
124 default=False,
125 help='Print the assembled Job JSON to standard output.')
126 parser.add_argument(
127 '--ignore-blobs',
128 dest='ignore_blobs',
129 action='store_true',
130 default=False,
131 help='Ignore data blobs even if they are available in the verification'
132 'job.')
134 env_group = parser.add_argument_group('Environment arguments')
135 env_group.add_argument(
136 '--env',
137 dest='env_name',
138 choices=Configuration.allowed_env,
139 help='Name of the environment where the verification job is being '
140 'run. In some environments display_verify.py will gather '
141 'additional metadata automatically:\n'
142 '\n'
143 'jenkins\n'
144 ' For the Jenkins CI (https://ci.lsst.codes)'
145 ' environment.\n'
146 'ldf\n'
147 ' For the LSST Data Facility environment. \n'
148 '\n'
149 'Equivalent to the $VERIFY_ENV environment variable.')
150 env_group.add_argument(
151 '--lsstsw',
152 dest='lsstsw',
153 metavar='PATH',
154 help='lsstsw directory path. If available, Stack package versions are '
155 'read from lsstsw. Equivalent to the ``$LSSTSW`` environment '
156 'variable. Disabled with ``--ignore-lsstsw.``')
157 env_group.add_argument(
158 '--package-repos',
159 dest='extra_package_paths',
160 nargs='*',
161 metavar='PATH',
162 help='Paths to additional Stack package Git repositories. These '
163 'packages are tracked in Job metadata, like lsstsw-based '
164 'packages.')
165 env_group.add_argument(
166 '--ignore-lsstsw',
167 dest='ignore_lsstsw',
168 action='store_true',
169 default=False,
170 help='Ignore lsstsw metadata even if it is available (for example, '
171 'the ``$LSSTSW`` variable is set).')
173 api_group = parser.add_argument_group('SQUASH API arguments')
174 api_group.add_argument(
175 '--url',
176 dest='api_url',
177 metavar='URL',
178 help='Root URL of the SQUASH API. Equivalent to the ``$SQUASH_URL`` '
179 'environment variable.')
180 api_group.add_argument(
181 '--user',
182 dest='api_user',
183 metavar='USER',
184 help='Username for SQUASH API. Equivalent to the $SQUASH_USER '
185 'environment variable.')
186 api_group.add_argument(
187 '--password',
188 dest='api_password',
189 metavar='PASSWORD',
190 help='Password for SQUASH API. Equivalent to the ``$SQUASH_PASSWORD`` '
191 'environment variable. If neither is set, you will be prompted.')
192 api_group.add_argument(
193 '--date-created',
194 dest='date_created',
195 help='ISO8601 formatted datetime in UTC for the Job creation date, '
196 'e.g. 2021-06-30T22:28:25Z. If not provided the current '
197 'datetime is used.')
198 return parser
201def main():
202 """Entrypoint for the ``dispatch_verify.py`` command line executable.
203 """
204 log = lsst.log.Log.getLogger('verify.bin.dispatchverify.main')
206 parser = build_argparser()
207 args = parser.parse_args()
208 config = Configuration(args)
209 log.debug(str(config))
211 # Parse all Job JSON
212 jobs = []
213 for json_path in config.json_paths:
214 log.info('Loading {0}'.format(json_path))
215 with open(json_path) as fp:
216 json_data = json.load(fp)
217 # Ignore blobs from the verification jobs
218 if config.ignore_blobs:
219 log.info('Ignoring blobs from Job JSON {0}'.format(json_path))
220 json_data = delete_blobs(json_data)
221 job = Job.deserialize(**json_data)
222 jobs.append(job)
224 # Merge all Jobs into one
225 job = jobs.pop(0)
226 if len(jobs) > 0:
227 log.info('Merging verification Job JSON.')
228 for other_job in jobs:
229 job += other_job
231 # Ensure all measurements have a metric so that units are normalized
232 log.info('Refreshing metric definitions from verify_metrics')
233 job.reload_metrics_package('verify_metrics')
235 # Insert package metadata from lsstsw
236 if not config.ignore_lsstsw:
237 log.info('Inserting lsstsw package metadata from '
238 '{0}.'.format(config.lsstsw))
239 job = insert_lsstsw_metadata(job, config)
241 # Insert metadata from additional specified packages
242 if config.extra_package_paths is not None:
243 job = insert_extra_package_metadata(job, config)
245 # Add environment variable metadata from the Jenkins CI environment
246 if config.env_name == 'jenkins':
247 log.info('Inserting Jenkins CI environment metadata.')
248 jenkins_metadata = get_jenkins_env()
249 job = insert_env_metadata(job, 'jenkins', jenkins_metadata,
250 config.date_created)
251 elif config.env_name == 'ldf':
252 log.info('Inserting LSST Data Facility environment metadata.')
253 ldf_metadata = get_ldf_env()
254 job = insert_env_metadata(job, 'ldf', ldf_metadata,
255 config.date_created)
257 # Upload job
258 if not config.test:
259 log.info('Uploading Job JSON to {0}.'.format(config.api_url))
260 job.dispatch(api_user=config.api_user,
261 api_password=config.api_password,
262 api_url=config.api_url)
264 if config.show_json:
265 print(json.dumps(job.json,
266 sort_keys=True, indent=4, separators=(',', ': ')))
268 # Write a json file
269 if config.output_filepath is not None:
270 log.info('Writing Job JSON to {0}.'.format(config.output_filepath))
271 job.write(config.output_filepath)
274def delete_blobs(json_data):
275 """Delete data blobs from the Job JSON
276 """
277 if 'blobs' in json_data:
278 del json_data['blobs']
279 return json_data
282def insert_lsstsw_metadata(job, config):
283 """Insert metadata for lsstsw-based packages into ``Job.meta['packages']``.
284 """
285 lsstsw_repos = LsstswRepos(config.lsstsw)
287 with open(lsstsw_repos.manifest_path) as fp:
288 manifest = Manifest(fp)
290 packages = {}
291 for package_name, manifest_item in manifest.items():
292 package_doc = {
293 'name': package_name,
294 'git_branch': lsstsw_repos.get_package_branch(package_name),
295 'git_url': lsstsw_repos.get_package_repo_url(package_name),
296 'git_sha': manifest_item.git_sha,
297 'eups_version': manifest_item.version
298 }
299 packages[package_name] = package_doc
301 if 'packages' in job.meta:
302 # Extend packages entry
303 job.meta['packages'].update(packages)
304 else:
305 # Create new packages entry
306 job.meta['packages'] = packages
307 return job
310def insert_extra_package_metadata(job, config):
311 """Insert metadata for extra packages ('--package-repos') into
312 ``Job.meta['packages']``.
313 """
314 log = lsst.log.Log.getLogger(
315 'verify.bin.dispatchverify.insert_extra_package_metadata')
317 if 'packages' not in job.meta:
318 job.meta['packages'] = dict()
320 for package_path in config.extra_package_paths:
321 log.info('Inserting extra package metadata: {0}'.format(package_path))
322 package_name = package_path.split(os.sep)[-1]
324 package = {'name': package_name}
326 if git is not None:
327 git_repo = git.Repo(package_path)
328 package['git_sha'] = git_repo.active_branch.commit.hexsha
329 package['git_branch'] = git_repo.active_branch.name
330 package['git_url'] = git_repo.remotes.origin.url
332 if package_name in job.meta['packages']:
333 # Update pre-existing package metadata
334 job.meta['packages'][package_name].update(package)
335 else:
336 # Create new package metadata
337 job.meta['packages'][package_name] = package
339 return job
342def insert_env_metadata(job, env_name, metadata, date_created):
343 """Insert environment metadata into the Job.
344 """
345 metadata.update({'env_name': env_name})
347 if date_created is not None:
348 date = date_created
349 else:
350 date = datetime.now(timezone.utc).isoformat()
352 metadata.update({'date': date})
354 job.meta['env'] = metadata
356 return job
359def validate_date_created(date_created):
360 """Ensure date_created is a valid datetime string in UTC.
361 """
362 try:
363 date = date_parser.parse(date_created)
364 except ValueError:
365 return False
367 if date.tzname() == 'UTC':
368 return True
369 else:
370 return False
373class Configuration(object):
374 """Configuration for dispatch_verify.py that reconciles command line and
375 environment variable arguments.
377 Configuration is validated for completeness and certain errors.
379 Parameters
380 ----------
381 args : `argparse.Namespace`
382 Parsed command line arguments, produced by `parse_args`.
383 """
385 allowed_env = ('jenkins', 'ldf')
387 def __init__(self, args):
388 self.json_paths = args.json_paths
390 self.test = args.test
392 self.output_filepath = args.output_filepath
394 self.show_json = args.show_json
396 self.env_name = args.env_name or os.getenv('VERIFY_ENV')
397 if self.env_name is not None and self.env_name not in self.allowed_env:
398 message = '$VERIFY_ENV not one of {0!s}'.format(self.allowed_env)
399 raise RuntimeError(message)
401 self.ignore_blobs = args.ignore_blobs
403 self.ignore_lsstsw = args.ignore_lsstsw
405 # Make sure --ignore-lsstw is used in the LDF environment
406 if self.env_name == 'ldf':
407 self.ignore_lsstsw = True
409 self.lsstsw = args.lsstsw or os.getenv('LSSTSW')
410 if self.lsstsw is not None:
411 self.lsstsw = os.path.abspath(self.lsstsw)
412 if not self.ignore_lsstsw and not self.lsstsw:
413 message = 'lsstsw directory not found at {0}'.format(self.lsstsw)
414 raise RuntimeError(message)
416 if args.extra_package_paths is not None:
417 self.extra_package_paths = [os.path.abspath(p)
418 for p in args.extra_package_paths]
419 else:
420 self.extra_package_paths = []
421 for path in self.extra_package_paths:
422 if not os.path.isdir(path):
423 message = 'Package directory not found: {0}'.format(path)
424 raise RuntimeError(message)
426 default_url = 'https://squash.lsst.codes/dashboard/api'
427 self.api_url = args.api_url or os.getenv('SQUASH_URL', default_url)
429 self.api_user = args.api_user or os.getenv('SQUASH_USER')
430 if not self.test and self.api_user is None:
431 message = '--user or $SQUASH_USER configuration required'
432 raise RuntimeError(message)
434 self.api_password = (args.api_password
435 or os.getenv('SQUASH_password'))
436 if not self.test and self.api_password is None:
437 # If password hasn't been set, prompt for it.
438 self.api_password = getpass.getpass(prompt="SQuaSH password: ")
440 self.date_created = args.date_created
442 if self.date_created is not None:
443 if not validate_date_created(self.date_created):
444 message = 'Invalid datetime string, use a ISO8601 formatted ' \
445 'datetime in UTC, e.g. 2021-06-30T22:28:25Z.'
446 raise RuntimeError(message)
447 else:
448 self.date_created = \
449 date_parser.parse(self.date_created).isoformat()
451 def __str__(self):
452 configs = {
453 'json_paths': self.json_paths,
454 'test': self.test,
455 'output_filepath': self.output_filepath,
456 'show_json': self.show_json,
457 'ignore_blobs': self.ignore_blobs,
458 'env': self.env_name,
459 'ignore_lsstsw': self.ignore_lsstsw,
460 'lsstsw': self.lsstsw,
461 'extra_package_paths': self.extra_package_paths,
462 'api_url': self.api_url,
463 'api_user': self.api_user,
464 'date_created': self.date_created,
465 }
466 if self.api_password is None:
467 configs['api_password'] = None
468 else:
469 configs['api_password'] = '*' * len(self.api_password)
471 return json.dumps(configs,
472 sort_keys=True, indent=4, separators=(',', ': '))