Coverage for python/lsst/pipe/base/_instrument.py: 52%
125 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:56 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:56 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("Instrument",)
32import contextlib
33import datetime
34import os.path
35from abc import ABCMeta, abstractmethod
36from collections.abc import Sequence
37from typing import TYPE_CHECKING, Any, cast, final
39from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
40from lsst.daf.butler.registry import DataIdError
41from lsst.pex.config import Config, RegistryField
42from lsst.utils import doImportType
44from ._observation_dimension_packer import observation_packer_registry
46if TYPE_CHECKING:
47 from lsst.daf.butler import Registry
50class Instrument(metaclass=ABCMeta):
51 """Base class for instrument-specific logic for the Gen3 Butler.
53 Parameters
54 ----------
55 collection_prefix : `str`, optional
56 Prefix for collection names to use instead of the instrument's own
57 name. This is primarily for use in simulated-data repositories, where
58 the instrument name may not be necessary and/or sufficient to
59 distinguish between collections.
61 Notes
62 -----
63 Concrete instrument subclasses must have the same construction signature as
64 the base class.
65 """
67 configPaths: Sequence[str] = ()
68 """Paths to config files to read for specific Tasks.
70 The paths in this list should contain files of the form `task.py`, for
71 each of the Tasks that requires special configuration.
72 """
74 policyName: str | None = None
75 """Instrument specific name to use when locating a policy or configuration
76 file in the file system."""
78 raw_definition: tuple[str, tuple[str, ...], str] | None = None
79 """Dataset type definition to use for "raw" datasets. This is a tuple
80 of the dataset type name, a tuple of dimension names, and the storage class
81 name. If `None` the ingest system will use its default definition."""
83 def __init__(self, collection_prefix: str | None = None):
84 if collection_prefix is None:
85 collection_prefix = self.getName()
86 self.collection_prefix = collection_prefix
88 @classmethod
89 @abstractmethod
90 def getName(cls) -> str:
91 """Return the short (dimension) name for this instrument.
93 This is not (in general) the same as the class name - it's what is used
94 as the value of the "instrument" field in data IDs, and is usually an
95 abbreviation of the full name.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def register(self, registry: Registry, *, update: bool = False) -> None:
101 """Insert instrument, and other relevant records into `Registry`.
103 Parameters
104 ----------
105 registry : `lsst.daf.butler.Registry`
106 Registry client for the data repository to modify.
107 update : `bool`, optional
108 If `True` (`False` is default), update existing records if they
109 differ from the new ones.
111 Raises
112 ------
113 lsst.daf.butler.registry.ConflictingDefinitionError
114 Raised if any existing record has the same key but a different
115 definition as one being registered.
117 Notes
118 -----
119 New records can always be added by calling this method multiple times,
120 as long as no existing records have changed (if existing records have
121 changed, ``update=True`` must be used). Old records can never be
122 removed by this method.
124 Implementations should guarantee that registration is atomic (the
125 registry should not be modified if any error occurs) and idempotent at
126 the level of individual dimension entries; new detectors and filters
127 should be added, but changes to any existing record should not be.
128 This can generally be achieved via a block like
130 .. code-block:: python
132 with registry.transaction():
133 registry.syncDimensionData("instrument", ...)
134 registry.syncDimensionData("detector", ...)
135 self.registerFilters(registry)
137 """
138 raise NotImplementedError()
140 @staticmethod
141 def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument:
142 """Given an instrument name and a butler registry, retrieve a
143 corresponding instantiated instrument object.
145 Parameters
146 ----------
147 name : `str`
148 Name of the instrument (must match the return value of `getName`).
149 registry : `lsst.daf.butler.Registry`
150 Butler registry to query to find the information.
151 collection_prefix : `str`, optional
152 Prefix for collection names to use instead of the instrument's own
153 name. This is primarily for use in simulated-data repositories,
154 where the instrument name may not be necessary and/or sufficient to
155 distinguish between collections.
157 Returns
158 -------
159 instrument : `Instrument`
160 An instance of the relevant `Instrument`.
162 Notes
163 -----
164 The instrument must be registered in the corresponding butler.
166 Raises
167 ------
168 LookupError
169 Raised if the instrument is not known to the supplied registry.
170 ModuleNotFoundError
171 Raised if the class could not be imported. This could mean
172 that the relevant obs package has not been setup.
173 TypeError
174 Raised if the class name retrieved is not a string or the imported
175 symbol is not an `Instrument` subclass.
176 """
177 try:
178 records = list(registry.queryDimensionRecords("instrument", instrument=name))
179 except DataIdError:
180 records = None
181 if not records:
182 raise LookupError(f"No registered instrument with name '{name}'.")
183 cls_name = records[0].class_name
184 if not isinstance(cls_name, str):
185 raise TypeError(
186 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
187 )
188 return Instrument._from_cls_name(cls_name, collection_prefix)
190 @staticmethod
191 def from_string(
192 name: str, registry: Registry | None = None, collection_prefix: str | None = None
193 ) -> Instrument:
194 """Return an instance from the short name or class name.
196 If the instrument name is not qualified (does not contain a '.') and a
197 butler registry is provided, this will attempt to load the instrument
198 using `Instrument.fromName()`. Otherwise the instrument will be
199 imported and instantiated.
201 Parameters
202 ----------
203 name : `str`
204 The name or fully-qualified class name of an instrument.
205 registry : `lsst.daf.butler.Registry`, optional
206 Butler registry to query to find information about the instrument,
207 by default `None`.
208 collection_prefix : `str`, optional
209 Prefix for collection names to use instead of the instrument's own
210 name. This is primarily for use in simulated-data repositories,
211 where the instrument name may not be necessary and/or sufficient
212 to distinguish between collections.
214 Returns
215 -------
216 instrument : `Instrument`
217 The instantiated instrument.
219 Raises
220 ------
221 RuntimeError
222 Raised if the instrument can not be imported, instantiated, or
223 obtained from the registry.
224 TypeError
225 Raised if the instrument is not a subclass of
226 `~lsst.pipe.base.Instrument`.
228 See Also
229 --------
230 Instrument.fromName
231 """
232 if "." not in name and registry is not None:
233 try:
234 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix)
235 except Exception as err:
236 raise RuntimeError(
237 f"Could not get instrument from name: {name}. Failed with exception: {err}"
238 ) from err
239 else:
240 try:
241 instr_class = doImportType(name)
242 except Exception as err:
243 raise RuntimeError(
244 f"Could not import instrument: {name}. Failed with exception: {err}"
245 ) from err
246 instr = instr_class(collection_prefix=collection_prefix)
247 if not isinstance(instr, Instrument):
248 raise TypeError(f"{name} is not an Instrument subclass.")
249 return instr
251 @staticmethod
252 def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument:
253 """Instantiate an `Instrument` object from a fully-expanded data ID.
255 Parameters
256 ----------
257 data_id : `~lsst.daf.butler.DataCoordinate`
258 Expanded data ID that includes the instrument dimension.
259 collection_prefix : `str`, optional
260 Prefix for collection names to use instead of the instrument's own
261 name. This is primarily for use in simulated-data repositories,
262 where the instrument name may not be necessary and/or sufficient to
263 distinguish between collections.
265 Returns
266 -------
267 instrument : `Instrument`
268 An instance of the relevant `Instrument`.
270 Raises
271 ------
272 TypeError
273 Raised if the class name retrieved is not a string or the imported
274 symbol is not an `Instrument` subclass.
275 """
276 return Instrument._from_cls_name(
277 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
278 )
280 @staticmethod
281 def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instrument:
282 """Instantiate an `Instrument` object type name.
284 This just provides common error-handling for `fromName` and
285 `from_data_id`
287 Parameters
288 ----------
289 cls_name : `str`
290 Fully-qualified name of the type.
291 collection_prefix : `str`, optional
292 Prefix for collection names to use instead of the instrument's own
293 name. This is primarily for use in simulated-data repositories,
294 where the instrument name may not be necessary and/or sufficient to
295 distinguish between collections.
297 Returns
298 -------
299 instrument : `Instrument`
300 An instance of the relevant `Instrument`.
302 Raises
303 ------
304 TypeError
305 Raised if the class name retrieved is not a string or the imported
306 symbol is not an `Instrument` subclass.
307 """
308 instrument_cls: type = doImportType(cls_name)
309 if not issubclass(instrument_cls, Instrument):
310 raise TypeError(
311 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
312 )
313 return instrument_cls(collection_prefix=collection_prefix)
315 @staticmethod
316 def importAll(registry: Registry) -> None:
317 """Import all the instruments known to this registry.
319 This will ensure that all metadata translators have been registered.
321 Parameters
322 ----------
323 registry : `lsst.daf.butler.Registry`
324 Butler registry to query to find the information.
326 Notes
327 -----
328 It is allowed for a particular instrument class to fail on import.
329 This might simply indicate that a particular obs package has
330 not been setup.
331 """
332 records = list(registry.queryDimensionRecords("instrument"))
333 for record in records:
334 cls = record.class_name
335 with contextlib.suppress(Exception):
336 doImportType(cls)
338 @abstractmethod
339 def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
340 """Return the Formatter class that should be used to read a particular
341 raw file.
343 Parameters
344 ----------
345 dataId : `DataId`
346 Dimension-based ID for the raw file or files being ingested.
348 Returns
349 -------
350 formatter : `lsst.daf.butler.Formatter` class
351 Class to be used that reads the file into the correct
352 Python object for the raw data.
353 """
354 raise NotImplementedError()
356 def applyConfigOverrides(self, name: str, config: Config) -> None:
357 """Apply instrument-specific overrides for a task config.
359 Parameters
360 ----------
361 name : `str`
362 Name of the object being configured; typically the _DefaultName
363 of a Task.
364 config : `lsst.pex.config.Config`
365 Config instance to which overrides should be applied.
366 """
367 for root in self.configPaths:
368 path = os.path.join(root, f"{name}.py")
369 if os.path.exists(path):
370 config.load(path)
372 @staticmethod
373 def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
374 """Format a timestamp for use in a collection name.
376 Parameters
377 ----------
378 timestamp : `str` or `datetime.datetime`
379 Timestamp to format. May be a date or datetime string in extended
380 ISO format (assumed UTC), with or without a timezone specifier, a
381 datetime string in basic ISO format with a timezone specifier, a
382 naive `datetime.datetime` instance (assumed UTC) or a
383 timezone-aware `datetime.datetime` instance (converted to UTC).
384 This is intended to cover all forms that string ``CALIBDATE``
385 metadata values have taken in the past, as well as the format this
386 method itself writes out (to enable round-tripping).
388 Returns
389 -------
390 formatted : `str`
391 Standardized string form for the timestamp.
392 """
393 if isinstance(timestamp, str):
394 if "-" in timestamp:
395 # extended ISO format, with - and : delimiters
396 timestamp = datetime.datetime.fromisoformat(timestamp)
397 else:
398 # basic ISO format, with no delimiters (what this method
399 # returns)
400 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
401 if not isinstance(timestamp, datetime.datetime):
402 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
403 if timestamp.tzinfo is not None:
404 timestamp = timestamp.astimezone(datetime.timezone.utc)
405 return f"{timestamp:%Y%m%dT%H%M%S}Z"
407 @staticmethod
408 def makeCollectionTimestamp() -> str:
409 """Create a timestamp string for use in a collection name from the
410 current time.
412 Returns
413 -------
414 formatted : `str`
415 Standardized string form of the current time.
416 """
417 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
419 def makeDefaultRawIngestRunName(self) -> str:
420 """Make the default instrument-specific run collection string for raw
421 data ingest.
423 Returns
424 -------
425 coll : `str`
426 Run collection name to be used as the default for ingestion of
427 raws.
428 """
429 return self.makeCollectionName("raw", "all")
431 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
432 """Make a RUN collection name appropriate for inserting calibration
433 datasets whose validity ranges are unbounded.
435 Parameters
436 ----------
437 *labels : `str`
438 Extra strings to be included in the base name, using the default
439 delimiter for collection names. Usually this is the name of the
440 ticket on which the calibration collection is being created.
442 Returns
443 -------
444 name : `str`
445 Run collection name.
446 """
447 return self.makeCollectionName("calib", *labels, "unbounded")
449 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
450 """Make a RUN collection name appropriate for inserting curated
451 calibration datasets with the given ``CALIBDATE`` metadata value.
453 Parameters
454 ----------
455 calibDate : `str`
456 The ``CALIBDATE`` metadata value.
457 *labels : `str`
458 Strings to be included in the collection name (before
459 ``calibDate``, but after all other terms), using the default
460 delimiter for collection names. Usually this is the name of the
461 ticket on which the calibration collection is being created.
463 Returns
464 -------
465 name : `str`
466 Run collection name.
467 """
468 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
470 def makeCalibrationCollectionName(self, *labels: str) -> str:
471 """Make a CALIBRATION collection name appropriate for associating
472 calibration datasets with validity ranges.
474 Parameters
475 ----------
476 *labels : `str`
477 Strings to be appended to the base name, using the default
478 delimiter for collection names. Usually this is the name of the
479 ticket on which the calibration collection is being created.
481 Returns
482 -------
483 name : `str`
484 Calibration collection name.
485 """
486 return self.makeCollectionName("calib", *labels)
488 @staticmethod
489 def makeRefCatCollectionName(*labels: str) -> str:
490 """Return a global (not instrument-specific) name for a collection that
491 holds reference catalogs.
493 With no arguments, this returns the name of the collection that holds
494 all reference catalogs (usually a ``CHAINED`` collection, at least in
495 long-lived repos that may contain more than one reference catalog).
497 Parameters
498 ----------
499 *labels : `str`
500 Strings to be added to the global collection name, in order to
501 define a collection name for one or more reference catalogs being
502 ingested at the same time.
504 Returns
505 -------
506 name : `str`
507 Collection name.
509 Notes
510 -----
511 This is a ``staticmethod``, not a ``classmethod``, because it should
512 be the same for all instruments.
513 """
514 return "/".join(("refcats",) + labels)
516 def makeUmbrellaCollectionName(self) -> str:
517 """Return the name of the umbrella ``CHAINED`` collection for this
518 instrument that combines all standard recommended input collections.
520 This method should almost never be overridden by derived classes.
522 Returns
523 -------
524 name : `str`
525 Name for the umbrella collection.
526 """
527 return self.makeCollectionName("defaults")
529 def makeCollectionName(self, *labels: str) -> str:
530 """Get the instrument-specific collection string to use as derived
531 from the supplied labels.
533 Parameters
534 ----------
535 *labels : `str`
536 Strings to be combined with the instrument name to form a
537 collection name.
539 Returns
540 -------
541 name : `str`
542 Collection name to use that includes the instrument's recommended
543 prefix.
544 """
545 return "/".join((self.collection_prefix,) + labels)
547 @staticmethod
548 def make_dimension_packer_config_field(
549 doc: str = (
550 "How to pack visit+detector or exposure+detector data IDs into integers. "
551 "The default (None) is to delegate to the Instrument class for which "
552 "registered implementation to use (but still use the nested configuration "
553 "for that implementation)."
554 ),
555 ) -> RegistryField:
556 """Make an `lsst.pex.config.Field` that can be used to configure how
557 data IDs for this instrument are packed.
559 Parameters
560 ----------
561 doc : `str`, optional
562 Documentation for the config field.
564 Returns
565 -------
566 field : `lsst.pex.config.RegistryField`
567 A config field for which calling ``apply`` on the instance
568 attribute constructs an `lsst.daf.butler.DimensionPacker` that
569 defaults to the appropriate one for this instrument.
571 Notes
572 -----
573 This method is expected to be used whenever code requires a single
574 integer that represents the combination of a detector and either a
575 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
576 class and its helper configs provide a simpler high-level interface
577 that should be used instead of calling this method directly.
579 This system is designed to work best when the configuration for the ID
580 packer is not overridden at all, allowing the appropriate instrument
581 class to determine the behavior for each data ID encountered. When the
582 configuration does need to be modified (most often when the scheme for
583 packing an instrument's data IDs is undergoing an upgrade), it is
584 important to ensure the overrides are only applied to data IDs with the
585 desired instrument value.
587 Unit tests of code that use a field produced by this method will often
588 want to explicitly set the packer to "observation" and manually set
589 its ``n_detectors`` and ``n_observations`` fields; this will make it
590 unnecessary for tests to provide expanded data IDs.
591 """
592 # The control flow here bounces around a bit when this RegistryField's
593 # apply() method is called, so it merits a thorough walkthrough
594 # somewhere, and that might as well be here:
595 #
596 # - If the config field's name is not `None`, that kind of packer is
597 # constructed and returned with the arguments to `apply`, in just the
598 # way it works with most RegistryFields or ConfigurableFields. But
599 # this is expected to be rare.
600 #
601 # - If the config fields' name is `None`, the `apply` method (which
602 # actually lives on the `pex.config.RegistryInstanceDict` class,
603 # since `RegistryField` is a descriptor), calls
604 # `_make_default_dimension_packer_dispatch` (which is final, and
605 # hence the base class implementation just below is the only one).
606 #
607 # - `_make_default_dimension_packer_dispatch` instantiates an
608 # `Instrument` instance of the type pointed at by the data ID (i.e.
609 # calling `Instrument.from_data_id`), then calls
610 # `_make_default_dimension_packer` on that.
611 #
612 # - The default implementation of `_make_default_dimension_packer` here
613 # in the base class picks the "observation" dimension packer, so if
614 # it's not overridden by a derived class everything proceeds as if
615 # the config field's name was set to that. Note that this sets which
616 # item in the registry is used, but it still pays attention to the
617 # configuration for that entry in the registry field.
618 #
619 # - A subclass implementation of `_make_default_dimension_packer` will
620 # take precedence over the base class, but it's expected that these
621 # will usually just delegate back to ``super()`` while changing the
622 # ``default`` argument to something other than "observation". Once
623 # again, this will control which packer entry in the registry is used
624 # but the result will still reflect the configuration for that packer
625 # in the registry field.
626 #
627 return observation_packer_registry.makeField(
628 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
629 )
631 @staticmethod
632 @final
633 def make_default_dimension_packer(
634 data_id: DataCoordinate, is_exposure: bool | None = None
635 ) -> DimensionPacker:
636 """Return the default dimension packer for the given data ID.
638 Parameters
639 ----------
640 data_id : `lsst.daf.butler.DataCoordinate`
641 Data ID that identifies at least the ``instrument`` dimension. Must
642 have dimension records attached.
643 is_exposure : `bool`, optional
644 If `False`, construct a packer for visit+detector data IDs. If
645 `True`, construct a packer for exposure+detector data IDs. If
646 `None`, this is determined based on whether ``visit`` or
647 ``exposure`` is present in ``data_id``, with ``visit`` checked
648 first and hence used if both are present.
650 Returns
651 -------
652 packer : `lsst.daf.butler.DimensionPacker`
653 Object that packs {visit, detector} or {exposure, detector} data
654 IDs into integers.
656 Notes
657 -----
658 When using a dimension packer in task code, using
659 `make_dimension_packer_config_field` to make the packing algorithm
660 configurable is preferred over this method.
662 When obtaining a dimension packer to unpack IDs that were packed by
663 task code, it is similarly preferable to load the configuration for
664 that task and the existing packer configuration field there, to ensure
665 any config overrides are respected. That is sometimes quite difficult,
666 however, and since config overrides for dimension packers are expected
667 to be exceedingly rare, using this simpler method will almost always
668 work.
669 """
671 class _DummyConfig(Config):
672 packer = Instrument.make_dimension_packer_config_field()
674 config = _DummyConfig()
676 return config.packer.apply(data_id, is_exposure=is_exposure) # type: ignore
678 @staticmethod
679 @final
680 def _make_default_dimension_packer_dispatch(
681 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
682 ) -> DimensionPacker:
683 """Dispatch method used to invoke `_make_dimension_packer`.
685 This method constructs the appropriate `Instrument` subclass from
686 config and then calls its `_make_default_dimension_packer`.
687 It is called when (as usual) the field returned by
688 `make_dimension_packer_config_field` is left to its default selection
689 of `None`.
691 All arguments and return values are the same as
692 `_make_default_dimension_packer.`
693 """
694 instrument = Instrument.from_data_id(data_id)
695 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
697 def _make_default_dimension_packer(
698 self,
699 config_dict: Any,
700 data_id: DataCoordinate,
701 is_exposure: bool | None = None,
702 default: str = "observation",
703 ) -> DimensionPacker:
704 """Construct return the default dimension packer for this instrument.
706 This method is a protected hook for subclasses to override the behavior
707 of `make_dimension_packer_config_field` when the packer is not selected
708 explicitly via configuration.
710 Parameters
711 ----------
712 config_dict
713 Mapping attribute of a `lsst.pex.config.Config` instance that
714 corresponds to a field created by `make_dimension_packer_config`
715 (the actual type of this object is a `lsst.pex.config`
716 implementation detail).
717 data_id : `lsst.daf.butler.DataCoordinate`
718 Data ID that identifies at least the ``instrument`` dimension. For
719 most configurations this must have dimension records attached.
720 is_exposure : `bool`, optional
721 If `False`, construct a packer for visit+detector data IDs. If
722 `True`, construct a packer for exposure+detector data IDs. If
723 `None`, this is determined based on whether ``visit`` or
724 ``exposure`` is present in ``data_id``, with ``visit`` checked
725 first and hence used if both are present.
726 default : `str`, optional
727 Registered name of the dimension packer to select when the
728 configured packer is `None` (as is usually the case). This is
729 intended primarily for derived classes delegating to `super` in
730 reimplementations of this method.
732 Returns
733 -------
734 packer : `lsst.daf.butler.DimensionPacker`
735 Object that packs {visit, detector} or {exposure, detector} data
736 IDs into integers.
737 """
738 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)