Coverage for python/lsst/pipe/base/_instrument.py: 53%
126 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:31 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:31 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("Instrument",)
32import contextlib
33import datetime
34import os.path
35from abc import ABCMeta, abstractmethod
36from collections.abc import Sequence
37from typing import TYPE_CHECKING, Any, Self, cast, final
39from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
40from lsst.daf.butler.registry import DataIdError
41from lsst.pex.config import Config, RegistryField
42from lsst.utils import doImportType
43from lsst.utils.introspection import get_full_type_name
45from ._observation_dimension_packer import observation_packer_registry
47if TYPE_CHECKING:
48 from lsst.daf.butler import Registry
51class Instrument(metaclass=ABCMeta):
52 """Base class for instrument-specific logic for the Gen3 Butler.
54 Parameters
55 ----------
56 collection_prefix : `str`, optional
57 Prefix for collection names to use instead of the instrument's own
58 name. This is primarily for use in simulated-data repositories, where
59 the instrument name may not be necessary and/or sufficient to
60 distinguish between collections.
62 Notes
63 -----
64 Concrete instrument subclasses must have the same construction signature as
65 the base class.
66 """
68 configPaths: Sequence[str] = ()
69 """Paths to config files to read for specific Tasks.
71 The paths in this list should contain files of the form `task.py`, for
72 each of the Tasks that requires special configuration.
73 """
75 policyName: str | None = None
76 """Instrument specific name to use when locating a policy or configuration
77 file in the file system."""
79 raw_definition: tuple[str, tuple[str, ...], str] | None = None
80 """Dataset type definition to use for "raw" datasets. This is a tuple
81 of the dataset type name, a tuple of dimension names, and the storage class
82 name. If `None` the ingest system will use its default definition."""
84 def __init__(self, collection_prefix: str | None = None):
85 if collection_prefix is None:
86 collection_prefix = self.getName()
87 self.collection_prefix = collection_prefix
89 @classmethod
90 @abstractmethod
91 def getName(cls) -> str:
92 """Return the short (dimension) name for this instrument.
94 This is not (in general) the same as the class name - it's what is used
95 as the value of the "instrument" field in data IDs, and is usually an
96 abbreviation of the full name.
97 """
98 raise NotImplementedError()
100 @abstractmethod
101 def register(self, registry: Registry, *, update: bool = False) -> None:
102 """Insert instrument, and other relevant records into `Registry`.
104 Parameters
105 ----------
106 registry : `lsst.daf.butler.Registry`
107 Registry client for the data repository to modify.
108 update : `bool`, optional
109 If `True` (`False` is default), update existing records if they
110 differ from the new ones.
112 Raises
113 ------
114 lsst.daf.butler.registry.ConflictingDefinitionError
115 Raised if any existing record has the same key but a different
116 definition as one being registered.
118 Notes
119 -----
120 New records can always be added by calling this method multiple times,
121 as long as no existing records have changed (if existing records have
122 changed, ``update=True`` must be used). Old records can never be
123 removed by this method.
125 Implementations should guarantee that registration is atomic (the
126 registry should not be modified if any error occurs) and idempotent at
127 the level of individual dimension entries; new detectors and filters
128 should be added, but changes to any existing record should not be.
129 This can generally be achieved via a block like
131 .. code-block:: python
133 with registry.transaction():
134 registry.syncDimensionData("instrument", ...)
135 registry.syncDimensionData("detector", ...)
136 self.registerFilters(registry)
137 """
138 raise NotImplementedError()
140 @classmethod
141 def fromName(cls, name: str, registry: Registry, collection_prefix: str | None = None) -> Self:
142 """Given an instrument name and a butler registry, retrieve a
143 corresponding instantiated instrument object.
145 Parameters
146 ----------
147 name : `str`
148 Name of the instrument (must match the return value of `getName`).
149 registry : `lsst.daf.butler.Registry`
150 Butler registry to query to find the information.
151 collection_prefix : `str`, optional
152 Prefix for collection names to use instead of the instrument's own
153 name. This is primarily for use in simulated-data repositories,
154 where the instrument name may not be necessary and/or sufficient to
155 distinguish between collections.
157 Returns
158 -------
159 instrument : `Instrument`
160 An instance of the relevant `Instrument`.
162 Notes
163 -----
164 The instrument must be registered in the corresponding butler.
166 Raises
167 ------
168 LookupError
169 Raised if the instrument is not known to the supplied registry.
170 ModuleNotFoundError
171 Raised if the class could not be imported. This could mean
172 that the relevant obs package has not been setup.
173 TypeError
174 Raised if the class name retrieved is not a string or the imported
175 symbol is not an `Instrument` subclass.
176 """
177 try:
178 records = list(registry.queryDimensionRecords("instrument", instrument=name))
179 except DataIdError:
180 records = None
181 if not records:
182 raise LookupError(f"No registered instrument with name '{name}'.")
183 cls_name = records[0].class_name
184 if not isinstance(cls_name, str):
185 raise TypeError(
186 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
187 )
188 return cls._from_cls_name(cls_name, collection_prefix)
190 @classmethod
191 def from_string(
192 cls, name: str, registry: Registry | None = None, collection_prefix: str | None = None
193 ) -> Self:
194 """Return an instance from the short name or class name.
196 If the instrument name is not qualified (does not contain a '.') and a
197 butler registry is provided, this will attempt to load the instrument
198 using `Instrument.fromName()`. Otherwise the instrument will be
199 imported and instantiated.
201 Parameters
202 ----------
203 name : `str`
204 The name or fully-qualified class name of an instrument.
205 registry : `lsst.daf.butler.Registry`, optional
206 Butler registry to query to find information about the instrument,
207 by default `None`.
208 collection_prefix : `str`, optional
209 Prefix for collection names to use instead of the instrument's own
210 name. This is primarily for use in simulated-data repositories,
211 where the instrument name may not be necessary and/or sufficient
212 to distinguish between collections.
214 Returns
215 -------
216 instrument : `Instrument`
217 The instantiated instrument.
219 Raises
220 ------
221 RuntimeError
222 Raised if the instrument can not be imported, instantiated, or
223 obtained from the registry.
224 TypeError
225 Raised if the instrument is not a subclass of
226 `~lsst.pipe.base.Instrument`.
228 See Also
229 --------
230 Instrument.fromName : Constructing Instrument from a name.
231 """
232 if "." not in name and registry is not None:
233 try:
234 instr = cls.fromName(name, registry, collection_prefix=collection_prefix)
235 except Exception as err:
236 raise RuntimeError(
237 f"Could not get instrument from name: {name}. Failed with exception: {err}"
238 ) from err
239 else:
240 try:
241 instr_class = doImportType(name)
242 except Exception as err:
243 raise RuntimeError(
244 f"Could not import instrument: {name}. Failed with exception: {err}"
245 ) from err
246 instr = instr_class(collection_prefix=collection_prefix)
247 if not isinstance(instr, cls):
248 raise TypeError(f"{name} is not a {get_full_type_name(cls)} subclass.")
249 return instr
251 @classmethod
252 def from_data_id(cls, data_id: DataCoordinate, collection_prefix: str | None = None) -> Self:
253 """Instantiate an `Instrument` object from a fully-expanded data ID.
255 Parameters
256 ----------
257 data_id : `~lsst.daf.butler.DataCoordinate`
258 Expanded data ID that includes the instrument dimension.
259 collection_prefix : `str`, optional
260 Prefix for collection names to use instead of the instrument's own
261 name. This is primarily for use in simulated-data repositories,
262 where the instrument name may not be necessary and/or sufficient to
263 distinguish between collections.
265 Returns
266 -------
267 instrument : `Instrument`
268 An instance of the relevant `Instrument`.
270 Raises
271 ------
272 TypeError
273 Raised if the class name retrieved is not a string or the imported
274 symbol is not an `Instrument` subclass.
275 """
276 return cls._from_cls_name(
277 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
278 )
280 @classmethod
281 def _from_cls_name(cls, cls_name: str, collection_prefix: str | None = None) -> Self:
282 """Instantiate an `Instrument` object type name.
284 This just provides common error-handling for `fromName` and
285 `from_data_id`
287 Parameters
288 ----------
289 cls_name : `str`
290 Fully-qualified name of the type.
291 collection_prefix : `str`, optional
292 Prefix for collection names to use instead of the instrument's own
293 name. This is primarily for use in simulated-data repositories,
294 where the instrument name may not be necessary and/or sufficient to
295 distinguish between collections.
297 Returns
298 -------
299 instrument : `Instrument`
300 An instance of the relevant `Instrument`.
302 Raises
303 ------
304 TypeError
305 Raised if the class name retrieved is not a string or the imported
306 symbol is not an `Instrument` subclass.
307 """
308 instrument_cls: type = doImportType(cls_name)
309 if not issubclass(instrument_cls, cls):
310 raise TypeError(
311 f"{instrument_cls!r}, obtained from importing {cls_name}, is not a subclass "
312 f"of {get_full_type_name(cls)}."
313 )
314 return instrument_cls(collection_prefix=collection_prefix)
316 @staticmethod
317 def importAll(registry: Registry) -> None:
318 """Import all the instruments known to this registry.
320 This will ensure that all metadata translators have been registered.
322 Parameters
323 ----------
324 registry : `lsst.daf.butler.Registry`
325 Butler registry to query to find the information.
327 Notes
328 -----
329 It is allowed for a particular instrument class to fail on import.
330 This might simply indicate that a particular obs package has
331 not been setup.
332 """
333 records = list(registry.queryDimensionRecords("instrument"))
334 for record in records:
335 cls = record.class_name
336 with contextlib.suppress(Exception):
337 doImportType(cls)
339 @abstractmethod
340 def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
341 """Return the Formatter class that should be used to read a particular
342 raw file.
344 Parameters
345 ----------
346 dataId : `DataId`
347 Dimension-based ID for the raw file or files being ingested.
349 Returns
350 -------
351 formatter : `lsst.daf.butler.Formatter` class
352 Class to be used that reads the file into the correct
353 Python object for the raw data.
354 """
355 raise NotImplementedError()
357 def applyConfigOverrides(self, name: str, config: Config) -> None:
358 """Apply instrument-specific overrides for a task config.
360 Parameters
361 ----------
362 name : `str`
363 Name of the object being configured; typically the _DefaultName
364 of a Task.
365 config : `lsst.pex.config.Config`
366 Config instance to which overrides should be applied.
367 """
368 for root in self.configPaths:
369 path = os.path.join(root, f"{name}.py")
370 if os.path.exists(path):
371 config.load(path)
373 @staticmethod
374 def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
375 """Format a timestamp for use in a collection name.
377 Parameters
378 ----------
379 timestamp : `str` or `datetime.datetime`
380 Timestamp to format. May be a date or datetime string in extended
381 ISO format (assumed UTC), with or without a timezone specifier, a
382 datetime string in basic ISO format with a timezone specifier, a
383 naive `datetime.datetime` instance (assumed UTC) or a
384 timezone-aware `datetime.datetime` instance (converted to UTC).
385 This is intended to cover all forms that string ``CALIBDATE``
386 metadata values have taken in the past, as well as the format this
387 method itself writes out (to enable round-tripping).
389 Returns
390 -------
391 formatted : `str`
392 Standardized string form for the timestamp.
393 """
394 if isinstance(timestamp, str):
395 if "-" in timestamp:
396 # extended ISO format, with - and : delimiters
397 timestamp = datetime.datetime.fromisoformat(timestamp)
398 else:
399 # basic ISO format, with no delimiters (what this method
400 # returns)
401 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
402 if not isinstance(timestamp, datetime.datetime):
403 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
404 if timestamp.tzinfo is not None:
405 timestamp = timestamp.astimezone(datetime.timezone.utc)
406 return f"{timestamp:%Y%m%dT%H%M%S}Z"
408 @staticmethod
409 def makeCollectionTimestamp() -> str:
410 """Create a timestamp string for use in a collection name from the
411 current time.
413 Returns
414 -------
415 formatted : `str`
416 Standardized string form of the current time.
417 """
418 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
420 def makeDefaultRawIngestRunName(self) -> str:
421 """Make the default instrument-specific run collection string for raw
422 data ingest.
424 Returns
425 -------
426 coll : `str`
427 Run collection name to be used as the default for ingestion of
428 raws.
429 """
430 return self.makeCollectionName("raw", "all")
432 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
433 """Make a RUN collection name appropriate for inserting calibration
434 datasets whose validity ranges are unbounded.
436 Parameters
437 ----------
438 *labels : `str`
439 Extra strings to be included in the base name, using the default
440 delimiter for collection names. Usually this is the name of the
441 ticket on which the calibration collection is being created.
443 Returns
444 -------
445 name : `str`
446 Run collection name.
447 """
448 return self.makeCollectionName("calib", *labels, "unbounded")
450 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
451 """Make a RUN collection name appropriate for inserting curated
452 calibration datasets with the given ``CALIBDATE`` metadata value.
454 Parameters
455 ----------
456 calibDate : `str`
457 The ``CALIBDATE`` metadata value.
458 *labels : `str`
459 Strings to be included in the collection name (before
460 ``calibDate``, but after all other terms), using the default
461 delimiter for collection names. Usually this is the name of the
462 ticket on which the calibration collection is being created.
464 Returns
465 -------
466 name : `str`
467 Run collection name.
468 """
469 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
471 def makeCalibrationCollectionName(self, *labels: str) -> str:
472 """Make a CALIBRATION collection name appropriate for associating
473 calibration datasets with validity ranges.
475 Parameters
476 ----------
477 *labels : `str`
478 Strings to be appended to the base name, using the default
479 delimiter for collection names. Usually this is the name of the
480 ticket on which the calibration collection is being created.
482 Returns
483 -------
484 name : `str`
485 Calibration collection name.
486 """
487 return self.makeCollectionName("calib", *labels)
489 @staticmethod
490 def makeRefCatCollectionName(*labels: str) -> str:
491 """Return a global (not instrument-specific) name for a collection that
492 holds reference catalogs.
494 With no arguments, this returns the name of the collection that holds
495 all reference catalogs (usually a ``CHAINED`` collection, at least in
496 long-lived repos that may contain more than one reference catalog).
498 Parameters
499 ----------
500 *labels : `str`
501 Strings to be added to the global collection name, in order to
502 define a collection name for one or more reference catalogs being
503 ingested at the same time.
505 Returns
506 -------
507 name : `str`
508 Collection name.
510 Notes
511 -----
512 This is a ``staticmethod``, not a ``classmethod``, because it should
513 be the same for all instruments.
514 """
515 return "/".join(("refcats",) + labels)
517 def makeUmbrellaCollectionName(self) -> str:
518 """Return the name of the umbrella ``CHAINED`` collection for this
519 instrument that combines all standard recommended input collections.
521 This method should almost never be overridden by derived classes.
523 Returns
524 -------
525 name : `str`
526 Name for the umbrella collection.
527 """
528 return self.makeCollectionName("defaults")
530 def makeCollectionName(self, *labels: str) -> str:
531 """Get the instrument-specific collection string to use as derived
532 from the supplied labels.
534 Parameters
535 ----------
536 *labels : `str`
537 Strings to be combined with the instrument name to form a
538 collection name.
540 Returns
541 -------
542 name : `str`
543 Collection name to use that includes the instrument's recommended
544 prefix.
545 """
546 return "/".join((self.collection_prefix,) + labels)
548 @staticmethod
549 def make_dimension_packer_config_field(
550 doc: str = (
551 "How to pack visit+detector or exposure+detector data IDs into integers. "
552 "The default (None) is to delegate to the Instrument class for which "
553 "registered implementation to use (but still use the nested configuration "
554 "for that implementation)."
555 ),
556 ) -> RegistryField:
557 """Make an `lsst.pex.config.Field` that can be used to configure how
558 data IDs for this instrument are packed.
560 Parameters
561 ----------
562 doc : `str`, optional
563 Documentation for the config field.
565 Returns
566 -------
567 field : `lsst.pex.config.RegistryField`
568 A config field for which calling ``apply`` on the instance
569 attribute constructs an `lsst.daf.butler.DimensionPacker` that
570 defaults to the appropriate one for this instrument.
572 Notes
573 -----
574 This method is expected to be used whenever code requires a single
575 integer that represents the combination of a detector and either a
576 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
577 class and its helper configs provide a simpler high-level interface
578 that should be used instead of calling this method directly.
580 This system is designed to work best when the configuration for the ID
581 packer is not overridden at all, allowing the appropriate instrument
582 class to determine the behavior for each data ID encountered. When the
583 configuration does need to be modified (most often when the scheme for
584 packing an instrument's data IDs is undergoing an upgrade), it is
585 important to ensure the overrides are only applied to data IDs with the
586 desired instrument value.
588 Unit tests of code that use a field produced by this method will often
589 want to explicitly set the packer to "observation" and manually set
590 its ``n_detectors`` and ``n_observations`` fields; this will make it
591 unnecessary for tests to provide expanded data IDs.
592 """
593 # The control flow here bounces around a bit when this RegistryField's
594 # apply() method is called, so it merits a thorough walkthrough
595 # somewhere, and that might as well be here:
596 #
597 # - If the config field's name is not `None`, that kind of packer is
598 # constructed and returned with the arguments to `apply`, in just the
599 # way it works with most RegistryFields or ConfigurableFields. But
600 # this is expected to be rare.
601 #
602 # - If the config fields' name is `None`, the `apply` method (which
603 # actually lives on the `pex.config.RegistryInstanceDict` class,
604 # since `RegistryField` is a descriptor), calls
605 # `_make_default_dimension_packer_dispatch` (which is final, and
606 # hence the base class implementation just below is the only one).
607 #
608 # - `_make_default_dimension_packer_dispatch` instantiates an
609 # `Instrument` instance of the type pointed at by the data ID (i.e.
610 # calling `Instrument.from_data_id`), then calls
611 # `_make_default_dimension_packer` on that.
612 #
613 # - The default implementation of `_make_default_dimension_packer` here
614 # in the base class picks the "observation" dimension packer, so if
615 # it's not overridden by a derived class everything proceeds as if
616 # the config field's name was set to that. Note that this sets which
617 # item in the registry is used, but it still pays attention to the
618 # configuration for that entry in the registry field.
619 #
620 # - A subclass implementation of `_make_default_dimension_packer` will
621 # take precedence over the base class, but it's expected that these
622 # will usually just delegate back to ``super()`` while changing the
623 # ``default`` argument to something other than "observation". Once
624 # again, this will control which packer entry in the registry is used
625 # but the result will still reflect the configuration for that packer
626 # in the registry field.
627 #
628 return observation_packer_registry.makeField(
629 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
630 )
632 @staticmethod
633 @final
634 def make_default_dimension_packer(
635 data_id: DataCoordinate, is_exposure: bool | None = None
636 ) -> DimensionPacker:
637 """Return the default dimension packer for the given data ID.
639 Parameters
640 ----------
641 data_id : `lsst.daf.butler.DataCoordinate`
642 Data ID that identifies at least the ``instrument`` dimension. Must
643 have dimension records attached.
644 is_exposure : `bool`, optional
645 If `False`, construct a packer for visit+detector data IDs. If
646 `True`, construct a packer for exposure+detector data IDs. If
647 `None`, this is determined based on whether ``visit`` or
648 ``exposure`` is present in ``data_id``, with ``visit`` checked
649 first and hence used if both are present.
651 Returns
652 -------
653 packer : `lsst.daf.butler.DimensionPacker`
654 Object that packs {visit, detector} or {exposure, detector} data
655 IDs into integers.
657 Notes
658 -----
659 When using a dimension packer in task code, using
660 `make_dimension_packer_config_field` to make the packing algorithm
661 configurable is preferred over this method.
663 When obtaining a dimension packer to unpack IDs that were packed by
664 task code, it is similarly preferable to load the configuration for
665 that task and the existing packer configuration field there, to ensure
666 any config overrides are respected. That is sometimes quite difficult,
667 however, and since config overrides for dimension packers are expected
668 to be exceedingly rare, using this simpler method will almost always
669 work.
670 """
672 class _DummyConfig(Config):
673 packer = Instrument.make_dimension_packer_config_field()
675 config = _DummyConfig()
677 return config.packer.apply(data_id, is_exposure=is_exposure) # type: ignore
679 @staticmethod
680 @final
681 def _make_default_dimension_packer_dispatch(
682 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
683 ) -> DimensionPacker:
684 """Dispatch method used to invoke `_make_dimension_packer`.
686 This method constructs the appropriate `Instrument` subclass from
687 config and then calls its `_make_default_dimension_packer`.
688 It is called when (as usual) the field returned by
689 `make_dimension_packer_config_field` is left to its default selection
690 of `None`.
692 All arguments and return values are the same as
693 `_make_default_dimension_packer.`
694 """
695 instrument = Instrument.from_data_id(data_id)
696 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
698 def _make_default_dimension_packer(
699 self,
700 config_dict: Any,
701 data_id: DataCoordinate,
702 is_exposure: bool | None = None,
703 default: str = "observation",
704 ) -> DimensionPacker:
705 """Construct return the default dimension packer for this instrument.
707 This method is a protected hook for subclasses to override the behavior
708 of `make_dimension_packer_config_field` when the packer is not selected
709 explicitly via configuration.
711 Parameters
712 ----------
713 config_dict
714 Mapping attribute of a `lsst.pex.config.Config` instance that
715 corresponds to a field created by `make_dimension_packer_config`
716 (the actual type of this object is a `lsst.pex.config`
717 implementation detail).
718 data_id : `lsst.daf.butler.DataCoordinate`
719 Data ID that identifies at least the ``instrument`` dimension. For
720 most configurations this must have dimension records attached.
721 is_exposure : `bool`, optional
722 If `False`, construct a packer for visit+detector data IDs. If
723 `True`, construct a packer for exposure+detector data IDs. If
724 `None`, this is determined based on whether ``visit`` or
725 ``exposure`` is present in ``data_id``, with ``visit`` checked
726 first and hence used if both are present.
727 default : `str`, optional
728 Registered name of the dimension packer to select when the
729 configured packer is `None` (as is usually the case). This is
730 intended primarily for derived classes delegating to `super` in
731 reimplementations of this method.
733 Returns
734 -------
735 packer : `lsst.daf.butler.DimensionPacker`
736 Object that packs {visit, detector} or {exposure, detector} data
737 IDs into integers.
738 """
739 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)