Coverage for python/lsst/pipe/base/_instrument.py: 52%
125 statements
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-22 11:04 +0000
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-22 11:04 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("Instrument",)
32import contextlib
33import datetime
34import os.path
35from abc import ABCMeta, abstractmethod
36from collections.abc import Sequence
37from typing import TYPE_CHECKING, Any, cast, final
39from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
40from lsst.daf.butler.registry import DataIdError
41from lsst.pex.config import Config, RegistryField
42from lsst.utils import doImportType
44from ._observation_dimension_packer import observation_packer_registry
46if TYPE_CHECKING:
47 from lsst.daf.butler import Registry
50class Instrument(metaclass=ABCMeta):
51 """Base class for instrument-specific logic for the Gen3 Butler.
53 Parameters
54 ----------
55 collection_prefix : `str`, optional
56 Prefix for collection names to use instead of the instrument's own
57 name. This is primarily for use in simulated-data repositories, where
58 the instrument name may not be necessary and/or sufficient to
59 distinguish between collections.
61 Notes
62 -----
63 Concrete instrument subclasses must have the same construction signature as
64 the base class.
65 """
67 configPaths: Sequence[str] = ()
68 """Paths to config files to read for specific Tasks.
70 The paths in this list should contain files of the form `task.py`, for
71 each of the Tasks that requires special configuration.
72 """
74 policyName: str | None = None
75 """Instrument specific name to use when locating a policy or configuration
76 file in the file system."""
78 raw_definition: tuple[str, tuple[str, ...], str] | None = None
79 """Dataset type definition to use for "raw" datasets. This is a tuple
80 of the dataset type name, a tuple of dimension names, and the storage class
81 name. If `None` the ingest system will use its default definition."""
83 def __init__(self, collection_prefix: str | None = None):
84 if collection_prefix is None:
85 collection_prefix = self.getName()
86 self.collection_prefix = collection_prefix
88 @classmethod
89 @abstractmethod
90 def getName(cls) -> str:
91 """Return the short (dimension) name for this instrument.
93 This is not (in general) the same as the class name - it's what is used
94 as the value of the "instrument" field in data IDs, and is usually an
95 abbreviation of the full name.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def register(self, registry: Registry, *, update: bool = False) -> None:
101 """Insert instrument, and other relevant records into `Registry`.
103 Parameters
104 ----------
105 registry : `lsst.daf.butler.Registry`
106 Registry client for the data repository to modify.
107 update : `bool`, optional
108 If `True` (`False` is default), update existing records if they
109 differ from the new ones.
111 Raises
112 ------
113 lsst.daf.butler.registry.ConflictingDefinitionError
114 Raised if any existing record has the same key but a different
115 definition as one being registered.
117 Notes
118 -----
119 New records can always be added by calling this method multiple times,
120 as long as no existing records have changed (if existing records have
121 changed, ``update=True`` must be used). Old records can never be
122 removed by this method.
124 Implementations should guarantee that registration is atomic (the
125 registry should not be modified if any error occurs) and idempotent at
126 the level of individual dimension entries; new detectors and filters
127 should be added, but changes to any existing record should not be.
128 This can generally be achieved via a block like
130 .. code-block:: python
132 with registry.transaction():
133 registry.syncDimensionData("instrument", ...)
134 registry.syncDimensionData("detector", ...)
135 self.registerFilters(registry)
136 """
137 raise NotImplementedError()
139 @staticmethod
140 def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument:
141 """Given an instrument name and a butler registry, retrieve a
142 corresponding instantiated instrument object.
144 Parameters
145 ----------
146 name : `str`
147 Name of the instrument (must match the return value of `getName`).
148 registry : `lsst.daf.butler.Registry`
149 Butler registry to query to find the information.
150 collection_prefix : `str`, optional
151 Prefix for collection names to use instead of the instrument's own
152 name. This is primarily for use in simulated-data repositories,
153 where the instrument name may not be necessary and/or sufficient to
154 distinguish between collections.
156 Returns
157 -------
158 instrument : `Instrument`
159 An instance of the relevant `Instrument`.
161 Notes
162 -----
163 The instrument must be registered in the corresponding butler.
165 Raises
166 ------
167 LookupError
168 Raised if the instrument is not known to the supplied registry.
169 ModuleNotFoundError
170 Raised if the class could not be imported. This could mean
171 that the relevant obs package has not been setup.
172 TypeError
173 Raised if the class name retrieved is not a string or the imported
174 symbol is not an `Instrument` subclass.
175 """
176 try:
177 records = list(registry.queryDimensionRecords("instrument", instrument=name))
178 except DataIdError:
179 records = None
180 if not records:
181 raise LookupError(f"No registered instrument with name '{name}'.")
182 cls_name = records[0].class_name
183 if not isinstance(cls_name, str):
184 raise TypeError(
185 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
186 )
187 return Instrument._from_cls_name(cls_name, collection_prefix)
189 @staticmethod
190 def from_string(
191 name: str, registry: Registry | None = None, collection_prefix: str | None = None
192 ) -> Instrument:
193 """Return an instance from the short name or class name.
195 If the instrument name is not qualified (does not contain a '.') and a
196 butler registry is provided, this will attempt to load the instrument
197 using `Instrument.fromName()`. Otherwise the instrument will be
198 imported and instantiated.
200 Parameters
201 ----------
202 name : `str`
203 The name or fully-qualified class name of an instrument.
204 registry : `lsst.daf.butler.Registry`, optional
205 Butler registry to query to find information about the instrument,
206 by default `None`.
207 collection_prefix : `str`, optional
208 Prefix for collection names to use instead of the instrument's own
209 name. This is primarily for use in simulated-data repositories,
210 where the instrument name may not be necessary and/or sufficient
211 to distinguish between collections.
213 Returns
214 -------
215 instrument : `Instrument`
216 The instantiated instrument.
218 Raises
219 ------
220 RuntimeError
221 Raised if the instrument can not be imported, instantiated, or
222 obtained from the registry.
223 TypeError
224 Raised if the instrument is not a subclass of
225 `~lsst.pipe.base.Instrument`.
227 See Also
228 --------
229 Instrument.fromName : Constructing Instrument from a name.
230 """
231 if "." not in name and registry is not None:
232 try:
233 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix)
234 except Exception as err:
235 raise RuntimeError(
236 f"Could not get instrument from name: {name}. Failed with exception: {err}"
237 ) from err
238 else:
239 try:
240 instr_class = doImportType(name)
241 except Exception as err:
242 raise RuntimeError(
243 f"Could not import instrument: {name}. Failed with exception: {err}"
244 ) from err
245 instr = instr_class(collection_prefix=collection_prefix)
246 if not isinstance(instr, Instrument):
247 raise TypeError(f"{name} is not an Instrument subclass.")
248 return instr
250 @staticmethod
251 def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument:
252 """Instantiate an `Instrument` object from a fully-expanded data ID.
254 Parameters
255 ----------
256 data_id : `~lsst.daf.butler.DataCoordinate`
257 Expanded data ID that includes the instrument dimension.
258 collection_prefix : `str`, optional
259 Prefix for collection names to use instead of the instrument's own
260 name. This is primarily for use in simulated-data repositories,
261 where the instrument name may not be necessary and/or sufficient to
262 distinguish between collections.
264 Returns
265 -------
266 instrument : `Instrument`
267 An instance of the relevant `Instrument`.
269 Raises
270 ------
271 TypeError
272 Raised if the class name retrieved is not a string or the imported
273 symbol is not an `Instrument` subclass.
274 """
275 return Instrument._from_cls_name(
276 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
277 )
279 @staticmethod
280 def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instrument:
281 """Instantiate an `Instrument` object type name.
283 This just provides common error-handling for `fromName` and
284 `from_data_id`
286 Parameters
287 ----------
288 cls_name : `str`
289 Fully-qualified name of the type.
290 collection_prefix : `str`, optional
291 Prefix for collection names to use instead of the instrument's own
292 name. This is primarily for use in simulated-data repositories,
293 where the instrument name may not be necessary and/or sufficient to
294 distinguish between collections.
296 Returns
297 -------
298 instrument : `Instrument`
299 An instance of the relevant `Instrument`.
301 Raises
302 ------
303 TypeError
304 Raised if the class name retrieved is not a string or the imported
305 symbol is not an `Instrument` subclass.
306 """
307 instrument_cls: type = doImportType(cls_name)
308 if not issubclass(instrument_cls, Instrument):
309 raise TypeError(
310 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
311 )
312 return instrument_cls(collection_prefix=collection_prefix)
314 @staticmethod
315 def importAll(registry: Registry) -> None:
316 """Import all the instruments known to this registry.
318 This will ensure that all metadata translators have been registered.
320 Parameters
321 ----------
322 registry : `lsst.daf.butler.Registry`
323 Butler registry to query to find the information.
325 Notes
326 -----
327 It is allowed for a particular instrument class to fail on import.
328 This might simply indicate that a particular obs package has
329 not been setup.
330 """
331 records = list(registry.queryDimensionRecords("instrument"))
332 for record in records:
333 cls = record.class_name
334 with contextlib.suppress(Exception):
335 doImportType(cls)
337 @abstractmethod
338 def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
339 """Return the Formatter class that should be used to read a particular
340 raw file.
342 Parameters
343 ----------
344 dataId : `DataId`
345 Dimension-based ID for the raw file or files being ingested.
347 Returns
348 -------
349 formatter : `lsst.daf.butler.Formatter` class
350 Class to be used that reads the file into the correct
351 Python object for the raw data.
352 """
353 raise NotImplementedError()
355 def applyConfigOverrides(self, name: str, config: Config) -> None:
356 """Apply instrument-specific overrides for a task config.
358 Parameters
359 ----------
360 name : `str`
361 Name of the object being configured; typically the _DefaultName
362 of a Task.
363 config : `lsst.pex.config.Config`
364 Config instance to which overrides should be applied.
365 """
366 for root in self.configPaths:
367 path = os.path.join(root, f"{name}.py")
368 if os.path.exists(path):
369 config.load(path)
371 @staticmethod
372 def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
373 """Format a timestamp for use in a collection name.
375 Parameters
376 ----------
377 timestamp : `str` or `datetime.datetime`
378 Timestamp to format. May be a date or datetime string in extended
379 ISO format (assumed UTC), with or without a timezone specifier, a
380 datetime string in basic ISO format with a timezone specifier, a
381 naive `datetime.datetime` instance (assumed UTC) or a
382 timezone-aware `datetime.datetime` instance (converted to UTC).
383 This is intended to cover all forms that string ``CALIBDATE``
384 metadata values have taken in the past, as well as the format this
385 method itself writes out (to enable round-tripping).
387 Returns
388 -------
389 formatted : `str`
390 Standardized string form for the timestamp.
391 """
392 if isinstance(timestamp, str):
393 if "-" in timestamp:
394 # extended ISO format, with - and : delimiters
395 timestamp = datetime.datetime.fromisoformat(timestamp)
396 else:
397 # basic ISO format, with no delimiters (what this method
398 # returns)
399 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
400 if not isinstance(timestamp, datetime.datetime):
401 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
402 if timestamp.tzinfo is not None:
403 timestamp = timestamp.astimezone(datetime.timezone.utc)
404 return f"{timestamp:%Y%m%dT%H%M%S}Z"
406 @staticmethod
407 def makeCollectionTimestamp() -> str:
408 """Create a timestamp string for use in a collection name from the
409 current time.
411 Returns
412 -------
413 formatted : `str`
414 Standardized string form of the current time.
415 """
416 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
418 def makeDefaultRawIngestRunName(self) -> str:
419 """Make the default instrument-specific run collection string for raw
420 data ingest.
422 Returns
423 -------
424 coll : `str`
425 Run collection name to be used as the default for ingestion of
426 raws.
427 """
428 return self.makeCollectionName("raw", "all")
430 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
431 """Make a RUN collection name appropriate for inserting calibration
432 datasets whose validity ranges are unbounded.
434 Parameters
435 ----------
436 *labels : `str`
437 Extra strings to be included in the base name, using the default
438 delimiter for collection names. Usually this is the name of the
439 ticket on which the calibration collection is being created.
441 Returns
442 -------
443 name : `str`
444 Run collection name.
445 """
446 return self.makeCollectionName("calib", *labels, "unbounded")
448 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
449 """Make a RUN collection name appropriate for inserting curated
450 calibration datasets with the given ``CALIBDATE`` metadata value.
452 Parameters
453 ----------
454 calibDate : `str`
455 The ``CALIBDATE`` metadata value.
456 *labels : `str`
457 Strings to be included in the collection name (before
458 ``calibDate``, but after all other terms), using the default
459 delimiter for collection names. Usually this is the name of the
460 ticket on which the calibration collection is being created.
462 Returns
463 -------
464 name : `str`
465 Run collection name.
466 """
467 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
469 def makeCalibrationCollectionName(self, *labels: str) -> str:
470 """Make a CALIBRATION collection name appropriate for associating
471 calibration datasets with validity ranges.
473 Parameters
474 ----------
475 *labels : `str`
476 Strings to be appended to the base name, using the default
477 delimiter for collection names. Usually this is the name of the
478 ticket on which the calibration collection is being created.
480 Returns
481 -------
482 name : `str`
483 Calibration collection name.
484 """
485 return self.makeCollectionName("calib", *labels)
487 @staticmethod
488 def makeRefCatCollectionName(*labels: str) -> str:
489 """Return a global (not instrument-specific) name for a collection that
490 holds reference catalogs.
492 With no arguments, this returns the name of the collection that holds
493 all reference catalogs (usually a ``CHAINED`` collection, at least in
494 long-lived repos that may contain more than one reference catalog).
496 Parameters
497 ----------
498 *labels : `str`
499 Strings to be added to the global collection name, in order to
500 define a collection name for one or more reference catalogs being
501 ingested at the same time.
503 Returns
504 -------
505 name : `str`
506 Collection name.
508 Notes
509 -----
510 This is a ``staticmethod``, not a ``classmethod``, because it should
511 be the same for all instruments.
512 """
513 return "/".join(("refcats",) + labels)
515 def makeUmbrellaCollectionName(self) -> str:
516 """Return the name of the umbrella ``CHAINED`` collection for this
517 instrument that combines all standard recommended input collections.
519 This method should almost never be overridden by derived classes.
521 Returns
522 -------
523 name : `str`
524 Name for the umbrella collection.
525 """
526 return self.makeCollectionName("defaults")
528 def makeCollectionName(self, *labels: str) -> str:
529 """Get the instrument-specific collection string to use as derived
530 from the supplied labels.
532 Parameters
533 ----------
534 *labels : `str`
535 Strings to be combined with the instrument name to form a
536 collection name.
538 Returns
539 -------
540 name : `str`
541 Collection name to use that includes the instrument's recommended
542 prefix.
543 """
544 return "/".join((self.collection_prefix,) + labels)
546 @staticmethod
547 def make_dimension_packer_config_field(
548 doc: str = (
549 "How to pack visit+detector or exposure+detector data IDs into integers. "
550 "The default (None) is to delegate to the Instrument class for which "
551 "registered implementation to use (but still use the nested configuration "
552 "for that implementation)."
553 ),
554 ) -> RegistryField:
555 """Make an `lsst.pex.config.Field` that can be used to configure how
556 data IDs for this instrument are packed.
558 Parameters
559 ----------
560 doc : `str`, optional
561 Documentation for the config field.
563 Returns
564 -------
565 field : `lsst.pex.config.RegistryField`
566 A config field for which calling ``apply`` on the instance
567 attribute constructs an `lsst.daf.butler.DimensionPacker` that
568 defaults to the appropriate one for this instrument.
570 Notes
571 -----
572 This method is expected to be used whenever code requires a single
573 integer that represents the combination of a detector and either a
574 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
575 class and its helper configs provide a simpler high-level interface
576 that should be used instead of calling this method directly.
578 This system is designed to work best when the configuration for the ID
579 packer is not overridden at all, allowing the appropriate instrument
580 class to determine the behavior for each data ID encountered. When the
581 configuration does need to be modified (most often when the scheme for
582 packing an instrument's data IDs is undergoing an upgrade), it is
583 important to ensure the overrides are only applied to data IDs with the
584 desired instrument value.
586 Unit tests of code that use a field produced by this method will often
587 want to explicitly set the packer to "observation" and manually set
588 its ``n_detectors`` and ``n_observations`` fields; this will make it
589 unnecessary for tests to provide expanded data IDs.
590 """
591 # The control flow here bounces around a bit when this RegistryField's
592 # apply() method is called, so it merits a thorough walkthrough
593 # somewhere, and that might as well be here:
594 #
595 # - If the config field's name is not `None`, that kind of packer is
596 # constructed and returned with the arguments to `apply`, in just the
597 # way it works with most RegistryFields or ConfigurableFields. But
598 # this is expected to be rare.
599 #
600 # - If the config fields' name is `None`, the `apply` method (which
601 # actually lives on the `pex.config.RegistryInstanceDict` class,
602 # since `RegistryField` is a descriptor), calls
603 # `_make_default_dimension_packer_dispatch` (which is final, and
604 # hence the base class implementation just below is the only one).
605 #
606 # - `_make_default_dimension_packer_dispatch` instantiates an
607 # `Instrument` instance of the type pointed at by the data ID (i.e.
608 # calling `Instrument.from_data_id`), then calls
609 # `_make_default_dimension_packer` on that.
610 #
611 # - The default implementation of `_make_default_dimension_packer` here
612 # in the base class picks the "observation" dimension packer, so if
613 # it's not overridden by a derived class everything proceeds as if
614 # the config field's name was set to that. Note that this sets which
615 # item in the registry is used, but it still pays attention to the
616 # configuration for that entry in the registry field.
617 #
618 # - A subclass implementation of `_make_default_dimension_packer` will
619 # take precedence over the base class, but it's expected that these
620 # will usually just delegate back to ``super()`` while changing the
621 # ``default`` argument to something other than "observation". Once
622 # again, this will control which packer entry in the registry is used
623 # but the result will still reflect the configuration for that packer
624 # in the registry field.
625 #
626 return observation_packer_registry.makeField(
627 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
628 )
630 @staticmethod
631 @final
632 def make_default_dimension_packer(
633 data_id: DataCoordinate, is_exposure: bool | None = None
634 ) -> DimensionPacker:
635 """Return the default dimension packer for the given data ID.
637 Parameters
638 ----------
639 data_id : `lsst.daf.butler.DataCoordinate`
640 Data ID that identifies at least the ``instrument`` dimension. Must
641 have dimension records attached.
642 is_exposure : `bool`, optional
643 If `False`, construct a packer for visit+detector data IDs. If
644 `True`, construct a packer for exposure+detector data IDs. If
645 `None`, this is determined based on whether ``visit`` or
646 ``exposure`` is present in ``data_id``, with ``visit`` checked
647 first and hence used if both are present.
649 Returns
650 -------
651 packer : `lsst.daf.butler.DimensionPacker`
652 Object that packs {visit, detector} or {exposure, detector} data
653 IDs into integers.
655 Notes
656 -----
657 When using a dimension packer in task code, using
658 `make_dimension_packer_config_field` to make the packing algorithm
659 configurable is preferred over this method.
661 When obtaining a dimension packer to unpack IDs that were packed by
662 task code, it is similarly preferable to load the configuration for
663 that task and the existing packer configuration field there, to ensure
664 any config overrides are respected. That is sometimes quite difficult,
665 however, and since config overrides for dimension packers are expected
666 to be exceedingly rare, using this simpler method will almost always
667 work.
668 """
670 class _DummyConfig(Config):
671 packer = Instrument.make_dimension_packer_config_field()
673 config = _DummyConfig()
675 return config.packer.apply(data_id, is_exposure=is_exposure) # type: ignore
677 @staticmethod
678 @final
679 def _make_default_dimension_packer_dispatch(
680 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
681 ) -> DimensionPacker:
682 """Dispatch method used to invoke `_make_dimension_packer`.
684 This method constructs the appropriate `Instrument` subclass from
685 config and then calls its `_make_default_dimension_packer`.
686 It is called when (as usual) the field returned by
687 `make_dimension_packer_config_field` is left to its default selection
688 of `None`.
690 All arguments and return values are the same as
691 `_make_default_dimension_packer.`
692 """
693 instrument = Instrument.from_data_id(data_id)
694 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
696 def _make_default_dimension_packer(
697 self,
698 config_dict: Any,
699 data_id: DataCoordinate,
700 is_exposure: bool | None = None,
701 default: str = "observation",
702 ) -> DimensionPacker:
703 """Construct return the default dimension packer for this instrument.
705 This method is a protected hook for subclasses to override the behavior
706 of `make_dimension_packer_config_field` when the packer is not selected
707 explicitly via configuration.
709 Parameters
710 ----------
711 config_dict
712 Mapping attribute of a `lsst.pex.config.Config` instance that
713 corresponds to a field created by `make_dimension_packer_config`
714 (the actual type of this object is a `lsst.pex.config`
715 implementation detail).
716 data_id : `lsst.daf.butler.DataCoordinate`
717 Data ID that identifies at least the ``instrument`` dimension. For
718 most configurations this must have dimension records attached.
719 is_exposure : `bool`, optional
720 If `False`, construct a packer for visit+detector data IDs. If
721 `True`, construct a packer for exposure+detector data IDs. If
722 `None`, this is determined based on whether ``visit`` or
723 ``exposure`` is present in ``data_id``, with ``visit`` checked
724 first and hence used if both are present.
725 default : `str`, optional
726 Registered name of the dimension packer to select when the
727 configured packer is `None` (as is usually the case). This is
728 intended primarily for derived classes delegating to `super` in
729 reimplementations of this method.
731 Returns
732 -------
733 packer : `lsst.daf.butler.DimensionPacker`
734 Object that packs {visit, detector} or {exposure, detector} data
735 IDs into integers.
736 """
737 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)