Coverage for python/lsst/pipe/base/_instrument.py: 52%
125 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-23 10:31 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Instrument",)
26import contextlib
27import datetime
28import os.path
29from abc import ABCMeta, abstractmethod
30from collections.abc import Sequence
31from typing import TYPE_CHECKING, Any, cast, final
33from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
34from lsst.daf.butler.registry import DataIdError
35from lsst.pex.config import Config, RegistryField
36from lsst.utils import doImportType
38from ._observation_dimension_packer import observation_packer_registry
40if TYPE_CHECKING:
41 from lsst.daf.butler import Registry
44class Instrument(metaclass=ABCMeta):
45 """Base class for instrument-specific logic for the Gen3 Butler.
47 Parameters
48 ----------
49 collection_prefix : `str`, optional
50 Prefix for collection names to use instead of the instrument's own
51 name. This is primarily for use in simulated-data repositories, where
52 the instrument name may not be necessary and/or sufficient to
53 distinguish between collections.
55 Notes
56 -----
57 Concrete instrument subclasses must have the same construction signature as
58 the base class.
59 """
61 configPaths: Sequence[str] = ()
62 """Paths to config files to read for specific Tasks.
64 The paths in this list should contain files of the form `task.py`, for
65 each of the Tasks that requires special configuration.
66 """
68 policyName: str | None = None
69 """Instrument specific name to use when locating a policy or configuration
70 file in the file system."""
72 raw_definition: tuple[str, tuple[str, ...], str] | None = None
73 """Dataset type definition to use for "raw" datasets. This is a tuple
74 of the dataset type name, a tuple of dimension names, and the storage class
75 name. If `None` the ingest system will use its default definition."""
77 def __init__(self, collection_prefix: str | None = None):
78 if collection_prefix is None:
79 collection_prefix = self.getName()
80 self.collection_prefix = collection_prefix
82 @classmethod
83 @abstractmethod
84 def getName(cls) -> str:
85 """Return the short (dimension) name for this instrument.
87 This is not (in general) the same as the class name - it's what is used
88 as the value of the "instrument" field in data IDs, and is usually an
89 abbreviation of the full name.
90 """
91 raise NotImplementedError()
93 @abstractmethod
94 def register(self, registry: Registry, *, update: bool = False) -> None:
95 """Insert instrument, and other relevant records into `Registry`.
97 Parameters
98 ----------
99 registry : `lsst.daf.butler.Registry`
100 Registry client for the data repository to modify.
101 update : `bool`, optional
102 If `True` (`False` is default), update existing records if they
103 differ from the new ones.
105 Raises
106 ------
107 lsst.daf.butler.registry.ConflictingDefinitionError
108 Raised if any existing record has the same key but a different
109 definition as one being registered.
111 Notes
112 -----
113 New records can always be added by calling this method multiple times,
114 as long as no existing records have changed (if existing records have
115 changed, ``update=True`` must be used). Old records can never be
116 removed by this method.
118 Implementations should guarantee that registration is atomic (the
119 registry should not be modified if any error occurs) and idempotent at
120 the level of individual dimension entries; new detectors and filters
121 should be added, but changes to any existing record should not be.
122 This can generally be achieved via a block like
124 .. code-block:: python
126 with registry.transaction():
127 registry.syncDimensionData("instrument", ...)
128 registry.syncDimensionData("detector", ...)
129 self.registerFilters(registry)
131 """
132 raise NotImplementedError()
134 @staticmethod
135 def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument:
136 """Given an instrument name and a butler registry, retrieve a
137 corresponding instantiated instrument object.
139 Parameters
140 ----------
141 name : `str`
142 Name of the instrument (must match the return value of `getName`).
143 registry : `lsst.daf.butler.Registry`
144 Butler registry to query to find the information.
145 collection_prefix : `str`, optional
146 Prefix for collection names to use instead of the instrument's own
147 name. This is primarily for use in simulated-data repositories,
148 where the instrument name may not be necessary and/or sufficient to
149 distinguish between collections.
151 Returns
152 -------
153 instrument : `Instrument`
154 An instance of the relevant `Instrument`.
156 Notes
157 -----
158 The instrument must be registered in the corresponding butler.
160 Raises
161 ------
162 LookupError
163 Raised if the instrument is not known to the supplied registry.
164 ModuleNotFoundError
165 Raised if the class could not be imported. This could mean
166 that the relevant obs package has not been setup.
167 TypeError
168 Raised if the class name retrieved is not a string or the imported
169 symbol is not an `Instrument` subclass.
170 """
171 try:
172 records = list(registry.queryDimensionRecords("instrument", instrument=name))
173 except DataIdError:
174 records = None
175 if not records:
176 raise LookupError(f"No registered instrument with name '{name}'.")
177 cls_name = records[0].class_name
178 if not isinstance(cls_name, str):
179 raise TypeError(
180 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
181 )
182 return Instrument._from_cls_name(cls_name, collection_prefix)
184 @staticmethod
185 def from_string(
186 name: str, registry: Registry | None = None, collection_prefix: str | None = None
187 ) -> Instrument:
188 """Return an instance from the short name or class name.
190 If the instrument name is not qualified (does not contain a '.') and a
191 butler registry is provided, this will attempt to load the instrument
192 using `Instrument.fromName()`. Otherwise the instrument will be
193 imported and instantiated.
195 Parameters
196 ----------
197 name : `str`
198 The name or fully-qualified class name of an instrument.
199 registry : `lsst.daf.butler.Registry`, optional
200 Butler registry to query to find information about the instrument,
201 by default `None`.
202 collection_prefix : `str`, optional
203 Prefix for collection names to use instead of the instrument's own
204 name. This is primarily for use in simulated-data repositories,
205 where the instrument name may not be necessary and/or sufficient
206 to distinguish between collections.
208 Returns
209 -------
210 instrument : `Instrument`
211 The instantiated instrument.
213 Raises
214 ------
215 RuntimeError
216 Raised if the instrument can not be imported, instantiated, or
217 obtained from the registry.
218 TypeError
219 Raised if the instrument is not a subclass of
220 `~lsst.pipe.base.Instrument`.
222 See Also
223 --------
224 Instrument.fromName
225 """
226 if "." not in name and registry is not None:
227 try:
228 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix)
229 except Exception as err:
230 raise RuntimeError(
231 f"Could not get instrument from name: {name}. Failed with exception: {err}"
232 ) from err
233 else:
234 try:
235 instr_class = doImportType(name)
236 except Exception as err:
237 raise RuntimeError(
238 f"Could not import instrument: {name}. Failed with exception: {err}"
239 ) from err
240 instr = instr_class(collection_prefix=collection_prefix)
241 if not isinstance(instr, Instrument):
242 raise TypeError(f"{name} is not an Instrument subclass.")
243 return instr
245 @staticmethod
246 def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument:
247 """Instantiate an `Instrument` object from a fully-expanded data ID.
249 Parameters
250 ----------
251 data_id : `~lsst.daf.butler.DataCoordinate`
252 Expanded data ID that includes the instrument dimension.
253 collection_prefix : `str`, optional
254 Prefix for collection names to use instead of the instrument's own
255 name. This is primarily for use in simulated-data repositories,
256 where the instrument name may not be necessary and/or sufficient to
257 distinguish between collections.
259 Returns
260 -------
261 instrument : `Instrument`
262 An instance of the relevant `Instrument`.
264 Raises
265 ------
266 TypeError
267 Raised if the class name retrieved is not a string or the imported
268 symbol is not an `Instrument` subclass.
269 """
270 return Instrument._from_cls_name(
271 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
272 )
274 @staticmethod
275 def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instrument:
276 """Instantiate an `Instrument` object type name.
278 This just provides common error-handling for `fromName` and
279 `from_data_id`
281 Parameters
282 ----------
283 cls_name : `str`
284 Fully-qualified name of the type.
285 collection_prefix : `str`, optional
286 Prefix for collection names to use instead of the instrument's own
287 name. This is primarily for use in simulated-data repositories,
288 where the instrument name may not be necessary and/or sufficient to
289 distinguish between collections.
291 Returns
292 -------
293 instrument : `Instrument`
294 An instance of the relevant `Instrument`.
296 Raises
297 ------
298 TypeError
299 Raised if the class name retrieved is not a string or the imported
300 symbol is not an `Instrument` subclass.
301 """
302 instrument_cls: type = doImportType(cls_name)
303 if not issubclass(instrument_cls, Instrument):
304 raise TypeError(
305 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
306 )
307 return instrument_cls(collection_prefix=collection_prefix)
309 @staticmethod
310 def importAll(registry: Registry) -> None:
311 """Import all the instruments known to this registry.
313 This will ensure that all metadata translators have been registered.
315 Parameters
316 ----------
317 registry : `lsst.daf.butler.Registry`
318 Butler registry to query to find the information.
320 Notes
321 -----
322 It is allowed for a particular instrument class to fail on import.
323 This might simply indicate that a particular obs package has
324 not been setup.
325 """
326 records = list(registry.queryDimensionRecords("instrument"))
327 for record in records:
328 cls = record.class_name
329 with contextlib.suppress(Exception):
330 doImportType(cls)
332 @abstractmethod
333 def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
334 """Return the Formatter class that should be used to read a particular
335 raw file.
337 Parameters
338 ----------
339 dataId : `DataId`
340 Dimension-based ID for the raw file or files being ingested.
342 Returns
343 -------
344 formatter : `lsst.daf.butler.Formatter` class
345 Class to be used that reads the file into the correct
346 Python object for the raw data.
347 """
348 raise NotImplementedError()
350 def applyConfigOverrides(self, name: str, config: Config) -> None:
351 """Apply instrument-specific overrides for a task config.
353 Parameters
354 ----------
355 name : `str`
356 Name of the object being configured; typically the _DefaultName
357 of a Task.
358 config : `lsst.pex.config.Config`
359 Config instance to which overrides should be applied.
360 """
361 for root in self.configPaths:
362 path = os.path.join(root, f"{name}.py")
363 if os.path.exists(path):
364 config.load(path)
366 @staticmethod
367 def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
368 """Format a timestamp for use in a collection name.
370 Parameters
371 ----------
372 timestamp : `str` or `datetime.datetime`
373 Timestamp to format. May be a date or datetime string in extended
374 ISO format (assumed UTC), with or without a timezone specifier, a
375 datetime string in basic ISO format with a timezone specifier, a
376 naive `datetime.datetime` instance (assumed UTC) or a
377 timezone-aware `datetime.datetime` instance (converted to UTC).
378 This is intended to cover all forms that string ``CALIBDATE``
379 metadata values have taken in the past, as well as the format this
380 method itself writes out (to enable round-tripping).
382 Returns
383 -------
384 formatted : `str`
385 Standardized string form for the timestamp.
386 """
387 if isinstance(timestamp, str):
388 if "-" in timestamp:
389 # extended ISO format, with - and : delimiters
390 timestamp = datetime.datetime.fromisoformat(timestamp)
391 else:
392 # basic ISO format, with no delimiters (what this method
393 # returns)
394 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
395 if not isinstance(timestamp, datetime.datetime):
396 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
397 if timestamp.tzinfo is not None:
398 timestamp = timestamp.astimezone(datetime.timezone.utc)
399 return f"{timestamp:%Y%m%dT%H%M%S}Z"
401 @staticmethod
402 def makeCollectionTimestamp() -> str:
403 """Create a timestamp string for use in a collection name from the
404 current time.
406 Returns
407 -------
408 formatted : `str`
409 Standardized string form of the current time.
410 """
411 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
413 def makeDefaultRawIngestRunName(self) -> str:
414 """Make the default instrument-specific run collection string for raw
415 data ingest.
417 Returns
418 -------
419 coll : `str`
420 Run collection name to be used as the default for ingestion of
421 raws.
422 """
423 return self.makeCollectionName("raw", "all")
425 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
426 """Make a RUN collection name appropriate for inserting calibration
427 datasets whose validity ranges are unbounded.
429 Parameters
430 ----------
431 *labels : `str`
432 Extra strings to be included in the base name, using the default
433 delimiter for collection names. Usually this is the name of the
434 ticket on which the calibration collection is being created.
436 Returns
437 -------
438 name : `str`
439 Run collection name.
440 """
441 return self.makeCollectionName("calib", *labels, "unbounded")
443 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
444 """Make a RUN collection name appropriate for inserting curated
445 calibration datasets with the given ``CALIBDATE`` metadata value.
447 Parameters
448 ----------
449 calibDate : `str`
450 The ``CALIBDATE`` metadata value.
451 *labels : `str`
452 Strings to be included in the collection name (before
453 ``calibDate``, but after all other terms), using the default
454 delimiter for collection names. Usually this is the name of the
455 ticket on which the calibration collection is being created.
457 Returns
458 -------
459 name : `str`
460 Run collection name.
461 """
462 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
464 def makeCalibrationCollectionName(self, *labels: str) -> str:
465 """Make a CALIBRATION collection name appropriate for associating
466 calibration datasets with validity ranges.
468 Parameters
469 ----------
470 *labels : `str`
471 Strings to be appended to the base name, using the default
472 delimiter for collection names. Usually this is the name of the
473 ticket on which the calibration collection is being created.
475 Returns
476 -------
477 name : `str`
478 Calibration collection name.
479 """
480 return self.makeCollectionName("calib", *labels)
482 @staticmethod
483 def makeRefCatCollectionName(*labels: str) -> str:
484 """Return a global (not instrument-specific) name for a collection that
485 holds reference catalogs.
487 With no arguments, this returns the name of the collection that holds
488 all reference catalogs (usually a ``CHAINED`` collection, at least in
489 long-lived repos that may contain more than one reference catalog).
491 Parameters
492 ----------
493 *labels : `str`
494 Strings to be added to the global collection name, in order to
495 define a collection name for one or more reference catalogs being
496 ingested at the same time.
498 Returns
499 -------
500 name : `str`
501 Collection name.
503 Notes
504 -----
505 This is a ``staticmethod``, not a ``classmethod``, because it should
506 be the same for all instruments.
507 """
508 return "/".join(("refcats",) + labels)
510 def makeUmbrellaCollectionName(self) -> str:
511 """Return the name of the umbrella ``CHAINED`` collection for this
512 instrument that combines all standard recommended input collections.
514 This method should almost never be overridden by derived classes.
516 Returns
517 -------
518 name : `str`
519 Name for the umbrella collection.
520 """
521 return self.makeCollectionName("defaults")
523 def makeCollectionName(self, *labels: str) -> str:
524 """Get the instrument-specific collection string to use as derived
525 from the supplied labels.
527 Parameters
528 ----------
529 *labels : `str`
530 Strings to be combined with the instrument name to form a
531 collection name.
533 Returns
534 -------
535 name : `str`
536 Collection name to use that includes the instrument's recommended
537 prefix.
538 """
539 return "/".join((self.collection_prefix,) + labels)
541 @staticmethod
542 def make_dimension_packer_config_field(
543 doc: str = (
544 "How to pack visit+detector or exposure+detector data IDs into integers. "
545 "The default (None) is to delegate to the Instrument class for which "
546 "registered implementation to use (but still use the nested configuration "
547 "for that implementation)."
548 ),
549 ) -> RegistryField:
550 """Make an `lsst.pex.config.Field` that can be used to configure how
551 data IDs for this instrument are packed.
553 Parameters
554 ----------
555 doc : `str`, optional
556 Documentation for the config field.
558 Returns
559 -------
560 field : `lsst.pex.config.RegistryField`
561 A config field for which calling ``apply`` on the instance
562 attribute constructs an `lsst.daf.butler.DimensionPacker` that
563 defaults to the appropriate one for this instrument.
565 Notes
566 -----
567 This method is expected to be used whenever code requires a single
568 integer that represents the combination of a detector and either a
569 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
570 class and its helper configs provide a simpler high-level interface
571 that should be used instead of calling this method directly.
573 This system is designed to work best when the configuration for the ID
574 packer is not overridden at all, allowing the appropriate instrument
575 class to determine the behavior for each data ID encountered. When the
576 configuration does need to be modified (most often when the scheme for
577 packing an instrument's data IDs is undergoing an upgrade), it is
578 important to ensure the overrides are only applied to data IDs with the
579 desired instrument value.
581 Unit tests of code that use a field produced by this method will often
582 want to explicitly set the packer to "observation" and manually set
583 its ``n_detectors`` and ``n_observations`` fields; this will make it
584 unnecessary for tests to provide expanded data IDs.
585 """
586 # The control flow here bounces around a bit when this RegistryField's
587 # apply() method is called, so it merits a thorough walkthrough
588 # somewhere, and that might as well be here:
589 #
590 # - If the config field's name is not `None`, that kind of packer is
591 # constructed and returned with the arguments to `apply`, in just the
592 # way it works with most RegistryFields or ConfigurableFields. But
593 # this is expected to be rare.
594 #
595 # - If the config fields' name is `None`, the `apply` method (which
596 # actually lives on the `pex.config.RegistryInstanceDict` class,
597 # since `RegistryField` is a descriptor), calls
598 # `_make_default_dimension_packer_dispatch` (which is final, and
599 # hence the base class implementation just below is the only one).
600 #
601 # - `_make_default_dimension_packer_dispatch` instantiates an
602 # `Instrument` instance of the type pointed at by the data ID (i.e.
603 # calling `Instrument.from_data_id`), then calls
604 # `_make_default_dimension_packer` on that.
605 #
606 # - The default implementation of `_make_default_dimension_packer` here
607 # in the base class picks the "observation" dimension packer, so if
608 # it's not overridden by a derived class everything proceeds as if
609 # the config field's name was set to that. Note that this sets which
610 # item in the registry is used, but it still pays attention to the
611 # configuration for that entry in the registry field.
612 #
613 # - A subclass implementation of `_make_default_dimension_packer` will
614 # take precedence over the base class, but it's expected that these
615 # will usually just delegate back to ``super()`` while changing the
616 # ``default`` argument to something other than "observation". Once
617 # again, this will control which packer entry in the registry is used
618 # but the result will still reflect the configuration for that packer
619 # in the registry field.
620 #
621 return observation_packer_registry.makeField(
622 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
623 )
625 @staticmethod
626 @final
627 def make_default_dimension_packer(
628 data_id: DataCoordinate, is_exposure: bool | None = None
629 ) -> DimensionPacker:
630 """Return the default dimension packer for the given data ID.
632 Parameters
633 ----------
634 data_id : `lsst.daf.butler.DataCoordinate`
635 Data ID that identifies at least the ``instrument`` dimension. Must
636 have dimension records attached.
637 is_exposure : `bool`, optional
638 If `False`, construct a packer for visit+detector data IDs. If
639 `True`, construct a packer for exposure+detector data IDs. If
640 `None`, this is determined based on whether ``visit`` or
641 ``exposure`` is present in ``data_id``, with ``visit`` checked
642 first and hence used if both are present.
644 Returns
645 -------
646 packer : `lsst.daf.butler.DimensionPacker`
647 Object that packs {visit, detector} or {exposure, detector} data
648 IDs into integers.
650 Notes
651 -----
652 When using a dimension packer in task code, using
653 `make_dimension_packer_config_field` to make the packing algorithm
654 configurable is preferred over this method.
656 When obtaining a dimension packer to unpack IDs that were packed by
657 task code, it is similarly preferable to load the configuration for
658 that task and the existing packer configuration field there, to ensure
659 any config overrides are respected. That is sometimes quite difficult,
660 however, and since config overrides for dimension packers are expected
661 to be exceedingly rare, using this simpler method will almost always
662 work.
663 """
665 class _DummyConfig(Config):
666 packer = Instrument.make_dimension_packer_config_field()
668 config = _DummyConfig()
670 return config.packer.apply(data_id, is_exposure=is_exposure) # type: ignore
672 @staticmethod
673 @final
674 def _make_default_dimension_packer_dispatch(
675 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
676 ) -> DimensionPacker:
677 """Dispatch method used to invoke `_make_dimension_packer`.
679 This method constructs the appropriate `Instrument` subclass from
680 config and then calls its `_make_default_dimension_packer`.
681 It is called when (as usual) the field returned by
682 `make_dimension_packer_config_field` is left to its default selection
683 of `None`.
685 All arguments and return values are the same as
686 `_make_default_dimension_packer.`
687 """
688 instrument = Instrument.from_data_id(data_id)
689 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
691 def _make_default_dimension_packer(
692 self,
693 config_dict: Any,
694 data_id: DataCoordinate,
695 is_exposure: bool | None = None,
696 default: str = "observation",
697 ) -> DimensionPacker:
698 """Construct return the default dimension packer for this instrument.
700 This method is a protected hook for subclasses to override the behavior
701 of `make_dimension_packer_config_field` when the packer is not selected
702 explicitly via configuration.
704 Parameters
705 ----------
706 config_dict
707 Mapping attribute of a `lsst.pex.config.Config` instance that
708 corresponds to a field created by `make_dimension_packer_config`
709 (the actual type of this object is a `lsst.pex.config`
710 implementation detail).
711 data_id : `lsst.daf.butler.DataCoordinate`
712 Data ID that identifies at least the ``instrument`` dimension. For
713 most configurations this must have dimension records attached.
714 is_exposure : `bool`, optional
715 If `False`, construct a packer for visit+detector data IDs. If
716 `True`, construct a packer for exposure+detector data IDs. If
717 `None`, this is determined based on whether ``visit`` or
718 ``exposure`` is present in ``data_id``, with ``visit`` checked
719 first and hence used if both are present.
720 default : `str`, optional
721 Registered name of the dimension packer to select when the
722 configured packer is `None` (as is usually the case). This is
723 intended primarily for derived classes delegating to `super` in
724 reimplementations of this method.
726 Returns
727 -------
728 packer : `lsst.daf.butler.DimensionPacker`
729 Object that packs {visit, detector} or {exposure, detector} data
730 IDs into integers.
731 """
732 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)