Coverage for python/lsst/pipe/base/_instrument.py: 38%
124 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 02:45 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 02:45 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Instrument",)
26import datetime
27import os.path
28from abc import ABCMeta, abstractmethod
29from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union, cast, final
31from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
32from lsst.daf.butler.registry import DataIdError
33from lsst.pex.config import RegistryField
34from lsst.utils import doImportType
36from ._observation_dimension_packer import observation_packer_registry
38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true
39 from lsst.daf.butler import Registry
40 from lsst.pex.config import Config
43class Instrument(metaclass=ABCMeta):
44 """Base class for instrument-specific logic for the Gen3 Butler.
46 Parameters
47 ----------
48 collection_prefix : `str`, optional
49 Prefix for collection names to use instead of the intrument's own name.
50 This is primarily for use in simulated-data repositories, where the
51 instrument name may not be necessary and/or sufficient to distinguish
52 between collections.
54 Notes
55 -----
56 Concrete instrument subclasses must have the same construction signature as
57 the base class.
58 """
60 configPaths: Sequence[str] = ()
61 """Paths to config files to read for specific Tasks.
63 The paths in this list should contain files of the form `task.py`, for
64 each of the Tasks that requires special configuration.
65 """
67 policyName: Optional[str] = None
68 """Instrument specific name to use when locating a policy or configuration
69 file in the file system."""
71 raw_definition: tuple[str, tuple[str, ...], str] | None = None
72 """Dataset type definition to use for "raw" datasets. This is a tuple
73 of the dataset type name, a tuple of dimension names, and the storage class
74 name. If `None` the ingest system will use its default definition."""
76 def __init__(self, collection_prefix: Optional[str] = None):
77 if collection_prefix is None:
78 collection_prefix = self.getName()
79 self.collection_prefix = collection_prefix
81 @classmethod
82 @abstractmethod
83 def getName(cls) -> str:
84 """Return the short (dimension) name for this instrument.
86 This is not (in general) the same as the class name - it's what is used
87 as the value of the "instrument" field in data IDs, and is usually an
88 abbreviation of the full name.
89 """
90 raise NotImplementedError()
92 @abstractmethod
93 def register(self, registry: Registry, *, update: bool = False) -> None:
94 """Insert instrument, and other relevant records into `Registry`.
96 Parameters
97 ----------
98 registry : `lsst.daf.butler.Registry`
99 Registry client for the data repository to modify.
100 update : `bool`, optional
101 If `True` (`False` is default), update existing records if they
102 differ from the new ones.
104 Raises
105 ------
106 lsst.daf.butler.registry.ConflictingDefinitionError
107 Raised if any existing record has the same key but a different
108 definition as one being registered.
110 Notes
111 -----
112 New records can always be added by calling this method multiple times,
113 as long as no existing records have changed (if existing records have
114 changed, ``update=True`` must be used). Old records can never be
115 removed by this method.
117 Implementations should guarantee that registration is atomic (the
118 registry should not be modified if any error occurs) and idempotent at
119 the level of individual dimension entries; new detectors and filters
120 should be added, but changes to any existing record should not be.
121 This can generally be achieved via a block like
123 .. code-block:: python
125 with registry.transaction():
126 registry.syncDimensionData("instrument", ...)
127 registry.syncDimensionData("detector", ...)
128 self.registerFilters(registry)
130 """
131 raise NotImplementedError()
133 @staticmethod
134 def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = None) -> Instrument:
135 """Given an instrument name and a butler registry, retrieve a
136 corresponding instantiated instrument object.
138 Parameters
139 ----------
140 name : `str`
141 Name of the instrument (must match the return value of `getName`).
142 registry : `lsst.daf.butler.Registry`
143 Butler registry to query to find the information.
144 collection_prefix : `str`, optional
145 Prefix for collection names to use instead of the intrument's own
146 name. This is primarily for use in simulated-data repositories,
147 where the instrument name may not be necessary and/or sufficient to
148 distinguish between collections.
150 Returns
151 -------
152 instrument : `Instrument`
153 An instance of the relevant `Instrument`.
155 Notes
156 -----
157 The instrument must be registered in the corresponding butler.
159 Raises
160 ------
161 LookupError
162 Raised if the instrument is not known to the supplied registry.
163 ModuleNotFoundError
164 Raised if the class could not be imported. This could mean
165 that the relevant obs package has not been setup.
166 TypeError
167 Raised if the class name retrieved is not a string or the imported
168 symbol is not an `Instrument` subclass.
169 """
170 try:
171 records = list(registry.queryDimensionRecords("instrument", instrument=name))
172 except DataIdError:
173 records = None
174 if not records:
175 raise LookupError(f"No registered instrument with name '{name}'.")
176 cls_name = records[0].class_name
177 if not isinstance(cls_name, str):
178 raise TypeError(
179 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
180 )
181 return Instrument._from_cls_name(cls_name, collection_prefix)
183 @staticmethod
184 def from_string(
185 name: str, registry: Optional[Registry] = None, collection_prefix: Optional[str] = None
186 ) -> Instrument:
187 """Return an instance from the short name or class name.
189 If the instrument name is not qualified (does not contain a '.') and a
190 butler registry is provided, this will attempt to load the instrument
191 using `Instrument.fromName()`. Otherwise the instrument will be
192 imported and instantiated.
194 Parameters
195 ----------
196 name : `str`
197 The name or fully-qualified class name of an instrument.
198 registry : `lsst.daf.butler.Registry`, optional
199 Butler registry to query to find information about the instrument,
200 by default `None`.
201 collection_prefix : `str`, optional
202 Prefix for collection names to use instead of the intrument's own
203 name. This is primarily for use in simulated-data repositories,
204 where the instrument name may not be necessary and/or sufficient
205 to distinguish between collections.
207 Returns
208 -------
209 instrument : `Instrument`
210 The instantiated instrument.
212 Raises
213 ------
214 RuntimeError
215 Raised if the instrument can not be imported, instantiated, or
216 obtained from the registry.
217 TypeError
218 Raised if the instrument is not a subclass of
219 `~lsst.pipe.base.Instrument`.
221 See Also
222 --------
223 Instrument.fromName
224 """
225 if "." not in name and registry is not None:
226 try:
227 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix)
228 except Exception as err:
229 raise RuntimeError(
230 f"Could not get instrument from name: {name}. Failed with exception: {err}"
231 ) from err
232 else:
233 try:
234 instr_class = doImportType(name)
235 except Exception as err:
236 raise RuntimeError(
237 f"Could not import instrument: {name}. Failed with exception: {err}"
238 ) from err
239 instr = instr_class(collection_prefix=collection_prefix)
240 if not isinstance(instr, Instrument):
241 raise TypeError(f"{name} is not an Instrument subclass.")
242 return instr
244 @staticmethod
245 def from_data_id(data_id: DataCoordinate, collection_prefix: Optional[str] = None) -> Instrument:
246 """Instantiate an `Instrument` object from a fully-expanded data ID.
248 Parameters
249 ----------
250 data_id : `DataCoordinate`
251 Expanded data ID that includes the instrument dimension.
252 collection_prefix : `str`, optional
253 Prefix for collection names to use instead of the intrument's own
254 name. This is primarily for use in simulated-data repositories,
255 where the instrument name may not be necessary and/or sufficient to
256 distinguish between collections.
258 Returns
259 -------
260 instrument : `Instrument`
261 An instance of the relevant `Instrument`.
263 Raises
264 ------
265 TypeError
266 Raised if the class name retrieved is not a string or the imported
267 symbol is not an `Instrument` subclass.
268 """
269 return Instrument._from_cls_name(
270 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
271 )
273 @staticmethod
274 def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instrument:
275 """Instantiate an `Instrument` object type name.
277 This just provides common error-handling for `fromName` and
278 `from_data_id`
280 Parameters
281 ----------
282 cls_name : `str`
283 Fully-qualified name of the type.
284 collection_prefix : `str`, optional
285 Prefix for collection names to use instead of the intrument's own
286 name. This is primarily for use in simulated-data repositories,
287 where the instrument name may not be necessary and/or sufficient to
288 distinguish between collections.
290 Returns
291 -------
292 instrument : `Instrument`
293 An instance of the relevant `Instrument`.
295 Raises
296 ------
297 TypeError
298 Raised if the class name retrieved is not a string or the imported
299 symbol is not an `Instrument` subclass.
300 """
301 instrument_cls: type = doImportType(cls_name)
302 if not issubclass(instrument_cls, Instrument):
303 raise TypeError(
304 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
305 )
306 return instrument_cls(collection_prefix=collection_prefix)
308 @staticmethod
309 def importAll(registry: Registry) -> None:
310 """Import all the instruments known to this registry.
312 This will ensure that all metadata translators have been registered.
314 Parameters
315 ----------
316 registry : `lsst.daf.butler.Registry`
317 Butler registry to query to find the information.
319 Notes
320 -----
321 It is allowed for a particular instrument class to fail on import.
322 This might simply indicate that a particular obs package has
323 not been setup.
324 """
325 records = list(registry.queryDimensionRecords("instrument"))
326 for record in records:
327 cls = record.class_name
328 try:
329 doImportType(cls)
330 except Exception:
331 pass
333 @abstractmethod
334 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]:
335 """Return the Formatter class that should be used to read a particular
336 raw file.
338 Parameters
339 ----------
340 dataId : `DataId`
341 Dimension-based ID for the raw file or files being ingested.
343 Returns
344 -------
345 formatter : `lsst.daf.butler.Formatter` class
346 Class to be used that reads the file into the correct
347 Python object for the raw data.
348 """
349 raise NotImplementedError()
351 def applyConfigOverrides(self, name: str, config: Config) -> None:
352 """Apply instrument-specific overrides for a task config.
354 Parameters
355 ----------
356 name : `str`
357 Name of the object being configured; typically the _DefaultName
358 of a Task.
359 config : `lsst.pex.config.Config`
360 Config instance to which overrides should be applied.
361 """
362 for root in self.configPaths:
363 path = os.path.join(root, f"{name}.py")
364 if os.path.exists(path):
365 config.load(path)
367 @staticmethod
368 def formatCollectionTimestamp(timestamp: Union[str, datetime.datetime]) -> str:
369 """Format a timestamp for use in a collection name.
371 Parameters
372 ----------
373 timestamp : `str` or `datetime.datetime`
374 Timestamp to format. May be a date or datetime string in extended
375 ISO format (assumed UTC), with or without a timezone specifier, a
376 datetime string in basic ISO format with a timezone specifier, a
377 naive `datetime.datetime` instance (assumed UTC) or a
378 timezone-aware `datetime.datetime` instance (converted to UTC).
379 This is intended to cover all forms that string ``CALIBDATE``
380 metadata values have taken in the past, as well as the format this
381 method itself writes out (to enable round-tripping).
383 Returns
384 -------
385 formatted : `str`
386 Standardized string form for the timestamp.
387 """
388 if isinstance(timestamp, str):
389 if "-" in timestamp:
390 # extended ISO format, with - and : delimiters
391 timestamp = datetime.datetime.fromisoformat(timestamp)
392 else:
393 # basic ISO format, with no delimiters (what this method
394 # returns)
395 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
396 if not isinstance(timestamp, datetime.datetime):
397 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
398 if timestamp.tzinfo is not None:
399 timestamp = timestamp.astimezone(datetime.timezone.utc)
400 return f"{timestamp:%Y%m%dT%H%M%S}Z"
402 @staticmethod
403 def makeCollectionTimestamp() -> str:
404 """Create a timestamp string for use in a collection name from the
405 current time.
407 Returns
408 -------
409 formatted : `str`
410 Standardized string form of the current time.
411 """
412 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
414 def makeDefaultRawIngestRunName(self) -> str:
415 """Make the default instrument-specific run collection string for raw
416 data ingest.
418 Returns
419 -------
420 coll : `str`
421 Run collection name to be used as the default for ingestion of
422 raws.
423 """
424 return self.makeCollectionName("raw", "all")
426 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
427 """Make a RUN collection name appropriate for inserting calibration
428 datasets whose validity ranges are unbounded.
430 Parameters
431 ----------
432 *labels : `str`
433 Extra strings to be included in the base name, using the default
434 delimiter for collection names. Usually this is the name of the
435 ticket on which the calibration collection is being created.
437 Returns
438 -------
439 name : `str`
440 Run collection name.
441 """
442 return self.makeCollectionName("calib", *labels, "unbounded")
444 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
445 """Make a RUN collection name appropriate for inserting curated
446 calibration datasets with the given ``CALIBDATE`` metadata value.
448 Parameters
449 ----------
450 calibDate : `str`
451 The ``CALIBDATE`` metadata value.
452 *labels : `str`
453 Strings to be included in the collection name (before
454 ``calibDate``, but after all other terms), using the default
455 delimiter for collection names. Usually this is the name of the
456 ticket on which the calibration collection is being created.
458 Returns
459 -------
460 name : `str`
461 Run collection name.
462 """
463 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
465 def makeCalibrationCollectionName(self, *labels: str) -> str:
466 """Make a CALIBRATION collection name appropriate for associating
467 calibration datasets with validity ranges.
469 Parameters
470 ----------
471 *labels : `str`
472 Strings to be appended to the base name, using the default
473 delimiter for collection names. Usually this is the name of the
474 ticket on which the calibration collection is being created.
476 Returns
477 -------
478 name : `str`
479 Calibration collection name.
480 """
481 return self.makeCollectionName("calib", *labels)
483 @staticmethod
484 def makeRefCatCollectionName(*labels: str) -> str:
485 """Return a global (not instrument-specific) name for a collection that
486 holds reference catalogs.
488 With no arguments, this returns the name of the collection that holds
489 all reference catalogs (usually a ``CHAINED`` collection, at least in
490 long-lived repos that may contain more than one reference catalog).
492 Parameters
493 ----------
494 *labels : `str`
495 Strings to be added to the global collection name, in order to
496 define a collection name for one or more reference catalogs being
497 ingested at the same time.
499 Returns
500 -------
501 name : `str`
502 Collection name.
504 Notes
505 -----
506 This is a ``staticmethod``, not a ``classmethod``, because it should
507 be the same for all instruments.
508 """
509 return "/".join(("refcats",) + labels)
511 def makeUmbrellaCollectionName(self) -> str:
512 """Return the name of the umbrella ``CHAINED`` collection for this
513 instrument that combines all standard recommended input collections.
515 This method should almost never be overridden by derived classes.
517 Returns
518 -------
519 name : `str`
520 Name for the umbrella collection.
521 """
522 return self.makeCollectionName("defaults")
524 def makeCollectionName(self, *labels: str) -> str:
525 """Get the instrument-specific collection string to use as derived
526 from the supplied labels.
528 Parameters
529 ----------
530 *labels : `str`
531 Strings to be combined with the instrument name to form a
532 collection name.
534 Returns
535 -------
536 name : `str`
537 Collection name to use that includes the instrument's recommended
538 prefix.
539 """
540 return "/".join((self.collection_prefix,) + labels)
542 @staticmethod
543 def make_dimension_packer_config_field(
544 doc: str = (
545 "How to pack visit+detector or exposure+detector data IDs into integers. "
546 "The default (None) is to delegate to the Instrument class for which "
547 "registered implementation to use (but still use the nested configuration "
548 "for that implementation)."
549 ),
550 ) -> RegistryField:
551 """Make an `lsst.pex.config.Field` that can be used to configure how
552 data IDs for this instrument are packed.
554 Parameters
555 ----------
556 doc : `str`, optional
557 Documentation for the config field.
559 Returns
560 -------
561 field : `lsst.pex.config.RegistryField`
562 A config field for which calling ``apply`` on the instance
563 attribute constructs an `lsst.daf.butler.DimensionPacker` that
564 defaults to the appropriate one for this instrument.
566 Notes
567 -----
568 This method is expected to be used whenever code requires a single
569 integer that represents the combination of a detector and either a
570 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
571 class and its helper configs provide a simpler high-level interface
572 that should be used instead of calling this method directly.
574 This system is designed to work best when the configuration for the ID
575 packer is not overridden at all, allowing the appropriate instrument
576 class to determine the behavior for each data ID encountered. When the
577 configuration does need to be modified (most often when the scheme for
578 packing an instrument's data IDs is undergoing an upgrade), it is
579 important to ensure the overrides are only applied to data IDs with the
580 desired instrument value.
582 Unit tests of code that use a field produced by this method will often
583 want to explicitly set the packer to "observation" and manually set
584 its ``n_detectors`` and ``n_observations`` fields; this will make it
585 unnecessary for tests to provide expanded data IDs.
586 """
587 # The control flow here bounces around a bit when this RegistryField's
588 # apply() method is called, so it merits a thorough walkthrough
589 # somewhere, and that might as well be here:
590 #
591 # - If the config field's name is not `None`, that kind of packer is
592 # constructed and returned with the arguments to `apply`, in just the
593 # way it works with most RegistryFields or ConfigurableFields. But
594 # this is expected to be rare.
595 #
596 # - If the config fields' name is `None`, the `apply` method (which
597 # actually lives on the `pex.config.RegistryInstanceDict` class,
598 # since `RegistryField` is a descriptor), calls
599 # `_make_default_dimension_packer_dispatch` (which is final, and
600 # hence the base class implementation just below is the only one).
601 #
602 # - `_make_default_dimension_packer_dispatch` instantiates an
603 # `Instrument` instance of the type pointed at by the data ID (i.e.
604 # calling `Instrument.from_data_id`), then calls
605 # `_make_default_dimension_packer` on that.
606 #
607 # - The default implementation of `_make_default_dimension_packer` here
608 # in the base class picks the "observation" dimension packer, so if
609 # it's not overridden by a derived class everything proceeds as if
610 # the config field's name was set to that. Note that this sets which
611 # item in the registry is used, but it still pays attention to the
612 # configuration for that entry in the registry field.
613 #
614 # - A subclass implementation of `_make_default_dimension_packer` will
615 # take precedence over the base class, but it's expected that these
616 # will usually just delegate back to ``super()`` while changing the
617 # ``default`` argument to something other than "observation". Once
618 # again, this will control which packer entry in the registry is used
619 # but the result will still reflect the configuration for that packer
620 # in the registry field.
621 #
622 return observation_packer_registry.makeField(
623 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
624 )
626 @staticmethod
627 @final
628 def _make_default_dimension_packer_dispatch(
629 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
630 ) -> DimensionPacker:
631 """Dispatch method used to invoke `_make_dimension_packer`.
633 This method constructs the appropriate `Instrument` subclass from
634 config and then calls its `_make_default_dimension_packer`.
635 It is called when (as usual) the field returned by
636 `make_dimension_packer_config_field` is left to its default selection
637 of `None`.
639 All arguments and return values are the same as
640 `_make_default_dimension_packer.`
641 """
642 instrument = Instrument.from_data_id(data_id)
643 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
645 def _make_default_dimension_packer(
646 self,
647 config_dict: Any,
648 data_id: DataCoordinate,
649 is_exposure: bool | None = None,
650 default: str = "observation",
651 ) -> DimensionPacker:
652 """Construct return the default dimension packer for this instrument.
654 This method is a protected hook for subclasses to override the behavior
655 of `make_dimension_packer_config_field` when the packer is not selected
656 explicitly via configuration.
658 Parameters
659 ----------
660 config_dict
661 Mapping attribute of a `lsst.pex.config.Config` instance that
662 corresponds to a field created by `make_dimension_packer_config`
663 (the actual type of this object is a `lsst.pex.config`
664 implementation detail).
665 data_id : `lsst.daf.butler.DataCoordinate`
666 Data ID that identifies at least the ``instrument`` dimension. For
667 most configurations this must have dimension records attached.
668 is_exposure : `bool`, optional
669 If `False`, construct a packer for visit+detector data IDs. If
670 `True`, construct a packer for exposure+detector data IDs. If
671 `None`, this is determined based on whether ``visit`` or
672 ``exposure`` is present in ``data_id``, with ``visit`` checked
673 first and hence used if both are present.
674 default : `str`, optional
675 Registered name of the dimension packer to select when the
676 configured packer is `None` (as is usually the case). This is
677 intended primarily for derived classes delegating to `super` in
678 reimplementations of this method.
680 Returns
681 -------
682 packer : `lsst.daf.butler.DimensionPacker`
683 Object that packs {visit, detector} or {exposure, detector} data
684 IDs into integers.
685 """
686 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)