Coverage for python/lsst/pipe/base/_instrument.py: 39%
119 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:49 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:49 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Instrument",)
26import datetime
27import os.path
28from abc import ABCMeta, abstractmethod
29from collections.abc import Sequence
30from typing import TYPE_CHECKING, Any, cast, final
32from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
33from lsst.daf.butler.registry import DataIdError
34from lsst.pex.config import RegistryField
35from lsst.utils import doImportType
37from ._observation_dimension_packer import observation_packer_registry
39if TYPE_CHECKING:
40 from lsst.daf.butler import Registry
41 from lsst.pex.config import Config
44class Instrument(metaclass=ABCMeta):
45 """Base class for instrument-specific logic for the Gen3 Butler.
47 Parameters
48 ----------
49 collection_prefix : `str`, optional
50 Prefix for collection names to use instead of the instrument's own
51 name. This is primarily for use in simulated-data repositories, where
52 the instrument name may not be necessary and/or sufficient to
53 distinguish between collections.
55 Notes
56 -----
57 Concrete instrument subclasses must have the same construction signature as
58 the base class.
59 """
61 configPaths: Sequence[str] = ()
62 """Paths to config files to read for specific Tasks.
64 The paths in this list should contain files of the form `task.py`, for
65 each of the Tasks that requires special configuration.
66 """
68 policyName: str | None = None
69 """Instrument specific name to use when locating a policy or configuration
70 file in the file system."""
72 raw_definition: tuple[str, tuple[str, ...], str] | None = None
73 """Dataset type definition to use for "raw" datasets. This is a tuple
74 of the dataset type name, a tuple of dimension names, and the storage class
75 name. If `None` the ingest system will use its default definition."""
77 def __init__(self, collection_prefix: str | None = None):
78 if collection_prefix is None:
79 collection_prefix = self.getName()
80 self.collection_prefix = collection_prefix
82 @classmethod
83 @abstractmethod
84 def getName(cls) -> str:
85 """Return the short (dimension) name for this instrument.
87 This is not (in general) the same as the class name - it's what is used
88 as the value of the "instrument" field in data IDs, and is usually an
89 abbreviation of the full name.
90 """
91 raise NotImplementedError()
93 @abstractmethod
94 def register(self, registry: Registry, *, update: bool = False) -> None:
95 """Insert instrument, and other relevant records into `Registry`.
97 Parameters
98 ----------
99 registry : `lsst.daf.butler.Registry`
100 Registry client for the data repository to modify.
101 update : `bool`, optional
102 If `True` (`False` is default), update existing records if they
103 differ from the new ones.
105 Raises
106 ------
107 lsst.daf.butler.registry.ConflictingDefinitionError
108 Raised if any existing record has the same key but a different
109 definition as one being registered.
111 Notes
112 -----
113 New records can always be added by calling this method multiple times,
114 as long as no existing records have changed (if existing records have
115 changed, ``update=True`` must be used). Old records can never be
116 removed by this method.
118 Implementations should guarantee that registration is atomic (the
119 registry should not be modified if any error occurs) and idempotent at
120 the level of individual dimension entries; new detectors and filters
121 should be added, but changes to any existing record should not be.
122 This can generally be achieved via a block like
124 .. code-block:: python
126 with registry.transaction():
127 registry.syncDimensionData("instrument", ...)
128 registry.syncDimensionData("detector", ...)
129 self.registerFilters(registry)
131 """
132 raise NotImplementedError()
134 @staticmethod
135 def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument:
136 """Given an instrument name and a butler registry, retrieve a
137 corresponding instantiated instrument object.
139 Parameters
140 ----------
141 name : `str`
142 Name of the instrument (must match the return value of `getName`).
143 registry : `lsst.daf.butler.Registry`
144 Butler registry to query to find the information.
145 collection_prefix : `str`, optional
146 Prefix for collection names to use instead of the instrument's own
147 name. This is primarily for use in simulated-data repositories,
148 where the instrument name may not be necessary and/or sufficient to
149 distinguish between collections.
151 Returns
152 -------
153 instrument : `Instrument`
154 An instance of the relevant `Instrument`.
156 Notes
157 -----
158 The instrument must be registered in the corresponding butler.
160 Raises
161 ------
162 LookupError
163 Raised if the instrument is not known to the supplied registry.
164 ModuleNotFoundError
165 Raised if the class could not be imported. This could mean
166 that the relevant obs package has not been setup.
167 TypeError
168 Raised if the class name retrieved is not a string or the imported
169 symbol is not an `Instrument` subclass.
170 """
171 try:
172 records = list(registry.queryDimensionRecords("instrument", instrument=name))
173 except DataIdError:
174 records = None
175 if not records:
176 raise LookupError(f"No registered instrument with name '{name}'.")
177 cls_name = records[0].class_name
178 if not isinstance(cls_name, str):
179 raise TypeError(
180 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
181 )
182 return Instrument._from_cls_name(cls_name, collection_prefix)
184 @staticmethod
185 def from_string(
186 name: str, registry: Registry | None = None, collection_prefix: str | None = None
187 ) -> Instrument:
188 """Return an instance from the short name or class name.
190 If the instrument name is not qualified (does not contain a '.') and a
191 butler registry is provided, this will attempt to load the instrument
192 using `Instrument.fromName()`. Otherwise the instrument will be
193 imported and instantiated.
195 Parameters
196 ----------
197 name : `str`
198 The name or fully-qualified class name of an instrument.
199 registry : `lsst.daf.butler.Registry`, optional
200 Butler registry to query to find information about the instrument,
201 by default `None`.
202 collection_prefix : `str`, optional
203 Prefix for collection names to use instead of the instrument's own
204 name. This is primarily for use in simulated-data repositories,
205 where the instrument name may not be necessary and/or sufficient
206 to distinguish between collections.
208 Returns
209 -------
210 instrument : `Instrument`
211 The instantiated instrument.
213 Raises
214 ------
215 RuntimeError
216 Raised if the instrument can not be imported, instantiated, or
217 obtained from the registry.
218 TypeError
219 Raised if the instrument is not a subclass of
220 `~lsst.pipe.base.Instrument`.
222 See Also
223 --------
224 Instrument.fromName
225 """
226 if "." not in name and registry is not None:
227 try:
228 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix)
229 except Exception as err:
230 raise RuntimeError(
231 f"Could not get instrument from name: {name}. Failed with exception: {err}"
232 ) from err
233 else:
234 try:
235 instr_class = doImportType(name)
236 except Exception as err:
237 raise RuntimeError(
238 f"Could not import instrument: {name}. Failed with exception: {err}"
239 ) from err
240 instr = instr_class(collection_prefix=collection_prefix)
241 if not isinstance(instr, Instrument):
242 raise TypeError(f"{name} is not an Instrument subclass.")
243 return instr
245 @staticmethod
246 def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument:
247 """Instantiate an `Instrument` object from a fully-expanded data ID.
249 Parameters
250 ----------
251 data_id : `~lsst.daf.butler.DataCoordinate`
252 Expanded data ID that includes the instrument dimension.
253 collection_prefix : `str`, optional
254 Prefix for collection names to use instead of the instrument's own
255 name. This is primarily for use in simulated-data repositories,
256 where the instrument name may not be necessary and/or sufficient to
257 distinguish between collections.
259 Returns
260 -------
261 instrument : `Instrument`
262 An instance of the relevant `Instrument`.
264 Raises
265 ------
266 TypeError
267 Raised if the class name retrieved is not a string or the imported
268 symbol is not an `Instrument` subclass.
269 """
270 return Instrument._from_cls_name(
271 cast(DimensionRecord, data_id.records["instrument"]).class_name, collection_prefix
272 )
274 @staticmethod
275 def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instrument:
276 """Instantiate an `Instrument` object type name.
278 This just provides common error-handling for `fromName` and
279 `from_data_id`
281 Parameters
282 ----------
283 cls_name : `str`
284 Fully-qualified name of the type.
285 collection_prefix : `str`, optional
286 Prefix for collection names to use instead of the instrument's own
287 name. This is primarily for use in simulated-data repositories,
288 where the instrument name may not be necessary and/or sufficient to
289 distinguish between collections.
291 Returns
292 -------
293 instrument : `Instrument`
294 An instance of the relevant `Instrument`.
296 Raises
297 ------
298 TypeError
299 Raised if the class name retrieved is not a string or the imported
300 symbol is not an `Instrument` subclass.
301 """
302 instrument_cls: type = doImportType(cls_name)
303 if not issubclass(instrument_cls, Instrument):
304 raise TypeError(
305 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
306 )
307 return instrument_cls(collection_prefix=collection_prefix)
309 @staticmethod
310 def importAll(registry: Registry) -> None:
311 """Import all the instruments known to this registry.
313 This will ensure that all metadata translators have been registered.
315 Parameters
316 ----------
317 registry : `lsst.daf.butler.Registry`
318 Butler registry to query to find the information.
320 Notes
321 -----
322 It is allowed for a particular instrument class to fail on import.
323 This might simply indicate that a particular obs package has
324 not been setup.
325 """
326 records = list(registry.queryDimensionRecords("instrument"))
327 for record in records:
328 cls = record.class_name
329 try:
330 doImportType(cls)
331 except Exception:
332 pass
334 @abstractmethod
335 def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
336 """Return the Formatter class that should be used to read a particular
337 raw file.
339 Parameters
340 ----------
341 dataId : `DataId`
342 Dimension-based ID for the raw file or files being ingested.
344 Returns
345 -------
346 formatter : `lsst.daf.butler.Formatter` class
347 Class to be used that reads the file into the correct
348 Python object for the raw data.
349 """
350 raise NotImplementedError()
352 def applyConfigOverrides(self, name: str, config: Config) -> None:
353 """Apply instrument-specific overrides for a task config.
355 Parameters
356 ----------
357 name : `str`
358 Name of the object being configured; typically the _DefaultName
359 of a Task.
360 config : `lsst.pex.config.Config`
361 Config instance to which overrides should be applied.
362 """
363 for root in self.configPaths:
364 path = os.path.join(root, f"{name}.py")
365 if os.path.exists(path):
366 config.load(path)
368 @staticmethod
369 def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
370 """Format a timestamp for use in a collection name.
372 Parameters
373 ----------
374 timestamp : `str` or `datetime.datetime`
375 Timestamp to format. May be a date or datetime string in extended
376 ISO format (assumed UTC), with or without a timezone specifier, a
377 datetime string in basic ISO format with a timezone specifier, a
378 naive `datetime.datetime` instance (assumed UTC) or a
379 timezone-aware `datetime.datetime` instance (converted to UTC).
380 This is intended to cover all forms that string ``CALIBDATE``
381 metadata values have taken in the past, as well as the format this
382 method itself writes out (to enable round-tripping).
384 Returns
385 -------
386 formatted : `str`
387 Standardized string form for the timestamp.
388 """
389 if isinstance(timestamp, str):
390 if "-" in timestamp:
391 # extended ISO format, with - and : delimiters
392 timestamp = datetime.datetime.fromisoformat(timestamp)
393 else:
394 # basic ISO format, with no delimiters (what this method
395 # returns)
396 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z")
397 if not isinstance(timestamp, datetime.datetime):
398 raise TypeError(f"Unexpected date/time object: {timestamp!r}.")
399 if timestamp.tzinfo is not None:
400 timestamp = timestamp.astimezone(datetime.timezone.utc)
401 return f"{timestamp:%Y%m%dT%H%M%S}Z"
403 @staticmethod
404 def makeCollectionTimestamp() -> str:
405 """Create a timestamp string for use in a collection name from the
406 current time.
408 Returns
409 -------
410 formatted : `str`
411 Standardized string form of the current time.
412 """
413 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc))
415 def makeDefaultRawIngestRunName(self) -> str:
416 """Make the default instrument-specific run collection string for raw
417 data ingest.
419 Returns
420 -------
421 coll : `str`
422 Run collection name to be used as the default for ingestion of
423 raws.
424 """
425 return self.makeCollectionName("raw", "all")
427 def makeUnboundedCalibrationRunName(self, *labels: str) -> str:
428 """Make a RUN collection name appropriate for inserting calibration
429 datasets whose validity ranges are unbounded.
431 Parameters
432 ----------
433 *labels : `str`
434 Extra strings to be included in the base name, using the default
435 delimiter for collection names. Usually this is the name of the
436 ticket on which the calibration collection is being created.
438 Returns
439 -------
440 name : `str`
441 Run collection name.
442 """
443 return self.makeCollectionName("calib", *labels, "unbounded")
445 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str:
446 """Make a RUN collection name appropriate for inserting curated
447 calibration datasets with the given ``CALIBDATE`` metadata value.
449 Parameters
450 ----------
451 calibDate : `str`
452 The ``CALIBDATE`` metadata value.
453 *labels : `str`
454 Strings to be included in the collection name (before
455 ``calibDate``, but after all other terms), using the default
456 delimiter for collection names. Usually this is the name of the
457 ticket on which the calibration collection is being created.
459 Returns
460 -------
461 name : `str`
462 Run collection name.
463 """
464 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate))
466 def makeCalibrationCollectionName(self, *labels: str) -> str:
467 """Make a CALIBRATION collection name appropriate for associating
468 calibration datasets with validity ranges.
470 Parameters
471 ----------
472 *labels : `str`
473 Strings to be appended to the base name, using the default
474 delimiter for collection names. Usually this is the name of the
475 ticket on which the calibration collection is being created.
477 Returns
478 -------
479 name : `str`
480 Calibration collection name.
481 """
482 return self.makeCollectionName("calib", *labels)
484 @staticmethod
485 def makeRefCatCollectionName(*labels: str) -> str:
486 """Return a global (not instrument-specific) name for a collection that
487 holds reference catalogs.
489 With no arguments, this returns the name of the collection that holds
490 all reference catalogs (usually a ``CHAINED`` collection, at least in
491 long-lived repos that may contain more than one reference catalog).
493 Parameters
494 ----------
495 *labels : `str`
496 Strings to be added to the global collection name, in order to
497 define a collection name for one or more reference catalogs being
498 ingested at the same time.
500 Returns
501 -------
502 name : `str`
503 Collection name.
505 Notes
506 -----
507 This is a ``staticmethod``, not a ``classmethod``, because it should
508 be the same for all instruments.
509 """
510 return "/".join(("refcats",) + labels)
512 def makeUmbrellaCollectionName(self) -> str:
513 """Return the name of the umbrella ``CHAINED`` collection for this
514 instrument that combines all standard recommended input collections.
516 This method should almost never be overridden by derived classes.
518 Returns
519 -------
520 name : `str`
521 Name for the umbrella collection.
522 """
523 return self.makeCollectionName("defaults")
525 def makeCollectionName(self, *labels: str) -> str:
526 """Get the instrument-specific collection string to use as derived
527 from the supplied labels.
529 Parameters
530 ----------
531 *labels : `str`
532 Strings to be combined with the instrument name to form a
533 collection name.
535 Returns
536 -------
537 name : `str`
538 Collection name to use that includes the instrument's recommended
539 prefix.
540 """
541 return "/".join((self.collection_prefix,) + labels)
543 @staticmethod
544 def make_dimension_packer_config_field(
545 doc: str = (
546 "How to pack visit+detector or exposure+detector data IDs into integers. "
547 "The default (None) is to delegate to the Instrument class for which "
548 "registered implementation to use (but still use the nested configuration "
549 "for that implementation)."
550 ),
551 ) -> RegistryField:
552 """Make an `lsst.pex.config.Field` that can be used to configure how
553 data IDs for this instrument are packed.
555 Parameters
556 ----------
557 doc : `str`, optional
558 Documentation for the config field.
560 Returns
561 -------
562 field : `lsst.pex.config.RegistryField`
563 A config field for which calling ``apply`` on the instance
564 attribute constructs an `lsst.daf.butler.DimensionPacker` that
565 defaults to the appropriate one for this instrument.
567 Notes
568 -----
569 This method is expected to be used whenever code requires a single
570 integer that represents the combination of a detector and either a
571 visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
572 class and its helper configs provide a simpler high-level interface
573 that should be used instead of calling this method directly.
575 This system is designed to work best when the configuration for the ID
576 packer is not overridden at all, allowing the appropriate instrument
577 class to determine the behavior for each data ID encountered. When the
578 configuration does need to be modified (most often when the scheme for
579 packing an instrument's data IDs is undergoing an upgrade), it is
580 important to ensure the overrides are only applied to data IDs with the
581 desired instrument value.
583 Unit tests of code that use a field produced by this method will often
584 want to explicitly set the packer to "observation" and manually set
585 its ``n_detectors`` and ``n_observations`` fields; this will make it
586 unnecessary for tests to provide expanded data IDs.
587 """
588 # The control flow here bounces around a bit when this RegistryField's
589 # apply() method is called, so it merits a thorough walkthrough
590 # somewhere, and that might as well be here:
591 #
592 # - If the config field's name is not `None`, that kind of packer is
593 # constructed and returned with the arguments to `apply`, in just the
594 # way it works with most RegistryFields or ConfigurableFields. But
595 # this is expected to be rare.
596 #
597 # - If the config fields' name is `None`, the `apply` method (which
598 # actually lives on the `pex.config.RegistryInstanceDict` class,
599 # since `RegistryField` is a descriptor), calls
600 # `_make_default_dimension_packer_dispatch` (which is final, and
601 # hence the base class implementation just below is the only one).
602 #
603 # - `_make_default_dimension_packer_dispatch` instantiates an
604 # `Instrument` instance of the type pointed at by the data ID (i.e.
605 # calling `Instrument.from_data_id`), then calls
606 # `_make_default_dimension_packer` on that.
607 #
608 # - The default implementation of `_make_default_dimension_packer` here
609 # in the base class picks the "observation" dimension packer, so if
610 # it's not overridden by a derived class everything proceeds as if
611 # the config field's name was set to that. Note that this sets which
612 # item in the registry is used, but it still pays attention to the
613 # configuration for that entry in the registry field.
614 #
615 # - A subclass implementation of `_make_default_dimension_packer` will
616 # take precedence over the base class, but it's expected that these
617 # will usually just delegate back to ``super()`` while changing the
618 # ``default`` argument to something other than "observation". Once
619 # again, this will control which packer entry in the registry is used
620 # but the result will still reflect the configuration for that packer
621 # in the registry field.
622 #
623 return observation_packer_registry.makeField(
624 doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
625 )
627 @staticmethod
628 @final
629 def _make_default_dimension_packer_dispatch(
630 config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
631 ) -> DimensionPacker:
632 """Dispatch method used to invoke `_make_dimension_packer`.
634 This method constructs the appropriate `Instrument` subclass from
635 config and then calls its `_make_default_dimension_packer`.
636 It is called when (as usual) the field returned by
637 `make_dimension_packer_config_field` is left to its default selection
638 of `None`.
640 All arguments and return values are the same as
641 `_make_default_dimension_packer.`
642 """
643 instrument = Instrument.from_data_id(data_id)
644 return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
646 def _make_default_dimension_packer(
647 self,
648 config_dict: Any,
649 data_id: DataCoordinate,
650 is_exposure: bool | None = None,
651 default: str = "observation",
652 ) -> DimensionPacker:
653 """Construct return the default dimension packer for this instrument.
655 This method is a protected hook for subclasses to override the behavior
656 of `make_dimension_packer_config_field` when the packer is not selected
657 explicitly via configuration.
659 Parameters
660 ----------
661 config_dict
662 Mapping attribute of a `lsst.pex.config.Config` instance that
663 corresponds to a field created by `make_dimension_packer_config`
664 (the actual type of this object is a `lsst.pex.config`
665 implementation detail).
666 data_id : `lsst.daf.butler.DataCoordinate`
667 Data ID that identifies at least the ``instrument`` dimension. For
668 most configurations this must have dimension records attached.
669 is_exposure : `bool`, optional
670 If `False`, construct a packer for visit+detector data IDs. If
671 `True`, construct a packer for exposure+detector data IDs. If
672 `None`, this is determined based on whether ``visit`` or
673 ``exposure`` is present in ``data_id``, with ``visit`` checked
674 first and hence used if both are present.
675 default : `str`, optional
676 Registered name of the dimension packer to select when the
677 configured packer is `None` (as is usually the case). This is
678 intended primarily for derived classes delegating to `super` in
679 reimplementations of this method.
681 Returns
682 -------
683 packer : `lsst.daf.butler.DimensionPacker`
684 Object that packs {visit, detector} or {exposure, detector} data
685 IDs into integers.
686 """
687 return config_dict.apply_with(default, data_id, is_exposure=is_exposure)