Coverage for python/lsst/obs/base/defineVisits.py: 26%
418 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-17 02:49 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-17 02:49 -0700
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = [
25 "DefineVisitsConfig",
26 "DefineVisitsTask",
27 "GroupExposuresConfig",
28 "GroupExposuresTask",
29 "VisitDefinitionData",
30 "VisitSystem",
31]
33import cmath
34import dataclasses
35import enum
36import math
37import operator
38from abc import ABCMeta, abstractmethod
39from collections import defaultdict
40from collections.abc import Callable, Iterable
41from typing import Any, ClassVar, TypeVar, cast
43import lsst.geom
44from lsst.afw.cameraGeom import FOCAL_PLANE, PIXELS
45from lsst.daf.butler import Butler, DataCoordinate, DataId, DimensionRecord, Progress, Timespan
46from lsst.geom import Box2D
47from lsst.pex.config import Config, Field, makeRegistry, registerConfigurable
48from lsst.pipe.base import Task
49from lsst.sphgeom import ConvexPolygon, Region, UnitVector3d
50from lsst.utils.introspection import get_full_type_name
52from ._instrument import Instrument, loadCamera
55class VisitSystem(enum.Enum):
56 """Enumeration used to label different visit systems."""
58 ONE_TO_ONE = 0
59 """Each exposure is assigned to its own visit."""
61 BY_GROUP_METADATA = 1
62 """Visit membership is defined by the value of the group dimension or, for
63 older dimension universes, exposure.group_id."""
65 BY_SEQ_START_END = 2
66 """Visit membership is defined by the values of the ``exposure.day_obs``,
67 ``exposure.seq_start``, and ``exposure.seq_end`` values.
68 """
70 @classmethod
71 def all(cls) -> frozenset[VisitSystem]:
72 """Return a `frozenset` containing all members."""
73 return frozenset(cls.__members__.values())
75 @classmethod
76 def from_name(cls, external_name: str) -> VisitSystem:
77 """Construct the enumeration from given name."""
78 name = external_name.upper()
79 name = name.replace("-", "_")
80 try:
81 return cls.__members__[name]
82 except KeyError:
83 raise KeyError(f"Visit system named '{external_name}' not known.") from None
85 @classmethod
86 def from_names(cls, names: Iterable[str] | None) -> frozenset[VisitSystem]:
87 """Return a `frozenset` of all the visit systems matching the supplied
88 names.
90 Parameters
91 ----------
92 names : iterable of `str`, or `None`
93 Names of visit systems. Case insensitive. If `None` or empty, all
94 the visit systems are returned.
96 Returns
97 -------
98 systems : `frozenset` of `VisitSystem`
99 The matching visit systems.
100 """
101 if not names:
102 return cls.all()
104 return frozenset({cls.from_name(name) for name in names})
106 def __str__(self) -> str:
107 name = self.name.lower()
108 name = name.replace("_", "-")
109 return name
112@dataclasses.dataclass
113class VisitDefinitionData:
114 """Struct representing a group of exposures that will be used to define a
115 visit.
116 """
118 instrument: str
119 """Name of the instrument this visit will be associated with.
120 """
122 id: int
123 """Integer ID of the visit.
125 This must be unique across all visit systems for the instrument.
126 """
128 name: str
129 """String name for the visit.
131 This must be unique across all visit systems for the instrument.
132 """
134 visit_systems: set[VisitSystem]
135 """All the visit systems associated with this visit."""
137 exposures: list[DimensionRecord] = dataclasses.field(default_factory=list)
138 """Dimension records for the exposures that are part of this visit.
139 """
142@dataclasses.dataclass
143class _VisitRecords:
144 """Struct containing the dimension records associated with a visit."""
146 visit: DimensionRecord
147 """Record for the 'visit' dimension itself.
148 """
150 visit_definition: list[DimensionRecord]
151 """Records for 'visit_definition', which relates 'visit' to 'exposure'.
152 """
154 visit_detector_region: list[DimensionRecord]
155 """Records for 'visit_detector_region', which associates the combination
156 of a 'visit' and a 'detector' with a region on the sky.
157 """
159 visit_system_membership: list[DimensionRecord]
160 """Records relating visits to an associated visit system."""
163class GroupExposuresConfig(Config):
164 """Configure exposure grouping."""
167class GroupExposuresTask(Task, metaclass=ABCMeta):
168 """Abstract base class for the subtask of `DefineVisitsTask` that is
169 responsible for grouping exposures into visits.
171 Subclasses should be registered with `GroupExposuresTask.registry` to
172 enable use by `DefineVisitsTask`, and should generally correspond to a
173 particular 'visit_system' dimension value. They are also responsible for
174 defining visit IDs and names that are unique across all visit systems in
175 use by an instrument.
177 Parameters
178 ----------
179 config : `GroupExposuresConfig`
180 Configuration information.
181 **kwargs
182 Additional keyword arguments forwarded to the `Task` constructor.
183 """
185 def __init__(self, config: GroupExposuresConfig, **kwargs: Any):
186 Task.__init__(self, config=config, **kwargs)
188 ConfigClass = GroupExposuresConfig
190 _DefaultName = "groupExposures"
192 registry = makeRegistry(
193 doc="Registry of algorithms for grouping exposures into visits.",
194 configBaseType=GroupExposuresConfig,
195 )
197 @abstractmethod
198 def find_missing(
199 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
200 ) -> list[DimensionRecord]:
201 """Determine, if possible, which exposures might be missing.
203 Parameters
204 ----------
205 exposures : `list` of `lsst.daf.butler.DimensionRecord`
206 The exposure records to analyze.
207 registry : `lsst.daf.butler.Registry`
208 A butler registry that contains these exposure records.
210 Returns
211 -------
212 missing : `list` of `lsst.daf.butler.DimensionRecord`
213 Any exposure records present in registry that were related to
214 the given exposures but were missing from that list and deemed
215 to be relevant.
217 Notes
218 -----
219 Only some grouping schemes are able to find missing exposures. It
220 is acceptable to return an empty list.
221 """
222 raise NotImplementedError()
224 @abstractmethod
225 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
226 """Group the exposures in a way most natural for this visit definition.
228 Parameters
229 ----------
230 exposures : `list` of `lsst.daf.butler.DimensionRecord`
231 The exposure records to group.
233 Returns
234 -------
235 groups : `dict` [Any, `list` of `DimensionRecord`]
236 Groupings of exposure records. The key type is relevant to the
237 specific visit definition and could be a string or a tuple.
238 """
239 raise NotImplementedError()
241 @abstractmethod
242 def group(
243 self, exposures: list[DimensionRecord], instrument: Instrument
244 ) -> Iterable[VisitDefinitionData]:
245 """Group the given exposures into visits.
247 Parameters
248 ----------
249 exposures : `list` [ `DimensionRecord` ]
250 DimensionRecords (for the 'exposure' dimension) describing the
251 exposures to group.
252 instrument : `~lsst.obs.base.Instrument`
253 Instrument specification that can be used to optionally support
254 some visit ID definitions.
256 Returns
257 -------
258 visits : `Iterable` [ `VisitDefinitionData` ]
259 Structs identifying the visits and the exposures associated with
260 them. This may be an iterator or a container.
261 """
262 raise NotImplementedError()
264 def getVisitSystems(self) -> set[VisitSystem]:
265 """Return identifiers for the 'visit_system' dimension this
266 algorithm implements.
268 Returns
269 -------
270 visit_systems : `Set` [`VisitSystem`]
271 The visit systems used by this algorithm.
272 """
273 raise NotImplementedError()
276class ComputeVisitRegionsConfig(Config):
277 """Configure visit region calculations."""
279 padding: Field[int] = Field(
280 dtype=int,
281 default=250,
282 doc=(
283 "Pad raw image bounding boxes with specified number of pixels "
284 "when calculating their (conservatively large) region on the "
285 "sky. Note that the config value for pixelMargin of the "
286 "reference object loaders in meas_algorithms should be <= "
287 "the value set here."
288 ),
289 )
292class ComputeVisitRegionsTask(Task, metaclass=ABCMeta):
293 """Abstract base class for the subtask of `DefineVisitsTask` that is
294 responsible for extracting spatial regions for visits and visit+detector
295 combinations.
297 Subclasses should be registered with `ComputeVisitRegionsTask.registry` to
298 enable use by `DefineVisitsTask`.
300 Parameters
301 ----------
302 config : `ComputeVisitRegionsConfig`
303 Configuration information.
304 butler : `lsst.daf.butler.Butler`
305 The butler to use.
306 **kwargs
307 Additional keyword arguments forwarded to the `Task` constructor.
308 """
310 def __init__(self, config: ComputeVisitRegionsConfig, *, butler: Butler, **kwargs: Any):
311 Task.__init__(self, config=config, **kwargs)
312 self.butler = butler
313 self.instrumentMap: dict[str, Instrument] = {}
315 ConfigClass = ComputeVisitRegionsConfig
317 _DefaultName = "computeVisitRegions"
319 registry = makeRegistry(
320 doc="Registry of algorithms for computing on-sky regions for visits and visit+detector combinations.",
321 configBaseType=ComputeVisitRegionsConfig,
322 )
324 def getInstrument(self, instrumentName: str) -> Instrument:
325 """Retrieve an `~lsst.obs.base.Instrument` associated with this
326 instrument name.
328 Parameters
329 ----------
330 instrumentName : `str`
331 The name of the instrument.
333 Returns
334 -------
335 instrument : `~lsst.obs.base.Instrument`
336 The associated instrument object.
338 Notes
339 -----
340 The result is cached.
341 """
342 instrument = self.instrumentMap.get(instrumentName)
343 if instrument is None:
344 instrument = Instrument.fromName(instrumentName, self.butler.registry)
345 self.instrumentMap[instrumentName] = instrument
346 return instrument
348 @abstractmethod
349 def compute(
350 self, visit: VisitDefinitionData, *, collections: Any = None
351 ) -> tuple[Region, dict[int, Region]]:
352 """Compute regions for the given visit and all detectors in that visit.
354 Parameters
355 ----------
356 visit : `VisitDefinitionData`
357 Struct describing the visit and the exposures associated with it.
358 collections : Any, optional
359 Collections to be searched for raws and camera geometry, overriding
360 ``self.butler.collections``.
361 Can be any of the types supported by the ``collections`` argument
362 to butler construction.
364 Returns
365 -------
366 visitRegion : `lsst.sphgeom.Region`
367 Region for the full visit.
368 visitDetectorRegions : `dict` [ `int`, `lsst.sphgeom.Region` ]
369 Dictionary mapping detector ID to the region for that detector.
370 Should include all detectors in the visit.
371 """
372 raise NotImplementedError()
375class DefineVisitsConfig(Config):
376 """Configure visit definition."""
378 groupExposures = GroupExposuresTask.registry.makeField(
379 doc="Algorithm for grouping exposures into visits.",
380 default="one-to-one-and-by-counter",
381 )
382 computeVisitRegions = ComputeVisitRegionsTask.registry.makeField(
383 doc="Algorithm from computing visit and visit+detector regions.",
384 default="single-raw-wcs",
385 )
386 ignoreNonScienceExposures: Field[bool] = Field(
387 doc=(
388 "If True, silently ignore input exposures that do not have "
389 "observation_type=SCIENCE. If False, raise an exception if one "
390 "encountered."
391 ),
392 dtype=bool,
393 optional=False,
394 default=True,
395 )
396 updateObsCoreTable: Field[bool] = Field(
397 doc=(
398 "If True, update exposure regions in obscore table after visits "
399 "are defined. If False, do not update obscore table."
400 ),
401 dtype=bool,
402 default=True,
403 )
406class DefineVisitsTask(Task):
407 """Driver Task for defining visits (and their spatial regions) in Gen3
408 Butler repositories.
410 Parameters
411 ----------
412 config : `DefineVisitsConfig`
413 Configuration for the task.
414 butler : `~lsst.daf.butler.Butler`
415 Writeable butler instance. Will be used to read `raw.wcs` and `camera`
416 datasets and insert/sync dimension data.
417 **kwargs
418 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task`
419 constructor.
421 Notes
422 -----
423 Each instance of `DefineVisitsTask` reads from / writes to the same Butler.
424 Each invocation of `DefineVisitsTask.run` processes an independent group of
425 exposures into one or more new visits, all belonging to the same visit
426 system and instrument.
428 The actual work of grouping exposures and computing regions is delegated
429 to pluggable subtasks (`GroupExposuresTask` and `ComputeVisitRegionsTask`),
430 respectively. The defaults are to create one visit for every exposure,
431 and to use exactly one (arbitrary) detector-level raw dataset's WCS along
432 with camera geometry to compute regions for all detectors. Other
433 implementations can be created and configured for instruments for which
434 these choices are unsuitable (e.g. because visits and exposures are not
435 one-to-one, or because ``raw.wcs`` datasets for different detectors may not
436 be consistent with camera geometry).
438 It is not necessary in general to ingest all raws for an exposure before
439 defining a visit that includes the exposure; this depends entirely on the
440 `ComputeVisitRegionTask` subclass used. For the default configuration,
441 a single raw for each exposure is sufficient.
443 Defining the same visit the same way multiple times (e.g. via multiple
444 invocations of this task on the same exposures, with the same
445 configuration) is safe, but it may be inefficient, as most of the work must
446 be done before new visits can be compared to existing visits.
447 """
449 def __init__(self, config: DefineVisitsConfig, *, butler: Butler, **kwargs: Any):
450 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
451 super().__init__(config, **kwargs)
452 self.butler = butler
453 self.universe = self.butler.dimensions
454 self.progress = Progress("obs.base.DefineVisitsTask")
455 self.makeSubtask("groupExposures")
456 self.makeSubtask("computeVisitRegions", butler=self.butler)
458 def _reduce_kwargs(self) -> dict:
459 # Add extra parameters to pickle
460 return dict(**super()._reduce_kwargs(), butler=self.butler)
462 ConfigClass: ClassVar[type[Config]] = DefineVisitsConfig
464 _DefaultName: ClassVar[str] = "defineVisits"
466 config: DefineVisitsConfig
467 groupExposures: GroupExposuresTask
468 computeVisitRegions: ComputeVisitRegionsTask
470 def _buildVisitRecords(
471 self, definition: VisitDefinitionData, *, collections: Any = None
472 ) -> _VisitRecords:
473 """Build the DimensionRecords associated with a visit.
475 Parameters
476 ----------
477 definition : `VisitDefinitionData`
478 Struct with identifiers for the visit and records for its
479 constituent exposures.
480 collections : Any, optional
481 Collections to be searched for raws and camera geometry, overriding
482 ``self.butler.collections``.
483 Can be any of the types supported by the ``collections`` argument
484 to butler construction.
486 Results
487 -------
488 records : `_VisitRecords`
489 Struct containing DimensionRecords for the visit, including
490 associated dimension elements.
491 """
492 dimension = self.universe["visit"]
494 # Some registries support additional items.
495 supported = {meta.name for meta in dimension.metadata}
497 # Compute all regions.
498 visitRegion, visitDetectorRegions = self.computeVisitRegions.compute(
499 definition, collections=collections
500 )
501 # Aggregate other exposure quantities.
502 timespan = Timespan(
503 begin=_reduceOrNone(min, (e.timespan.begin for e in definition.exposures)),
504 end=_reduceOrNone(max, (e.timespan.end for e in definition.exposures)),
505 )
506 exposure_time = _reduceOrNone(operator.add, (e.exposure_time for e in definition.exposures))
507 physical_filter = _reduceOrNone(_value_if_equal, (e.physical_filter for e in definition.exposures))
508 target_name = _reduceOrNone(_value_if_equal, (e.target_name for e in definition.exposures))
509 science_program = _reduceOrNone(_value_if_equal, (e.science_program for e in definition.exposures))
511 # observing day for a visit is defined by the earliest observation
512 # of the visit
513 observing_day = _reduceOrNone(min, (e.day_obs for e in definition.exposures))
514 observation_reason = _reduceOrNone(
515 _value_if_equal, (e.observation_reason for e in definition.exposures)
516 )
517 if observation_reason is None:
518 # Be explicit about there being multiple reasons
519 observation_reason = "various"
521 # Use the mean zenith angle as an approximation
522 zenith_angle = _reduceOrNone(operator.add, (e.zenith_angle for e in definition.exposures))
523 if zenith_angle is not None:
524 zenith_angle /= len(definition.exposures)
526 # New records that may not be supported.
527 extras: dict[str, Any] = {}
528 if "seq_num" in supported:
529 extras["seq_num"] = _reduceOrNone(min, (e.seq_num for e in definition.exposures))
530 if "azimuth" in supported:
531 # Must take into account 0/360 problem.
532 extras["azimuth"] = _calc_mean_angle([e.azimuth for e in definition.exposures])
534 # visit_system handling changed. This is the logic for visit/exposure
535 # that has support for seq_start/seq_end.
536 if "seq_num" in supported:
537 # Map visit to exposure.
538 visit_definition = [
539 self.universe["visit_definition"].RecordClass(
540 instrument=definition.instrument,
541 visit=definition.id,
542 exposure=exposure.id,
543 )
544 for exposure in definition.exposures
545 ]
547 # Map visit to visit system.
548 visit_system_membership = []
549 for visit_system in self.groupExposures.getVisitSystems():
550 if visit_system in definition.visit_systems:
551 record = self.universe["visit_system_membership"].RecordClass(
552 instrument=definition.instrument,
553 visit=definition.id,
554 visit_system=visit_system.value,
555 )
556 visit_system_membership.append(record)
558 else:
559 # The old approach can only handle one visit system at a time.
560 # If we have been configured with multiple options, prefer the
561 # one-to-one.
562 visit_systems = self.groupExposures.getVisitSystems()
563 if len(visit_systems) > 1:
564 one_to_one = VisitSystem.from_name("one-to-one")
565 if one_to_one not in visit_systems:
566 raise ValueError(
567 f"Multiple visit systems specified ({visit_systems}) for use with old"
568 " dimension universe but unable to find one-to-one."
569 )
570 visit_system = one_to_one
571 else:
572 visit_system = visit_systems.pop()
574 extras["visit_system"] = visit_system.value
576 # The old visit_definition included visit system.
577 visit_definition = [
578 self.universe["visit_definition"].RecordClass(
579 instrument=definition.instrument,
580 visit=definition.id,
581 exposure=exposure.id,
582 visit_system=visit_system.value,
583 )
584 for exposure in definition.exposures
585 ]
587 # This concept does not exist in old schema.
588 visit_system_membership = []
590 # Construct the actual DimensionRecords.
591 return _VisitRecords(
592 visit=dimension.RecordClass(
593 instrument=definition.instrument,
594 id=definition.id,
595 name=definition.name,
596 physical_filter=physical_filter,
597 target_name=target_name,
598 science_program=science_program,
599 observation_reason=observation_reason,
600 day_obs=observing_day,
601 zenith_angle=zenith_angle,
602 exposure_time=exposure_time,
603 timespan=timespan,
604 region=visitRegion,
605 # TODO: no seeing value in exposure dimension records, so we
606 # can't set that here. But there are many other columns that
607 # both dimensions should probably have as well.
608 **extras,
609 ),
610 visit_definition=visit_definition,
611 visit_system_membership=visit_system_membership,
612 visit_detector_region=[
613 self.universe["visit_detector_region"].RecordClass(
614 instrument=definition.instrument,
615 visit=definition.id,
616 detector=detectorId,
617 region=detectorRegion,
618 )
619 for detectorId, detectorRegion in visitDetectorRegions.items()
620 ],
621 )
623 def run(
624 self,
625 dataIds: Iterable[DataId],
626 *,
627 collections: str | None = None,
628 update_records: bool = False,
629 incremental: bool = False,
630 ) -> None:
631 """Add visit definitions to the registry for the given exposures.
633 Parameters
634 ----------
635 dataIds : `Iterable` [ `dict` or `~lsst.daf.butler.DataCoordinate` ]
636 Exposure-level data IDs. These must all correspond to the same
637 instrument, and are expected to be on-sky science exposures.
638 collections : Any, optional
639 Collections to be searched for raws and camera geometry, overriding
640 ``self.butler.collections``.
641 Can be any of the types supported by the ``collections`` argument
642 to butler construction.
643 update_records : `bool`, optional
644 If `True` (`False` is default), update existing visit records that
645 conflict with the new ones instead of rejecting them (and when this
646 occurs, update visit_detector_region as well). THIS IS AN ADVANCED
647 OPTION THAT SHOULD ONLY BE USED TO FIX REGIONS AND/OR METADATA THAT
648 ARE KNOWN TO BE BAD, AND IT CANNOT BE USED TO REMOVE EXPOSURES OR
649 DETECTORS FROM A VISIT.
650 incremental : `bool`, optional
651 If `True` indicate that exposures are being ingested incrementally
652 and visit definition will be run on partial visits. This will
653 force ``update_records`` to `True`. If there is any risk that
654 files are being ingested incrementally it is critical that this
655 parameter is set to `True` and not to rely on ``updated_records``.
657 Raises
658 ------
659 lsst.daf.butler.registry.ConflictingDefinitionError
660 Raised if a visit ID conflict is detected and the existing visit
661 differs from the new one.
662 """
663 # Normalize, expand, and deduplicate data IDs.
664 self.log.info("Preprocessing data IDs.")
665 dimensions = self.universe.conform(["exposure"])
666 data_id_set: set[DataCoordinate] = {
667 self.butler.registry.expandDataId(d, dimensions=dimensions) for d in dataIds
668 }
669 if not data_id_set:
670 raise RuntimeError("No exposures given.")
671 if incremental:
672 update_records = True
673 # Extract exposure DimensionRecords, check that there's only one
674 # instrument in play, and check for non-science exposures.
675 exposures = []
676 instruments = set()
677 instrument_cls_name: str | None = None
678 for dataId in data_id_set:
679 record = dataId.records["exposure"]
680 assert record is not None, "Guaranteed by expandDataIds call earlier."
681 if record.tracking_ra is None or record.tracking_dec is None or record.sky_angle is None:
682 if self.config.ignoreNonScienceExposures:
683 continue
684 else:
685 raise RuntimeError(
686 f"Input exposure {dataId} has observation_type "
687 f"{record.observation_type}, but is not on sky."
688 )
689 instruments.add(dataId["instrument"])
690 instrument_record = dataId.records["instrument"]
691 if instrument_record is not None:
692 instrument_cls_name = instrument_record.class_name
693 exposures.append(record)
694 if not exposures:
695 self.log.info("No on-sky exposures found after filtering.")
696 return
697 if len(instruments) > 1:
698 raise RuntimeError(
699 "All data IDs passed to DefineVisitsTask.run must be "
700 f"from the same instrument; got {instruments}."
701 )
702 (instrument,) = instruments
704 # Might need the instrument class for later depending on universe
705 # and grouping scheme.
706 assert instrument_cls_name is not None, "Instrument must be defined in this dataId"
707 instrument_helper = Instrument.from_string(instrument_cls_name)
709 # Ensure the visit_system our grouping algorithm uses is in the
710 # registry, if it wasn't already.
711 visitSystems = self.groupExposures.getVisitSystems()
712 for visitSystem in visitSystems:
713 self.log.info("Registering visit_system %d: %s.", visitSystem.value, visitSystem)
714 self.butler.registry.syncDimensionData(
715 "visit_system",
716 {"instrument": instrument, "id": visitSystem.value, "name": str(visitSystem)},
717 )
719 # In true incremental we will be given the second snap on its
720 # own on the assumption that the previous snap was already handled.
721 # For correct grouping we need access to the other exposures in the
722 # visit.
723 if incremental:
724 exposures.extend(self.groupExposures.find_missing(exposures, self.butler.registry))
726 # Group exposures into visits, delegating to subtask.
727 self.log.info("Grouping %d exposure(s) into visits.", len(exposures))
728 definitions = list(self.groupExposures.group(exposures, instrument_helper))
729 # Iterate over visits, compute regions, and insert dimension data, one
730 # transaction per visit. If a visit already exists, we skip all other
731 # inserts.
732 self.log.info("Computing regions and other metadata for %d visit(s).", len(definitions))
733 for visitDefinition in self.progress.wrap(
734 definitions, total=len(definitions), desc="Computing regions and inserting visits"
735 ):
736 visitRecords = self._buildVisitRecords(visitDefinition, collections=collections)
737 with self.butler.registry.transaction():
738 inserted_or_updated = self.butler.registry.syncDimensionData(
739 "visit",
740 visitRecords.visit,
741 update=update_records,
742 )
743 if inserted_or_updated:
744 if inserted_or_updated is True:
745 # This is a new visit, not an update to an existing
746 # one, so insert visit definition.
747 # We don't allow visit definitions to change even when
748 # asked to update, because we'd have to delete the old
749 # visit_definitions first and also worry about what
750 # this does to datasets that already use the visit.
751 self.butler.registry.insertDimensionData(
752 "visit_definition", *visitRecords.visit_definition
753 )
754 if visitRecords.visit_system_membership:
755 self.butler.registry.insertDimensionData(
756 "visit_system_membership", *visitRecords.visit_system_membership
757 )
758 elif incremental and len(visitRecords.visit_definition) > 1:
759 # The visit record was modified. This could happen
760 # if a multi-snap visit was redefined with an
761 # additional snap so play it safe and allow for the
762 # visit definition to be updated. We use update=False
763 # here since there should not be any rows updated,
764 # just additional rows added. update=True does not work
765 # correctly with multiple records. In incremental mode
766 # we assume that the caller wants the visit definition
767 # to be updated and has no worries about provenance
768 # with the previous definition.
769 for definition in visitRecords.visit_definition:
770 self.butler.registry.syncDimensionData("visit_definition", definition)
772 # [Re]Insert visit_detector_region records for both inserts
773 # and updates, because we do allow updating to affect the
774 # region calculations.
775 self.butler.registry.insertDimensionData(
776 "visit_detector_region", *visitRecords.visit_detector_region, replace=update_records
777 )
779 # Update obscore exposure records with region information
780 # from corresponding visits.
781 if self.config.updateObsCoreTable:
782 if obscore_manager := self.butler.registry.obsCoreTableManager:
783 obscore_updates: list[tuple[int, int, Region]] = []
784 exposure_ids = [rec.exposure for rec in visitRecords.visit_definition]
785 for record in visitRecords.visit_detector_region:
786 obscore_updates += [
787 (exposure, record.detector, record.region) for exposure in exposure_ids
788 ]
789 if obscore_updates:
790 obscore_manager.update_exposure_regions(
791 cast(str, instrument), obscore_updates
792 )
795_T = TypeVar("_T")
798def _reduceOrNone(func: Callable[[_T, _T], _T | None], iterable: Iterable[_T | None]) -> _T | None:
799 """Apply a binary function to pairs of elements in an iterable until a
800 single value is returned, but return `None` if any element is `None` or
801 there are no elements.
802 """
803 r: _T | None = None
804 for v in iterable:
805 if v is None:
806 return None
807 if r is None:
808 r = v
809 else:
810 r = func(r, v)
811 return r
814def _value_if_equal(a: _T, b: _T) -> _T | None:
815 """Return either argument if they are equal, or `None` if they are not."""
816 return a if a == b else None
819def _calc_mean_angle(angles: list[float]) -> float:
820 """Calculate the mean angle, taking into account 0/360 wrapping.
822 Parameters
823 ----------
824 angles : `list` [`float`]
825 Angles to average together, in degrees.
827 Returns
828 -------
829 average : `float`
830 Average angle in degrees.
831 """
832 # Save on all the math if we only have one value.
833 if len(angles) == 1:
834 return angles[0]
836 # Convert polar coordinates of unit circle to complex values.
837 # Average the complex values.
838 # Convert back to a phase angle.
839 return math.degrees(cmath.phase(sum(cmath.rect(1.0, math.radians(d)) for d in angles) / len(angles)))
842class _GroupExposuresOneToOneConfig(GroupExposuresConfig):
843 visitSystemId: Field[int] = Field(
844 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
845 dtype=int,
846 default=0,
847 deprecated="No longer used. Replaced by enum.",
848 )
849 visitSystemName: Field[str] = Field(
850 doc="String name of the visit_system implemented by this grouping algorithm.",
851 dtype=str,
852 default="one-to-one",
853 deprecated="No longer used. Replaced by enum.",
854 )
857@registerConfigurable("one-to-one", GroupExposuresTask.registry)
858class _GroupExposuresOneToOneTask(GroupExposuresTask, metaclass=ABCMeta):
859 """An exposure grouping algorithm that simply defines one visit for each
860 exposure, reusing the exposures identifiers for the visit.
861 """
863 ConfigClass = _GroupExposuresOneToOneConfig
865 def find_missing(
866 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
867 ) -> list[DimensionRecord]:
868 # By definition no exposures can be missing.
869 return []
871 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
872 # No grouping.
873 return {exposure.id: [exposure] for exposure in exposures}
875 def group(
876 self, exposures: list[DimensionRecord], instrument: Instrument
877 ) -> Iterable[VisitDefinitionData]:
878 # Docstring inherited from GroupExposuresTask.
879 visit_systems = {VisitSystem.from_name("one-to-one")}
880 for exposure in exposures:
881 yield VisitDefinitionData(
882 instrument=exposure.instrument,
883 id=exposure.id,
884 name=exposure.obs_id,
885 exposures=[exposure],
886 visit_systems=visit_systems,
887 )
889 def getVisitSystems(self) -> set[VisitSystem]:
890 # Docstring inherited from GroupExposuresTask.
891 return set(VisitSystem.from_names(["one-to-one"]))
894class _GroupExposuresByGroupMetadataConfig(GroupExposuresConfig):
895 visitSystemId: Field[int] = Field(
896 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
897 dtype=int,
898 default=1,
899 deprecated="No longer used. Replaced by enum.",
900 )
901 visitSystemName: Field[str] = Field(
902 doc="String name of the visit_system implemented by this grouping algorithm.",
903 dtype=str,
904 default="by-group-metadata",
905 deprecated="No longer used. Replaced by enum.",
906 )
909@registerConfigurable("by-group-metadata", GroupExposuresTask.registry)
910class _GroupExposuresByGroupMetadataTask(GroupExposuresTask, metaclass=ABCMeta):
911 """An exposure grouping algorithm that uses the exposure group.
913 This algorithm uses the ``group`` dimension for modern universes and the
914 ``exposure.group_id`` for older universes.
916 This algorithm *assumes* group ID (generally populated from
917 `astro_metadata_translator.ObservationInfo.visit_id`) is not just unique,
918 but disjoint from all `ObservationInfo.exposure_id` values - if it isn't,
919 it will be impossible to ever use both this grouping algorithm and the
920 one-to-one algorithm for a particular camera in the same data repository.
921 """
923 ConfigClass = _GroupExposuresByGroupMetadataConfig
925 def find_missing(
926 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
927 ) -> list[DimensionRecord]:
928 groups = self.group_exposures(exposures)
929 # Determine which group implementation we are using.
930 if "group" in registry.dimensions["exposure"].implied:
931 group_key = "group"
932 else:
933 group_key = "group_name"
934 missing_exposures: list[DimensionRecord] = []
935 for exposures_in_group in groups.values():
936 # We can not tell how many exposures are expected to be in the
937 # visit so we have to query every time.
938 first = exposures_in_group[0]
939 records = set(
940 registry.queryDimensionRecords(
941 "exposure",
942 where=f"exposure.{group_key} = groupnam",
943 bind={"groupnam": getattr(first, group_key)},
944 instrument=first.instrument,
945 )
946 )
947 records.difference_update(set(exposures_in_group))
948 missing_exposures.extend(list(records))
949 return missing_exposures
951 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
952 groups = defaultdict(list)
953 group_key = "group"
954 if exposures and hasattr(exposures[0], "group_name"):
955 group_key = "group_name"
956 for exposure in exposures:
957 groups[getattr(exposure, group_key)].append(exposure)
958 return groups
960 def group(
961 self, exposures: list[DimensionRecord], instrument: Instrument
962 ) -> Iterable[VisitDefinitionData]:
963 # Docstring inherited from GroupExposuresTask.
964 visit_systems = {VisitSystem.from_name("by-group-metadata")}
965 groups = self.group_exposures(exposures)
966 has_group_dimension: bool | None = None
967 for visitName, exposuresInGroup in groups.items():
968 instrument_name = exposuresInGroup[0].instrument
969 assert instrument_name == instrument.getName(), "Inconsistency in instrument name"
970 visit_ids: set[int] = set()
971 if has_group_dimension is None:
972 has_group_dimension = hasattr(exposuresInGroup[0], "group")
973 if has_group_dimension:
974 visit_ids = {instrument.group_name_to_group_id(e.group) for e in exposuresInGroup}
975 else:
976 visit_ids = {e.group_id for e in exposuresInGroup}
977 assert len(visit_ids) == 1, "Grouping by exposure group does not yield consistent group IDs"
978 yield VisitDefinitionData(
979 instrument=instrument_name,
980 id=visit_ids.pop(),
981 name=visitName,
982 exposures=exposuresInGroup,
983 visit_systems=visit_systems,
984 )
986 def getVisitSystems(self) -> set[VisitSystem]:
987 # Docstring inherited from GroupExposuresTask.
988 return set(VisitSystem.from_names(["by-group-metadata"]))
991class _GroupExposuresByCounterAndExposuresConfig(GroupExposuresConfig):
992 visitSystemId: Field[int] = Field(
993 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
994 dtype=int,
995 default=2,
996 deprecated="No longer used. Replaced by enum.",
997 )
998 visitSystemName: Field[str] = Field(
999 doc="String name of the visit_system implemented by this grouping algorithm.",
1000 dtype=str,
1001 default="by-counter-and-exposures",
1002 deprecated="No longer used. Replaced by enum.",
1003 )
1006@registerConfigurable("one-to-one-and-by-counter", GroupExposuresTask.registry)
1007class _GroupExposuresByCounterAndExposuresTask(GroupExposuresTask, metaclass=ABCMeta):
1008 """An exposure grouping algorithm that uses the sequence start and
1009 sequence end metadata to create multi-exposure visits, but also
1010 creates one-to-one visits.
1012 This algorithm uses the exposure.seq_start and
1013 exposure.seq_end fields to collect related snaps.
1014 It also groups single exposures.
1015 """
1017 ConfigClass = _GroupExposuresByCounterAndExposuresConfig
1019 def find_missing(
1020 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
1021 ) -> list[DimensionRecord]:
1022 """Analyze the exposures and return relevant exposures known to
1023 registry.
1024 """
1025 groups = self.group_exposures(exposures)
1026 missing_exposures: list[DimensionRecord] = []
1027 for exposures_in_group in groups.values():
1028 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1029 first = sorted_exposures[0]
1031 # Only need to look for the seq_nums that we don't already have.
1032 seq_nums = set(range(first.seq_start, first.seq_end + 1))
1033 seq_nums.difference_update({exp.seq_num for exp in sorted_exposures})
1035 if seq_nums:
1036 # Missing something. Check registry.
1037 records = list(
1038 registry.queryDimensionRecords(
1039 "exposure",
1040 where="exposure.seq_start = seq_start AND exposure.seq_end = seq_end AND "
1041 "exposure.seq_num IN (seq_nums)",
1042 bind={"seq_start": first.seq_start, "seq_end": first.seq_end, "seq_nums": seq_nums},
1043 instrument=first.instrument,
1044 )
1045 )
1046 missing_exposures.extend(records)
1048 return missing_exposures
1050 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
1051 groups = defaultdict(list)
1052 for exposure in exposures:
1053 groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure)
1054 return groups
1056 def group(
1057 self, exposures: list[DimensionRecord], instrument: Instrument
1058 ) -> Iterable[VisitDefinitionData]:
1059 # Docstring inherited from GroupExposuresTask.
1060 system_one_to_one = VisitSystem.from_name("one-to-one")
1061 system_seq_start_end = VisitSystem.from_name("by-seq-start-end")
1063 groups = self.group_exposures(exposures)
1064 for visit_key, exposures_in_group in groups.items():
1065 instrument_name = exposures_in_group[0].instrument
1067 # It is possible that the first exposure in a visit has not
1068 # been ingested. This can be determined and if that is the case
1069 # we can not reliably define the multi-exposure visit.
1070 skip_multi = False
1071 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1072 first = sorted_exposures.pop(0)
1073 if first.seq_num != first.seq_start:
1074 # Special case seq_num == 0 since that implies that the
1075 # instrument has no counters and therefore no multi-exposure
1076 # visits.
1077 if first.seq_num != 0:
1078 self.log.warning(
1079 "First exposure for visit %s is not present. Skipping the multi-snap definition.",
1080 visit_key,
1081 )
1082 skip_multi = True
1084 multi_exposure = False
1085 if first.seq_start != first.seq_end:
1086 # This is a multi-exposure visit regardless of the number
1087 # of exposures present.
1088 multi_exposure = True
1090 # Define the one-to-one visits.
1091 for exposure in exposures_in_group:
1092 # Default is to use the exposure ID and name unless
1093 # this is the first exposure in a multi-exposure visit.
1094 visit_name = exposure.obs_id
1095 visit_id = exposure.id
1096 visit_systems = {system_one_to_one}
1098 if not multi_exposure:
1099 # This is also a by-counter visit.
1100 # It will use the same visit_name and visit_id.
1101 visit_systems.add(system_seq_start_end)
1103 elif not skip_multi and exposure == first:
1104 # This is the first legitimate exposure in a multi-exposure
1105 # visit. It therefore needs a modified visit name and ID
1106 # so it does not clash with the multi-exposure visit
1107 # definition.
1108 visit_name = f"{visit_name}_first"
1109 visit_id = int(f"9{visit_id}")
1111 yield VisitDefinitionData(
1112 instrument=instrument_name,
1113 id=visit_id,
1114 name=visit_name,
1115 exposures=[exposure],
1116 visit_systems=visit_systems,
1117 )
1119 # Multi-exposure visit.
1120 if not skip_multi and multi_exposure:
1121 # Define the visit using the first exposure
1122 visit_name = first.obs_id
1123 visit_id = first.id
1125 yield VisitDefinitionData(
1126 instrument=instrument_name,
1127 id=visit_id,
1128 name=visit_name,
1129 exposures=exposures_in_group,
1130 visit_systems={system_seq_start_end},
1131 )
1133 def getVisitSystems(self) -> set[VisitSystem]:
1134 # Docstring inherited from GroupExposuresTask.
1135 # Using a Config for this is difficult because what this grouping
1136 # algorithm is doing is using two visit systems.
1137 # One is using metadata (but not by-group) and the other is the
1138 # one-to-one. For now hard-code in class.
1139 return set(VisitSystem.from_names(["one-to-one", "by-seq-start-end"]))
1142class _ComputeVisitRegionsFromSingleRawWcsConfig(ComputeVisitRegionsConfig):
1143 mergeExposures: Field[bool] = Field(
1144 doc=(
1145 "If True, merge per-detector regions over all exposures in a "
1146 "visit (via convex hull) instead of using the first exposure and "
1147 "assuming its regions are valid for all others."
1148 ),
1149 dtype=bool,
1150 default=False,
1151 )
1152 detectorId: Field[int | None] = Field(
1153 doc=(
1154 "Load the WCS for the detector with this ID. If None, use an "
1155 "arbitrary detector (the first found in a query of the data "
1156 "repository for each exposure (or all exposures, if "
1157 "mergeExposures is True)."
1158 ),
1159 dtype=int,
1160 optional=True,
1161 default=None,
1162 )
1163 requireVersionedCamera: Field[bool] = Field(
1164 doc=(
1165 "If True, raise LookupError if version camera geometry cannot be "
1166 "loaded for an exposure. If False, use the nominal camera from "
1167 "the Instrument class instead."
1168 ),
1169 dtype=bool,
1170 optional=False,
1171 default=False,
1172 )
1175@registerConfigurable("single-raw-wcs", ComputeVisitRegionsTask.registry)
1176class _ComputeVisitRegionsFromSingleRawWcsTask(ComputeVisitRegionsTask):
1177 """A visit region calculator that uses a single raw WCS and a camera to
1178 project the bounding boxes of all detectors onto the sky, relating
1179 different detectors by their positions in focal plane coordinates.
1181 Notes
1182 -----
1183 Most instruments should have their raw WCSs determined from a combination
1184 of boresight angle, rotator angle, and camera geometry, and hence this
1185 algorithm should produce stable results regardless of which detector the
1186 raw corresponds to. If this is not the case (e.g. because a per-file FITS
1187 WCS is used instead), either the ID of the detector should be fixed (see
1188 the ``detectorId`` config parameter) or a different algorithm used.
1189 """
1191 ConfigClass = _ComputeVisitRegionsFromSingleRawWcsConfig
1192 config: _ComputeVisitRegionsFromSingleRawWcsConfig
1194 def computeExposureBounds(
1195 self, exposure: DimensionRecord, *, collections: Any = None
1196 ) -> dict[int, list[UnitVector3d]]:
1197 """Compute the lists of unit vectors on the sphere that correspond to
1198 the sky positions of detector corners.
1200 Parameters
1201 ----------
1202 exposure : `DimensionRecord`
1203 Dimension record for the exposure.
1204 collections : Any, optional
1205 Collections to be searched for raws and camera geometry, overriding
1206 ``self.butler.collections``.
1207 Can be any of the types supported by the ``collections`` argument
1208 to butler construction.
1210 Returns
1211 -------
1212 bounds : `dict`
1213 Dictionary mapping detector ID to a list of unit vectors on the
1214 sphere representing that detector's corners projected onto the sky.
1215 """
1216 if collections is None:
1217 collections = self.butler.collections
1218 camera, versioned = loadCamera(self.butler, exposure.dataId, collections=collections)
1219 if not versioned and self.config.requireVersionedCamera:
1220 raise LookupError(f"No versioned camera found for exposure {exposure.dataId}.")
1222 # Derive WCS from boresight information -- if available in registry
1223 use_registry = True
1224 try:
1225 orientation = lsst.geom.Angle(exposure.sky_angle, lsst.geom.degrees)
1226 radec = lsst.geom.SpherePoint(
1227 lsst.geom.Angle(exposure.tracking_ra, lsst.geom.degrees),
1228 lsst.geom.Angle(exposure.tracking_dec, lsst.geom.degrees),
1229 )
1230 except AttributeError:
1231 use_registry = False
1233 if use_registry:
1234 if self.config.detectorId is None:
1235 detectorId = next(camera.getIdIter())
1236 else:
1237 detectorId = self.config.detectorId
1238 wcsDetector = camera[detectorId]
1240 # Ask the raw formatter to create the relevant WCS
1241 # This allows flips to be taken into account
1242 instrument = self.getInstrument(exposure.instrument)
1243 rawFormatter = instrument.getRawFormatter({"detector": detectorId})
1245 try:
1246 wcs = rawFormatter.makeRawSkyWcsFromBoresight(radec, orientation, wcsDetector) # type: ignore
1247 except AttributeError:
1248 raise TypeError(
1249 f"Raw formatter is {get_full_type_name(rawFormatter)} but visit"
1250 " definition requires it to support 'makeRawSkyWcsFromBoresight'"
1251 ) from None
1252 else:
1253 if self.config.detectorId is None:
1254 wcsRefsIter = self.butler.registry.queryDatasets(
1255 "raw.wcs", dataId=exposure.dataId, collections=collections
1256 )
1257 if not wcsRefsIter:
1258 raise LookupError(
1259 f"No raw.wcs datasets found for data ID {exposure.dataId} "
1260 f"in collections {collections}."
1261 )
1262 wcsRef = next(iter(wcsRefsIter))
1263 wcsDetector = camera[wcsRef.dataId["detector"]]
1264 wcs = self.butler.get(wcsRef)
1265 else:
1266 wcsDetector = camera[self.config.detectorId]
1267 wcs = self.butler.get(
1268 "raw.wcs",
1269 dataId=exposure.dataId,
1270 detector=self.config.detectorId,
1271 collections=collections,
1272 )
1273 fpToSky = wcsDetector.getTransform(FOCAL_PLANE, PIXELS).then(wcs.getTransform())
1274 bounds = {}
1275 for detector in camera:
1276 pixelsToSky = detector.getTransform(PIXELS, FOCAL_PLANE).then(fpToSky)
1277 pixCorners = Box2D(detector.getBBox().dilatedBy(self.config.padding)).getCorners()
1278 bounds[detector.getId()] = [
1279 skyCorner.getVector() for skyCorner in pixelsToSky.applyForward(pixCorners)
1280 ]
1281 return bounds
1283 def compute(
1284 self, visit: VisitDefinitionData, *, collections: Any = None
1285 ) -> tuple[Region, dict[int, Region]]:
1286 # Docstring inherited from ComputeVisitRegionsTask.
1287 if self.config.mergeExposures:
1288 detectorBounds: dict[int, list[UnitVector3d]] = defaultdict(list)
1289 for exposure in visit.exposures:
1290 exposureDetectorBounds = self.computeExposureBounds(exposure, collections=collections)
1291 for detectorId, bounds in exposureDetectorBounds.items():
1292 detectorBounds[detectorId].extend(bounds)
1293 else:
1294 detectorBounds = self.computeExposureBounds(visit.exposures[0], collections=collections)
1295 visitBounds = []
1296 detectorRegions = {}
1297 for detectorId, bounds in detectorBounds.items():
1298 detectorRegions[detectorId] = ConvexPolygon.convexHull(bounds)
1299 visitBounds.extend(bounds)
1300 return ConvexPolygon.convexHull(visitBounds), detectorRegions