Coverage for python/lsst/obs/base/defineVisits.py: 27%
399 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:53 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = [
25 "DefineVisitsConfig",
26 "DefineVisitsTask",
27 "GroupExposuresConfig",
28 "GroupExposuresTask",
29 "VisitDefinitionData",
30 "VisitSystem",
31]
33import cmath
34import dataclasses
35import enum
36import math
37import operator
38from abc import ABCMeta, abstractmethod
39from collections import defaultdict
40from collections.abc import Callable, Iterable
41from typing import Any, ClassVar, TypeVar, cast
43import lsst.geom
44from lsst.afw.cameraGeom import FOCAL_PLANE, PIXELS
45from lsst.daf.butler import (
46 Butler,
47 DataCoordinate,
48 DataId,
49 DimensionGraph,
50 DimensionRecord,
51 Progress,
52 Timespan,
53)
54from lsst.geom import Box2D
55from lsst.pex.config import Config, Field, makeRegistry, registerConfigurable
56from lsst.pipe.base import Instrument, Task
57from lsst.sphgeom import ConvexPolygon, Region, UnitVector3d
58from lsst.utils.introspection import get_full_type_name
60from ._instrument import loadCamera
63class VisitSystem(enum.Enum):
64 """Enumeration used to label different visit systems."""
66 ONE_TO_ONE = 0
67 """Each exposure is assigned to its own visit."""
69 BY_GROUP_METADATA = 1
70 """Visit membership is defined by the value of the exposure.group_id."""
72 BY_SEQ_START_END = 2
73 """Visit membership is defined by the values of the ``exposure.day_obs``,
74 ``exposure.seq_start``, and ``exposure.seq_end`` values.
75 """
77 @classmethod
78 def all(cls) -> frozenset[VisitSystem]:
79 """Return a `frozenset` containing all members."""
80 return frozenset(cls.__members__.values())
82 @classmethod
83 def from_name(cls, external_name: str) -> VisitSystem:
84 """Construct the enumeration from given name."""
85 name = external_name.upper()
86 name = name.replace("-", "_")
87 try:
88 return cls.__members__[name]
89 except KeyError:
90 raise KeyError(f"Visit system named '{external_name}' not known.") from None
92 @classmethod
93 def from_names(cls, names: Iterable[str] | None) -> frozenset[VisitSystem]:
94 """Return a `frozenset` of all the visit systems matching the supplied
95 names.
97 Parameters
98 ----------
99 names : iterable of `str`, or `None`
100 Names of visit systems. Case insensitive. If `None` or empty, all
101 the visit systems are returned.
103 Returns
104 -------
105 systems : `frozenset` of `VisitSystem`
106 The matching visit systems.
107 """
108 if not names:
109 return cls.all()
111 return frozenset({cls.from_name(name) for name in names})
113 def __str__(self) -> str:
114 name = self.name.lower()
115 name = name.replace("_", "-")
116 return name
119@dataclasses.dataclass
120class VisitDefinitionData:
121 """Struct representing a group of exposures that will be used to define a
122 visit.
123 """
125 instrument: str
126 """Name of the instrument this visit will be associated with.
127 """
129 id: int
130 """Integer ID of the visit.
132 This must be unique across all visit systems for the instrument.
133 """
135 name: str
136 """String name for the visit.
138 This must be unique across all visit systems for the instrument.
139 """
141 visit_systems: set[VisitSystem]
142 """All the visit systems associated with this visit."""
144 exposures: list[DimensionRecord] = dataclasses.field(default_factory=list)
145 """Dimension records for the exposures that are part of this visit.
146 """
149@dataclasses.dataclass
150class _VisitRecords:
151 """Struct containing the dimension records associated with a visit."""
153 visit: DimensionRecord
154 """Record for the 'visit' dimension itself.
155 """
157 visit_definition: list[DimensionRecord]
158 """Records for 'visit_definition', which relates 'visit' to 'exposure'.
159 """
161 visit_detector_region: list[DimensionRecord]
162 """Records for 'visit_detector_region', which associates the combination
163 of a 'visit' and a 'detector' with a region on the sky.
164 """
166 visit_system_membership: list[DimensionRecord]
167 """Records relating visits to an associated visit system."""
170class GroupExposuresConfig(Config):
171 """Configure exposure grouping."""
174class GroupExposuresTask(Task, metaclass=ABCMeta):
175 """Abstract base class for the subtask of `DefineVisitsTask` that is
176 responsible for grouping exposures into visits.
178 Subclasses should be registered with `GroupExposuresTask.registry` to
179 enable use by `DefineVisitsTask`, and should generally correspond to a
180 particular 'visit_system' dimension value. They are also responsible for
181 defining visit IDs and names that are unique across all visit systems in
182 use by an instrument.
184 Parameters
185 ----------
186 config : `GroupExposuresConfig`
187 Configuration information.
188 **kwargs
189 Additional keyword arguments forwarded to the `Task` constructor.
190 """
192 def __init__(self, config: GroupExposuresConfig, **kwargs: Any):
193 Task.__init__(self, config=config, **kwargs)
195 ConfigClass = GroupExposuresConfig
197 _DefaultName = "groupExposures"
199 registry = makeRegistry(
200 doc="Registry of algorithms for grouping exposures into visits.",
201 configBaseType=GroupExposuresConfig,
202 )
204 @abstractmethod
205 def find_missing(
206 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
207 ) -> list[DimensionRecord]:
208 """Determine, if possible, which exposures might be missing.
210 Parameters
211 ----------
212 exposures : `list` of `lsst.daf.butler.DimensionRecord`
213 The exposure records to analyze.
214 registry : `lsst.daf.butler.Registry`
215 A butler registry that contains these exposure records.
217 Returns
218 -------
219 missing : `list` of `lsst.daf.butler.DimensionRecord`
220 Any exposure records present in registry that were related to
221 the given exposures but were missing from that list and deemed
222 to be relevant.
224 Notes
225 -----
226 Only some grouping schemes are able to find missing exposures. It
227 is acceptable to return an empty list.
228 """
229 raise NotImplementedError()
231 @abstractmethod
232 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
233 """Group the exposures in a way most natural for this visit definition.
235 Parameters
236 ----------
237 exposures : `list` of `lsst.daf.butler.DimensionRecord`
238 The exposure records to group.
240 Returns
241 -------
242 groups : `dict` [Any, `list` of `DimensionRecord`]
243 Groupings of exposure records. The key type is relevant to the
244 specific visit definition and could be a string or a tuple.
245 """
246 raise NotImplementedError()
248 @abstractmethod
249 def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]:
250 """Group the given exposures into visits.
252 Parameters
253 ----------
254 exposures : `list` [ `DimensionRecord` ]
255 DimensionRecords (for the 'exposure' dimension) describing the
256 exposures to group.
258 Returns
259 -------
260 visits : `Iterable` [ `VisitDefinitionData` ]
261 Structs identifying the visits and the exposures associated with
262 them. This may be an iterator or a container.
263 """
264 raise NotImplementedError()
266 def getVisitSystems(self) -> set[VisitSystem]:
267 """Return identifiers for the 'visit_system' dimension this
268 algorithm implements.
270 Returns
271 -------
272 visit_systems : `Set` [`VisitSystem`]
273 The visit systems used by this algorithm.
274 """
275 raise NotImplementedError()
278class ComputeVisitRegionsConfig(Config):
279 """Configure visit region calculations."""
281 padding: Field[int] = Field(
282 dtype=int,
283 default=250,
284 doc=(
285 "Pad raw image bounding boxes with specified number of pixels "
286 "when calculating their (conservatively large) region on the "
287 "sky. Note that the config value for pixelMargin of the "
288 "reference object loaders in meas_algorithms should be <= "
289 "the value set here."
290 ),
291 )
294class ComputeVisitRegionsTask(Task, metaclass=ABCMeta):
295 """Abstract base class for the subtask of `DefineVisitsTask` that is
296 responsible for extracting spatial regions for visits and visit+detector
297 combinations.
299 Subclasses should be registered with `ComputeVisitRegionsTask.registry` to
300 enable use by `DefineVisitsTask`.
302 Parameters
303 ----------
304 config : `ComputeVisitRegionsConfig`
305 Configuration information.
306 butler : `lsst.daf.butler.Butler`
307 The butler to use.
308 **kwargs
309 Additional keyword arguments forwarded to the `Task` constructor.
310 """
312 def __init__(self, config: ComputeVisitRegionsConfig, *, butler: Butler, **kwargs: Any):
313 Task.__init__(self, config=config, **kwargs)
314 self.butler = butler
315 self.instrumentMap: dict[str, Instrument] = {}
317 ConfigClass = ComputeVisitRegionsConfig
319 _DefaultName = "computeVisitRegions"
321 registry = makeRegistry(
322 doc="Registry of algorithms for computing on-sky regions for visits and visit+detector combinations.",
323 configBaseType=ComputeVisitRegionsConfig,
324 )
326 def getInstrument(self, instrumentName: str) -> Instrument:
327 """Retrieve an `~lsst.obs.base.Instrument` associated with this
328 instrument name.
330 Parameters
331 ----------
332 instrumentName : `str`
333 The name of the instrument.
335 Returns
336 -------
337 instrument : `~lsst.obs.base.Instrument`
338 The associated instrument object.
340 Notes
341 -----
342 The result is cached.
343 """
344 instrument = self.instrumentMap.get(instrumentName)
345 if instrument is None:
346 instrument = Instrument.fromName(instrumentName, self.butler.registry)
347 self.instrumentMap[instrumentName] = instrument
348 return instrument
350 @abstractmethod
351 def compute(
352 self, visit: VisitDefinitionData, *, collections: Any = None
353 ) -> tuple[Region, dict[int, Region]]:
354 """Compute regions for the given visit and all detectors in that visit.
356 Parameters
357 ----------
358 visit : `VisitDefinitionData`
359 Struct describing the visit and the exposures associated with it.
360 collections : Any, optional
361 Collections to be searched for raws and camera geometry, overriding
362 ``self.butler.collections``.
363 Can be any of the types supported by the ``collections`` argument
364 to butler construction.
366 Returns
367 -------
368 visitRegion : `lsst.sphgeom.Region`
369 Region for the full visit.
370 visitDetectorRegions : `dict` [ `int`, `lsst.sphgeom.Region` ]
371 Dictionary mapping detector ID to the region for that detector.
372 Should include all detectors in the visit.
373 """
374 raise NotImplementedError()
377class DefineVisitsConfig(Config):
378 """Configure visit definition."""
380 groupExposures = GroupExposuresTask.registry.makeField(
381 doc="Algorithm for grouping exposures into visits.",
382 default="one-to-one-and-by-counter",
383 )
384 computeVisitRegions = ComputeVisitRegionsTask.registry.makeField(
385 doc="Algorithm from computing visit and visit+detector regions.",
386 default="single-raw-wcs",
387 )
388 ignoreNonScienceExposures: Field[bool] = Field(
389 doc=(
390 "If True, silently ignore input exposures that do not have "
391 "observation_type=SCIENCE. If False, raise an exception if one "
392 "encountered."
393 ),
394 dtype=bool,
395 optional=False,
396 default=True,
397 )
398 updateObsCoreTable: Field[bool] = Field(
399 doc=(
400 "If True, update exposure regions in obscore table after visits "
401 "are defined. If False, do not update obscore table."
402 ),
403 dtype=bool,
404 default=True,
405 )
408class DefineVisitsTask(Task):
409 """Driver Task for defining visits (and their spatial regions) in Gen3
410 Butler repositories.
412 Parameters
413 ----------
414 config : `DefineVisitsConfig`
415 Configuration for the task.
416 butler : `~lsst.daf.butler.Butler`
417 Writeable butler instance. Will be used to read `raw.wcs` and `camera`
418 datasets and insert/sync dimension data.
419 **kwargs
420 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task`
421 constructor.
423 Notes
424 -----
425 Each instance of `DefineVisitsTask` reads from / writes to the same Butler.
426 Each invocation of `DefineVisitsTask.run` processes an independent group of
427 exposures into one or more new visits, all belonging to the same visit
428 system and instrument.
430 The actual work of grouping exposures and computing regions is delegated
431 to pluggable subtasks (`GroupExposuresTask` and `ComputeVisitRegionsTask`),
432 respectively. The defaults are to create one visit for every exposure,
433 and to use exactly one (arbitrary) detector-level raw dataset's WCS along
434 with camera geometry to compute regions for all detectors. Other
435 implementations can be created and configured for instruments for which
436 these choices are unsuitable (e.g. because visits and exposures are not
437 one-to-one, or because ``raw.wcs`` datasets for different detectors may not
438 be consistent with camera geometry).
440 It is not necessary in general to ingest all raws for an exposure before
441 defining a visit that includes the exposure; this depends entirely on the
442 `ComputeVisitRegionTask` subclass used. For the default configuration,
443 a single raw for each exposure is sufficient.
445 Defining the same visit the same way multiple times (e.g. via multiple
446 invocations of this task on the same exposures, with the same
447 configuration) is safe, but it may be inefficient, as most of the work must
448 be done before new visits can be compared to existing visits.
449 """
451 def __init__(self, config: DefineVisitsConfig, *, butler: Butler, **kwargs: Any):
452 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
453 super().__init__(config, **kwargs)
454 self.butler = butler
455 self.universe = self.butler.dimensions
456 self.progress = Progress("obs.base.DefineVisitsTask")
457 self.makeSubtask("groupExposures")
458 self.makeSubtask("computeVisitRegions", butler=self.butler)
460 def _reduce_kwargs(self) -> dict:
461 # Add extra parameters to pickle
462 return dict(**super()._reduce_kwargs(), butler=self.butler)
464 ConfigClass: ClassVar[type[Config]] = DefineVisitsConfig
466 _DefaultName: ClassVar[str] = "defineVisits"
468 config: DefineVisitsConfig
469 groupExposures: GroupExposuresTask
470 computeVisitRegions: ComputeVisitRegionsTask
472 def _buildVisitRecords(
473 self, definition: VisitDefinitionData, *, collections: Any = None
474 ) -> _VisitRecords:
475 """Build the DimensionRecords associated with a visit.
477 Parameters
478 ----------
479 definition : `VisitDefinitionData`
480 Struct with identifiers for the visit and records for its
481 constituent exposures.
482 collections : Any, optional
483 Collections to be searched for raws and camera geometry, overriding
484 ``self.butler.collections``.
485 Can be any of the types supported by the ``collections`` argument
486 to butler construction.
488 Results
489 -------
490 records : `_VisitRecords`
491 Struct containing DimensionRecords for the visit, including
492 associated dimension elements.
493 """
494 dimension = self.universe["visit"]
496 # Some registries support additional items.
497 supported = {meta.name for meta in dimension.metadata}
499 # Compute all regions.
500 visitRegion, visitDetectorRegions = self.computeVisitRegions.compute(
501 definition, collections=collections
502 )
503 # Aggregate other exposure quantities.
504 timespan = Timespan(
505 begin=_reduceOrNone(min, (e.timespan.begin for e in definition.exposures)),
506 end=_reduceOrNone(max, (e.timespan.end for e in definition.exposures)),
507 )
508 exposure_time = _reduceOrNone(operator.add, (e.exposure_time for e in definition.exposures))
509 physical_filter = _reduceOrNone(_value_if_equal, (e.physical_filter for e in definition.exposures))
510 target_name = _reduceOrNone(_value_if_equal, (e.target_name for e in definition.exposures))
511 science_program = _reduceOrNone(_value_if_equal, (e.science_program for e in definition.exposures))
513 # observing day for a visit is defined by the earliest observation
514 # of the visit
515 observing_day = _reduceOrNone(min, (e.day_obs for e in definition.exposures))
516 observation_reason = _reduceOrNone(
517 _value_if_equal, (e.observation_reason for e in definition.exposures)
518 )
519 if observation_reason is None:
520 # Be explicit about there being multiple reasons
521 # MyPy can't really handle DimensionRecord fields as
522 # DimensionRecord classes are dynamically defined; easiest to just
523 # shush it when it complains.
524 observation_reason = "various" # type: ignore
526 # Use the mean zenith angle as an approximation
527 zenith_angle = _reduceOrNone(operator.add, (e.zenith_angle for e in definition.exposures))
528 if zenith_angle is not None:
529 zenith_angle /= len(definition.exposures)
531 # New records that may not be supported.
532 extras: dict[str, Any] = {}
533 if "seq_num" in supported:
534 extras["seq_num"] = _reduceOrNone(min, (e.seq_num for e in definition.exposures))
535 if "azimuth" in supported:
536 # Must take into account 0/360 problem.
537 extras["azimuth"] = _calc_mean_angle([e.azimuth for e in definition.exposures])
539 # visit_system handling changed. This is the logic for visit/exposure
540 # that has support for seq_start/seq_end.
541 if "seq_num" in supported:
542 # Map visit to exposure.
543 visit_definition = [
544 self.universe["visit_definition"].RecordClass(
545 instrument=definition.instrument,
546 visit=definition.id,
547 exposure=exposure.id,
548 )
549 for exposure in definition.exposures
550 ]
552 # Map visit to visit system.
553 visit_system_membership = []
554 for visit_system in self.groupExposures.getVisitSystems():
555 if visit_system in definition.visit_systems:
556 record = self.universe["visit_system_membership"].RecordClass(
557 instrument=definition.instrument,
558 visit=definition.id,
559 visit_system=visit_system.value,
560 )
561 visit_system_membership.append(record)
563 else:
564 # The old approach can only handle one visit system at a time.
565 # If we have been configured with multiple options, prefer the
566 # one-to-one.
567 visit_systems = self.groupExposures.getVisitSystems()
568 if len(visit_systems) > 1:
569 one_to_one = VisitSystem.from_name("one-to-one")
570 if one_to_one not in visit_systems:
571 raise ValueError(
572 f"Multiple visit systems specified ({visit_systems}) for use with old"
573 " dimension universe but unable to find one-to-one."
574 )
575 visit_system = one_to_one
576 else:
577 visit_system = visit_systems.pop()
579 extras["visit_system"] = visit_system.value
581 # The old visit_definition included visit system.
582 visit_definition = [
583 self.universe["visit_definition"].RecordClass(
584 instrument=definition.instrument,
585 visit=definition.id,
586 exposure=exposure.id,
587 visit_system=visit_system.value,
588 )
589 for exposure in definition.exposures
590 ]
592 # This concept does not exist in old schema.
593 visit_system_membership = []
595 # Construct the actual DimensionRecords.
596 return _VisitRecords(
597 visit=dimension.RecordClass(
598 instrument=definition.instrument,
599 id=definition.id,
600 name=definition.name,
601 physical_filter=physical_filter,
602 target_name=target_name,
603 science_program=science_program,
604 observation_reason=observation_reason,
605 day_obs=observing_day,
606 zenith_angle=zenith_angle,
607 exposure_time=exposure_time,
608 timespan=timespan,
609 region=visitRegion,
610 # TODO: no seeing value in exposure dimension records, so we
611 # can't set that here. But there are many other columns that
612 # both dimensions should probably have as well.
613 **extras,
614 ),
615 visit_definition=visit_definition,
616 visit_system_membership=visit_system_membership,
617 visit_detector_region=[
618 self.universe["visit_detector_region"].RecordClass(
619 instrument=definition.instrument,
620 visit=definition.id,
621 detector=detectorId,
622 region=detectorRegion,
623 )
624 for detectorId, detectorRegion in visitDetectorRegions.items()
625 ],
626 )
628 def run(
629 self,
630 dataIds: Iterable[DataId],
631 *,
632 collections: str | None = None,
633 update_records: bool = False,
634 incremental: bool = False,
635 ) -> None:
636 """Add visit definitions to the registry for the given exposures.
638 Parameters
639 ----------
640 dataIds : `Iterable` [ `dict` or `~lsst.daf.butler.DataCoordinate` ]
641 Exposure-level data IDs. These must all correspond to the same
642 instrument, and are expected to be on-sky science exposures.
643 collections : Any, optional
644 Collections to be searched for raws and camera geometry, overriding
645 ``self.butler.collections``.
646 Can be any of the types supported by the ``collections`` argument
647 to butler construction.
648 update_records : `bool`, optional
649 If `True` (`False` is default), update existing visit records that
650 conflict with the new ones instead of rejecting them (and when this
651 occurs, update visit_detector_region as well). THIS IS AN ADVANCED
652 OPTION THAT SHOULD ONLY BE USED TO FIX REGIONS AND/OR METADATA THAT
653 ARE KNOWN TO BE BAD, AND IT CANNOT BE USED TO REMOVE EXPOSURES OR
654 DETECTORS FROM A VISIT.
655 incremental : `bool`, optional
656 If `True` indicate that exposures are being ingested incrementally
657 and visit definition will be run on partial visits. This will
658 force ``update_records`` to `True`. If there is any risk that
659 files are being ingested incrementally it is critical that this
660 parameter is set to `True` and not to rely on ``updated_records``.
662 Raises
663 ------
664 lsst.daf.butler.registry.ConflictingDefinitionError
665 Raised if a visit ID conflict is detected and the existing visit
666 differs from the new one.
667 """
668 # Normalize, expand, and deduplicate data IDs.
669 self.log.info("Preprocessing data IDs.")
670 dimensions = DimensionGraph(self.universe, names=["exposure"])
671 data_id_set: set[DataCoordinate] = {
672 self.butler.registry.expandDataId(d, graph=dimensions) for d in dataIds
673 }
674 if not data_id_set:
675 raise RuntimeError("No exposures given.")
676 if incremental:
677 update_records = True
678 # Extract exposure DimensionRecords, check that there's only one
679 # instrument in play, and check for non-science exposures.
680 exposures = []
681 instruments = set()
682 for dataId in data_id_set:
683 record = dataId.records["exposure"]
684 assert record is not None, "Guaranteed by expandDataIds call earlier."
685 if record.tracking_ra is None or record.tracking_dec is None or record.sky_angle is None:
686 if self.config.ignoreNonScienceExposures:
687 continue
688 else:
689 raise RuntimeError(
690 f"Input exposure {dataId} has observation_type "
691 f"{record.observation_type}, but is not on sky."
692 )
693 instruments.add(dataId["instrument"])
694 exposures.append(record)
695 if not exposures:
696 self.log.info("No on-sky exposures found after filtering.")
697 return
698 if len(instruments) > 1:
699 raise RuntimeError(
700 "All data IDs passed to DefineVisitsTask.run must be "
701 f"from the same instrument; got {instruments}."
702 )
703 (instrument,) = instruments
704 # Ensure the visit_system our grouping algorithm uses is in the
705 # registry, if it wasn't already.
706 visitSystems = self.groupExposures.getVisitSystems()
707 for visitSystem in visitSystems:
708 self.log.info("Registering visit_system %d: %s.", visitSystem.value, visitSystem)
709 self.butler.registry.syncDimensionData(
710 "visit_system",
711 {"instrument": instrument, "id": visitSystem.value, "name": str(visitSystem)},
712 )
714 # In true incremental we will be given the second snap on its
715 # own on the assumption that the previous snap was already handled.
716 # For correct grouping we need access to the other exposures in the
717 # visit.
718 if incremental:
719 exposures.extend(self.groupExposures.find_missing(exposures, self.butler.registry))
721 # Group exposures into visits, delegating to subtask.
722 self.log.info("Grouping %d exposure(s) into visits.", len(exposures))
723 definitions = list(self.groupExposures.group(exposures))
724 # Iterate over visits, compute regions, and insert dimension data, one
725 # transaction per visit. If a visit already exists, we skip all other
726 # inserts.
727 self.log.info("Computing regions and other metadata for %d visit(s).", len(definitions))
728 for visitDefinition in self.progress.wrap(
729 definitions, total=len(definitions), desc="Computing regions and inserting visits"
730 ):
731 visitRecords = self._buildVisitRecords(visitDefinition, collections=collections)
732 with self.butler.registry.transaction():
733 inserted_or_updated = self.butler.registry.syncDimensionData(
734 "visit",
735 visitRecords.visit,
736 update=update_records,
737 )
738 if inserted_or_updated:
739 if inserted_or_updated is True:
740 # This is a new visit, not an update to an existing
741 # one, so insert visit definition.
742 # We don't allow visit definitions to change even when
743 # asked to update, because we'd have to delete the old
744 # visit_definitions first and also worry about what
745 # this does to datasets that already use the visit.
746 self.butler.registry.insertDimensionData(
747 "visit_definition", *visitRecords.visit_definition
748 )
749 if visitRecords.visit_system_membership:
750 self.butler.registry.insertDimensionData(
751 "visit_system_membership", *visitRecords.visit_system_membership
752 )
753 elif incremental and len(visitRecords.visit_definition) > 1:
754 # The visit record was modified. This could happen
755 # if a multi-snap visit was redefined with an
756 # additional snap so play it safe and allow for the
757 # visit definition to be updated. We use update=False
758 # here since there should not be any rows updated,
759 # just additional rows added. update=True does not work
760 # correctly with multiple records. In incremental mode
761 # we assume that the caller wants the visit definition
762 # to be updated and has no worries about provenance
763 # with the previous definition.
764 for definition in visitRecords.visit_definition:
765 self.butler.registry.syncDimensionData("visit_definition", definition)
767 # [Re]Insert visit_detector_region records for both inserts
768 # and updates, because we do allow updating to affect the
769 # region calculations.
770 self.butler.registry.insertDimensionData(
771 "visit_detector_region", *visitRecords.visit_detector_region, replace=update_records
772 )
774 # Update obscore exposure records with region information
775 # from corresponding visits.
776 if self.config.updateObsCoreTable:
777 if obscore_manager := self.butler.registry.obsCoreTableManager:
778 obscore_updates: list[tuple[int, int, Region]] = []
779 exposure_ids = [rec.exposure for rec in visitRecords.visit_definition]
780 for record in visitRecords.visit_detector_region:
781 obscore_updates += [
782 (exposure, record.detector, record.region) for exposure in exposure_ids
783 ]
784 if obscore_updates:
785 obscore_manager.update_exposure_regions(
786 cast(str, instrument), obscore_updates
787 )
790_T = TypeVar("_T")
793def _reduceOrNone(func: Callable[[_T, _T], _T | None], iterable: Iterable[_T | None]) -> _T | None:
794 """Apply a binary function to pairs of elements in an iterable until a
795 single value is returned, but return `None` if any element is `None` or
796 there are no elements.
797 """
798 r: _T | None = None
799 for v in iterable:
800 if v is None:
801 return None
802 if r is None:
803 r = v
804 else:
805 r = func(r, v)
806 return r
809def _value_if_equal(a: _T, b: _T) -> _T | None:
810 """Return either argument if they are equal, or `None` if they are not."""
811 return a if a == b else None
814def _calc_mean_angle(angles: list[float]) -> float:
815 """Calculate the mean angle, taking into account 0/360 wrapping.
817 Parameters
818 ----------
819 angles : `list` [`float`]
820 Angles to average together, in degrees.
822 Returns
823 -------
824 average : `float`
825 Average angle in degrees.
826 """
827 # Save on all the math if we only have one value.
828 if len(angles) == 1:
829 return angles[0]
831 # Convert polar coordinates of unit circle to complex values.
832 # Average the complex values.
833 # Convert back to a phase angle.
834 return math.degrees(cmath.phase(sum(cmath.rect(1.0, math.radians(d)) for d in angles) / len(angles)))
837class _GroupExposuresOneToOneConfig(GroupExposuresConfig):
838 visitSystemId: Field[int] = Field(
839 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
840 dtype=int,
841 default=0,
842 deprecated="No longer used. Replaced by enum.",
843 )
844 visitSystemName: Field[str] = Field(
845 doc="String name of the visit_system implemented by this grouping algorithm.",
846 dtype=str,
847 default="one-to-one",
848 deprecated="No longer used. Replaced by enum.",
849 )
852@registerConfigurable("one-to-one", GroupExposuresTask.registry)
853class _GroupExposuresOneToOneTask(GroupExposuresTask, metaclass=ABCMeta):
854 """An exposure grouping algorithm that simply defines one visit for each
855 exposure, reusing the exposures identifiers for the visit.
856 """
858 ConfigClass = _GroupExposuresOneToOneConfig
860 def find_missing(
861 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
862 ) -> list[DimensionRecord]:
863 # By definition no exposures can be missing.
864 return []
866 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
867 # No grouping.
868 return {exposure.id: [exposure] for exposure in exposures}
870 def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]:
871 # Docstring inherited from GroupExposuresTask.
872 visit_systems = {VisitSystem.from_name("one-to-one")}
873 for exposure in exposures:
874 yield VisitDefinitionData(
875 instrument=exposure.instrument,
876 id=exposure.id,
877 name=exposure.obs_id,
878 exposures=[exposure],
879 visit_systems=visit_systems,
880 )
882 def getVisitSystems(self) -> set[VisitSystem]:
883 # Docstring inherited from GroupExposuresTask.
884 return set(VisitSystem.from_names(["one-to-one"]))
887class _GroupExposuresByGroupMetadataConfig(GroupExposuresConfig):
888 visitSystemId: Field[int] = Field(
889 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
890 dtype=int,
891 default=1,
892 deprecated="No longer used. Replaced by enum.",
893 )
894 visitSystemName: Field[str] = Field(
895 doc="String name of the visit_system implemented by this grouping algorithm.",
896 dtype=str,
897 default="by-group-metadata",
898 deprecated="No longer used. Replaced by enum.",
899 )
902@registerConfigurable("by-group-metadata", GroupExposuresTask.registry)
903class _GroupExposuresByGroupMetadataTask(GroupExposuresTask, metaclass=ABCMeta):
904 """An exposure grouping algorithm that uses exposure.group_name and
905 exposure.group_id.
907 This algorithm _assumes_ exposure.group_id (generally populated from
908 `astro_metadata_translator.ObservationInfo.visit_id`) is not just unique,
909 but disjoint from all `ObservationInfo.exposure_id` values - if it isn't,
910 it will be impossible to ever use both this grouping algorithm and the
911 one-to-one algorithm for a particular camera in the same data repository.
912 """
914 ConfigClass = _GroupExposuresByGroupMetadataConfig
916 def find_missing(
917 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
918 ) -> list[DimensionRecord]:
919 groups = self.group_exposures(exposures)
920 missing_exposures: list[DimensionRecord] = []
921 for exposures_in_group in groups.values():
922 # We can not tell how many exposures are expected to be in the
923 # visit so we have to query every time.
924 first = exposures_in_group[0]
925 records = set(
926 registry.queryDimensionRecords(
927 "exposure",
928 where="exposure.group_name = group",
929 bind={"group": first.group_name},
930 instrument=first.instrument,
931 )
932 )
933 records.difference_update(set(exposures_in_group))
934 missing_exposures.extend(list(records))
935 return missing_exposures
937 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
938 groups = defaultdict(list)
939 for exposure in exposures:
940 groups[exposure.group_name].append(exposure)
941 return groups
943 def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]:
944 # Docstring inherited from GroupExposuresTask.
945 visit_systems = {VisitSystem.from_name("by-group-metadata")}
946 groups = self.group_exposures(exposures)
947 for visitName, exposuresInGroup in groups.items():
948 instrument = exposuresInGroup[0].instrument
949 visitId = exposuresInGroup[0].group_id
950 assert all(
951 e.group_id == visitId for e in exposuresInGroup
952 ), "Grouping by exposure.group_name does not yield consistent group IDs"
953 yield VisitDefinitionData(
954 instrument=instrument,
955 id=visitId,
956 name=visitName,
957 exposures=exposuresInGroup,
958 visit_systems=visit_systems,
959 )
961 def getVisitSystems(self) -> set[VisitSystem]:
962 # Docstring inherited from GroupExposuresTask.
963 return set(VisitSystem.from_names(["by-group-metadata"]))
966class _GroupExposuresByCounterAndExposuresConfig(GroupExposuresConfig):
967 visitSystemId: Field[int] = Field(
968 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
969 dtype=int,
970 default=2,
971 deprecated="No longer used. Replaced by enum.",
972 )
973 visitSystemName: Field[str] = Field(
974 doc="String name of the visit_system implemented by this grouping algorithm.",
975 dtype=str,
976 default="by-counter-and-exposures",
977 deprecated="No longer used. Replaced by enum.",
978 )
981@registerConfigurable("one-to-one-and-by-counter", GroupExposuresTask.registry)
982class _GroupExposuresByCounterAndExposuresTask(GroupExposuresTask, metaclass=ABCMeta):
983 """An exposure grouping algorithm that uses the sequence start and
984 sequence end metadata to create multi-exposure visits, but also
985 creates one-to-one visits.
987 This algorithm uses the exposure.seq_start and
988 exposure.seq_end fields to collect related snaps.
989 It also groups single exposures.
990 """
992 ConfigClass = _GroupExposuresByCounterAndExposuresConfig
994 def find_missing(
995 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
996 ) -> list[DimensionRecord]:
997 """Analyze the exposures and return relevant exposures known to
998 registry.
999 """
1000 groups = self.group_exposures(exposures)
1001 missing_exposures: list[DimensionRecord] = []
1002 for exposures_in_group in groups.values():
1003 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1004 first = sorted_exposures[0]
1006 # Only need to look for the seq_nums that we don't already have.
1007 seq_nums = set(range(first.seq_start, first.seq_end + 1))
1008 seq_nums.difference_update({exp.seq_num for exp in sorted_exposures})
1010 if seq_nums:
1011 # Missing something. Check registry.
1012 records = list(
1013 registry.queryDimensionRecords(
1014 "exposure",
1015 where="exposure.seq_start = seq_start AND exposure.seq_end = seq_end AND "
1016 "exposure.seq_num IN (seq_nums)",
1017 bind={"seq_start": first.seq_start, "seq_end": first.seq_end, "seq_nums": seq_nums},
1018 instrument=first.instrument,
1019 )
1020 )
1021 missing_exposures.extend(records)
1023 return missing_exposures
1025 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
1026 groups = defaultdict(list)
1027 for exposure in exposures:
1028 groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure)
1029 return groups
1031 def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]:
1032 # Docstring inherited from GroupExposuresTask.
1033 system_one_to_one = VisitSystem.from_name("one-to-one")
1034 system_seq_start_end = VisitSystem.from_name("by-seq-start-end")
1036 groups = self.group_exposures(exposures)
1037 for visit_key, exposures_in_group in groups.items():
1038 instrument = exposures_in_group[0].instrument
1040 # It is possible that the first exposure in a visit has not
1041 # been ingested. This can be determined and if that is the case
1042 # we can not reliably define the multi-exposure visit.
1043 skip_multi = False
1044 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1045 first = sorted_exposures.pop(0)
1046 if first.seq_num != first.seq_start:
1047 # Special case seq_num == 0 since that implies that the
1048 # instrument has no counters and therefore no multi-exposure
1049 # visits.
1050 if first.seq_num != 0:
1051 self.log.warning(
1052 "First exposure for visit %s is not present. Skipping the multi-snap definition.",
1053 visit_key,
1054 )
1055 skip_multi = True
1057 multi_exposure = False
1058 if first.seq_start != first.seq_end:
1059 # This is a multi-exposure visit regardless of the number
1060 # of exposures present.
1061 multi_exposure = True
1063 # Define the one-to-one visits.
1064 for exposure in exposures_in_group:
1065 # Default is to use the exposure ID and name unless
1066 # this is the first exposure in a multi-exposure visit.
1067 visit_name = exposure.obs_id
1068 visit_id = exposure.id
1069 visit_systems = {system_one_to_one}
1071 if not multi_exposure:
1072 # This is also a by-counter visit.
1073 # It will use the same visit_name and visit_id.
1074 visit_systems.add(system_seq_start_end)
1076 elif not skip_multi and exposure == first:
1077 # This is the first legitimate exposure in a multi-exposure
1078 # visit. It therefore needs a modified visit name and ID
1079 # so it does not clash with the multi-exposure visit
1080 # definition.
1081 visit_name = f"{visit_name}_first"
1082 visit_id = int(f"9{visit_id}")
1084 yield VisitDefinitionData(
1085 instrument=instrument,
1086 id=visit_id,
1087 name=visit_name,
1088 exposures=[exposure],
1089 visit_systems=visit_systems,
1090 )
1092 # Multi-exposure visit.
1093 if not skip_multi and multi_exposure:
1094 # Define the visit using the first exposure
1095 visit_name = first.obs_id
1096 visit_id = first.id
1098 yield VisitDefinitionData(
1099 instrument=instrument,
1100 id=visit_id,
1101 name=visit_name,
1102 exposures=exposures_in_group,
1103 visit_systems={system_seq_start_end},
1104 )
1106 def getVisitSystems(self) -> set[VisitSystem]:
1107 # Docstring inherited from GroupExposuresTask.
1108 # Using a Config for this is difficult because what this grouping
1109 # algorithm is doing is using two visit systems.
1110 # One is using metadata (but not by-group) and the other is the
1111 # one-to-one. For now hard-code in class.
1112 return set(VisitSystem.from_names(["one-to-one", "by-seq-start-end"]))
1115class _ComputeVisitRegionsFromSingleRawWcsConfig(ComputeVisitRegionsConfig):
1116 mergeExposures: Field[bool] = Field(
1117 doc=(
1118 "If True, merge per-detector regions over all exposures in a "
1119 "visit (via convex hull) instead of using the first exposure and "
1120 "assuming its regions are valid for all others."
1121 ),
1122 dtype=bool,
1123 default=False,
1124 )
1125 detectorId: Field[int | None] = Field(
1126 doc=(
1127 "Load the WCS for the detector with this ID. If None, use an "
1128 "arbitrary detector (the first found in a query of the data "
1129 "repository for each exposure (or all exposures, if "
1130 "mergeExposures is True)."
1131 ),
1132 dtype=int,
1133 optional=True,
1134 default=None,
1135 )
1136 requireVersionedCamera: Field[bool] = Field(
1137 doc=(
1138 "If True, raise LookupError if version camera geometry cannot be "
1139 "loaded for an exposure. If False, use the nominal camera from "
1140 "the Instrument class instead."
1141 ),
1142 dtype=bool,
1143 optional=False,
1144 default=False,
1145 )
1148@registerConfigurable("single-raw-wcs", ComputeVisitRegionsTask.registry)
1149class _ComputeVisitRegionsFromSingleRawWcsTask(ComputeVisitRegionsTask):
1150 """A visit region calculator that uses a single raw WCS and a camera to
1151 project the bounding boxes of all detectors onto the sky, relating
1152 different detectors by their positions in focal plane coordinates.
1154 Notes
1155 -----
1156 Most instruments should have their raw WCSs determined from a combination
1157 of boresight angle, rotator angle, and camera geometry, and hence this
1158 algorithm should produce stable results regardless of which detector the
1159 raw corresponds to. If this is not the case (e.g. because a per-file FITS
1160 WCS is used instead), either the ID of the detector should be fixed (see
1161 the ``detectorId`` config parameter) or a different algorithm used.
1162 """
1164 ConfigClass = _ComputeVisitRegionsFromSingleRawWcsConfig
1165 config: _ComputeVisitRegionsFromSingleRawWcsConfig
1167 def computeExposureBounds(
1168 self, exposure: DimensionRecord, *, collections: Any = None
1169 ) -> dict[int, list[UnitVector3d]]:
1170 """Compute the lists of unit vectors on the sphere that correspond to
1171 the sky positions of detector corners.
1173 Parameters
1174 ----------
1175 exposure : `DimensionRecord`
1176 Dimension record for the exposure.
1177 collections : Any, optional
1178 Collections to be searched for raws and camera geometry, overriding
1179 ``self.butler.collections``.
1180 Can be any of the types supported by the ``collections`` argument
1181 to butler construction.
1183 Returns
1184 -------
1185 bounds : `dict`
1186 Dictionary mapping detector ID to a list of unit vectors on the
1187 sphere representing that detector's corners projected onto the sky.
1188 """
1189 if collections is None:
1190 collections = self.butler.collections
1191 camera, versioned = loadCamera(self.butler, exposure.dataId, collections=collections)
1192 if not versioned and self.config.requireVersionedCamera:
1193 raise LookupError(f"No versioned camera found for exposure {exposure.dataId}.")
1195 # Derive WCS from boresight information -- if available in registry
1196 use_registry = True
1197 try:
1198 orientation = lsst.geom.Angle(exposure.sky_angle, lsst.geom.degrees)
1199 radec = lsst.geom.SpherePoint(
1200 lsst.geom.Angle(exposure.tracking_ra, lsst.geom.degrees),
1201 lsst.geom.Angle(exposure.tracking_dec, lsst.geom.degrees),
1202 )
1203 except AttributeError:
1204 use_registry = False
1206 if use_registry:
1207 if self.config.detectorId is None:
1208 detectorId = next(camera.getIdIter())
1209 else:
1210 detectorId = self.config.detectorId
1211 wcsDetector = camera[detectorId]
1213 # Ask the raw formatter to create the relevant WCS
1214 # This allows flips to be taken into account
1215 instrument = self.getInstrument(exposure.instrument)
1216 rawFormatter = instrument.getRawFormatter({"detector": detectorId})
1218 try:
1219 wcs = rawFormatter.makeRawSkyWcsFromBoresight(radec, orientation, wcsDetector) # type: ignore
1220 except AttributeError:
1221 raise TypeError(
1222 f"Raw formatter is {get_full_type_name(rawFormatter)} but visit"
1223 " definition requires it to support 'makeRawSkyWcsFromBoresight'"
1224 ) from None
1225 else:
1226 if self.config.detectorId is None:
1227 wcsRefsIter = self.butler.registry.queryDatasets(
1228 "raw.wcs", dataId=exposure.dataId, collections=collections
1229 )
1230 if not wcsRefsIter:
1231 raise LookupError(
1232 f"No raw.wcs datasets found for data ID {exposure.dataId} "
1233 f"in collections {collections}."
1234 )
1235 wcsRef = next(iter(wcsRefsIter))
1236 wcsDetector = camera[wcsRef.dataId["detector"]]
1237 wcs = self.butler.get(wcsRef)
1238 else:
1239 wcsDetector = camera[self.config.detectorId]
1240 wcs = self.butler.get(
1241 "raw.wcs",
1242 dataId=exposure.dataId,
1243 detector=self.config.detectorId,
1244 collections=collections,
1245 )
1246 fpToSky = wcsDetector.getTransform(FOCAL_PLANE, PIXELS).then(wcs.getTransform())
1247 bounds = {}
1248 for detector in camera:
1249 pixelsToSky = detector.getTransform(PIXELS, FOCAL_PLANE).then(fpToSky)
1250 pixCorners = Box2D(detector.getBBox().dilatedBy(self.config.padding)).getCorners()
1251 bounds[detector.getId()] = [
1252 skyCorner.getVector() for skyCorner in pixelsToSky.applyForward(pixCorners)
1253 ]
1254 return bounds
1256 def compute(
1257 self, visit: VisitDefinitionData, *, collections: Any = None
1258 ) -> tuple[Region, dict[int, Region]]:
1259 # Docstring inherited from ComputeVisitRegionsTask.
1260 if self.config.mergeExposures:
1261 detectorBounds: dict[int, list[UnitVector3d]] = defaultdict(list)
1262 for exposure in visit.exposures:
1263 exposureDetectorBounds = self.computeExposureBounds(exposure, collections=collections)
1264 for detectorId, bounds in exposureDetectorBounds.items():
1265 detectorBounds[detectorId].extend(bounds)
1266 else:
1267 detectorBounds = self.computeExposureBounds(visit.exposures[0], collections=collections)
1268 visitBounds = []
1269 detectorRegions = {}
1270 for detectorId, bounds in detectorBounds.items():
1271 detectorRegions[detectorId] = ConvexPolygon.convexHull(bounds)
1272 visitBounds.extend(bounds)
1273 return ConvexPolygon.convexHull(visitBounds), detectorRegions