Coverage for python/lsst/obs/base/defineVisits.py: 26%
399 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-22 09:59 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-22 09:59 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = [
25 "DefineVisitsConfig",
26 "DefineVisitsTask",
27 "GroupExposuresConfig",
28 "GroupExposuresTask",
29 "VisitDefinitionData",
30 "VisitSystem",
31]
33import cmath
34import dataclasses
35import enum
36import math
37import operator
38from abc import ABCMeta, abstractmethod
39from collections import defaultdict
40from typing import (
41 Any,
42 Callable,
43 ClassVar,
44 Dict,
45 FrozenSet,
46 Iterable,
47 List,
48 Optional,
49 Set,
50 Tuple,
51 Type,
52 TypeVar,
53 cast,
54)
56import lsst.geom
57from lsst.afw.cameraGeom import FOCAL_PLANE, PIXELS
58from lsst.daf.butler import (
59 Butler,
60 DataCoordinate,
61 DataId,
62 DimensionGraph,
63 DimensionRecord,
64 Progress,
65 Timespan,
66)
67from lsst.geom import Box2D
68from lsst.pex.config import Config, Field, makeRegistry, registerConfigurable
69from lsst.pipe.base import Instrument, Task
70from lsst.sphgeom import ConvexPolygon, Region, UnitVector3d
71from lsst.utils.introspection import get_full_type_name
73from ._instrument import loadCamera
76class VisitSystem(enum.Enum):
77 """Enumeration used to label different visit systems."""
79 ONE_TO_ONE = 0
80 """Each exposure is assigned to its own visit."""
82 BY_GROUP_METADATA = 1
83 """Visit membership is defined by the value of the exposure.group_id."""
85 BY_SEQ_START_END = 2
86 """Visit membership is defined by the values of the ``exposure.day_obs``,
87 ``exposure.seq_start``, and ``exposure.seq_end`` values.
88 """
90 @classmethod
91 def all(cls) -> FrozenSet[VisitSystem]:
92 """Return a `frozenset` containing all members."""
93 return frozenset(cls.__members__.values())
95 @classmethod
96 def from_name(cls, external_name: str) -> VisitSystem:
97 """Construct the enumeration from given name."""
98 name = external_name.upper()
99 name = name.replace("-", "_")
100 try:
101 return cls.__members__[name]
102 except KeyError:
103 raise KeyError(f"Visit system named '{external_name}' not known.") from None
105 @classmethod
106 def from_names(cls, names: Optional[Iterable[str]]) -> FrozenSet[VisitSystem]:
107 """Return a `frozenset` of all the visit systems matching the supplied
108 names.
110 Parameters
111 ----------
112 names : iterable of `str`, or `None`
113 Names of visit systems. Case insensitive. If `None` or empty, all
114 the visit systems are returned.
116 Returns
117 -------
118 systems : `frozenset` of `VisitSystem`
119 The matching visit systems.
120 """
121 if not names:
122 return cls.all()
124 return frozenset({cls.from_name(name) for name in names})
126 def __str__(self) -> str:
127 name = self.name.lower()
128 name = name.replace("_", "-")
129 return name
132@dataclasses.dataclass
133class VisitDefinitionData:
134 """Struct representing a group of exposures that will be used to define a
135 visit.
136 """
138 instrument: str
139 """Name of the instrument this visit will be associated with.
140 """
142 id: int
143 """Integer ID of the visit.
145 This must be unique across all visit systems for the instrument.
146 """
148 name: str
149 """String name for the visit.
151 This must be unique across all visit systems for the instrument.
152 """
154 visit_systems: Set[VisitSystem]
155 """All the visit systems associated with this visit."""
157 exposures: List[DimensionRecord] = dataclasses.field(default_factory=list)
158 """Dimension records for the exposures that are part of this visit.
159 """
162@dataclasses.dataclass
163class _VisitRecords:
164 """Struct containing the dimension records associated with a visit."""
166 visit: DimensionRecord
167 """Record for the 'visit' dimension itself.
168 """
170 visit_definition: List[DimensionRecord]
171 """Records for 'visit_definition', which relates 'visit' to 'exposure'.
172 """
174 visit_detector_region: List[DimensionRecord]
175 """Records for 'visit_detector_region', which associates the combination
176 of a 'visit' and a 'detector' with a region on the sky.
177 """
179 visit_system_membership: List[DimensionRecord]
180 """Records relating visits to an associated visit system."""
183class GroupExposuresConfig(Config):
184 pass
187class GroupExposuresTask(Task, metaclass=ABCMeta):
188 """Abstract base class for the subtask of `DefineVisitsTask` that is
189 responsible for grouping exposures into visits.
191 Subclasses should be registered with `GroupExposuresTask.registry` to
192 enable use by `DefineVisitsTask`, and should generally correspond to a
193 particular 'visit_system' dimension value. They are also responsible for
194 defining visit IDs and names that are unique across all visit systems in
195 use by an instrument.
197 Parameters
198 ----------
199 config : `GroupExposuresConfig`
200 Configuration information.
201 **kwargs
202 Additional keyword arguments forwarded to the `Task` constructor.
203 """
205 def __init__(self, config: GroupExposuresConfig, **kwargs: Any):
206 Task.__init__(self, config=config, **kwargs)
208 ConfigClass = GroupExposuresConfig
210 _DefaultName = "groupExposures"
212 registry = makeRegistry(
213 doc="Registry of algorithms for grouping exposures into visits.",
214 configBaseType=GroupExposuresConfig,
215 )
217 @abstractmethod
218 def find_missing(
219 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
220 ) -> list[DimensionRecord]:
221 """Determine, if possible, which exposures might be missing.
223 Parameters
224 ----------
225 exposures : `list` of `lsst.daf.butler.DimensionRecord`
226 The exposure records to analyze.
227 registry : `lsst.daf.butler.Registry`
228 A butler registry that contains these exposure records.
230 Returns
231 -------
232 missing : `list` of `lsst.daf.butler.DimensionRecord`
233 Any exposure records present in registry that were related to
234 the given exposures but were missing from that list and deemed
235 to be relevant.
237 Notes
238 -----
239 Only some grouping schemes are able to find missing exposures. It
240 is acceptable to return an empty list.
241 """
242 raise NotImplementedError()
244 @abstractmethod
245 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
246 """Group the exposures in a way most natural for this visit definition.
248 Parameters
249 ----------
250 exposures : `list` of `lsst.daf.butler.DimensionRecord`
251 The exposure records to group.
253 Returns
254 -------
255 groups : `dict` [Any, `list` of `DimensionRecord`]
256 Groupings of exposure records. The key type is relevant to the
257 specific visit definition and could be a string or a tuple.
258 """
259 raise NotImplementedError()
261 @abstractmethod
262 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
263 """Group the given exposures into visits.
265 Parameters
266 ----------
267 exposures : `list` [ `DimensionRecord` ]
268 DimensionRecords (for the 'exposure' dimension) describing the
269 exposures to group.
271 Returns
272 -------
273 visits : `Iterable` [ `VisitDefinitionData` ]
274 Structs identifying the visits and the exposures associated with
275 them. This may be an iterator or a container.
276 """
277 raise NotImplementedError()
279 def getVisitSystems(self) -> Set[VisitSystem]:
280 """Return identifiers for the 'visit_system' dimension this
281 algorithm implements.
283 Returns
284 -------
285 visit_systems : `Set` [`VisitSystem`]
286 The visit systems used by this algorithm.
287 """
288 raise NotImplementedError()
291class ComputeVisitRegionsConfig(Config):
292 padding: Field[int] = Field(
293 dtype=int,
294 default=250,
295 doc=(
296 "Pad raw image bounding boxes with specified number of pixels "
297 "when calculating their (conservatively large) region on the "
298 "sky. Note that the config value for pixelMargin of the "
299 "reference object loaders in meas_algorithms should be <= "
300 "the value set here."
301 ),
302 )
305class ComputeVisitRegionsTask(Task, metaclass=ABCMeta):
306 """Abstract base class for the subtask of `DefineVisitsTask` that is
307 responsible for extracting spatial regions for visits and visit+detector
308 combinations.
310 Subclasses should be registered with `ComputeVisitRegionsTask.registry` to
311 enable use by `DefineVisitsTask`.
313 Parameters
314 ----------
315 config : `ComputeVisitRegionsConfig`
316 Configuration information.
317 butler : `lsst.daf.butler.Butler`
318 The butler to use.
319 **kwargs
320 Additional keyword arguments forwarded to the `Task` constructor.
321 """
323 def __init__(self, config: ComputeVisitRegionsConfig, *, butler: Butler, **kwargs: Any):
324 Task.__init__(self, config=config, **kwargs)
325 self.butler = butler
326 self.instrumentMap: Dict[str, Instrument] = {}
328 ConfigClass = ComputeVisitRegionsConfig
330 _DefaultName = "computeVisitRegions"
332 registry = makeRegistry(
333 doc="Registry of algorithms for computing on-sky regions for visits and visit+detector combinations.",
334 configBaseType=ComputeVisitRegionsConfig,
335 )
337 def getInstrument(self, instrumentName: str) -> Instrument:
338 """Retrieve an `~lsst.obs.base.Instrument` associated with this
339 instrument name.
341 Parameters
342 ----------
343 instrumentName : `str`
344 The name of the instrument.
346 Returns
347 -------
348 instrument : `~lsst.obs.base.Instrument`
349 The associated instrument object.
351 Notes
352 -----
353 The result is cached.
354 """
355 instrument = self.instrumentMap.get(instrumentName)
356 if instrument is None:
357 instrument = Instrument.fromName(instrumentName, self.butler.registry)
358 self.instrumentMap[instrumentName] = instrument
359 return instrument
361 @abstractmethod
362 def compute(
363 self, visit: VisitDefinitionData, *, collections: Any = None
364 ) -> Tuple[Region, Dict[int, Region]]:
365 """Compute regions for the given visit and all detectors in that visit.
367 Parameters
368 ----------
369 visit : `VisitDefinitionData`
370 Struct describing the visit and the exposures associated with it.
371 collections : Any, optional
372 Collections to be searched for raws and camera geometry, overriding
373 ``self.butler.collections``.
374 Can be any of the types supported by the ``collections`` argument
375 to butler construction.
377 Returns
378 -------
379 visitRegion : `lsst.sphgeom.Region`
380 Region for the full visit.
381 visitDetectorRegions : `dict` [ `int`, `lsst.sphgeom.Region` ]
382 Dictionary mapping detector ID to the region for that detector.
383 Should include all detectors in the visit.
384 """
385 raise NotImplementedError()
388class DefineVisitsConfig(Config):
389 groupExposures = GroupExposuresTask.registry.makeField(
390 doc="Algorithm for grouping exposures into visits.",
391 default="one-to-one-and-by-counter",
392 )
393 computeVisitRegions = ComputeVisitRegionsTask.registry.makeField(
394 doc="Algorithm from computing visit and visit+detector regions.",
395 default="single-raw-wcs",
396 )
397 ignoreNonScienceExposures: Field[bool] = Field(
398 doc=(
399 "If True, silently ignore input exposures that do not have "
400 "observation_type=SCIENCE. If False, raise an exception if one "
401 "encountered."
402 ),
403 dtype=bool,
404 optional=False,
405 default=True,
406 )
407 updateObsCoreTable: Field[bool] = Field(
408 doc=(
409 "If True, update exposure regions in obscore table after visits "
410 "are defined. If False, do not update obscore table."
411 ),
412 dtype=bool,
413 default=True,
414 )
417class DefineVisitsTask(Task):
418 """Driver Task for defining visits (and their spatial regions) in Gen3
419 Butler repositories.
421 Parameters
422 ----------
423 config : `DefineVisitsConfig`
424 Configuration for the task.
425 butler : `~lsst.daf.butler.Butler`
426 Writeable butler instance. Will be used to read `raw.wcs` and `camera`
427 datasets and insert/sync dimension data.
428 **kwargs
429 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task`
430 constructor.
432 Notes
433 -----
434 Each instance of `DefineVisitsTask` reads from / writes to the same Butler.
435 Each invocation of `DefineVisitsTask.run` processes an independent group of
436 exposures into one or more new vists, all belonging to the same visit
437 system and instrument.
439 The actual work of grouping exposures and computing regions is delegated
440 to pluggable subtasks (`GroupExposuresTask` and `ComputeVisitRegionsTask`),
441 respectively. The defaults are to create one visit for every exposure,
442 and to use exactly one (arbitrary) detector-level raw dataset's WCS along
443 with camera geometry to compute regions for all detectors. Other
444 implementations can be created and configured for instruments for which
445 these choices are unsuitable (e.g. because visits and exposures are not
446 one-to-one, or because ``raw.wcs`` datasets for different detectors may not
447 be consistent with camera geomery).
449 It is not necessary in general to ingest all raws for an exposure before
450 defining a visit that includes the exposure; this depends entirely on the
451 `ComputeVisitRegionTask` subclass used. For the default configuration,
452 a single raw for each exposure is sufficient.
454 Defining the same visit the same way multiple times (e.g. via multiple
455 invocations of this task on the same exposures, with the same
456 configuration) is safe, but it may be inefficient, as most of the work must
457 be done before new visits can be compared to existing visits.
458 """
460 def __init__(self, config: DefineVisitsConfig, *, butler: Butler, **kwargs: Any):
461 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
462 super().__init__(config, **kwargs)
463 self.butler = butler
464 self.universe = self.butler.dimensions
465 self.progress = Progress("obs.base.DefineVisitsTask")
466 self.makeSubtask("groupExposures")
467 self.makeSubtask("computeVisitRegions", butler=self.butler)
469 def _reduce_kwargs(self) -> dict:
470 # Add extra parameters to pickle
471 return dict(**super()._reduce_kwargs(), butler=self.butler)
473 ConfigClass: ClassVar[Type[Config]] = DefineVisitsConfig
475 _DefaultName: ClassVar[str] = "defineVisits"
477 config: DefineVisitsConfig
478 groupExposures: GroupExposuresTask
479 computeVisitRegions: ComputeVisitRegionsTask
481 def _buildVisitRecords(
482 self, definition: VisitDefinitionData, *, collections: Any = None
483 ) -> _VisitRecords:
484 """Build the DimensionRecords associated with a visit.
486 Parameters
487 ----------
488 definition : `VisitDefinitionData`
489 Struct with identifiers for the visit and records for its
490 constituent exposures.
491 collections : Any, optional
492 Collections to be searched for raws and camera geometry, overriding
493 ``self.butler.collections``.
494 Can be any of the types supported by the ``collections`` argument
495 to butler construction.
497 Results
498 -------
499 records : `_VisitRecords`
500 Struct containing DimensionRecords for the visit, including
501 associated dimension elements.
502 """
503 dimension = self.universe["visit"]
505 # Some registries support additional items.
506 supported = {meta.name for meta in dimension.metadata}
508 # Compute all regions.
509 visitRegion, visitDetectorRegions = self.computeVisitRegions.compute(
510 definition, collections=collections
511 )
512 # Aggregate other exposure quantities.
513 timespan = Timespan(
514 begin=_reduceOrNone(min, (e.timespan.begin for e in definition.exposures)),
515 end=_reduceOrNone(max, (e.timespan.end for e in definition.exposures)),
516 )
517 exposure_time = _reduceOrNone(operator.add, (e.exposure_time for e in definition.exposures))
518 physical_filter = _reduceOrNone(_value_if_equal, (e.physical_filter for e in definition.exposures))
519 target_name = _reduceOrNone(_value_if_equal, (e.target_name for e in definition.exposures))
520 science_program = _reduceOrNone(_value_if_equal, (e.science_program for e in definition.exposures))
522 # observing day for a visit is defined by the earliest observation
523 # of the visit
524 observing_day = _reduceOrNone(min, (e.day_obs for e in definition.exposures))
525 observation_reason = _reduceOrNone(
526 _value_if_equal, (e.observation_reason for e in definition.exposures)
527 )
528 if observation_reason is None:
529 # Be explicit about there being multiple reasons
530 # MyPy can't really handle DimensionRecord fields as
531 # DimensionRecord classes are dynamically defined; easiest to just
532 # shush it when it complains.
533 observation_reason = "various" # type: ignore
535 # Use the mean zenith angle as an approximation
536 zenith_angle = _reduceOrNone(operator.add, (e.zenith_angle for e in definition.exposures))
537 if zenith_angle is not None:
538 zenith_angle /= len(definition.exposures)
540 # New records that may not be supported.
541 extras: Dict[str, Any] = {}
542 if "seq_num" in supported:
543 extras["seq_num"] = _reduceOrNone(min, (e.seq_num for e in definition.exposures))
544 if "azimuth" in supported:
545 # Must take into account 0/360 problem.
546 extras["azimuth"] = _calc_mean_angle([e.azimuth for e in definition.exposures])
548 # visit_system handling changed. This is the logic for visit/exposure
549 # that has support for seq_start/seq_end.
550 if "seq_num" in supported:
551 # Map visit to exposure.
552 visit_definition = [
553 self.universe["visit_definition"].RecordClass(
554 instrument=definition.instrument,
555 visit=definition.id,
556 exposure=exposure.id,
557 )
558 for exposure in definition.exposures
559 ]
561 # Map visit to visit system.
562 visit_system_membership = []
563 for visit_system in self.groupExposures.getVisitSystems():
564 if visit_system in definition.visit_systems:
565 record = self.universe["visit_system_membership"].RecordClass(
566 instrument=definition.instrument,
567 visit=definition.id,
568 visit_system=visit_system.value,
569 )
570 visit_system_membership.append(record)
572 else:
573 # The old approach can only handle one visit system at a time.
574 # If we have been configured with multiple options, prefer the
575 # one-to-one.
576 visit_systems = self.groupExposures.getVisitSystems()
577 if len(visit_systems) > 1:
578 one_to_one = VisitSystem.from_name("one-to-one")
579 if one_to_one not in visit_systems:
580 raise ValueError(
581 f"Multiple visit systems specified ({visit_systems}) for use with old"
582 " dimension universe but unable to find one-to-one."
583 )
584 visit_system = one_to_one
585 else:
586 visit_system = visit_systems.pop()
588 extras["visit_system"] = visit_system.value
590 # The old visit_definition included visit system.
591 visit_definition = [
592 self.universe["visit_definition"].RecordClass(
593 instrument=definition.instrument,
594 visit=definition.id,
595 exposure=exposure.id,
596 visit_system=visit_system.value,
597 )
598 for exposure in definition.exposures
599 ]
601 # This concept does not exist in old schema.
602 visit_system_membership = []
604 # Construct the actual DimensionRecords.
605 return _VisitRecords(
606 visit=dimension.RecordClass(
607 instrument=definition.instrument,
608 id=definition.id,
609 name=definition.name,
610 physical_filter=physical_filter,
611 target_name=target_name,
612 science_program=science_program,
613 observation_reason=observation_reason,
614 day_obs=observing_day,
615 zenith_angle=zenith_angle,
616 exposure_time=exposure_time,
617 timespan=timespan,
618 region=visitRegion,
619 # TODO: no seeing value in exposure dimension records, so we
620 # can't set that here. But there are many other columns that
621 # both dimensions should probably have as well.
622 **extras,
623 ),
624 visit_definition=visit_definition,
625 visit_system_membership=visit_system_membership,
626 visit_detector_region=[
627 self.universe["visit_detector_region"].RecordClass(
628 instrument=definition.instrument,
629 visit=definition.id,
630 detector=detectorId,
631 region=detectorRegion,
632 )
633 for detectorId, detectorRegion in visitDetectorRegions.items()
634 ],
635 )
637 def run(
638 self,
639 dataIds: Iterable[DataId],
640 *,
641 collections: Optional[str] = None,
642 update_records: bool = False,
643 incremental: bool = False,
644 ) -> None:
645 """Add visit definitions to the registry for the given exposures.
647 Parameters
648 ----------
649 dataIds : `Iterable` [ `dict` or `DataCoordinate` ]
650 Exposure-level data IDs. These must all correspond to the same
651 instrument, and are expected to be on-sky science exposures.
652 collections : Any, optional
653 Collections to be searched for raws and camera geometry, overriding
654 ``self.butler.collections``.
655 Can be any of the types supported by the ``collections`` argument
656 to butler construction.
657 update_records : `bool`, optional
658 If `True` (`False` is default), update existing visit records that
659 conflict with the new ones instead of rejecting them (and when this
660 occurs, update visit_detector_region as well). THIS IS AN ADVANCED
661 OPTION THAT SHOULD ONLY BE USED TO FIX REGIONS AND/OR METADATA THAT
662 ARE KNOWN TO BE BAD, AND IT CANNOT BE USED TO REMOVE EXPOSURES OR
663 DETECTORS FROM A VISIT.
664 incremental : `bool`, optional
665 If `True` indicate that exposures are being ingested incrementally
666 and visit definition will be run on partial visits. This will
667 force ``update_records`` to `True`. If there is any risk that
668 files are being ingested incrementally it is critical that this
669 parameter is set to `True` and not to rely on ``updated_records``.
671 Raises
672 ------
673 lsst.daf.butler.registry.ConflictingDefinitionError
674 Raised if a visit ID conflict is detected and the existing visit
675 differs from the new one.
676 """
677 # Normalize, expand, and deduplicate data IDs.
678 self.log.info("Preprocessing data IDs.")
679 dimensions = DimensionGraph(self.universe, names=["exposure"])
680 data_id_set: Set[DataCoordinate] = {
681 self.butler.registry.expandDataId(d, graph=dimensions) for d in dataIds
682 }
683 if not data_id_set:
684 raise RuntimeError("No exposures given.")
685 if incremental:
686 update_records = True
687 # Extract exposure DimensionRecords, check that there's only one
688 # instrument in play, and check for non-science exposures.
689 exposures = []
690 instruments = set()
691 for dataId in data_id_set:
692 record = dataId.records["exposure"]
693 assert record is not None, "Guaranteed by expandDataIds call earlier."
694 if record.tracking_ra is None or record.tracking_dec is None or record.sky_angle is None:
695 if self.config.ignoreNonScienceExposures:
696 continue
697 else:
698 raise RuntimeError(
699 f"Input exposure {dataId} has observation_type "
700 f"{record.observation_type}, but is not on sky."
701 )
702 instruments.add(dataId["instrument"])
703 exposures.append(record)
704 if not exposures:
705 self.log.info("No on-sky exposures found after filtering.")
706 return
707 if len(instruments) > 1:
708 raise RuntimeError(
709 "All data IDs passed to DefineVisitsTask.run must be "
710 f"from the same instrument; got {instruments}."
711 )
712 (instrument,) = instruments
713 # Ensure the visit_system our grouping algorithm uses is in the
714 # registry, if it wasn't already.
715 visitSystems = self.groupExposures.getVisitSystems()
716 for visitSystem in visitSystems:
717 self.log.info("Registering visit_system %d: %s.", visitSystem.value, visitSystem)
718 self.butler.registry.syncDimensionData(
719 "visit_system",
720 {"instrument": instrument, "id": visitSystem.value, "name": str(visitSystem)},
721 )
723 # In true incremental we will be given the second snap on its
724 # own on the assumption that the previous snap was already handled.
725 # For correct grouping we need access to the other exposures in the
726 # visit.
727 if incremental:
728 exposures.extend(self.groupExposures.find_missing(exposures, self.butler.registry))
730 # Group exposures into visits, delegating to subtask.
731 self.log.info("Grouping %d exposure(s) into visits.", len(exposures))
732 definitions = list(self.groupExposures.group(exposures))
733 # Iterate over visits, compute regions, and insert dimension data, one
734 # transaction per visit. If a visit already exists, we skip all other
735 # inserts.
736 self.log.info("Computing regions and other metadata for %d visit(s).", len(definitions))
737 for visitDefinition in self.progress.wrap(
738 definitions, total=len(definitions), desc="Computing regions and inserting visits"
739 ):
740 visitRecords = self._buildVisitRecords(visitDefinition, collections=collections)
741 with self.butler.registry.transaction():
742 inserted_or_updated = self.butler.registry.syncDimensionData(
743 "visit",
744 visitRecords.visit,
745 update=update_records,
746 )
747 if inserted_or_updated:
748 if inserted_or_updated is True:
749 # This is a new visit, not an update to an existing
750 # one, so insert visit definition.
751 # We don't allow visit definitions to change even when
752 # asked to update, because we'd have to delete the old
753 # visit_definitions first and also worry about what
754 # this does to datasets that already use the visit.
755 self.butler.registry.insertDimensionData(
756 "visit_definition", *visitRecords.visit_definition
757 )
758 if visitRecords.visit_system_membership:
759 self.butler.registry.insertDimensionData(
760 "visit_system_membership", *visitRecords.visit_system_membership
761 )
762 elif incremental and len(visitRecords.visit_definition) > 1:
763 # The visit record was modified. This could happen
764 # if a multi-snap visit was redefined with an
765 # additional snap so play it safe and allow for the
766 # visit definition to be updated. We use update=False
767 # here since there should not be any rows updated,
768 # just additional rows added. update=True does not work
769 # correctly with multiple records. In incremental mode
770 # we assume that the caller wants the visit definition
771 # to be updated and has no worries about provenance
772 # with the previous definition.
773 for definition in visitRecords.visit_definition:
774 self.butler.registry.syncDimensionData("visit_definition", definition)
776 # [Re]Insert visit_detector_region records for both inserts
777 # and updates, because we do allow updating to affect the
778 # region calculations.
779 self.butler.registry.insertDimensionData(
780 "visit_detector_region", *visitRecords.visit_detector_region, replace=update_records
781 )
783 # Update obscore exposure records with region information
784 # from corresponding visits.
785 if self.config.updateObsCoreTable:
786 if obscore_manager := self.butler.registry.obsCoreTableManager:
787 obscore_updates: list[tuple[int, int, Region]] = []
788 exposure_ids = [rec.exposure for rec in visitRecords.visit_definition]
789 for record in visitRecords.visit_detector_region:
790 obscore_updates += [
791 (exposure, record.detector, record.region) for exposure in exposure_ids
792 ]
793 if obscore_updates:
794 obscore_manager.update_exposure_regions(
795 cast(str, instrument), obscore_updates
796 )
799_T = TypeVar("_T")
802def _reduceOrNone(func: Callable[[_T, _T], Optional[_T]], iterable: Iterable[Optional[_T]]) -> Optional[_T]:
803 """Apply a binary function to pairs of elements in an iterable until a
804 single value is returned, but return `None` if any element is `None` or
805 there are no elements.
806 """
807 r: Optional[_T] = None
808 for v in iterable:
809 if v is None:
810 return None
811 if r is None:
812 r = v
813 else:
814 r = func(r, v)
815 return r
818def _value_if_equal(a: _T, b: _T) -> Optional[_T]:
819 """Return either argument if they are equal, or `None` if they are not."""
820 return a if a == b else None
823def _calc_mean_angle(angles: List[float]) -> float:
824 """Calculate the mean angle, taking into account 0/360 wrapping.
826 Parameters
827 ----------
828 angles : `list` [`float`]
829 Angles to average together, in degrees.
831 Returns
832 -------
833 average : `float`
834 Average angle in degrees.
835 """
836 # Save on all the math if we only have one value.
837 if len(angles) == 1:
838 return angles[0]
840 # Convert polar coordinates of unit circle to complex values.
841 # Average the complex values.
842 # Convert back to a phase angle.
843 return math.degrees(cmath.phase(sum(cmath.rect(1.0, math.radians(d)) for d in angles) / len(angles)))
846class _GroupExposuresOneToOneConfig(GroupExposuresConfig):
847 visitSystemId: Field[int] = Field(
848 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
849 dtype=int,
850 default=0,
851 deprecated="No longer used. Replaced by enum.",
852 )
853 visitSystemName: Field[str] = Field(
854 doc="String name of the visit_system implemented by this grouping algorithm.",
855 dtype=str,
856 default="one-to-one",
857 deprecated="No longer used. Replaced by enum.",
858 )
861@registerConfigurable("one-to-one", GroupExposuresTask.registry)
862class _GroupExposuresOneToOneTask(GroupExposuresTask, metaclass=ABCMeta):
863 """An exposure grouping algorithm that simply defines one visit for each
864 exposure, reusing the exposures identifiers for the visit.
865 """
867 ConfigClass = _GroupExposuresOneToOneConfig
869 def find_missing(
870 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
871 ) -> list[DimensionRecord]:
872 # By definition no exposures can be missing.
873 return []
875 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
876 # No grouping.
877 return {exposure.id: [exposure] for exposure in exposures}
879 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
880 # Docstring inherited from GroupExposuresTask.
881 visit_systems = {VisitSystem.from_name("one-to-one")}
882 for exposure in exposures:
883 yield VisitDefinitionData(
884 instrument=exposure.instrument,
885 id=exposure.id,
886 name=exposure.obs_id,
887 exposures=[exposure],
888 visit_systems=visit_systems,
889 )
891 def getVisitSystems(self) -> Set[VisitSystem]:
892 # Docstring inherited from GroupExposuresTask.
893 return set(VisitSystem.from_names(["one-to-one"]))
896class _GroupExposuresByGroupMetadataConfig(GroupExposuresConfig):
897 visitSystemId: Field[int] = Field(
898 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
899 dtype=int,
900 default=1,
901 deprecated="No longer used. Replaced by enum.",
902 )
903 visitSystemName: Field[str] = Field(
904 doc="String name of the visit_system implemented by this grouping algorithm.",
905 dtype=str,
906 default="by-group-metadata",
907 deprecated="No longer used. Replaced by enum.",
908 )
911@registerConfigurable("by-group-metadata", GroupExposuresTask.registry)
912class _GroupExposuresByGroupMetadataTask(GroupExposuresTask, metaclass=ABCMeta):
913 """An exposure grouping algorithm that uses exposure.group_name and
914 exposure.group_id.
916 This algorithm _assumes_ exposure.group_id (generally populated from
917 `astro_metadata_translator.ObservationInfo.visit_id`) is not just unique,
918 but disjoint from all `ObservationInfo.exposure_id` values - if it isn't,
919 it will be impossible to ever use both this grouping algorithm and the
920 one-to-one algorithm for a particular camera in the same data repository.
921 """
923 ConfigClass = _GroupExposuresByGroupMetadataConfig
925 def find_missing(
926 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
927 ) -> list[DimensionRecord]:
928 groups = self.group_exposures(exposures)
929 missing_exposures: list[DimensionRecord] = []
930 for exposures_in_group in groups.values():
931 # We can not tell how many exposures are expected to be in the
932 # visit so we have to query every time.
933 first = exposures_in_group[0]
934 records = set(
935 registry.queryDimensionRecords(
936 "exposure",
937 where="exposure.group_name = group",
938 bind={"group": first.group_name},
939 instrument=first.instrument,
940 )
941 )
942 records.difference_update(set(exposures_in_group))
943 missing_exposures.extend(list(records))
944 return missing_exposures
946 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
947 groups = defaultdict(list)
948 for exposure in exposures:
949 groups[exposure.group_name].append(exposure)
950 return groups
952 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
953 # Docstring inherited from GroupExposuresTask.
954 visit_systems = {VisitSystem.from_name("by-group-metadata")}
955 groups = self.group_exposures(exposures)
956 for visitName, exposuresInGroup in groups.items():
957 instrument = exposuresInGroup[0].instrument
958 visitId = exposuresInGroup[0].group_id
959 assert all(
960 e.group_id == visitId for e in exposuresInGroup
961 ), "Grouping by exposure.group_name does not yield consistent group IDs"
962 yield VisitDefinitionData(
963 instrument=instrument,
964 id=visitId,
965 name=visitName,
966 exposures=exposuresInGroup,
967 visit_systems=visit_systems,
968 )
970 def getVisitSystems(self) -> Set[VisitSystem]:
971 # Docstring inherited from GroupExposuresTask.
972 return set(VisitSystem.from_names(["by-group-metadata"]))
975class _GroupExposuresByCounterAndExposuresConfig(GroupExposuresConfig):
976 visitSystemId: Field[int] = Field(
977 doc="Integer ID of the visit_system implemented by this grouping algorithm.",
978 dtype=int,
979 default=2,
980 deprecated="No longer used. Replaced by enum.",
981 )
982 visitSystemName: Field[str] = Field(
983 doc="String name of the visit_system implemented by this grouping algorithm.",
984 dtype=str,
985 default="by-counter-and-exposures",
986 deprecated="No longer used. Replaced by enum.",
987 )
990@registerConfigurable("one-to-one-and-by-counter", GroupExposuresTask.registry)
991class _GroupExposuresByCounterAndExposuresTask(GroupExposuresTask, metaclass=ABCMeta):
992 """An exposure grouping algorithm that uses the sequence start and
993 sequence end metadata to create multi-exposure visits, but also
994 creates one-to-one visits.
996 This algorithm uses the exposure.seq_start and
997 exposure.seq_end fields to collect related snaps.
998 It also groups single exposures.
999 """
1001 ConfigClass = _GroupExposuresByCounterAndExposuresConfig
1003 def find_missing(
1004 self, exposures: list[DimensionRecord], registry: lsst.daf.butler.Registry
1005 ) -> list[DimensionRecord]:
1006 """Analyze the exposures and return relevant exposures known to
1007 registry.
1008 """
1009 groups = self.group_exposures(exposures)
1010 missing_exposures: list[DimensionRecord] = []
1011 for exposures_in_group in groups.values():
1012 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1013 first = sorted_exposures[0]
1015 # Only need to look for the seq_nums that we don't already have.
1016 seq_nums = set(range(first.seq_start, first.seq_end + 1))
1017 seq_nums.difference_update({exp.seq_num for exp in sorted_exposures})
1019 if seq_nums:
1020 # Missing something. Check registry.
1021 records = list(
1022 registry.queryDimensionRecords(
1023 "exposure",
1024 where="exposure.seq_start = seq_start AND exposure.seq_end = seq_end AND "
1025 "exposure.seq_num IN (seq_nums)",
1026 bind={"seq_start": first.seq_start, "seq_end": first.seq_end, "seq_nums": seq_nums},
1027 instrument=first.instrument,
1028 )
1029 )
1030 missing_exposures.extend(records)
1032 return missing_exposures
1034 def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[DimensionRecord]]:
1035 groups = defaultdict(list)
1036 for exposure in exposures:
1037 groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure)
1038 return groups
1040 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
1041 # Docstring inherited from GroupExposuresTask.
1042 system_one_to_one = VisitSystem.from_name("one-to-one")
1043 system_seq_start_end = VisitSystem.from_name("by-seq-start-end")
1045 groups = self.group_exposures(exposures)
1046 for visit_key, exposures_in_group in groups.items():
1047 instrument = exposures_in_group[0].instrument
1049 # It is possible that the first exposure in a visit has not
1050 # been ingested. This can be determined and if that is the case
1051 # we can not reliably define the multi-exposure visit.
1052 skip_multi = False
1053 sorted_exposures = sorted(exposures_in_group, key=lambda e: e.seq_num)
1054 first = sorted_exposures.pop(0)
1055 if first.seq_num != first.seq_start:
1056 # Special case seq_num == 0 since that implies that the
1057 # instrument has no counters and therefore no multi-exposure
1058 # visits.
1059 if first.seq_num != 0:
1060 self.log.warning(
1061 "First exposure for visit %s is not present. Skipping the multi-snap definition.",
1062 visit_key,
1063 )
1064 skip_multi = True
1066 multi_exposure = False
1067 if first.seq_start != first.seq_end:
1068 # This is a multi-exposure visit regardless of the number
1069 # of exposures present.
1070 multi_exposure = True
1072 # Define the one-to-one visits.
1073 for exposure in exposures_in_group:
1074 # Default is to use the exposure ID and name unless
1075 # this is the first exposure in a multi-exposure visit.
1076 visit_name = exposure.obs_id
1077 visit_id = exposure.id
1078 visit_systems = {system_one_to_one}
1080 if not multi_exposure:
1081 # This is also a by-counter visit.
1082 # It will use the same visit_name and visit_id.
1083 visit_systems.add(system_seq_start_end)
1085 elif not skip_multi and exposure == first:
1086 # This is the first legitimate exposure in a multi-exposure
1087 # visit. It therefore needs a modified visit name and ID
1088 # so it does not clash with the multi-exposure visit
1089 # definition.
1090 visit_name = f"{visit_name}_first"
1091 visit_id = int(f"9{visit_id}")
1093 yield VisitDefinitionData(
1094 instrument=instrument,
1095 id=visit_id,
1096 name=visit_name,
1097 exposures=[exposure],
1098 visit_systems=visit_systems,
1099 )
1101 # Multi-exposure visit.
1102 if not skip_multi and multi_exposure:
1103 # Define the visit using the first exposure
1104 visit_name = first.obs_id
1105 visit_id = first.id
1107 yield VisitDefinitionData(
1108 instrument=instrument,
1109 id=visit_id,
1110 name=visit_name,
1111 exposures=exposures_in_group,
1112 visit_systems={system_seq_start_end},
1113 )
1115 def getVisitSystems(self) -> Set[VisitSystem]:
1116 # Docstring inherited from GroupExposuresTask.
1117 # Using a Config for this is difficult because what this grouping
1118 # algorithm is doing is using two visit systems.
1119 # One is using metadata (but not by-group) and the other is the
1120 # one-to-one. For now hard-code in class.
1121 return set(VisitSystem.from_names(["one-to-one", "by-seq-start-end"]))
1124class _ComputeVisitRegionsFromSingleRawWcsConfig(ComputeVisitRegionsConfig):
1125 mergeExposures: Field[bool] = Field(
1126 doc=(
1127 "If True, merge per-detector regions over all exposures in a "
1128 "visit (via convex hull) instead of using the first exposure and "
1129 "assuming its regions are valid for all others."
1130 ),
1131 dtype=bool,
1132 default=False,
1133 )
1134 detectorId: Field[Optional[int]] = Field(
1135 doc=(
1136 "Load the WCS for the detector with this ID. If None, use an "
1137 "arbitrary detector (the first found in a query of the data "
1138 "repository for each exposure (or all exposures, if "
1139 "mergeExposures is True)."
1140 ),
1141 dtype=int,
1142 optional=True,
1143 default=None,
1144 )
1145 requireVersionedCamera: Field[bool] = Field(
1146 doc=(
1147 "If True, raise LookupError if version camera geometry cannot be "
1148 "loaded for an exposure. If False, use the nominal camera from "
1149 "the Instrument class instead."
1150 ),
1151 dtype=bool,
1152 optional=False,
1153 default=False,
1154 )
1157@registerConfigurable("single-raw-wcs", ComputeVisitRegionsTask.registry)
1158class _ComputeVisitRegionsFromSingleRawWcsTask(ComputeVisitRegionsTask):
1159 """A visit region calculator that uses a single raw WCS and a camera to
1160 project the bounding boxes of all detectors onto the sky, relating
1161 different detectors by their positions in focal plane coordinates.
1163 Notes
1164 -----
1165 Most instruments should have their raw WCSs determined from a combination
1166 of boresight angle, rotator angle, and camera geometry, and hence this
1167 algorithm should produce stable results regardless of which detector the
1168 raw corresponds to. If this is not the case (e.g. because a per-file FITS
1169 WCS is used instead), either the ID of the detector should be fixed (see
1170 the ``detectorId`` config parameter) or a different algorithm used.
1171 """
1173 ConfigClass = _ComputeVisitRegionsFromSingleRawWcsConfig
1174 config: _ComputeVisitRegionsFromSingleRawWcsConfig
1176 def computeExposureBounds(
1177 self, exposure: DimensionRecord, *, collections: Any = None
1178 ) -> Dict[int, List[UnitVector3d]]:
1179 """Compute the lists of unit vectors on the sphere that correspond to
1180 the sky positions of detector corners.
1182 Parameters
1183 ----------
1184 exposure : `DimensionRecord`
1185 Dimension record for the exposure.
1186 collections : Any, optional
1187 Collections to be searched for raws and camera geometry, overriding
1188 ``self.butler.collections``.
1189 Can be any of the types supported by the ``collections`` argument
1190 to butler construction.
1192 Returns
1193 -------
1194 bounds : `dict`
1195 Dictionary mapping detector ID to a list of unit vectors on the
1196 sphere representing that detector's corners projected onto the sky.
1197 """
1198 if collections is None:
1199 collections = self.butler.collections
1200 camera, versioned = loadCamera(self.butler, exposure.dataId, collections=collections)
1201 if not versioned and self.config.requireVersionedCamera:
1202 raise LookupError(f"No versioned camera found for exposure {exposure.dataId}.")
1204 # Derive WCS from boresight information -- if available in registry
1205 use_registry = True
1206 try:
1207 orientation = lsst.geom.Angle(exposure.sky_angle, lsst.geom.degrees)
1208 radec = lsst.geom.SpherePoint(
1209 lsst.geom.Angle(exposure.tracking_ra, lsst.geom.degrees),
1210 lsst.geom.Angle(exposure.tracking_dec, lsst.geom.degrees),
1211 )
1212 except AttributeError:
1213 use_registry = False
1215 if use_registry:
1216 if self.config.detectorId is None:
1217 detectorId = next(camera.getIdIter())
1218 else:
1219 detectorId = self.config.detectorId
1220 wcsDetector = camera[detectorId]
1222 # Ask the raw formatter to create the relevant WCS
1223 # This allows flips to be taken into account
1224 instrument = self.getInstrument(exposure.instrument)
1225 rawFormatter = instrument.getRawFormatter({"detector": detectorId})
1227 try:
1228 wcs = rawFormatter.makeRawSkyWcsFromBoresight(radec, orientation, wcsDetector) # type: ignore
1229 except AttributeError:
1230 raise TypeError(
1231 f"Raw formatter is {get_full_type_name(rawFormatter)} but visit"
1232 " definition requires it to support 'makeRawSkyWcsFromBoresight'"
1233 ) from None
1234 else:
1235 if self.config.detectorId is None:
1236 wcsRefsIter = self.butler.registry.queryDatasets(
1237 "raw.wcs", dataId=exposure.dataId, collections=collections
1238 )
1239 if not wcsRefsIter:
1240 raise LookupError(
1241 f"No raw.wcs datasets found for data ID {exposure.dataId} "
1242 f"in collections {collections}."
1243 )
1244 wcsRef = next(iter(wcsRefsIter))
1245 wcsDetector = camera[wcsRef.dataId["detector"]]
1246 wcs = self.butler.get(wcsRef)
1247 else:
1248 wcsDetector = camera[self.config.detectorId]
1249 wcs = self.butler.get(
1250 "raw.wcs",
1251 dataId=exposure.dataId,
1252 detector=self.config.detectorId,
1253 collections=collections,
1254 )
1255 fpToSky = wcsDetector.getTransform(FOCAL_PLANE, PIXELS).then(wcs.getTransform())
1256 bounds = {}
1257 for detector in camera:
1258 pixelsToSky = detector.getTransform(PIXELS, FOCAL_PLANE).then(fpToSky)
1259 pixCorners = Box2D(detector.getBBox().dilatedBy(self.config.padding)).getCorners()
1260 bounds[detector.getId()] = [
1261 skyCorner.getVector() for skyCorner in pixelsToSky.applyForward(pixCorners)
1262 ]
1263 return bounds
1265 def compute(
1266 self, visit: VisitDefinitionData, *, collections: Any = None
1267 ) -> Tuple[Region, Dict[int, Region]]:
1268 # Docstring inherited from ComputeVisitRegionsTask.
1269 if self.config.mergeExposures:
1270 detectorBounds: Dict[int, List[UnitVector3d]] = defaultdict(list)
1271 for exposure in visit.exposures:
1272 exposureDetectorBounds = self.computeExposureBounds(exposure, collections=collections)
1273 for detectorId, bounds in exposureDetectorBounds.items():
1274 detectorBounds[detectorId].extend(bounds)
1275 else:
1276 detectorBounds = self.computeExposureBounds(visit.exposures[0], collections=collections)
1277 visitBounds = []
1278 detectorRegions = {}
1279 for detectorId, bounds in detectorBounds.items():
1280 detectorRegions[detectorId] = ConvexPolygon.convexHull(bounds)
1281 visitBounds.extend(bounds)
1282 return ConvexPolygon.convexHull(visitBounds), detectorRegions