Coverage for python/lsst/obs/base/defineVisits.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = [
24 "DefineVisitsConfig",
25 "DefineVisitsTask",
26 "GroupExposuresConfig",
27 "GroupExposuresTask",
28 "VisitDefinitionData",
29]
31from abc import ABCMeta, abstractmethod
32from collections import defaultdict
33import dataclasses
34from typing import Any, Dict, Iterable, List, Optional, Tuple
35from multiprocessing import Pool
37from lsst.daf.butler import (
38 Butler,
39 DataId,
40 DimensionGraph,
41 DimensionRecord,
42 Timespan,
43 TIMESPAN_FIELD_SPECS,
44)
45from lsst.geom import Box2D
46from lsst.pex.config import Config, Field, makeRegistry, registerConfigurable
47from lsst.afw.cameraGeom import FOCAL_PLANE, PIXELS
48from lsst.pipe.base import Task
49from lsst.sphgeom import ConvexPolygon, Region, UnitVector3d
50from ._instrument import loadCamera
53@dataclasses.dataclass
54class VisitDefinitionData:
55 """Struct representing a group of exposures that will be used to define a
56 visit.
57 """
59 instrument: str
60 """Name of the instrument this visit will be associated with.
61 """
63 id: int
64 """Integer ID of the visit.
66 This must be unique across all visit systems for the instrument.
67 """
69 name: str
70 """String name for the visit.
72 This must be unique across all visit systems for the instrument.
73 """
75 exposures: List[DimensionRecord] = dataclasses.field(default_factory=list)
76 """Dimension records for the exposures that are part of this visit.
77 """
80@dataclasses.dataclass
81class _VisitRecords:
82 """Struct containing the dimension records associated with a visit.
83 """
85 visit: DimensionRecord
86 """Record for the 'visit' dimension itself.
87 """
89 visit_definition: List[DimensionRecord]
90 """Records for 'visit_definition', which relates 'visit' to 'exposure'.
91 """
93 visit_detector_region: List[DimensionRecord]
94 """Records for 'visit_detector_region', which associates the combination
95 of a 'visit' and a 'detector' with a region on the sky.
96 """
99class GroupExposuresConfig(Config):
100 pass
103class GroupExposuresTask(Task, metaclass=ABCMeta):
104 """Abstract base class for the subtask of `DefineVisitsTask` that is
105 responsible for grouping exposures into visits.
107 Subclasses should be registered with `GroupExposuresTask.registry` to
108 enable use by `DefineVisitsTask`, and should generally correspond to a
109 particular 'visit_system' dimension value. They are also responsible for
110 defining visit IDs and names that are unique across all visit systems in
111 use by an instrument.
113 Parameters
114 ----------
115 config : `GroupExposuresConfig`
116 Configuration information.
117 **kwargs
118 Additional keyword arguments forwarded to the `Task` constructor.
119 """
120 def __init__(self, config: GroupExposuresConfig, **kwargs: Any):
121 Task.__init__(self, config=config, **kwargs)
123 ConfigClass = GroupExposuresConfig
125 _DefaultName = "groupExposures"
127 registry = makeRegistry(
128 doc="Registry of algorithms for grouping exposures into visits.",
129 configBaseType=GroupExposuresConfig,
130 )
132 @abstractmethod
133 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
134 """Group the given exposures into visits.
136 Parameters
137 ----------
138 exposures : `list` [ `DimensionRecord` ]
139 DimensionRecords (for the 'exposure' dimension) describing the
140 exposures to group.
142 Returns
143 -------
144 visits : `Iterable` [ `VisitDefinitionData` ]
145 Structs identifying the visits and the exposures associated with
146 them. This may be an iterator or a container.
147 """
148 raise NotImplementedError()
150 @abstractmethod
151 def getVisitSystem(self) -> Tuple[int, str]:
152 """Return identifiers for the 'visit_system' dimension this
153 algorithm implements.
155 Returns
156 -------
157 id : `int`
158 Integer ID for the visit system (given an instrument).
159 name : `str`
160 Unique string identifier for the visit system (given an
161 instrument).
162 """
163 raise NotImplementedError()
166class ComputeVisitRegionsConfig(Config):
167 padding = Field(
168 dtype=int,
169 default=0,
170 doc=("Pad raw image bounding boxes with specified number of pixels "
171 "when calculating their (conservatively large) region on the "
172 "sky."),
173 )
176class ComputeVisitRegionsTask(Task, metaclass=ABCMeta):
177 """Abstract base class for the subtask of `DefineVisitsTask` that is
178 responsible for extracting spatial regions for visits and visit+detector
179 combinations.
181 Subclasses should be registered with `ComputeVisitRegionsTask.registry` to
182 enable use by `DefineVisitsTask`.
184 Parameters
185 ----------
186 config : `ComputeVisitRegionsConfig`
187 Configuration information.
188 **kwargs
189 Additional keyword arguments forwarded to the `Task` constructor.
190 """
191 def __init__(self, config: ComputeVisitRegionsConfig, *, butler: Butler, **kwargs: Any):
192 Task.__init__(self, config=config, **kwargs)
193 self.butler = butler
195 ConfigClass = ComputeVisitRegionsConfig
197 _DefaultName = "computeVisitRegions"
199 registry = makeRegistry(
200 doc=("Registry of algorithms for computing on-sky regions for visits "
201 "and visit+detector combinations."),
202 configBaseType=ComputeVisitRegionsConfig,
203 )
205 @abstractmethod
206 def compute(self, visit: VisitDefinitionData, *, collections: Any = None
207 ) -> Tuple[Region, Dict[int, Region]]:
208 """Compute regions for the given visit and all detectors in that visit.
210 Parameters
211 ----------
212 visit : `VisitDefinitionData`
213 Struct describing the visit and the exposures associated with it.
214 collections : Any, optional
215 Collections to be searched for raws and camera geometry, overriding
216 ``self.butler.collections``.
217 Can be any of the types supported by the ``collections`` argument
218 to butler construction.
220 Returns
221 -------
222 visitRegion : `lsst.sphgeom.Region`
223 Region for the full visit.
224 visitDetectorRegions : `dict` [ `int`, `lsst.sphgeom.Region` ]
225 Dictionary mapping detector ID to the region for that detector.
226 Should include all detectors in the visit.
227 """
228 raise NotImplementedError()
231class DefineVisitsConfig(Config):
232 groupExposures = GroupExposuresTask.registry.makeField(
233 doc="Algorithm for grouping exposures into visits.",
234 default="one-to-one",
235 )
236 computeVisitRegions = ComputeVisitRegionsTask.registry.makeField(
237 doc="Algorithm from computing visit and visit+detector regions.",
238 default="single-raw-wcs",
239 )
240 ignoreNonScienceExposures = Field(
241 doc=("If True, silently ignore input exposures that do not have "
242 "observation_type=SCIENCE. If False, raise an exception if one "
243 "encountered."),
244 dtype=bool,
245 optional=False,
246 default=True,
247 )
250class DefineVisitsTask(Task):
251 """Driver Task for defining visits (and their spatial regions) in Gen3
252 Butler repositories.
254 Parameters
255 ----------
256 config : `DefineVisitsConfig`
257 Configuration for the task.
258 butler : `~lsst.daf.butler.Butler`
259 Writeable butler instance. Will be used to read `raw.wcs` and `camera`
260 datasets and insert/sync dimension data.
261 **kwargs
262 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task`
263 constructor.
265 Notes
266 -----
267 Each instance of `DefineVisitsTask` reads from / writes to the same Butler.
268 Each invocation of `DefineVisitsTask.run` processes an independent group of
269 exposures into one or more new vists, all belonging to the same visit
270 system and instrument.
272 The actual work of grouping exposures and computing regions is delegated
273 to pluggable subtasks (`GroupExposuresTask` and `ComputeVisitRegionsTask`),
274 respectively. The defaults are to create one visit for every exposure,
275 and to use exactly one (arbitrary) detector-level raw dataset's WCS along
276 with camera geometry to compute regions for all detectors. Other
277 implementations can be created and configured for instruments for which
278 these choices are unsuitable (e.g. because visits and exposures are not
279 one-to-one, or because ``raw.wcs`` datasets for different detectors may not
280 be consistent with camera geomery).
282 It is not necessary in general to ingest all raws for an exposure before
283 defining a visit that includes the exposure; this depends entirely on the
284 `ComputeVisitRegionTask` subclass used. For the default configuration,
285 a single raw for each exposure is sufficient.
286 """
287 def __init__(self, config: Optional[DefineVisitsConfig] = None, *, butler: Butler, **kwargs: Any):
288 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
289 super().__init__(config, **kwargs)
290 self.butler = butler
291 self.universe = self.butler.registry.dimensions
292 self.makeSubtask("groupExposures")
293 self.makeSubtask("computeVisitRegions", butler=self.butler)
295 ConfigClass = DefineVisitsConfig
297 _DefaultName = "defineVisits"
299 def _buildVisitRecords(self, definition: VisitDefinitionData, *,
300 collections: Any = None) -> _VisitRecords:
301 """Build the DimensionRecords associated with a visit.
303 Parameters
304 ----------
305 definition : `VisitDefinition`
306 Struct with identifiers for the visit and records for its
307 constituent exposures.
308 collections : Any, optional
309 Collections to be searched for raws and camera geometry, overriding
310 ``self.butler.collections``.
311 Can be any of the types supported by the ``collections`` argument
312 to butler construction.
314 Results
315 -------
316 records : `_VisitRecords`
317 Struct containing DimensionRecords for the visit, including
318 associated dimension elements.
319 """
320 # Compute all regions.
321 visitRegion, visitDetectorRegions = self.computeVisitRegions.compute(definition,
322 collections=collections)
323 # Aggregate other exposure quantities.
324 timespan = Timespan(
325 begin=_reduceOrNone(min, (e.timespan.begin for e in definition.exposures)),
326 end=_reduceOrNone(max, (e.timespan.end for e in definition.exposures)),
327 )
328 exposure_time = _reduceOrNone(sum, (e.exposure_time for e in definition.exposures))
329 physical_filter = _reduceOrNone(lambda a, b: a if a == b else None,
330 (e.physical_filter for e in definition.exposures))
331 # Construct the actual DimensionRecords.
332 return _VisitRecords(
333 visit=self.universe["visit"].RecordClass.fromDict({
334 "instrument": definition.instrument,
335 "id": definition.id,
336 "name": definition.name,
337 "physical_filter": physical_filter,
338 "visit_system": self.groupExposures.getVisitSystem()[0],
339 "exposure_time": exposure_time,
340 TIMESPAN_FIELD_SPECS.begin.name: timespan.begin,
341 TIMESPAN_FIELD_SPECS.end.name: timespan.end,
342 "region": visitRegion,
343 # TODO: no seeing value in exposure dimension records, so we can't
344 # set that here. But there are many other columns that both
345 # dimensions should probably have as well.
346 }),
347 visit_definition=[
348 self.universe["visit_definition"].RecordClass.fromDict({
349 "instrument": definition.instrument,
350 "visit": definition.id,
351 "exposure": exposure.id,
352 "visit_system": self.groupExposures.getVisitSystem()[0],
353 })
354 for exposure in definition.exposures
355 ],
356 visit_detector_region=[
357 self.universe["visit_detector_region"].RecordClass.fromDict({
358 "instrument": definition.instrument,
359 "visit": definition.id,
360 "detector": detectorId,
361 "region": detectorRegion,
362 })
363 for detectorId, detectorRegion in visitDetectorRegions.items()
364 ]
365 )
367 def run(self, dataIds: Iterable[DataId], *,
368 pool: Optional[Pool] = None,
369 processes: int = 1,
370 collections: Optional[str] = None):
371 """Add visit definitions to the registry for the given exposures.
373 Parameters
374 ----------
375 dataIds : `Iterable` [ `dict` or `DataCoordinate` ]
376 Exposure-level data IDs. These must all correspond to the same
377 instrument, and are expected to be on-sky science exposures.
378 pool : `multiprocessing.Pool`, optional
379 If not `None`, a process pool with which to parallelize some
380 operations.
381 processes : `int`, optional
382 The number of processes to use. Ignored if ``pool`` is not `None`.
383 collections : Any, optional
384 Collections to be searched for raws and camera geometry, overriding
385 ``self.butler.collections``.
386 Can be any of the types supported by the ``collections`` argument
387 to butler construction.
388 """
389 # Set up multiprocessing, if desired.
390 if pool is None and processes > 1:
391 pool = Pool(processes)
392 mapFunc = map if pool is None else pool.imap_unordered
393 # Normalize, expand, and deduplicate data IDs.
394 self.log.info("Preprocessing data IDs.")
395 dimensions = DimensionGraph(self.universe, names=["exposure"])
396 dataIds = set(mapFunc(lambda d: self.butler.registry.expandDataId(d, graph=dimensions), dataIds))
397 if not dataIds:
398 raise RuntimeError("No exposures given.")
399 # Extract exposure DimensionRecords, check that there's only one
400 # instrument in play, and check for non-science exposures.
401 exposures = []
402 instruments = set()
403 for dataId in dataIds:
404 record = dataId.records["exposure"]
405 if record.observation_type != "science":
406 if self.config.ignoreNonScienceExposures:
407 continue
408 else:
409 raise RuntimeError(f"Input exposure {dataId} has observation_type "
410 f"{record.observation_type}, not 'science'.")
411 instruments.add(dataId["instrument"])
412 exposures.append(record)
413 if not exposures:
414 self.log.info("No science exposures found after filtering.")
415 return
416 if len(instruments) > 1:
417 raise RuntimeError(
418 f"All data IDs passed to DefineVisitsTask.run must be "
419 f"from the same instrument; got {instruments}."
420 )
421 instrument, = instruments
422 # Ensure the visit_system our grouping algorithm uses is in the
423 # registry, if it wasn't already.
424 visitSystemId, visitSystemName = self.groupExposures.getVisitSystem()
425 self.log.info("Registering visit_system %d: %s.", visitSystemId, visitSystemName)
426 self.butler.registry.syncDimensionData(
427 "visit_system",
428 {"instrument": instrument, "id": visitSystemId, "name": visitSystemName}
429 )
430 # Group exposures into visits, delegating to subtask.
431 self.log.info("Grouping %d exposure(s) into visits.", len(exposures))
432 definitions = list(self.groupExposures.group(exposures))
433 # Compute regions and build DimensionRecords for each visit.
434 # This is the only parallel step, but it _should_ be the most expensive
435 # one (unless DB operations are slow).
436 self.log.info("Computing regions and other metadata for %d visit(s).", len(definitions))
437 allRecords = mapFunc(lambda d: self._buildVisitRecords(d, collections=collections), definitions)
438 # Iterate over visits and insert dimension data, one transaction per
439 # visit.
440 for visitRecords in allRecords:
441 with self.butler.registry.transaction():
442 self.butler.registry.insertDimensionData("visit", visitRecords.visit)
443 self.butler.registry.insertDimensionData("visit_definition",
444 *visitRecords.visit_definition)
445 self.butler.registry.insertDimensionData("visit_detector_region",
446 *visitRecords.visit_detector_region)
449def _reduceOrNone(func, iterable):
450 """Apply a binary function to pairs of elements in an iterable until a
451 single value is returned, but return `None` if any element is `None` or
452 there are no elements.
453 """
454 r = None
455 for v in iterable:
456 if v is None:
457 return None
458 if r is None:
459 r = v
460 else:
461 r = func(r, v)
462 return r
465class _GroupExposuresOneToOneConfig(GroupExposuresConfig):
466 visitSystemId = Field(
467 doc=("Integer ID of the visit_system implemented by this grouping "
468 "algorithm."),
469 dtype=int,
470 default=0,
471 )
472 visitSystemName = Field(
473 doc=("String name of the visit_system implemented by this grouping "
474 "algorithm."),
475 dtype=str,
476 default="one-to-one",
477 )
480@registerConfigurable("one-to-one", GroupExposuresTask.registry)
481class _GroupExposuresOneToOneTask(GroupExposuresTask, metaclass=ABCMeta):
482 """An exposure grouping algorithm that simply defines one visit for each
483 exposure, reusing the exposures identifiers for the visit.
484 """
486 ConfigClass = _GroupExposuresOneToOneConfig
488 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
489 # Docstring inherited from GroupExposuresTask.
490 for exposure in exposures:
491 yield VisitDefinitionData(
492 instrument=exposure.instrument,
493 id=exposure.id,
494 name=exposure.name,
495 exposures=[exposure],
496 )
498 def getVisitSystem(self) -> Tuple[int, str]:
499 # Docstring inherited from GroupExposuresTask.
500 return (self.config.visitSystemId, self.config.visitSystemName)
503class _GroupExposuresByGroupMetadataConfig(GroupExposuresConfig):
504 visitSystemId = Field(
505 doc=("Integer ID of the visit_system implemented by this grouping "
506 "algorithm."),
507 dtype=int,
508 default=1,
509 )
510 visitSystemName = Field(
511 doc=("String name of the visit_system implemented by this grouping "
512 "algorithm."),
513 dtype=str,
514 default="by-group-metadata",
515 )
518@registerConfigurable("by-group-metadata", GroupExposuresTask.registry)
519class _GroupExposuresByGroupMetadataTask(GroupExposuresTask, metaclass=ABCMeta):
520 """An exposure grouping algorithm that uses exposure.group_name and
521 exposure.group_id.
523 This algorithm _assumes_ exposure.group_id (generally populated from
524 `astro_metadata_translator.ObservationInfo.visit_id`) is not just unique,
525 but disjoint from all `ObservationInfo.exposure_id` values - if it isn't,
526 it will be impossible to ever use both this grouping algorithm and the
527 one-to-one algorithm for a particular camera in the same data repository.
528 """
530 ConfigClass = _GroupExposuresOneToOneConfig
532 def group(self, exposures: List[DimensionRecord]) -> Iterable[VisitDefinitionData]:
533 # Docstring inherited from GroupExposuresTask.
534 groups = defaultdict(list)
535 for exposure in exposures:
536 groups[exposure.group_name].append(exposure)
537 for visitName, exposuresInGroup in groups.items():
538 instrument = exposuresInGroup[0].instrument
539 visitId = exposuresInGroup[0].group_id
540 assert all(e.group_id == visitId for e in exposuresInGroup), \
541 "Grouping by exposure.group_name does not yield consistent group IDs"
542 yield VisitDefinitionData(instrument=instrument, id=visitId, name=visitName,
543 exposures=exposuresInGroup)
545 def getVisitSystem(self) -> Tuple[int, str]:
546 # Docstring inherited from GroupExposuresTask.
547 return (self.config.visitSystemId, self.config.visitSystemName)
550class _ComputeVisitRegionsFromSingleRawWcsConfig(ComputeVisitRegionsConfig):
551 mergeExposures = Field(
552 doc=("If True, merge per-detector regions over all exposures in a "
553 "visit (via convex hull) instead of using the first exposure and "
554 "assuming its regions are valid for all others."),
555 dtype=bool,
556 default=False,
557 )
558 detectorId = Field(
559 doc=("Load the WCS for the detector with this ID. If None, use an "
560 "arbitrary detector (the first found in a query of the data "
561 "repository for each exposure (or all exposures, if "
562 "mergeExposures is True)."),
563 dtype=int,
564 optional=True,
565 default=None
566 )
567 requireVersionedCamera = Field(
568 doc=("If True, raise LookupError if version camera geometry cannot be "
569 "loaded for an exposure. If False, use the nominal camera from "
570 "the Instrument class instead."),
571 dtype=bool,
572 optional=False,
573 default=False,
574 )
577@registerConfigurable("single-raw-wcs", ComputeVisitRegionsTask.registry)
578class _ComputeVisitRegionsFromSingleRawWcsTask(ComputeVisitRegionsTask):
579 """A visit region calculator that uses a single raw WCS and a camera to
580 project the bounding boxes of all detectors onto the sky, relating
581 different detectors by their positions in focal plane coordinates.
583 Notes
584 -----
585 Most instruments should have their raw WCSs determined from a combination
586 of boresight angle, rotator angle, and camera geometry, and hence this
587 algorithm should produce stable results regardless of which detector the
588 raw corresponds to. If this is not the case (e.g. because a per-file FITS
589 WCS is used instead), either the ID of the detector should be fixed (see
590 the ``detectorId`` config parameter) or a different algorithm used.
591 """
593 ConfigClass = _ComputeVisitRegionsFromSingleRawWcsConfig
595 def computeExposureBounds(self, exposure: DimensionRecord, *, collections: Any = None
596 ) -> Dict[int, List[UnitVector3d]]:
597 """Compute the lists of unit vectors on the sphere that correspond to
598 the sky positions of detector corners.
600 Parameters
601 ----------
602 exposure : `DimensionRecord`
603 Dimension record for the exposure.
604 collections : Any, optional
605 Collections to be searched for raws and camera geometry, overriding
606 ``self.butler.collections``.
607 Can be any of the types supported by the ``collections`` argument
608 to butler construction.
610 Returns
611 -------
612 bounds : `dict`
613 Dictionary mapping detector ID to a list of unit vectors on the
614 sphere representing that detector's corners projected onto the sky.
615 """
616 if collections is None:
617 collections = self.butler.collections
618 camera, versioned = loadCamera(self.butler, exposure.dataId, collections=collections)
619 if not versioned and self.config.requireVersionedCamera:
620 raise LookupError(f"No versioned camera found for exposure {exposure.dataId}.")
621 if self.config.detectorId is None:
622 wcsRefs = list(self.butler.registry.queryDatasets("raw.wcs", dataId=exposure.dataId,
623 collections=collections))
624 if not wcsRefs:
625 raise LookupError(f"No raw.wcs datasets found for data ID {exposure.dataId} "
626 f"in collections {collections}.")
627 wcsDetector = camera[wcsRefs[0].dataId["detector"]]
628 wcs = self.butler.getDirect(wcsRefs[0])
629 else:
630 wcsDetector = camera[self.config.detectorId]
631 wcs = self.butler.get("raw.wcs", dataId=exposure.dataId, detector=self.config.detectorId,
632 collections=collections)
633 fpToSky = wcsDetector.getTransform(FOCAL_PLANE, PIXELS).then(wcs.getTransform())
634 bounds = {}
635 for detector in camera:
636 pixelsToSky = detector.getTransform(PIXELS, FOCAL_PLANE).then(fpToSky)
637 pixCorners = Box2D(detector.getBBox().dilatedBy(self.config.padding)).getCorners()
638 bounds[detector.getId()] = [
639 skyCorner.getVector() for skyCorner in pixelsToSky.applyForward(pixCorners)
640 ]
641 return bounds
643 def compute(self, visit: VisitDefinitionData, *, collections: Any = None
644 ) -> Tuple[Region, Dict[int, Region]]:
645 # Docstring inherited from ComputeVisitRegionsTask.
646 if self.config.mergeExposures:
647 detectorBounds = defaultdict(list)
648 for exposure in visit.exposures:
649 exposureDetectorBounds = self.computeExposureBounds(exposure, collections=collections)
650 for detectorId, bounds in exposureDetectorBounds.items():
651 detectorBounds[detectorId].extend(bounds)
652 else:
653 detectorBounds = self.computeExposureBounds(visit.exposures[0])
654 visitBounds = []
655 detectorRegions = {}
656 for detectorId, bounds in detectorBounds.items():
657 detectorRegions[detectorId] = ConvexPolygon.convexHull(bounds)
658 visitBounds.extend(bounds)
659 return ConvexPolygon.convexHull(visitBounds), detectorRegions