Coverage for python / lsst / dax / images / cutout / _image_cutout.py: 22%
210 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 09:35 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 09:35 +0000
1# This file is part of dax_images_cutout.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("CutoutMode", "Extraction", "ImageCutoutFactory")
26import dataclasses
27import logging
28from collections.abc import Sequence
29from enum import Enum, auto
30from uuid import UUID, uuid4
32import astropy.io.fits
33import astropy.time
35import lsst.geom
36from lsst.afw.geom import getImageXY0FromMetadata, makeSkyWcs
37from lsst.afw.image import Exposure, Image, Mask, MaskedImage, makeExposure, makeMaskedImage
38from lsst.daf.base import PropertyList
39from lsst.daf.butler import Butler, DataId, DatasetRef
40from lsst.resources import ResourcePath, ResourcePathExpression
41from lsst.utils.timer import time_this
43from .projection_finders import ProjectionFinder
44from .stencils import PixelStencil, SkyStencil
45from .version import __version__
47# Default logger.
48_LOG = logging.getLogger(__name__)
49_TIMER_LOG_LEVEL = logging.INFO
52class CutoutMode(Enum):
53 # The entire Exposure plus all associated metadata.
54 FULL_EXPOSURE = auto()
55 # Retrieve the full Exposure but only return MaskedImage/SkyWcs/metadata
56 STRIPPED_EXPOSURE = auto()
57 # Retrieve and return Image with no WCS or extra metadata.
58 IMAGE_ONLY = auto()
59 # MaskedImage using butler+afw
60 MASKED_IMAGE = auto()
61 # Astropy Image + primary HDU + WCS
62 ASTROPY_IMAGE = auto()
63 # Astropy with masked image.
64 ASTROPY_MASKED_IMAGE = auto()
67@dataclasses.dataclass
68class Extraction:
69 """A struct that aggregates the results of extracting subimages in the
70 image cutout backend.
71 """
73 cutout: Image | Mask | MaskedImage | Exposure | astropy.io.fits.HDUList
74 """The image cutout itself.
75 """
77 sky_stencil: SkyStencil
78 """The original sky-coordinate stencil.
79 """
81 pixel_stencil: PixelStencil
82 """A pixel-coordinate representation of the stencil.
83 """
85 metadata: PropertyList
86 """Additional FITS metadata about the cutout process.
88 This should be merged with ``cutout.getMetadata()`` on write, for types
89 that carry their own metadata.
90 """
92 origin_ref: DatasetRef
93 """Fully-resolved reference to the dataset the cutout is from.
94 """
96 def mask(self, name: str = "STENCIL") -> None:
97 """Set the bitmask to show the approximate coverage of nonrectangular
98 stencils.
100 Parameters
101 ----------
102 name : `str`, optional
103 Name of the mask plane to add and set.
105 Notes
106 -----
107 This method modifies `cutout` in place if it is a `Mask`,
108 `MaskedImage`, or `Exposure`. It does nothing if `cutout` is an
109 `Image`.
110 """
111 if isinstance(self.cutout, Exposure):
112 mask = self.cutout.mask
113 elif isinstance(self.cutout, MaskedImage):
114 mask = self.cutout.mask
115 elif isinstance(self.cutout, Mask):
116 mask = self.cutout
117 else:
118 return
119 mask.addMaskPlane(name)
120 bits = mask.getPlaneBitMask(name)
121 self.pixel_stencil.set_mask(mask, bits)
123 def write_fits(self, path: str, logger: logging.Logger | None = None) -> None:
124 """Write the cutout to a FITS file.
126 Parameters
127 ----------
128 path : `str`
129 Local path to the file.
130 logger : `logging.Logger`, optional
131 Logger to use for timing messages. If `None`, a default logger
132 will be used.
134 Notes
135 -----
136 If ``cutout`` is an `Exposure`, this will merge `metadata` into the
137 cutout's attached metadata. In other cases, `metadata` is written
138 to the primary header without modifying `cutout`.
139 """
140 logger = logger if logger is not None else _LOG
141 with time_this(logger, msg="Writing FITS file to %s", args=(path,), level=_TIMER_LOG_LEVEL):
142 if isinstance(self.cutout, Exposure):
143 self.cutout.getMetadata().update(self.metadata)
144 self.cutout.writeFits(path)
145 elif isinstance(self.cutout, astropy.io.fits.HDUList):
146 self.cutout[0].header.update(self.metadata)
147 with open(path, "wb") as fh:
148 self.cutout.writeto(fh)
149 else:
150 self.cutout.writeFits(path, metadata=self.metadata)
153class ImageCutoutFactory:
154 """High-level interface to the image cutout functionality.
156 Parameters
157 ----------
158 butler : `Butler`
159 Butler that subimages are extracted from.
160 projection_finder : `ProjectionObject`
161 Object that obtains the WCS and bounding box for butler datasets of
162 different types. May include caches.
163 output_root : convertible to `ResourcePath`
164 Root of output file URIs. This will be combined with the originating
165 dataset's UUID and an encoding of the stencil to form the complete URI.
166 temporary_root : convertible to `ResourcePath`, optional
167 Local filesystem root to write files to before they are transferred to
168 ``output_root`` (passed as the prefix argument to
169 `ResourcePath.temporary_uri`).
170 logger : `logging.Logger`, optional
171 Logger to use for timing messages. If `None`, a default logger
172 will be used.
173 """
175 def __init__(
176 self,
177 butler: Butler,
178 projection_finder: ProjectionFinder,
179 output_root: ResourcePathExpression,
180 temporary_root: ResourcePathExpression | None = None,
181 logger: logging.Logger | None = None,
182 ):
183 self.butler = butler
184 self.projection_finder = projection_finder
185 self.output_root = ResourcePath(output_root, forceAbsolute=True, forceDirectory=True)
186 self.temporary_root = (
187 ResourcePath(temporary_root, forceDirectory=True) if temporary_root is not None else None
188 )
189 self.logger = logger if logger is not None else _LOG
191 butler: Butler
192 """Butler that subimage are extracted from (`Butler`).
193 """
195 projection_finder: ProjectionFinder
196 """Object that obtains the WCS and bounding box for butler datasets of
197 different types (`ProjectionFinder`).
198 """
200 output_root: ResourcePath
201 """Root path that extracted cutouts are written to (`ResourcePath`).
202 """
204 temporary_root: ResourcePath | None
205 """Local filesystem root to write files to before they are transferred to
206 ``output_root``
207 """
209 def process_ref(
210 self,
211 stencil: SkyStencil,
212 ref: DatasetRef,
213 *,
214 mask_plane: str | None = "STENCIL",
215 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE,
216 ) -> ResourcePath:
217 """Extract and write a cutout from a fully-resolved `DatasetRef`.
219 Parameters
220 ----------
221 stencil : `SkyStencil`
222 Definition of the cutout region, in sky coordinates.
223 ref : `DatasetRef`
224 Fully-resolved reference to the dataset to obtain the cutout from.
225 Must have ``DatasetRef.id`` not `None` (use `extract_search`
226 instead when this is not the case). Need not have an expanded data
227 ID. May represent an image-like dataset component.
228 mask : `str`, optional
229 If not `None`, set this mask plane in the extracted cutout showing
230 the approximate stencil region. Does nothing if the image type
231 does not have a mask plane. Defaults to ``STENCIL``.
233 Returns
234 -------
235 uri : `ResourcePath`
236 Full path to the extracted cutout.
237 """
238 extract_result = self.extract_ref(stencil, ref, cutout_mode=cutout_mode)
239 if mask_plane is not None:
240 extract_result.mask(mask_plane)
241 return self.write_fits(extract_result)
243 def process_uuid(
244 self,
245 stencil: SkyStencil,
246 uuid: UUID,
247 *,
248 component: str | None = None,
249 mask_plane: str | None = "STENCIL",
250 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE,
251 ) -> ResourcePath:
252 """Extract and write a cutout from a dataset identified by its UUID.
254 Parameters
255 ----------
256 stencil : `SkyStencil`
257 Definition of the cutout region, in sky coordinates.
258 uuid : `UUID`
259 Unique ID of the dataset to extract the subimage from.
260 component : `str`, optional
261 If not `None` (default), read this component instead of the
262 composite dataset.
263 mask : `str`, optional
264 If not `None`, set this mask plane in the extracted cutout showing
265 the approximate stencil region. Does nothing if the image type
266 does not have a mask plane. Defaults to ``STENCIL``.
268 Returns
269 -------
270 uri : `ResourcePath`
271 Full path to the extracted cutout.
272 """
273 extract_result = self.extract_uuid(stencil, uuid, component=component, cutout_mode=cutout_mode)
274 if mask_plane is not None:
275 extract_result.mask(mask_plane)
276 return self.write_fits(extract_result)
278 def process_search(
279 self,
280 stencil: SkyStencil,
281 dataset_type_name: str,
282 data_id: DataId,
283 collections: Sequence[str],
284 *,
285 mask_plane: str | None = "STENCIL",
286 ) -> ResourcePath:
287 """Extract and write a cutout from a dataset identified by a
288 (dataset type, data ID, collection path) tuple.
290 Parameters
291 ----------
292 stencil : `SkyStencil`
293 Definition of the cutout region, in sky coordinates.
294 dataset_type_name : `str`
295 Name of the butler dataset. Use ``.``-separate terms to read an
296 image-like component.
297 data_id : `dict` or `DataCoordinate`
298 Mapping-of-dimensions identifier for the dataset within its
299 collection.
300 collections : `Iterable` [ `str` ]
301 Collections to search for the dataset, in the order they should be
302 searched.
303 mask : `str`, optional
304 If not `None`, set this mask plane in the extracted cutout showing
305 the approximate stencil region. Does nothing if the image type
306 does not have a mask plane. Defaults to ``STENCIL``.
308 Returns
309 -------
310 uri : `ResourcePath`
311 Full path to the extracted cutout.
312 """
313 extract_result = self.extract_search(stencil, dataset_type_name, data_id, collections)
314 if mask_plane is not None:
315 extract_result.mask(mask_plane)
316 return self.write_fits(extract_result)
318 def extract_ref(
319 self, stencil: SkyStencil, ref: DatasetRef, cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE
320 ) -> Extraction:
321 """Extract a subimage from a fully-resolved `DatasetRef`.
323 Parameters
324 ----------
325 stencil : `SkyStencil`
326 Definition of the cutout region, in sky coordinates.
327 ref : `DatasetRef`
328 Fully-resolved reference to the dataset to obtain the cutout from.
329 Must have ``DatasetRef.id`` not `None` (use `extract_search`
330 instead when this is not the case). Need not have an expanded data
331 ID. May represent an image-like dataset component.
333 Returns
334 -------
335 result : `Extraction`
336 Struct that combines the cutout itself with additional metadata
337 and the pixel-coordinate stencil. The cutout is not masked;
338 `Extraction.mask` must be called explicitly if desired.
339 """
340 if ref.id is None:
341 raise ValueError(f"A resolved DatasetRef is required; got {ref}.")
342 # Timestamp of the cutout extraction.
343 now = astropy.time.Time.now()
344 timesys = "UTC"
345 # Get the WCS and bbox of this dataset unless we are in astropy mode.
346 wcs = None
347 bbox = None
348 pixel_stencil = None
349 if cutout_mode not in (CutoutMode.ASTROPY_IMAGE, CutoutMode.ASTROPY_MASKED_IMAGE):
350 wcs, bbox = self.projection_finder(ref, self.butler, logger=self.logger)
351 # Transform the stencil to pixel coordinates.
352 pixel_stencil = stencil.to_pixels(wcs, bbox)
353 # Somewhere to store metadata.
354 metadata = PropertyList()
355 # Actually read the cutout. Leave it to the butler to cache remote
356 # files locally or do partial remote reads.
357 with time_this(
358 self.logger,
359 msg="Extract cutout",
360 kwargs={"id": str(ref.id), "cutout_mode": str(cutout_mode), "stencil": str(stencil)},
361 level=_TIMER_LOG_LEVEL,
362 ):
363 match cutout_mode:
364 case CutoutMode.FULL_EXPOSURE:
365 cutout = self.butler.get(ref, parameters={"bbox": pixel_stencil.bbox})
366 timesys = cutout.metadata.get("TIMESYS", timesys)
367 case CutoutMode.STRIPPED_EXPOSURE:
368 cutout = self.butler.get(ref, parameters={"bbox": pixel_stencil.bbox})
369 metadata = cutout.metadata # Track metadata externally.
370 timesys = metadata.get("TIMESYS", timesys)
371 cutout = makeExposure(cutout.maskedImage, wcs=cutout.wcs)
372 case CutoutMode.IMAGE_ONLY:
373 cutout = self.butler.get(
374 ref.makeComponentRef("image"), parameters={"bbox": pixel_stencil.bbox}
375 )
376 # No metadata so UTC is default.
377 timesys = "UTC"
378 case CutoutMode.MASKED_IMAGE:
379 # Rely on the file being cached on first read. Faster than
380 # reading entire exposure.
381 image = self.butler.get(
382 ref.makeComponentRef("image"), parameters={"bbox": pixel_stencil.bbox}
383 )
384 variance = self.butler.get(
385 ref.makeComponentRef("variance"), parameters={"bbox": pixel_stencil.bbox}
386 )
387 mask = self.butler.get(
388 ref.makeComponentRef("mask"), parameters={"bbox": pixel_stencil.bbox}
389 )
390 wcs = self.butler.get(ref.makeComponentRef("wcs"))
391 masked_image = makeMaskedImage(image, mask, variance)
392 cutout = makeExposure(masked_image, wcs=wcs)
394 metadata = self.butler.get(ref.makeComponentRef("metadata"))
395 timesys = metadata.get("TIMESYS", timesys)
396 case CutoutMode.ASTROPY_IMAGE | CutoutMode.ASTROPY_MASKED_IMAGE:
397 # Bypass butler and try to find the pixel HDU directly.
398 # Approximate WCS is attached to DP1 image data but needs
399 # to be shifted.
400 pixel_components = {"image", "mask", "variance"}
401 if cutout_mode == CutoutMode.ASTROPY_IMAGE:
402 pixel_components = {"image"}
403 uri = self.butler.getURI(ref)
404 fs, fspath = uri.to_fsspec()
405 hdul = []
406 # Want the primary header and the requested pixel HDU.
407 # Stop scanning once IMAGE has been located.
408 found_primary = False
409 with fs.open(fspath) as f, astropy.io.fits.open(f) as fits_obj:
410 for hdu in fits_obj:
411 if not found_primary:
412 hdul.append(hdu.copy())
413 timesys = hdul[0].header.get("TIMESYS", timesys)
414 found_primary = True
415 continue
417 hdr = hdu.header
418 extname = hdr.get("EXTNAME")
419 if extname and extname.lower() in pixel_components:
420 pixel_components.remove(extname.lower())
421 # Get BBOX for full HDU.
422 # Use shape to prevent reading the data array.
423 shape = hdu.shape
424 dimensions = lsst.geom.Extent2I(shape[1], shape[0])
426 # XY0 is defined in the A WCS.
427 pl = PropertyList()
428 pl.update(hdr)
429 xy0 = getImageXY0FromMetadata(pl, "A", strip=False)
431 # This is the PARENT bbox.
432 full_bbox = lsst.geom.Box2I(xy0, dimensions)
434 # Calculate the cutout bbox from the stencil.
435 if pixel_stencil is None:
436 # Use FITS WCS.
437 wcs = makeSkyWcs(pl)
438 pixel_stencil = stencil.to_pixels(wcs, full_bbox)
439 bbox = pixel_stencil.bbox
441 # Work out the required cutout of the HDU.
442 minX = bbox.getBeginX() - full_bbox.getBeginX()
443 maxX = bbox.getEndX() - full_bbox.getBeginX()
444 minY = bbox.getBeginY() - full_bbox.getBeginY()
445 maxY = bbox.getEndY() - full_bbox.getBeginY()
446 # Get the cutout and detach from remote.
447 data = hdu.section[minY:maxY, minX:maxX].copy()
449 # Must correct the header WCS to take into
450 # account the offset.
451 if (k := "CRPIX1") in hdr:
452 hdr[k] -= minX
453 if (k := "CRPIX2") in hdr:
454 hdr[k] -= minY
455 if (k := "LTV1") in hdr:
456 hdr[k] = -bbox.getBeginX()
457 if (k := "LTV2") in hdr:
458 hdr[k] = -bbox.getBeginY()
459 if (k := "CRVAL1A") in hdr:
460 hdr[k] = bbox.getBeginX()
461 if (k := "CRVAL2A") in hdr:
462 hdr[k] = bbox.getBeginY()
464 # Create new HDU with the cutout.
465 cutout_hdu = astropy.io.fits.ImageHDU(data=data, header=hdr.copy())
466 hdul.append(cutout_hdu)
468 # Stop looking for HDUs.
469 if not pixel_components:
470 break
471 cutout = astropy.io.fits.HDUList(hdus=hdul)
472 case _:
473 raise ValueError(f"Unsupported cutout mode: {cutout_mode}")
475 # Create some FITS metadata with the cutout parameters.
476 metadata.set("BTLRUUID", ref.id.hex, "Butler dataset UUID this cutout was extracted from.")
477 metadata.set(
478 "BTLRNAME", ref.datasetType.name, "Butler dataset type name this cutout was extracted from."
479 )
480 for n, (k, v) in enumerate(ref.dataId.required.items()):
481 # Write data ID dictionary sort of like a list of 2-tuples, to make
482 # it easier to stay within the FITS 8-char key limit.
483 metadata.set(f"BTLRK{n:03}", k, f"Name of dimension {n} in the data ID.")
484 metadata.set(f"BTLRV{n:03}", v, f"Value of dimension {n} in the data ID.")
485 stencil.to_fits_metadata(metadata)
487 # Record the time and software version.
488 now.format = "fits"
489 now = now.tai if timesys.lower() == "tai" else now.utc
490 metadata.set("DATE-CUT", str(now), "Time of cutout extraction")
491 metadata.set("CUTVERS", __version__, "dax_images_cutout software version")
493 return Extraction(
494 cutout=cutout,
495 sky_stencil=stencil,
496 pixel_stencil=pixel_stencil,
497 metadata=metadata,
498 origin_ref=ref,
499 )
501 def extract_uuid(
502 self,
503 stencil: SkyStencil,
504 uuid: UUID,
505 *,
506 component: str | None = None,
507 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE,
508 ) -> Extraction:
509 """Extract a subimage from a dataset identified by its UUID.
511 Parameters
512 ----------
513 stencil : `SkyStencil`
514 Definition of the cutout region, in sky coordinates.
515 uuid : `UUID`
516 Unique ID of the dataset to read from.
517 component : `str`, optional
518 If not `None` (default), read this component instead of the
519 composite dataset.
521 Returns
522 -------
523 result : `Extraction`
524 Struct that combines the cutout itself with additional metadata
525 and the pixel-coordinate stencil. The cutout is not masked;
526 `Extraction.mask` must be called explicitly if desired.
527 """
528 ref = self.butler.get_dataset(uuid)
529 if ref is None:
530 raise LookupError(f"No dataset found with UUID {uuid}.")
531 if component is not None:
532 ref = ref.makeComponentRef(component)
533 return self.extract_ref(stencil, ref, cutout_mode=cutout_mode)
535 def extract_search(
536 self, stencil: SkyStencil, dataset_type_name: str, data_id: DataId, collections: Sequence[str]
537 ) -> Extraction:
538 """Extract a subimage from a dataset identified by a (dataset type,
539 data ID, collection path) tuple.
541 Parameters
542 ----------
543 stencil : `SkyStencil`
544 Definition of the cutout region, in sky coordinates.
545 dataset_type_name : `str`
546 Name of the butler dataset. Use ``.``-separate terms to read an
547 image-like component.
548 data_id : `dict` or `DataCoordinate`
549 Mapping-of-dimensions identifier for the dataset within its
550 collection.
551 collections : `Iterable` [ `str` ]
552 Collections to search for the dataset, in the order they should be
553 searched.
555 Returns
556 -------
557 result : `Extraction`
558 Struct that combines the cutout itself with additional metadata
559 and the pixel-coordinate stencil. The cutout is not masked;
560 `Extraction.mask` must be called explicitly if desired.
561 """
562 ref = self.butler.find_dataset(dataset_type_name, data_id, collections=collections)
563 if ref is None:
564 raise LookupError(
565 f"No {dataset_type_name} dataset found with data ID {data_id} in {collections}."
566 )
567 return self.extract_ref(stencil, ref)
569 def write_fits(self, extract_result: Extraction) -> ResourcePath:
570 """Write a `Extraction` to a remote FITS file in `output_root`.
572 Parameters
573 ----------
574 extract_result : `Extraction`
575 Result of a call to a ``extract_*`` method.
577 Returns
578 -------
579 uri : `ResourcePath`
580 Full path to the extracted cutout.
581 """
582 output_uuid = uuid4()
583 remote_uri = self.output_root.join(output_uuid.hex + ".fits")
584 with ResourcePath.temporary_uri(prefix=self.temporary_root, suffix=".fits") as tmp_uri:
585 tmp_uri.parent().mkdir()
586 extract_result.write_fits(tmp_uri.ospath, logger=self.logger)
587 remote_uri.transfer_from(tmp_uri, transfer="copy")
588 return remote_uri