Coverage for python / lsst / dax / images / cutout / _image_cutout.py: 22%

210 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-15 00:22 +0000

1# This file is part of dax_images_cutout. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("CutoutMode", "Extraction", "ImageCutoutFactory") 

25 

26import dataclasses 

27import logging 

28from collections.abc import Sequence 

29from enum import Enum, auto 

30from uuid import UUID, uuid4 

31 

32import astropy.io.fits 

33import astropy.time 

34 

35import lsst.geom 

36from lsst.afw.geom import getImageXY0FromMetadata, makeSkyWcs 

37from lsst.afw.image import Exposure, Image, Mask, MaskedImage, makeExposure, makeMaskedImage 

38from lsst.daf.base import PropertyList 

39from lsst.daf.butler import Butler, DataId, DatasetRef 

40from lsst.resources import ResourcePath, ResourcePathExpression 

41from lsst.utils.timer import time_this 

42 

43from .projection_finders import ProjectionFinder 

44from .stencils import PixelStencil, SkyStencil 

45from .version import __version__ 

46 

47# Default logger. 

48_LOG = logging.getLogger(__name__) 

49_TIMER_LOG_LEVEL = logging.INFO 

50 

51 

52class CutoutMode(Enum): 

53 # The entire Exposure plus all associated metadata. 

54 FULL_EXPOSURE = auto() 

55 # Retrieve the full Exposure but only return MaskedImage/SkyWcs/metadata 

56 STRIPPED_EXPOSURE = auto() 

57 # Retrieve and return Image with no WCS or extra metadata. 

58 IMAGE_ONLY = auto() 

59 # MaskedImage using butler+afw 

60 MASKED_IMAGE = auto() 

61 # Astropy Image + primary HDU + WCS 

62 ASTROPY_IMAGE = auto() 

63 # Astropy with masked image. 

64 ASTROPY_MASKED_IMAGE = auto() 

65 

66 

67@dataclasses.dataclass 

68class Extraction: 

69 """A struct that aggregates the results of extracting subimages in the 

70 image cutout backend. 

71 """ 

72 

73 cutout: Image | Mask | MaskedImage | Exposure | astropy.io.fits.HDUList 

74 """The image cutout itself. 

75 """ 

76 

77 sky_stencil: SkyStencil 

78 """The original sky-coordinate stencil. 

79 """ 

80 

81 pixel_stencil: PixelStencil 

82 """A pixel-coordinate representation of the stencil. 

83 """ 

84 

85 metadata: PropertyList 

86 """Additional FITS metadata about the cutout process. 

87 

88 This should be merged with ``cutout.getMetadata()`` on write, for types 

89 that carry their own metadata. 

90 """ 

91 

92 origin_ref: DatasetRef 

93 """Fully-resolved reference to the dataset the cutout is from. 

94 """ 

95 

96 def mask(self, name: str = "STENCIL") -> None: 

97 """Set the bitmask to show the approximate coverage of nonrectangular 

98 stencils. 

99 

100 Parameters 

101 ---------- 

102 name : `str`, optional 

103 Name of the mask plane to add and set. 

104 

105 Notes 

106 ----- 

107 This method modifies `cutout` in place if it is a `Mask`, 

108 `MaskedImage`, or `Exposure`. It does nothing if `cutout` is an 

109 `Image`. 

110 """ 

111 if isinstance(self.cutout, Exposure): 

112 mask = self.cutout.mask 

113 elif isinstance(self.cutout, MaskedImage): 

114 mask = self.cutout.mask 

115 elif isinstance(self.cutout, Mask): 

116 mask = self.cutout 

117 else: 

118 return 

119 mask.addMaskPlane(name) 

120 bits = mask.getPlaneBitMask(name) 

121 self.pixel_stencil.set_mask(mask, bits) 

122 

123 def write_fits(self, path: str, logger: logging.Logger | None = None) -> None: 

124 """Write the cutout to a FITS file. 

125 

126 Parameters 

127 ---------- 

128 path : `str` 

129 Local path to the file. 

130 logger : `logging.Logger`, optional 

131 Logger to use for timing messages. If `None`, a default logger 

132 will be used. 

133 

134 Notes 

135 ----- 

136 If ``cutout`` is an `Exposure`, this will merge `metadata` into the 

137 cutout's attached metadata. In other cases, `metadata` is written 

138 to the primary header without modifying `cutout`. 

139 """ 

140 logger = logger if logger is not None else _LOG 

141 with time_this(logger, msg="Writing FITS file to %s", args=(path,), level=_TIMER_LOG_LEVEL): 

142 if isinstance(self.cutout, Exposure): 

143 self.cutout.getMetadata().update(self.metadata) 

144 self.cutout.writeFits(path) 

145 elif isinstance(self.cutout, astropy.io.fits.HDUList): 

146 self.cutout[0].header.update(self.metadata) 

147 with open(path, "wb") as fh: 

148 self.cutout.writeto(fh) 

149 else: 

150 self.cutout.writeFits(path, metadata=self.metadata) 

151 

152 

153class ImageCutoutFactory: 

154 """High-level interface to the image cutout functionality. 

155 

156 Parameters 

157 ---------- 

158 butler : `Butler` 

159 Butler that subimages are extracted from. 

160 projection_finder : `ProjectionObject` 

161 Object that obtains the WCS and bounding box for butler datasets of 

162 different types. May include caches. 

163 output_root : convertible to `ResourcePath` 

164 Root of output file URIs. This will be combined with the originating 

165 dataset's UUID and an encoding of the stencil to form the complete URI. 

166 temporary_root : convertible to `ResourcePath`, optional 

167 Local filesystem root to write files to before they are transferred to 

168 ``output_root`` (passed as the prefix argument to 

169 `ResourcePath.temporary_uri`). 

170 logger : `logging.Logger`, optional 

171 Logger to use for timing messages. If `None`, a default logger 

172 will be used. 

173 """ 

174 

175 def __init__( 

176 self, 

177 butler: Butler, 

178 projection_finder: ProjectionFinder, 

179 output_root: ResourcePathExpression, 

180 temporary_root: ResourcePathExpression | None = None, 

181 logger: logging.Logger | None = None, 

182 ): 

183 self.butler = butler 

184 self.projection_finder = projection_finder 

185 self.output_root = ResourcePath(output_root, forceAbsolute=True, forceDirectory=True) 

186 self.temporary_root = ( 

187 ResourcePath(temporary_root, forceDirectory=True) if temporary_root is not None else None 

188 ) 

189 self.logger = logger if logger is not None else _LOG 

190 

191 butler: Butler 

192 """Butler that subimage are extracted from (`Butler`). 

193 """ 

194 

195 projection_finder: ProjectionFinder 

196 """Object that obtains the WCS and bounding box for butler datasets of 

197 different types (`ProjectionFinder`). 

198 """ 

199 

200 output_root: ResourcePath 

201 """Root path that extracted cutouts are written to (`ResourcePath`). 

202 """ 

203 

204 temporary_root: ResourcePath | None 

205 """Local filesystem root to write files to before they are transferred to 

206 ``output_root`` 

207 """ 

208 

209 def process_ref( 

210 self, 

211 stencil: SkyStencil, 

212 ref: DatasetRef, 

213 *, 

214 mask_plane: str | None = "STENCIL", 

215 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE, 

216 ) -> ResourcePath: 

217 """Extract and write a cutout from a fully-resolved `DatasetRef`. 

218 

219 Parameters 

220 ---------- 

221 stencil : `SkyStencil` 

222 Definition of the cutout region, in sky coordinates. 

223 ref : `DatasetRef` 

224 Fully-resolved reference to the dataset to obtain the cutout from. 

225 Must have ``DatasetRef.id`` not `None` (use `extract_search` 

226 instead when this is not the case). Need not have an expanded data 

227 ID. May represent an image-like dataset component. 

228 mask : `str`, optional 

229 If not `None`, set this mask plane in the extracted cutout showing 

230 the approximate stencil region. Does nothing if the image type 

231 does not have a mask plane. Defaults to ``STENCIL``. 

232 

233 Returns 

234 ------- 

235 uri : `ResourcePath` 

236 Full path to the extracted cutout. 

237 """ 

238 extract_result = self.extract_ref(stencil, ref, cutout_mode=cutout_mode) 

239 if mask_plane is not None: 

240 extract_result.mask(mask_plane) 

241 return self.write_fits(extract_result) 

242 

243 def process_uuid( 

244 self, 

245 stencil: SkyStencil, 

246 uuid: UUID, 

247 *, 

248 component: str | None = None, 

249 mask_plane: str | None = "STENCIL", 

250 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE, 

251 ) -> ResourcePath: 

252 """Extract and write a cutout from a dataset identified by its UUID. 

253 

254 Parameters 

255 ---------- 

256 stencil : `SkyStencil` 

257 Definition of the cutout region, in sky coordinates. 

258 uuid : `UUID` 

259 Unique ID of the dataset to extract the subimage from. 

260 component : `str`, optional 

261 If not `None` (default), read this component instead of the 

262 composite dataset. 

263 mask : `str`, optional 

264 If not `None`, set this mask plane in the extracted cutout showing 

265 the approximate stencil region. Does nothing if the image type 

266 does not have a mask plane. Defaults to ``STENCIL``. 

267 

268 Returns 

269 ------- 

270 uri : `ResourcePath` 

271 Full path to the extracted cutout. 

272 """ 

273 extract_result = self.extract_uuid(stencil, uuid, component=component, cutout_mode=cutout_mode) 

274 if mask_plane is not None: 

275 extract_result.mask(mask_plane) 

276 return self.write_fits(extract_result) 

277 

278 def process_search( 

279 self, 

280 stencil: SkyStencil, 

281 dataset_type_name: str, 

282 data_id: DataId, 

283 collections: Sequence[str], 

284 *, 

285 mask_plane: str | None = "STENCIL", 

286 ) -> ResourcePath: 

287 """Extract and write a cutout from a dataset identified by a 

288 (dataset type, data ID, collection path) tuple. 

289 

290 Parameters 

291 ---------- 

292 stencil : `SkyStencil` 

293 Definition of the cutout region, in sky coordinates. 

294 dataset_type_name : `str` 

295 Name of the butler dataset. Use ``.``-separate terms to read an 

296 image-like component. 

297 data_id : `dict` or `DataCoordinate` 

298 Mapping-of-dimensions identifier for the dataset within its 

299 collection. 

300 collections : `Iterable` [ `str` ] 

301 Collections to search for the dataset, in the order they should be 

302 searched. 

303 mask : `str`, optional 

304 If not `None`, set this mask plane in the extracted cutout showing 

305 the approximate stencil region. Does nothing if the image type 

306 does not have a mask plane. Defaults to ``STENCIL``. 

307 

308 Returns 

309 ------- 

310 uri : `ResourcePath` 

311 Full path to the extracted cutout. 

312 """ 

313 extract_result = self.extract_search(stencil, dataset_type_name, data_id, collections) 

314 if mask_plane is not None: 

315 extract_result.mask(mask_plane) 

316 return self.write_fits(extract_result) 

317 

318 def extract_ref( 

319 self, stencil: SkyStencil, ref: DatasetRef, cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE 

320 ) -> Extraction: 

321 """Extract a subimage from a fully-resolved `DatasetRef`. 

322 

323 Parameters 

324 ---------- 

325 stencil : `SkyStencil` 

326 Definition of the cutout region, in sky coordinates. 

327 ref : `DatasetRef` 

328 Fully-resolved reference to the dataset to obtain the cutout from. 

329 Must have ``DatasetRef.id`` not `None` (use `extract_search` 

330 instead when this is not the case). Need not have an expanded data 

331 ID. May represent an image-like dataset component. 

332 

333 Returns 

334 ------- 

335 result : `Extraction` 

336 Struct that combines the cutout itself with additional metadata 

337 and the pixel-coordinate stencil. The cutout is not masked; 

338 `Extraction.mask` must be called explicitly if desired. 

339 """ 

340 if ref.id is None: 

341 raise ValueError(f"A resolved DatasetRef is required; got {ref}.") 

342 # Timestamp of the cutout extraction. 

343 now = astropy.time.Time.now() 

344 timesys = "UTC" 

345 # Get the WCS and bbox of this dataset unless we are in astropy mode. 

346 wcs = None 

347 bbox = None 

348 pixel_stencil = None 

349 if cutout_mode not in (CutoutMode.ASTROPY_IMAGE, CutoutMode.ASTROPY_MASKED_IMAGE): 

350 wcs, bbox = self.projection_finder(ref, self.butler, logger=self.logger) 

351 # Transform the stencil to pixel coordinates. 

352 pixel_stencil = stencil.to_pixels(wcs, bbox) 

353 # Somewhere to store metadata. 

354 metadata = PropertyList() 

355 # Actually read the cutout. Leave it to the butler to cache remote 

356 # files locally or do partial remote reads. 

357 with time_this( 

358 self.logger, 

359 msg="Extract cutout", 

360 kwargs={"id": str(ref.id), "cutout_mode": str(cutout_mode), "stencil": str(stencil)}, 

361 level=_TIMER_LOG_LEVEL, 

362 ): 

363 match cutout_mode: 

364 case CutoutMode.FULL_EXPOSURE: 

365 cutout = self.butler.get(ref, parameters={"bbox": pixel_stencil.bbox}) 

366 timesys = cutout.metadata.get("TIMESYS", timesys) 

367 case CutoutMode.STRIPPED_EXPOSURE: 

368 cutout = self.butler.get(ref, parameters={"bbox": pixel_stencil.bbox}) 

369 metadata = cutout.metadata # Track metadata externally. 

370 timesys = metadata.get("TIMESYS", timesys) 

371 cutout = makeExposure(cutout.maskedImage, wcs=cutout.wcs) 

372 case CutoutMode.IMAGE_ONLY: 

373 cutout = self.butler.get( 

374 ref.makeComponentRef("image"), parameters={"bbox": pixel_stencil.bbox} 

375 ) 

376 # No metadata so UTC is default. 

377 timesys = "UTC" 

378 case CutoutMode.MASKED_IMAGE: 

379 # Rely on the file being cached on first read. Faster than 

380 # reading entire exposure. 

381 image = self.butler.get( 

382 ref.makeComponentRef("image"), parameters={"bbox": pixel_stencil.bbox} 

383 ) 

384 variance = self.butler.get( 

385 ref.makeComponentRef("variance"), parameters={"bbox": pixel_stencil.bbox} 

386 ) 

387 mask = self.butler.get( 

388 ref.makeComponentRef("mask"), parameters={"bbox": pixel_stencil.bbox} 

389 ) 

390 wcs = self.butler.get(ref.makeComponentRef("wcs")) 

391 masked_image = makeMaskedImage(image, mask, variance) 

392 cutout = makeExposure(masked_image, wcs=wcs) 

393 

394 metadata = self.butler.get(ref.makeComponentRef("metadata")) 

395 timesys = metadata.get("TIMESYS", timesys) 

396 case CutoutMode.ASTROPY_IMAGE | CutoutMode.ASTROPY_MASKED_IMAGE: 

397 # Bypass butler and try to find the pixel HDU directly. 

398 # Approximate WCS is attached to DP1 image data but needs 

399 # to be shifted. 

400 pixel_components = {"image", "mask", "variance"} 

401 if cutout_mode == CutoutMode.ASTROPY_IMAGE: 

402 pixel_components = {"image"} 

403 uri = self.butler.getURI(ref) 

404 fs, fspath = uri.to_fsspec() 

405 hdul = [] 

406 # Want the primary header and the requested pixel HDU. 

407 # Stop scanning once IMAGE has been located. 

408 found_primary = False 

409 with fs.open(fspath) as f, astropy.io.fits.open(f) as fits_obj: 

410 for hdu in fits_obj: 

411 if not found_primary: 

412 hdul.append(hdu.copy()) 

413 timesys = hdul[0].header.get("TIMESYS", timesys) 

414 found_primary = True 

415 continue 

416 

417 hdr = hdu.header 

418 extname = hdr.get("EXTNAME") 

419 if extname and extname.lower() in pixel_components: 

420 pixel_components.remove(extname.lower()) 

421 # Get BBOX for full HDU. 

422 # Use shape to prevent reading the data array. 

423 shape = hdu.shape 

424 dimensions = lsst.geom.Extent2I(shape[1], shape[0]) 

425 

426 # XY0 is defined in the A WCS. 

427 pl = PropertyList() 

428 pl.update(hdr) 

429 xy0 = getImageXY0FromMetadata(pl, "A", strip=False) 

430 

431 # This is the PARENT bbox. 

432 full_bbox = lsst.geom.Box2I(xy0, dimensions) 

433 

434 # Calculate the cutout bbox from the stencil. 

435 if pixel_stencil is None: 

436 # Use FITS WCS. 

437 wcs = makeSkyWcs(pl) 

438 pixel_stencil = stencil.to_pixels(wcs, full_bbox) 

439 bbox = pixel_stencil.bbox 

440 

441 # Work out the required cutout of the HDU. 

442 minX = bbox.getBeginX() - full_bbox.getBeginX() 

443 maxX = bbox.getEndX() - full_bbox.getBeginX() 

444 minY = bbox.getBeginY() - full_bbox.getBeginY() 

445 maxY = bbox.getEndY() - full_bbox.getBeginY() 

446 # Get the cutout and detach from remote. 

447 data = hdu.section[minY:maxY, minX:maxX].copy() 

448 

449 # Must correct the header WCS to take into 

450 # account the offset. 

451 if (k := "CRPIX1") in hdr: 

452 hdr[k] -= minX 

453 if (k := "CRPIX2") in hdr: 

454 hdr[k] -= minY 

455 if (k := "LTV1") in hdr: 

456 hdr[k] = -bbox.getBeginX() 

457 if (k := "LTV2") in hdr: 

458 hdr[k] = -bbox.getBeginY() 

459 if (k := "CRVAL1A") in hdr: 

460 hdr[k] = bbox.getBeginX() 

461 if (k := "CRVAL2A") in hdr: 

462 hdr[k] = bbox.getBeginY() 

463 

464 # Create new HDU with the cutout. 

465 cutout_hdu = astropy.io.fits.ImageHDU(data=data, header=hdr.copy()) 

466 hdul.append(cutout_hdu) 

467 

468 # Stop looking for HDUs. 

469 if not pixel_components: 

470 break 

471 cutout = astropy.io.fits.HDUList(hdus=hdul) 

472 case _: 

473 raise ValueError(f"Unsupported cutout mode: {cutout_mode}") 

474 

475 # Create some FITS metadata with the cutout parameters. 

476 metadata.set("BTLRUUID", ref.id.hex, "Butler dataset UUID this cutout was extracted from.") 

477 metadata.set( 

478 "BTLRNAME", ref.datasetType.name, "Butler dataset type name this cutout was extracted from." 

479 ) 

480 for n, (k, v) in enumerate(ref.dataId.required.items()): 

481 # Write data ID dictionary sort of like a list of 2-tuples, to make 

482 # it easier to stay within the FITS 8-char key limit. 

483 metadata.set(f"BTLRK{n:03}", k, f"Name of dimension {n} in the data ID.") 

484 metadata.set(f"BTLRV{n:03}", v, f"Value of dimension {n} in the data ID.") 

485 stencil.to_fits_metadata(metadata) 

486 

487 # Record the time and software version. 

488 now.format = "fits" 

489 now = now.tai if timesys.lower() == "tai" else now.utc 

490 metadata.set("DATE-CUT", str(now), "Time of cutout extraction") 

491 metadata.set("CUTVERS", __version__, "dax_images_cutout software version") 

492 

493 return Extraction( 

494 cutout=cutout, 

495 sky_stencil=stencil, 

496 pixel_stencil=pixel_stencil, 

497 metadata=metadata, 

498 origin_ref=ref, 

499 ) 

500 

501 def extract_uuid( 

502 self, 

503 stencil: SkyStencil, 

504 uuid: UUID, 

505 *, 

506 component: str | None = None, 

507 cutout_mode: CutoutMode = CutoutMode.FULL_EXPOSURE, 

508 ) -> Extraction: 

509 """Extract a subimage from a dataset identified by its UUID. 

510 

511 Parameters 

512 ---------- 

513 stencil : `SkyStencil` 

514 Definition of the cutout region, in sky coordinates. 

515 uuid : `UUID` 

516 Unique ID of the dataset to read from. 

517 component : `str`, optional 

518 If not `None` (default), read this component instead of the 

519 composite dataset. 

520 

521 Returns 

522 ------- 

523 result : `Extraction` 

524 Struct that combines the cutout itself with additional metadata 

525 and the pixel-coordinate stencil. The cutout is not masked; 

526 `Extraction.mask` must be called explicitly if desired. 

527 """ 

528 ref = self.butler.get_dataset(uuid) 

529 if ref is None: 

530 raise LookupError(f"No dataset found with UUID {uuid}.") 

531 if component is not None: 

532 ref = ref.makeComponentRef(component) 

533 return self.extract_ref(stencil, ref, cutout_mode=cutout_mode) 

534 

535 def extract_search( 

536 self, stencil: SkyStencil, dataset_type_name: str, data_id: DataId, collections: Sequence[str] 

537 ) -> Extraction: 

538 """Extract a subimage from a dataset identified by a (dataset type, 

539 data ID, collection path) tuple. 

540 

541 Parameters 

542 ---------- 

543 stencil : `SkyStencil` 

544 Definition of the cutout region, in sky coordinates. 

545 dataset_type_name : `str` 

546 Name of the butler dataset. Use ``.``-separate terms to read an 

547 image-like component. 

548 data_id : `dict` or `DataCoordinate` 

549 Mapping-of-dimensions identifier for the dataset within its 

550 collection. 

551 collections : `Iterable` [ `str` ] 

552 Collections to search for the dataset, in the order they should be 

553 searched. 

554 

555 Returns 

556 ------- 

557 result : `Extraction` 

558 Struct that combines the cutout itself with additional metadata 

559 and the pixel-coordinate stencil. The cutout is not masked; 

560 `Extraction.mask` must be called explicitly if desired. 

561 """ 

562 ref = self.butler.find_dataset(dataset_type_name, data_id, collections=collections) 

563 if ref is None: 

564 raise LookupError( 

565 f"No {dataset_type_name} dataset found with data ID {data_id} in {collections}." 

566 ) 

567 return self.extract_ref(stencil, ref) 

568 

569 def write_fits(self, extract_result: Extraction) -> ResourcePath: 

570 """Write a `Extraction` to a remote FITS file in `output_root`. 

571 

572 Parameters 

573 ---------- 

574 extract_result : `Extraction` 

575 Result of a call to a ``extract_*`` method. 

576 

577 Returns 

578 ------- 

579 uri : `ResourcePath` 

580 Full path to the extracted cutout. 

581 """ 

582 output_uuid = uuid4() 

583 remote_uri = self.output_root.join(output_uuid.hex + ".fits") 

584 with ResourcePath.temporary_uri(prefix=self.temporary_root, suffix=".fits") as tmp_uri: 

585 tmp_uri.parent().mkdir() 

586 extract_result.write_fits(tmp_uri.ospath, logger=self.logger) 

587 remote_uri.transfer_from(tmp_uri, transfer="copy") 

588 return remote_uri