Coverage for python/lsst/cell_coadds/_fits.py: 24%

155 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 03:48 -0700

1# This file is part of cell_coadds. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Module to handle FITS serialization and de-serialization. 

23 

24The routines to write and read the files are in the same module, as a change to 

25one is typically accompanied by a corresponding change to another. Code changes 

26relating to writing the file must bump to the version number denoted by the 

27module constant FILE_FORMAT_VERSION. 

28 

29Although the typical use case is for newer versions of the code to read files 

30written by an older version, for the purposes of deciding the newer version 

31string, it is helpful to think about an older version of the reader attempting 

32to read a newer version of the file on disk. The policy for bumping the version 

33is as follows: 

34 

351. When the on-disk file format written by this module changes such that the 

36previous version of the reader can still read files written by the newer 

37version, then there should be a minor bump. 

38 

392. When the on-disk format written by this module changes in a way that will 

40prevent the previous version of the reader from reading a file produced by the 

41current version of the module, then there should be a major bump. This usually 

42means that the new version of the reader cannot read older file either, 

43save the temporary support with deprecation warnings, possibly until a new 

44release of the Science Pipelines is made. 

45 

46Examples 

47-------- 

481. A file with VERSION=1.3 should still be readable by the reader in 

49this module when the module-level constant FILE_FORMAT_VERSION=1.4. A file 

50written with VERSION=1.4 will typically be readable by a reader when the 

51module-level FILE_FORMAT_VERSION=1.3, although such a use case is not expected. 

52A concrete example of change 

53that requires only a minor bump is adding another BinTable that keeps track of 

54the input visits. 

55 

562. An example of major change would be migrating from using 

57BinTableHDU to ImageHDU to save data. Even if the reader supports reading 

58either of this formats based on the value of VERSION from the header, it should 

59be a major change because the previous version of the reader cannot read data 

60from ImageHDUs. 

61 

62Unit tests only check that a file written can be read by the concurrent version 

63of the module, but not by any of the previous ones. Hence, bumping 

64FILE_FORMAT_VERSION to the appropriate value is ultimately at the discretion of 

65the developers. 

66 

67A major bump must also be recorded in the `isCompatibleWith` method. 

68It is plausible that different (non-consequent) major format versions can be 

69read by the same reader (due to reverting back to an earlier format, or to 

70something very similar). `isCompatibleWith` method offers the convenience of 

71checking if a particular format version can be read by the current reader. 

72 

73Note that major version 0 is considered unstable and experimental and none of 

74the guarantee above applies. 

75""" 

76 

77from __future__ import annotations 

78 

79__all__ = ( 

80 "CellCoaddFitsFormatter", 

81 "CellCoaddFitsReader", 

82 "IncompatibleVersionError", 

83 "writeMultipleCellCoaddAsFits", 

84) 

85 

86import logging 

87import os 

88from collections.abc import Iterable, Mapping 

89from dataclasses import dataclass 

90from typing import Any 

91 

92import lsst.afw.geom as afwGeom 

93import lsst.afw.image as afwImage 

94import numpy as np 

95from astropy.io import fits 

96from lsst.afw.image import ImageD, ImageF 

97from lsst.daf.base import PropertySet 

98from lsst.geom import Box2I, Extent2I, Point2I 

99from lsst.obs.base.formatters.fitsGeneric import FitsGenericFormatter 

100from lsst.skymap import Index2D 

101from packaging import version 

102 

103from ._common_components import CoaddUnits, CommonComponents 

104from ._grid_container import GridContainer 

105from ._identifiers import CellIdentifiers, ObservationIdentifiers, PatchIdentifiers 

106from ._image_planes import OwnedImagePlanes 

107from ._multiple_cell_coadd import MultipleCellCoadd, SingleCellCoadd 

108from ._uniform_grid import UniformGrid 

109 

110FILE_FORMAT_VERSION = "0.3" 

111"""Version number for the file format as persisted, presented as a string of 

112the form M.m, where M is the major version, m is the minor version. 

113""" 

114 

115logger = logging.getLogger(__name__) 

116 

117 

118class IncompatibleVersionError(RuntimeError): 

119 """Exception raised when the CellCoaddFitsReader version is not compatible 

120 with the FITS file attempted to read. 

121 """ 

122 

123 

124@dataclass 

125class VisitRecord: 

126 """A dataclass to hold relevant info about a visit. 

127 

128 This is intended for use with this module. 

129 """ 

130 

131 visit: int 

132 day_obs: int 

133 physical_filter: str 

134 

135 

136class CellCoaddFitsFormatter(FitsGenericFormatter): 

137 """Interface for writing and reading cell coadds to/from FITS files. 

138 

139 This assumes the existence of readFits and writeFits methods (for now). 

140 """ 

141 

142 

143class CellCoaddFitsReader: 

144 """A reader class to read from a FITS file and produce cell-based coadds. 

145 

146 This reader class has read methods that can either return a single 

147 component without reading the entire file (e.g., readBBox, readWcs) 

148 and read methods that return a full coadd (e.g., 

149 readAsMultipleCellCoadd, readAsExplodedCellCoadd, readAsStitchedCoadd). 

150 

151 Parameters 

152 ---------- 

153 filename : `str` 

154 The name of the FITS file to read. 

155 """ 

156 

157 # Minimum and maximum compatible file format versions are listed as 

158 # iterables so as to allow for discontiguous intervals. 

159 MINIMUM_FILE_FORMAT_VERSIONS = ("0.1",) 

160 MAXIMUM_FILE_FORMAT_VERSIONS = ("1.0",) 

161 

162 def __init__(self, filename: str) -> None: 

163 if not os.path.exists(filename): 

164 raise FileNotFoundError(f"File {filename} not found") 

165 

166 self.filename = filename 

167 

168 @classmethod 

169 def isCompatibleWith(cls, written_version: str, /) -> bool: 

170 """Check if the serialization version is compatible with the reader. 

171 

172 This is a convenience method to ask if the current version of this 

173 class can read a file, based on the VERSION in its header. 

174 

175 Parameters 

176 ---------- 

177 written_version: `str` 

178 The VERSION of the file to be read. 

179 

180 Returns 

181 ------- 

182 compatible : `bool` 

183 Whether the reader can read a file whose VERSION is 

184 ``written_version``. 

185 

186 Notes 

187 ----- 

188 This accepts the other version as a positional argument only. 

189 """ 

190 written_version_object = version.parse(written_version) 

191 for min_version, max_version in zip( 

192 cls.MINIMUM_FILE_FORMAT_VERSIONS, 

193 cls.MAXIMUM_FILE_FORMAT_VERSIONS, 

194 strict=True, 

195 ): 

196 if version.parse(min_version) <= written_version_object < version.parse(max_version): 

197 return True 

198 

199 return False 

200 

201 def readAsMultipleCellCoadd(self) -> MultipleCellCoadd: 

202 """Read the FITS file as a MultipleCellCoadd object. 

203 

204 Raises 

205 ------ 

206 IncompatibleError 

207 Raised if the version of this module that wrote the file is 

208 incompatible with this module that is reading it in. 

209 """ 

210 with fits.open(self.filename) as hdu_list: 

211 header = hdu_list[1].header 

212 written_version = header.get("VERSION", "0.1") 

213 if not self.isCompatibleWith(written_version): 

214 raise IncompatibleVersionError( 

215 f"{self.filename} was written with version {written_version}" 

216 f"but attempting to read it with a reader designed for {FILE_FORMAT_VERSION}" 

217 ) 

218 if written_version != FILE_FORMAT_VERSION: 

219 logger.info( 

220 "Reading %s having version %s with reader designed for %s", 

221 self.filename, 

222 written_version, 

223 FILE_FORMAT_VERSION, 

224 ) 

225 

226 written_version = version.parse(written_version) 

227 

228 # TODO: Remove this when FILE_FORMAT_VERSION is bumped to 1.0 

229 if written_version < version.parse("0.3"): 

230 header.rename_keyword("BAND", "FILTER") 

231 

232 data = hdu_list[1].data 

233 

234 # Read in WCS 

235 ps = PropertySet() 

236 ps.update(hdu_list[0].header) 

237 wcs = afwGeom.makeSkyWcs(ps) 

238 

239 # Build the quantities needed to construct a MultipleCellCoadd. 

240 common = CommonComponents( 

241 units=CoaddUnits(1), # TODO: read from FITS TUNIT1 (DM-40562) 

242 wcs=wcs, 

243 band=header["FILTER"], 

244 identifiers=PatchIdentifiers( 

245 skymap=header["SKYMAP"], 

246 tract=header["TRACT"], 

247 patch=Index2D(x=header["PATCH_X"], y=header["PATCH_Y"]), 

248 band=header["FILTER"], 

249 ), 

250 ) 

251 

252 grid_cell_size = Extent2I(header["GRCELL1"], header["GRCELL2"]) # Inner size of a single cell. 

253 grid_shape = Extent2I(header["GRSHAPE1"], header["GRSHAPE2"]) 

254 grid_min = Point2I(header["GRMIN1"], header["GRMIN2"]) 

255 grid = UniformGrid(cell_size=grid_cell_size, shape=grid_shape, min=grid_min) 

256 

257 # This is the inner bounding box for the multiple cell coadd 

258 inner_bbox = Box2I( 

259 Point2I(header["INBBOX11"], header["INBBOX12"]), 

260 Point2I(header["INBBOX21"], header["INBBOX22"]), 

261 ) 

262 

263 outer_cell_size = Extent2I(header["OCELL1"], header["OCELL2"]) 

264 psf_image_size = Extent2I(header["PSFSIZE1"], header["PSFSIZE2"]) 

265 

266 # Attempt to get inputs for each cell. 

267 inputs = GridContainer[list[ObservationIdentifiers]](shape=grid.shape) 

268 if written_version >= version.parse("0.3"): 

269 visit_dict = { 

270 row["visit"]: VisitRecord( 

271 visit=row["visit"], 

272 physical_filter=row["physical_filter"], 

273 day_obs=row["day_obs"], 

274 ) 

275 for row in hdu_list[hdu_list.index_of("VISIT")].data 

276 } 

277 link_table = hdu_list[hdu_list.index_of("CELL")].data 

278 for link_row in link_table: 

279 cell_id = Index2D(link_row["cell_x"], link_row["cell_y"]) 

280 visit = link_row["visit"] 

281 obs_id = ObservationIdentifiers( 

282 instrument=header["INSTRUME"], 

283 visit=visit, 

284 detector=link_row["detector"], 

285 day_obs=visit_dict[visit].day_obs, 

286 physical_filter=visit_dict[visit].physical_filter, 

287 ) 

288 if cell_id in inputs: 

289 inputs[cell_id] += [obs_id] 

290 else: 

291 inputs[cell_id] = [obs_id] 

292 else: 

293 logger.info( 

294 "Cell inputs are available for VERSION=0.3 or later. The file provided has ", 

295 "VERSION = %s", 

296 written_version, 

297 ) 

298 

299 coadd = MultipleCellCoadd( 

300 ( 

301 self._readSingleCellCoadd( 

302 data=row, 

303 header=header, 

304 common=common, 

305 inputs=inputs[Index2D(row["cell_id"][0], row["cell_id"][1])], 

306 outer_cell_size=outer_cell_size, 

307 psf_image_size=psf_image_size, 

308 inner_cell_size=grid_cell_size, 

309 ) 

310 for row in data 

311 ), 

312 grid=grid, 

313 outer_cell_size=outer_cell_size, 

314 psf_image_size=psf_image_size, 

315 inner_bbox=inner_bbox, 

316 common=common, 

317 ) 

318 

319 return coadd 

320 

321 @staticmethod 

322 def _readSingleCellCoadd( 

323 data: Mapping[str, Any], 

324 common: CommonComponents, 

325 header: Mapping[str, Any], 

326 *, 

327 inputs: Iterable[ObservationIdentifiers], 

328 outer_cell_size: Extent2I, 

329 inner_cell_size: Extent2I, 

330 psf_image_size: Extent2I, 

331 ) -> SingleCellCoadd: 

332 """Read a coadd from a FITS file. 

333 

334 Parameters 

335 ---------- 

336 data : `Mapping` 

337 The data from the FITS file. Usually, a single row from the binary 

338 table representation. 

339 common : `CommonComponents` 

340 The common components of the coadd. 

341 header : `Mapping` 

342 The header of the FITS file as a dictionary. 

343 inputs : `Iterable` [`ObservationIdentifiers`] 

344 Any iterable of ObservationIdentifiers instances that contributed 

345 to this cell. 

346 outer_cell_size : `Extent2I` 

347 The size of the outer cell. 

348 psf_image_size : `Extent2I` 

349 The size of the PSF image. 

350 inner_cell_size : `Extent2I` 

351 The size of the inner cell. 

352 

353 Returns 

354 ------- 

355 coadd : `SingleCellCoadd` 

356 The coadd read from the file. 

357 """ 

358 buffer = (outer_cell_size - inner_cell_size) // 2 

359 

360 psf = ImageD( 

361 array=data["psf"].astype(np.float64), 

362 xy0=(-(psf_image_size // 2)).asPoint(), # integer division and negation do not commute. 

363 ) # use the variable 

364 xy0 = Point2I( 

365 inner_cell_size.x * data["cell_id"][0] - buffer.x + header["GRMIN1"], 

366 inner_cell_size.y * data["cell_id"][1] - buffer.y + header["GRMIN2"], 

367 ) 

368 mask = afwImage.Mask(data["mask"].astype(np.int32), xy0=xy0) 

369 image_planes = OwnedImagePlanes( 

370 image=ImageF( 

371 data["image"].astype(np.float32), 

372 xy0=xy0, 

373 ), 

374 mask=mask, 

375 variance=ImageF(data["variance"].astype(np.float32), xy0=xy0), 

376 noise_realizations=[], 

377 mask_fractions=None, 

378 ) 

379 

380 identifiers = CellIdentifiers( 

381 cell=Index2D(data["cell_id"][0], data["cell_id"][1]), 

382 skymap=common.identifiers.skymap, 

383 tract=common.identifiers.tract, 

384 patch=common.identifiers.patch, 

385 band=common.identifiers.band, 

386 ) 

387 

388 return SingleCellCoadd( 

389 outer=image_planes, 

390 psf=psf, 

391 inner_bbox=Box2I( 

392 corner=Point2I( 

393 inner_cell_size.x * data["cell_id"][0] + header["GRMIN1"], 

394 inner_cell_size.y * data["cell_id"][1] + header["GRMIN2"], 

395 ), 

396 dimensions=inner_cell_size, 

397 ), 

398 common=common, 

399 identifiers=identifiers, 

400 inputs=inputs, 

401 ) 

402 

403 def readWcs(self) -> afwGeom.SkyWcs: 

404 """Read the WCS information from the FITS file. 

405 

406 Returns 

407 ------- 

408 wcs : `~lsst.afw.geom.SkyWcs` 

409 The WCS information read from the FITS file. 

410 """ 

411 # Read in WCS 

412 ps = PropertySet() 

413 with fits.open(self.filename) as hdu_list: 

414 ps.update(hdu_list[0].header) 

415 wcs = afwGeom.makeSkyWcs(ps) 

416 return wcs 

417 

418 

419def writeMultipleCellCoaddAsFits( 

420 multiple_cell_coadd: MultipleCellCoadd, 

421 filename: str, 

422 overwrite: bool = False, 

423 metadata: PropertySet | None = None, 

424) -> fits.HDUList: 

425 """Write a MultipleCellCoadd object to a FITS file. 

426 

427 Parameters 

428 ---------- 

429 multiple_cell_coadd : `MultipleCellCoadd` 

430 The multiple cell coadd to write to a FITS file. 

431 filename : `str` 

432 The name of the file to write to. 

433 overwrite : `bool`, optional 

434 Whether to overwrite the file if it already exists? 

435 metadata : `~lsst.daf.base.PropertySet`, optional 

436 Additional metadata to write to the FITS file. 

437 

438 Returns 

439 ------- 

440 hdu_list : `~astropy.io.fits.HDUList` 

441 The FITS file as an HDUList. 

442 

443 Notes 

444 ----- 

445 Changes to this function that modify the way the file is written to disk 

446 must be accompanied with a change to FILE_FORMAT_VERSION. 

447 """ 

448 # Create metadata tables: 

449 # 1. Visit table containing information about the visits. 

450 # 2. Cell table containing info about the visit+detector for each cell. 

451 visit_records: list[Any] = [] 

452 cell_records: list[Any] = [] 

453 instrument_set = set() 

454 for cell_id, single_cell_coadd in multiple_cell_coadd.cells.items(): 

455 for observation_id in single_cell_coadd.inputs: 

456 visit_records.append( 

457 (observation_id.visit, observation_id.physical_filter, observation_id.day_obs) 

458 ) 

459 cell_records.append((cell_id.x, cell_id.y, observation_id.visit, observation_id.detector)) 

460 instrument_set.add(observation_id.instrument) 

461 

462 assert len(instrument_set) == 1, "All cells must have the same instrument." 

463 instrument = instrument_set.pop() 

464 

465 visit_recarray = np.rec.fromrecords( 

466 recList=sorted(set(visit_records), key=lambda x: x[0]), # Sort by visit. 

467 formats=None, # formats has specified to please mypy. See numpy#26376. 

468 names=( 

469 "visit", 

470 "physical_filter", 

471 "day_obs", 

472 ), 

473 ) 

474 cell_recarray = np.rec.fromrecords( 

475 recList=cell_records, 

476 formats=None, # formats has specified to please mypy. See numpy#26376. 

477 names=( 

478 "cell_x", 

479 "cell_y", 

480 "visit", 

481 "detector", 

482 ), 

483 ) 

484 

485 visit_hdu = fits.BinTableHDU.from_columns(visit_recarray, name="VISIT") 

486 cell_hdu = fits.BinTableHDU.from_columns(cell_recarray, name="CELL") 

487 

488 cell_id = fits.Column( 

489 name="cell_id", 

490 format="2I", 

491 array=[cell.identifiers.cell for cell in multiple_cell_coadd.cells.values()], 

492 ) 

493 

494 image_array = [cell.outer.image.array for cell in multiple_cell_coadd.cells.values()] 

495 unit_array = [cell.common.units.name for cell in multiple_cell_coadd.cells.values()] 

496 image = fits.Column( 

497 name="image", 

498 unit=unit_array[0], 

499 format=f"{image_array[0].size}E", 

500 dim=f"({image_array[0].shape[1]}, {image_array[0].shape[0]})", 

501 array=image_array, 

502 ) 

503 

504 mask_array = [cell.outer.mask.array for cell in multiple_cell_coadd.cells.values()] 

505 mask = fits.Column( 

506 name="mask", 

507 format=f"{mask_array[0].size}I", 

508 dim=f"({mask_array[0].shape[1]}, {mask_array[0].shape[0]})", 

509 array=mask_array, 

510 ) 

511 

512 variance_array = [cell.outer.variance.array for cell in multiple_cell_coadd.cells.values()] 

513 variance = fits.Column( 

514 name="variance", 

515 format=f"{variance_array[0].size}E", 

516 dim=f"({variance_array[0].shape[1]}, {variance_array[0].shape[0]})", 

517 array=variance_array, 

518 ) 

519 

520 psf_array = [cell.psf_image.array for cell in multiple_cell_coadd.cells.values()] 

521 psf = fits.Column( 

522 name="psf", 

523 format=f"{psf_array[0].size}D", 

524 dim=f"({psf_array[0].shape[1]}, {psf_array[0].shape[0]})", 

525 array=[cell.psf_image.array for cell in multiple_cell_coadd.cells.values()], 

526 ) 

527 

528 col_defs = fits.ColDefs([cell_id, image, mask, variance, psf]) 

529 hdu = fits.BinTableHDU.from_columns(col_defs) 

530 

531 grid_cell_size = multiple_cell_coadd.grid.cell_size 

532 grid_shape = multiple_cell_coadd.grid.shape 

533 grid_min = multiple_cell_coadd.grid.bbox.getMin() 

534 grid_cards = { 

535 "GRCELL1": grid_cell_size.x, 

536 "GRCELL2": grid_cell_size.y, 

537 "GRSHAPE1": grid_shape.x, 

538 "GRSHAPE2": grid_shape.y, 

539 "GRMIN1": grid_min.x, 

540 "GRMIN2": grid_min.y, 

541 } 

542 hdu.header.extend(grid_cards) 

543 

544 outer_cell_size_cards = { 

545 "OCELL1": multiple_cell_coadd.outer_cell_size.x, 

546 "OCELL2": multiple_cell_coadd.outer_cell_size.y, 

547 } 

548 hdu.header.extend(outer_cell_size_cards) 

549 

550 psf_image_size_cards = { 

551 "PSFSIZE1": multiple_cell_coadd.psf_image_size.x, 

552 "PSFSIZE2": multiple_cell_coadd.psf_image_size.y, 

553 } 

554 hdu.header.extend(psf_image_size_cards) 

555 

556 inner_bbox_cards = { 

557 "INBBOX11": multiple_cell_coadd.inner_bbox.minX, 

558 "INBBOX12": multiple_cell_coadd.inner_bbox.minY, 

559 "INBBOX21": multiple_cell_coadd.inner_bbox.maxX, 

560 "INBBOX22": multiple_cell_coadd.inner_bbox.maxY, 

561 } 

562 hdu.header.extend(inner_bbox_cards) 

563 

564 wcs = multiple_cell_coadd.common.wcs 

565 wcs_cards = wcs.getFitsMetadata().toDict() 

566 primary_hdu = fits.PrimaryHDU() 

567 primary_hdu.header.extend(wcs_cards) 

568 

569 hdu.header["VERSION"] = FILE_FORMAT_VERSION 

570 hdu.header["TUNIT1"] = multiple_cell_coadd.common.units.name 

571 # This assumed to be the same as multiple_cell_coadd.common.identifers.band 

572 # See DM-38843. 

573 hdu.header["INSTRUME"] = instrument 

574 hdu.header["FILTER"] = multiple_cell_coadd.common.band 

575 hdu.header["SKYMAP"] = multiple_cell_coadd.common.identifiers.skymap 

576 hdu.header["TRACT"] = multiple_cell_coadd.common.identifiers.tract 

577 hdu.header["PATCH_X"] = multiple_cell_coadd.common.identifiers.patch.x 

578 hdu.header["PATCH_Y"] = multiple_cell_coadd.common.identifiers.patch.y 

579 

580 if metadata is not None: 

581 hdu.header.extend(metadata.toDict()) 

582 

583 hdu_list = fits.HDUList([primary_hdu, hdu, cell_hdu, visit_hdu]) 

584 hdu_list.writeto(filename, overwrite=overwrite) 

585 

586 return hdu_list