Coverage for python/lsst/analysis/ap/plotImageSubtractionCutouts.py: 15%

340 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-18 12:10 +0000

1# This file is part of analysis_ap. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Construct template/image/difference cutouts for upload to Zooniverse, or 

23to just to view as images. 

24""" 

25 

26__all__ = ["PlotImageSubtractionCutoutsConfig", "PlotImageSubtractionCutoutsTask", "CutoutPath"] 

27 

28import argparse 

29import functools 

30import io 

31import logging 

32import multiprocessing 

33import os 

34import pathlib 

35from math import log10 

36 

37import astropy.units as u 

38import lsst.dax.apdb 

39import lsst.pex.config as pexConfig 

40import lsst.pex.exceptions 

41import lsst.pipe.base 

42import lsst.utils 

43import numpy as np 

44import pandas as pd 

45import sqlalchemy 

46 

47from . import apdb 

48 

49 

50class _ButlerCache: 

51 """Global class to handle butler queries, to allow lru_cache and 

52 `multiprocessing.Pool` to work together. 

53 

54 If we redo this all to work with BPS or other parallelized systems, or get 

55 good butler-side caching, we could remove this lru_cache system. 

56 """ 

57 

58 def set(self, butler, config): 

59 """Call this to store a Butler and Config instance before using the 

60 global class instance. 

61 

62 Parameters 

63 ---------- 

64 butler : `lsst.daf.butler.Butler` 

65 Butler instance to store. 

66 config : `lsst.pex.config.Config` 

67 Config instance to store. 

68 """ 

69 self._butler = butler 

70 self._config = config 

71 # Ensure the caches are empty if we've been re-set. 

72 self.get_exposures.cache_clear() 

73 self.get_catalog.cache_clear() 

74 

75 @functools.lru_cache(maxsize=4) 

76 def get_exposures(self, instrument, detector, visit): 

77 """Return science, template, difference exposures, using a small 

78 cache so we don't have to re-read files as often. 

79 

80 Parameters 

81 ---------- 

82 instrument : `str` 

83 Instrument name to define the data id. 

84 detector : `int` 

85 Detector id to define the data id. 

86 visit : `int` 

87 Visit id to define the data id. 

88 

89 Returns 

90 ------- 

91 exposures : `tuple` [`lsst.afw.image.ExposureF`] 

92 Science, template, and difference exposure for this data id. 

93 """ 

94 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit} 

95 return (self._butler.get(self._config.science_image_type, data_id), 

96 self._butler.get(f'{self._config.diff_image_type}_templateExp', data_id), 

97 self._butler.get(f'{self._config.diff_image_type}_differenceExp', data_id)) 

98 

99 @functools.lru_cache(maxsize=4) 

100 def get_catalog(self, instrument, detector, visit): 

101 """Return the diaSrc catalog from the butler. 

102 

103 Parameters 

104 ---------- 

105 instrument : `str` 

106 Instrument name to define the data id. 

107 detector : `int` 

108 Detector id to define the data id. 

109 visit : `int` 

110 Visit id to define the data id. 

111 

112 Returns 

113 ------- 

114 catalog : `lsst.afw.table.SourceCatalog` 

115 DiaSource catalog for this data id. 

116 """ 

117 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit} 

118 return self._butler.get(f'{self._config.diff_image_type}_diaSrc', data_id) 

119 

120 

121# Global used within each multiprocessing worker (or single process). 

122butler_cache = _ButlerCache() 

123 

124 

125class PlotImageSubtractionCutoutsConfig(pexConfig.Config): 

126 sizes = pexConfig.ListField( 

127 doc="List of widths of cutout to extract for image from science, \ 

128 template, and difference exposures.", 

129 dtype=int, 

130 default=[30], 

131 ) 

132 use_footprint = pexConfig.Field( 

133 doc="Use source footprint to to define cutout region; " 

134 "If set, ignore `size` and use the footprint bbox instead.", 

135 dtype=bool, 

136 default=False, 

137 ) 

138 url_root = pexConfig.Field( 

139 doc="URL that the resulting images will be served to Zooniverse from, for the manifest file. " 

140 "If not set, no manifest file will be written.", 

141 dtype=str, 

142 default=None, 

143 optional=True, 

144 ) 

145 diff_image_type = pexConfig.Field( 

146 doc="Dataset type of template and difference image to use for cutouts; " 

147 "Will have '_templateExp' and '_differenceExp' appended for butler.get(), respectively.", 

148 dtype=str, 

149 default="goodSeeingDiff", 

150 ) 

151 science_image_type = pexConfig.Field( 

152 doc="Dataset type of science image to use for cutouts.", 

153 dtype=str, 

154 default="calexp", 

155 ) 

156 add_metadata = pexConfig.Field( 

157 doc="Annotate the cutouts with catalog metadata, including coordinates, fluxes, flags, etc.", 

158 dtype=bool, 

159 default=True 

160 ) 

161 chunk_size = pexConfig.Field( 

162 doc="Chunk up files into subdirectories, with at most this many files per directory." 

163 " None means write all the files to one `images/` directory.", 

164 dtype=int, 

165 default=10000, 

166 optional=True 

167 ) 

168 save_as_numpy = pexConfig.Field( 

169 doc="Save the raw cutout images in numpy format.", 

170 dtype=bool, 

171 default=False 

172 ) 

173 

174 

175class PlotImageSubtractionCutoutsTask(lsst.pipe.base.Task): 

176 """Generate template/science/difference image cutouts of DiaSources and an 

177 optional manifest for upload to a Zooniverse project. 

178 

179 Parameters 

180 ---------- 

181 output_path : `str` 

182 The path to write the output to; manifest goes here, while the 

183 images themselves go into ``output_path/images/``. 

184 """ 

185 ConfigClass = PlotImageSubtractionCutoutsConfig 

186 _DefaultName = "plotImageSubtractionCutouts" 

187 

188 def __init__(self, *, output_path, **kwargs): 

189 super().__init__(**kwargs) 

190 self._output_path = output_path 

191 self.cutout_path = CutoutPath(output_path, chunk_size=self.config.chunk_size) 

192 

193 def _reduce_kwargs(self): 

194 # to allow pickling of this Task 

195 kwargs = super()._reduce_kwargs() 

196 kwargs["output_path"] = self._output_path 

197 return kwargs 

198 

199 def run(self, data, butler, njobs=0): 

200 """Generate cutout images and a manifest for upload to Zooniverse 

201 from a collection of DiaSources. 

202 

203 Parameters 

204 ---------- 

205 data : `pandas.DataFrame` 

206 The DiaSources to extract cutouts for. Must contain at least these 

207 fields: ``ra, dec, diaSourceId, detector, visit, instrument``. 

208 butler : `lsst.daf.butler.Butler` 

209 The butler connection to use to load the data; create it with the 

210 collections you wish to load images from. 

211 njobs : `int`, optional 

212 Number of multiprocessing jobs to make cutouts with; default of 0 

213 means don't use multiprocessing at all. 

214 

215 Returns 

216 ------- 

217 source_ids : `list` [`int`] 

218 DiaSourceIds of cutout images that were generated. 

219 """ 

220 result = self.write_images(data, butler, njobs=njobs) 

221 self.write_manifest(result) 

222 self.log.info("Wrote %d images to %s", len(result), self._output_path) 

223 return result 

224 

225 def write_manifest(self, sources): 

226 """Save a Zooniverse manifest attaching image URLs to source ids. 

227 

228 Parameters 

229 ---------- 

230 sources : `list` [`int`] 

231 The diaSourceIds of the sources that had cutouts succesfully made. 

232 """ 

233 if self.config.url_root is not None: 

234 manifest = self._make_manifest(sources) 

235 manifest.to_csv(os.path.join(self._output_path, "manifest.csv"), index=False) 

236 else: 

237 self.log.info("No url_root config provided, so no Zooniverse manifest file was written.") 

238 

239 def _make_manifest(self, sources): 

240 """Return a Zooniverse manifest attaching image URLs to source ids. 

241 

242 Parameters 

243 ---------- 

244 sources : `list` [`int`] 

245 The diaSourceIds of the sources that had cutouts succesfully made. 

246 

247 Returns 

248 ------- 

249 manifest : `pandas.DataFrame` 

250 The formatted URL manifest for upload to Zooniverse. 

251 """ 

252 cutout_path = CutoutPath(self.config.url_root) 

253 manifest = pd.DataFrame() 

254 manifest["external_id"] = sources 

255 manifest["location:1"] = [cutout_path(x) for x in sources] 

256 manifest["metadata:diaSourceId"] = sources 

257 return manifest 

258 

259 def write_images(self, data, butler, njobs=0): 

260 """Make the 3-part cutout images for each requested source and write 

261 them to disk. 

262 

263 Creates a ``images/`` subdirectory via cutout_path if one 

264 does not already exist; images are written there as PNG files. 

265 

266 Parameters 

267 ---------- 

268 data : `pandas.DataFrame` 

269 The DiaSources to extract cutouts for. Must contain at least these 

270 fields: ``ra, dec, diaSourceId, detector, visit, instrument``. 

271 butler : `lsst.daf.butler.Butler` 

272 The butler connection to use to load the data; create it with the 

273 collections you wish to load images from. 

274 njobs : `int`, optional 

275 Number of multiprocessing jobs to make cutouts with; default of 0 

276 means don't use multiprocessing at all. 

277 

278 Returns 

279 ------- 

280 sources : `list` 

281 DiaSourceIds that had cutouts made. 

282 """ 

283 # Ignore divide-by-zero and log-of-negative-value messages. 

284 seterr_dict = np.seterr(divide="ignore", invalid="ignore") 

285 

286 # Create a subdirectory for the images. 

287 pathlib.Path(os.path.join(self._output_path, "images")).mkdir(exist_ok=True) 

288 

289 sources = [] 

290 butler_cache.set(butler, self.config) 

291 if njobs > 0: 

292 with multiprocessing.Pool(njobs) as pool: 

293 sources = pool.starmap(self._do_one_source, data.to_records()) 

294 else: 

295 for i, source in enumerate(data.to_records()): 

296 id = self._do_one_source(source) 

297 sources.append(id) 

298 

299 # restore numpy error message state 

300 np.seterr(**seterr_dict) 

301 # Only return successful ids, not failures. 

302 return [s for s in sources if s is not None] 

303 

304 def _do_one_source(self, source): 

305 """Make cutouts for one diaSource. 

306 

307 Parameters 

308 ---------- 

309 source : `numpy.record`, optional 

310 DiaSource record for this cutout, to add metadata to the image. 

311 

312 Returns 

313 ------- 

314 diaSourceId : `int` or None 

315 Id of the source that was generated, or None if there was an error. 

316 """ 

317 try: 

318 center = lsst.geom.SpherePoint(source["ra"], source["dec"], lsst.geom.degrees) 

319 science, template, difference = butler_cache.get_exposures(source["instrument"], 

320 source["detector"], 

321 source["visit"]) 

322 if self.config.use_footprint: 

323 catalog = butler_cache.get_catalog(source["instrument"], 

324 source["detector"], 

325 source["visit"]) 

326 # The input catalogs must be sorted. 

327 if not catalog.isSorted(): 

328 data_id = {'instrument': source["instrument"], 

329 'detector': source["detector"], 

330 'visit': source["visit"]} 

331 msg = f"{self.config.diff_image_type}_diaSrc catalog for {data_id} is not sorted!" 

332 raise RuntimeError(msg) 

333 record = catalog.find(source['diaSourceId']) 

334 footprint = record.getFootprint() 

335 

336 scale = science.wcs.getPixelScale().asArcseconds() 

337 image = self.generate_image(science, template, difference, center, scale, 

338 dia_source_id=source['diaSourceId'], 

339 save_as_numpy=self.config.save_as_numpy, 

340 source=source if self.config.add_metadata else None, 

341 footprint=footprint if self.config.use_footprint else None) 

342 self.cutout_path.mkdir(source["diaSourceId"]) 

343 with open(self.cutout_path(source["diaSourceId"]), "wb") as outfile: 

344 outfile.write(image.getbuffer()) 

345 return source["diaSourceId"] 

346 except (LookupError, lsst.pex.exceptions.Exception) as e: 

347 self.log.error( 

348 f"{e.__class__.__name__} processing diaSourceId {source['diaSourceId']}: {e}" 

349 ) 

350 return None 

351 except Exception: 

352 # Ensure other exceptions are interpretable when multiprocessing. 

353 import traceback 

354 traceback.print_exc() 

355 raise 

356 

357 def generate_image(self, science, template, difference, center, scale, dia_source_id=None, 

358 save_as_numpy=False, source=None, footprint=None): 

359 """Get a 3-part cutout image to save to disk, for a single source. 

360 

361 Parameters 

362 ---------- 

363 science : `lsst.afw.image.ExposureF` 

364 Science exposure to include in the cutout. 

365 template : `lsst.afw.image.ExposureF` 

366 Matched template exposure to include in the cutout. 

367 difference : `lsst.afw.image.ExposureF` 

368 Matched science minus template exposure to include in the cutout. 

369 center : `lsst.geom.SpherePoint` 

370 Center of the source to be cut out of each image. 

371 scale : `float` 

372 Pixel scale in arcseconds. 

373 dia_source_id : `int`, optional 

374 DiaSourceId to use in the filename, if saving to disk. 

375 save_as_numpy : `bool`, optional 

376 Save the raw cutout images in numpy format. 

377 source : `numpy.record`, optional 

378 DiaSource record for this cutout, to add metadata to the image. 

379 footprint : `lsst.afw.detection.Footprint`, optional 

380 Detected source footprint; if specified, extract a square 

381 surrounding the footprint bbox, otherwise use ``config.size``. 

382 

383 Returns 

384 ------- 

385 image: `io.BytesIO` 

386 The generated image, to be output to a file or displayed on screen. 

387 """ 

388 numpy_cutouts = {} 

389 if not self.config.use_footprint: 

390 sizes = self.config.sizes 

391 cutout_science, cutout_template, cutout_difference = [], [], [] 

392 for i, s in enumerate(sizes): 

393 extent = lsst.geom.Extent2I(s, s) 

394 science_cutout = science.getCutout(center, extent) 

395 template_cutout = template.getCutout(center, extent) 

396 difference_cutout = difference.getCutout(center, extent) 

397 if save_as_numpy: 

398 numpy_cutouts[f"sci_{s}"] = science_cutout.image.array 

399 numpy_cutouts[f"temp_{s}"] = template_cutout.image.array 

400 numpy_cutouts[f"diff_{s}"] = difference_cutout.image.array 

401 pathlib.Path(os.path.join(self._output_path, "raw_npy")).mkdir(exist_ok=True) 

402 path = os.path.join(self._output_path, "raw_npy") 

403 for cutout_type, cutout in numpy_cutouts.items(): 

404 np.save(f"{path}/{dia_source_id}_{cutout_type}.npy", 

405 np.expand_dims(cutout, axis=0)) 

406 cutout_science.append(science_cutout) 

407 cutout_template.append(template_cutout) 

408 cutout_difference.append(difference_cutout) 

409 else: 

410 if self.config.save_as_numpy: 

411 raise RuntimeError("Cannot save as numpy when using footprints.") 

412 cutout_science = [science.getCutout(footprint.getBBox())] 

413 cutout_template = [template.getCutout(footprint.getBBox())] 

414 cutout_difference = [difference.getCutout(footprint.getBBox())] 

415 extent = footprint.getBBox().getDimensions() 

416 # Plot a square equal to the largest dimension. 

417 sizes = [extent.x if extent.x > extent.y else extent.y] 

418 

419 return self._plot_cutout(cutout_science, 

420 cutout_template, 

421 cutout_difference, 

422 scale, 

423 sizes, 

424 source=source) 

425 

426 def _plot_cutout(self, science, template, difference, scale, sizes, source=None): 

427 """Plot the cutouts for a source in one image. 

428 

429 Parameters 

430 ---------- 

431 science : `list` [`lsst.afw.image.ExposureF`] 

432 List of cutout Science exposure(s) to include in the image. 

433 template : `list` [`lsst.afw.image.ExposureF`] 

434 List of cutout template exposure(s) to include in the image. 

435 difference : `list` [`lsst.afw.image.ExposureF`] 

436 List of cutout science minus template exposure(s) to include 

437 in the image. 

438 source : `numpy.record`, optional 

439 DiaSource record for this cutout, to add metadata to the image. 

440 scale : `float` 

441 Pixel scale in arcseconds. 

442 size : `list` [`int`] 

443 List of x/y dimensions of of the images passed in, to set imshow 

444 extent. 

445 

446 Returns 

447 ------- 

448 image: `io.BytesIO` 

449 The generated image, to be output to a file via 

450 `image.write(filename)` or displayed on screen. 

451 """ 

452 import astropy.visualization as aviz 

453 import matplotlib 

454 matplotlib.use("AGG") 

455 # Force matplotlib defaults 

456 matplotlib.rcParams.update(matplotlib.rcParamsDefault) 

457 import matplotlib.pyplot as plt 

458 from matplotlib import cm 

459 

460 # TODO DM-32014: how do we color masked pixels (including edges)? 

461 

462 def plot_one_image(ax, data, size, name=None): 

463 """Plot a normalized image on an axis.""" 

464 if name == "Difference": 

465 norm = aviz.ImageNormalize( 

466 # focus on a rect of dim 15 at the center of the image. 

467 data[data.shape[0] // 2 - 7:data.shape[0] // 2 + 8, 

468 data.shape[1] // 2 - 7:data.shape[1] // 2 + 8], 

469 interval=aviz.MinMaxInterval(), 

470 stretch=aviz.AsinhStretch(a=0.1), 

471 ) 

472 else: 

473 norm = aviz.ImageNormalize( 

474 data, 

475 interval=aviz.MinMaxInterval(), 

476 stretch=aviz.AsinhStretch(a=0.1), 

477 ) 

478 ax.imshow(data, cmap=cm.bone, interpolation="none", norm=norm, 

479 extent=(0, size, 0, size), origin="lower", aspect="equal") 

480 x_line = 1 

481 y_line = 1 

482 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="blue", lw=6) 

483 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="yellow", lw=2) 

484 ax.axis("off") 

485 if name is not None: 

486 ax.set_title(name) 

487 

488 try: 

489 len_sizes = len(sizes) 

490 fig, axs = plt.subplots(len_sizes, 3, constrained_layout=True) 

491 if len_sizes == 1: 

492 plot_one_image(axs[0], template[0].image.array, sizes[0], "Template") 

493 plot_one_image(axs[1], science[0].image.array, sizes[0], "Science") 

494 plot_one_image(axs[2], difference[0].image.array, sizes[0], "Difference") 

495 else: 

496 plot_one_image(axs[0][0], template[0].image.array, sizes[0], "Template") 

497 plot_one_image(axs[0][1], science[0].image.array, sizes[0], "Science") 

498 plot_one_image(axs[0][2], difference[0].image.array, sizes[0], "Difference") 

499 for i in range(1, len(axs)): 

500 plot_one_image(axs[i][0], template[i].image.array, sizes[i], None) 

501 plot_one_image(axs[i][1], science[i].image.array, sizes[i], None) 

502 plot_one_image(axs[i][2], difference[i].image.array, sizes[i], None) 

503 plt.tight_layout() 

504 if source is not None: 

505 _annotate_image(fig, source, len_sizes) 

506 

507 output = io.BytesIO() 

508 plt.savefig(output, bbox_inches="tight", format="png") 

509 output.seek(0) # to ensure opening the image starts from the front 

510 finally: 

511 plt.close(fig) 

512 

513 return output 

514 

515 

516def _annotate_image(fig, source, len_sizes): 

517 """Annotate the cutouts image with metadata and flags. 

518 

519 Parameters 

520 ---------- 

521 fig : `matplotlib.Figure` 

522 Figure to be annotated. 

523 source : `numpy.record` 

524 DiaSource record of the object being plotted. 

525 len_sizes : `int` 

526 Length of the ``size`` array set in configuration. 

527 """ 

528 # Names of flags fields to add a flag label to the image, using any(). 

529 flags_psf = ["slot_PsfFlux_flag", "slot_PsfFlux_flag_noGoodPixels", "slot_PsfFlux_flag_edge"] 

530 flags_aperture = ["slot_ApFlux_flag", "slot_ApFlux_flag_apertureTruncated"] 

531 flags_forced = ["ip_diffim_forced_PsfFlux_flag", "ip_diffim_forced_PsfFlux_flag_noGoodPixels", 

532 "ip_diffim_forced_PsfFlux_flag_edge"] 

533 flags_edge = ["pixelFlags_edge"] 

534 flags_interp = ["pixelFlags_interpolated", "pixelFlags_interpolatedCenter"] 

535 flags_saturated = ["pixelFlags_saturated", "pixelFlags_saturatedCenter"] 

536 flags_cr = ["pixelFlags_cr", "pixelFlags_crCenter"] 

537 flags_bad = ["pixelFlags_bad"] 

538 flags_suspect = ["pixelFlags_suspect", "pixelFlags_suspectCenter"] 

539 flags_centroid = ["slot_Centroid_flag"] 

540 flags_shape = ["slot_Shape_flag", "slot_Shape_flag_no_pixels", "slot_Shape_flag_not_contained", 

541 "slot_Shape_flag_parent_source"] 

542 

543 flag_color = "red" 

544 text_color = "grey" 

545 

546 if len_sizes == 1: 

547 heights = [0.95, 0.91, 0.87, 0.83, 0.79] 

548 else: 

549 heights = [1.2, 1.15, 1.1, 1.05, 1.0] 

550 

551 # NOTE: fig.text coordinates are in fractions of the figure. 

552 fig.text(0, heights[0], "diaSourceId:", color=text_color) 

553 fig.text(0.145, heights[0], f"{source['diaSourceId']}") 

554 fig.text(0.43, heights[0], f"{source['instrument']}", fontweight="bold") 

555 fig.text(0.64, heights[0], "detector:", color=text_color) 

556 fig.text(0.74, heights[0], f"{source['detector']}") 

557 fig.text(0.795, heights[0], "visit:", color=text_color) 

558 fig.text(0.85, heights[0], f"{source['visit']}") 

559 fig.text(0.95, heights[0], f"{source['band']}") 

560 

561 fig.text(0.0, heights[1], "ra:", color=text_color) 

562 fig.text(0.037, heights[1], f"{source['ra']:.8f}") 

563 fig.text(0.21, heights[1], "dec:", color=text_color) 

564 fig.text(0.265, heights[1], f"{source['dec']:+.8f}") 

565 fig.text(0.50, heights[1], "detection S/N:", color=text_color) 

566 fig.text(0.66, heights[1], f"{source['snr']:6.1f}") 

567 fig.text(0.75, heights[1], "PSF chi2:", color=text_color) 

568 fig.text(0.85, heights[1], f"{source['psfChi2']/source['psfNdata']:6.2f}") 

569 

570 fig.text(0.0, heights[2], "PSF (nJy):", color=flag_color if any(source[flags_psf]) else text_color) 

571 fig.text(0.25, heights[2], f"{source['psfFlux']:8.1f}", horizontalalignment='right') 

572 fig.text(0.252, heights[2], "+/-", color=text_color) 

573 fig.text(0.29, heights[2], f"{source['psfFluxErr']:8.1f}") 

574 fig.text(0.40, heights[2], "S/N:", color=text_color) 

575 fig.text(0.45, heights[2], f"{abs(source['psfFlux']/source['psfFluxErr']):6.2f}") 

576 

577 # NOTE: yellow is hard to read on white; use goldenrod instead. 

578 if any(source[flags_edge]): 

579 fig.text(0.55, heights[2], "EDGE", color="goldenrod", fontweight="bold") 

580 if any(source[flags_interp]): 

581 fig.text(0.62, heights[2], "INTERP", color="green", fontweight="bold") 

582 if any(source[flags_saturated]): 

583 fig.text(0.72, heights[2], "SAT", color="green", fontweight="bold") 

584 if any(source[flags_cr]): 

585 fig.text(0.77, heights[2], "CR", color="magenta", fontweight="bold") 

586 if any(source[flags_bad]): 

587 fig.text(0.81, heights[2], "BAD", color="red", fontweight="bold") 

588 if source['isDipole']: 

589 fig.text(0.87, heights[2], "DIPOLE", color="indigo", fontweight="bold") 

590 

591 fig.text(0.0, heights[3], "ap (nJy):", color=flag_color if any(source[flags_aperture]) else text_color) 

592 fig.text(0.25, heights[3], f"{source['apFlux']:8.1f}", horizontalalignment='right') 

593 fig.text(0.252, heights[3], "+/-", color=text_color) 

594 fig.text(0.29, heights[3], f"{source['apFluxErr']:8.1f}") 

595 fig.text(0.40, heights[3], "S/N:", color=text_color) 

596 fig.text(0.45, heights[3], f"{abs(source['apFlux']/source['apFluxErr']):#6.2f}") 

597 

598 if any(source[flags_suspect]): 

599 fig.text(0.55, heights[3], "SUS", color="goldenrod", fontweight="bold") 

600 if any(source[flags_centroid]): 

601 fig.text(0.60, heights[3], "CENTROID", color="red", fontweight="bold") 

602 if any(source[flags_shape]): 

603 fig.text(0.73, heights[3], "SHAPE", color="red", fontweight="bold") 

604 # Future option: to add two more flag flavors to the legend, 

605 # use locations 0.80 and 0.87 

606 

607 # rb score 

608 if source['reliability'] is not None and np.isfinite(source['reliability']): 

609 fig.text(0.73, heights[4], f"RB:{source['reliability']:.03f}", 

610 color='#e41a1c' if source['reliability'] < 0.5 else '#4daf4a', 

611 fontweight="bold") 

612 

613 fig.text(0.0, heights[4], "sci (nJy):", color=flag_color if any(source[flags_forced]) else text_color) 

614 fig.text(0.25, heights[4], f"{source['scienceFlux']:8.1f}", horizontalalignment='right') 

615 fig.text(0.252, heights[4], "+/-", color=text_color) 

616 fig.text(0.29, heights[4], f"{source['scienceFluxErr']:8.1f}") 

617 fig.text(0.40, heights[4], "S/N:", color=text_color) 

618 fig.text(0.45, heights[4], f"{abs(source['scienceFlux']/source['scienceFluxErr']):6.2f}") 

619 fig.text(0.55, heights[4], "ABmag:", color=text_color) 

620 fig.text(0.635, heights[4], f"{(source['scienceFlux']*u.nanojansky).to_value(u.ABmag):.3f}") 

621 

622 

623class CutoutPath: 

624 """Manage paths to image cutouts with filenames based on diaSourceId. 

625 

626 Supports local files, and id-chunked directories. 

627 

628 Parameters 

629 ---------- 

630 root : `str` 

631 Root file path to manage. 

632 chunk_size : `int`, optional 

633 At most this many files per directory. Must be a power of 10. 

634 

635 Raises 

636 ------ 

637 RuntimeError 

638 Raised if chunk_size is not a power of 10. 

639 """ 

640 

641 def __init__(self, root, chunk_size=None): 

642 self._root = root 

643 if chunk_size is not None and (log10(chunk_size) != int(log10(chunk_size))): 

644 raise RuntimeError(f"CutoutPath file chunk_size must be a power of 10, got {chunk_size}.") 

645 self._chunk_size = chunk_size 

646 

647 def __call__(self, id): 

648 """Return the full path to a diaSource cutout. 

649 

650 Parameters 

651 ---------- 

652 id : `int` 

653 Source id to create the path for. 

654 

655 Returns 

656 ------- 

657 path : `str` 

658 Full path to the requested file. 

659 """ 

660 def chunker(id, size): 

661 return (id // size)*size 

662 

663 if self._chunk_size is not None: 

664 return os.path.join(self._root, f"images/{chunker(id, self._chunk_size)}/{id}.png") 

665 else: 

666 return os.path.join(self._root, f"images/{id}.png") 

667 

668 def mkdir(self, id): 

669 """Make the directory tree to write this cutout id to. 

670 

671 Parameters 

672 ---------- 

673 id : `int` 

674 Source id to create the path for. 

675 """ 

676 path = os.path.dirname(self(id)) 

677 os.makedirs(path, exist_ok=True) 

678 

679 

680def build_argparser(): 

681 """Construct an argument parser for the ``plotImageSubtractionCutouts`` 

682 script. 

683 

684 Returns 

685 ------- 

686 argparser : `argparse.ArgumentParser` 

687 The argument parser that defines the ``plotImageSubtractionCutouts`` 

688 command-line interface. 

689 """ 

690 parser = argparse.ArgumentParser( 

691 description=__doc__, 

692 formatter_class=argparse.RawDescriptionHelpFormatter, 

693 epilog="More information is available at https://pipelines.lsst.io.", 

694 ) 

695 

696 apdbArgs = parser.add_mutually_exclusive_group(required=True) 

697 apdbArgs.add_argument( 

698 "--sqlitefile", 

699 default=None, 

700 help="Path to sqlite file to load from; required for sqlite connection.", 

701 ) 

702 apdbArgs.add_argument( 

703 "--namespace", 

704 default=None, 

705 help="Postgres namespace (aka schema) to connect to; " 

706 " required for postgres connections." 

707 ) 

708 

709 parser.add_argument( 

710 "--postgres_url", 

711 default="rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl", 

712 help="Postgres connection path, or default (None) to use ApdbPostgresQuery default." 

713 ) 

714 

715 parser.add_argument( 

716 "--limit", 

717 default=5, 

718 type=int, 

719 help="Number of sources to load from the APDB (default=5), or the " 

720 "number of sources to load per 'page' when `--all` is set. " 

721 "This should be significantly larger (100x or more) than the value of `-j`, " 

722 "to ensure efficient use of each process.", 

723 ) 

724 parser.add_argument( 

725 "--all", 

726 default=False, 

727 action="store_true", 

728 help="Process all the sources; --limit then becomes the 'page size' to chunk the DB into.", 

729 ) 

730 

731 parser.add_argument( 

732 "-j", 

733 "--jobs", 

734 default=0, 

735 type=int, 

736 help="Number of processes to use when generating cutouts. " 

737 "Specify 0 (the default) to not use multiprocessing at all. " 

738 "Note that `--limit` determines how efficiently each process is filled." 

739 ) 

740 

741 parser.add_argument( 

742 "--instrument", 

743 required=True, 

744 help="Instrument short-name (e.g. 'DECam') of the data being loaded.", 

745 ) 

746 parser.add_argument( 

747 "-C", 

748 "--configFile", 

749 help="File containing the PlotImageSubtractionCutoutsConfig to load.", 

750 ) 

751 parser.add_argument( 

752 "--collections", 

753 nargs="*", 

754 help=( 

755 "Butler collection(s) to load data from." 

756 " If not specified, will search all butler collections, " 

757 "which may be very slow." 

758 ), 

759 ) 

760 parser.add_argument("repo", help="Path to Butler repository to load data from.") 

761 parser.add_argument( 

762 "outputPath", 

763 help="Path to write the output images and manifest to; " 

764 "manifest is written here, while the images go to `OUTPUTPATH/images/`.", 

765 ) 

766 parser.add_argument( 

767 "--reliabilityMin", 

768 type=float, 

769 default=None, 

770 help="Minimum reliability value (default=None) on which to filter the DiaSources.", 

771 ) 

772 parser.add_argument( 

773 "--reliabilityMax", 

774 type=float, 

775 default=None, 

776 help="Maximum reliability value (default=None) on which to filter the DiaSources.", 

777 ) 

778 return parser 

779 

780 

781def _make_apdbQuery(instrument, sqlitefile=None, postgres_url=None, namespace=None): 

782 """Return a query connection to the specified APDB. 

783 

784 Parameters 

785 ---------- 

786 instrument : `lsst.obs.base.Instrument` 

787 Instrument associated with this data, to get detector/visit data. 

788 sqlitefile : `str`, optional 

789 SQLite file to load APDB from; if set, postgres kwargs are ignored. 

790 postgres_url : `str`, optional 

791 Postgres connection URL to connect to APDB. 

792 namespace : `str`, optional 

793 Postgres schema to load from; required with postgres_url. 

794 

795 Returns 

796 ------- 

797 apdb_query : `lsst.analysis.ap.ApdbQuery` 

798 Query instance to use to load data from APDB. 

799 

800 Raises 

801 ------ 

802 RuntimeError 

803 Raised if the APDB connection kwargs are invalid in some way. 

804 """ 

805 if sqlitefile is not None: 

806 apdb_query = apdb.ApdbSqliteQuery(sqlitefile, instrument=instrument) 

807 elif postgres_url is not None and namespace is not None: 

808 apdb_query = apdb.ApdbPostgresQuery(namespace, postgres_url, instrument=instrument) 

809 else: 

810 raise RuntimeError("Cannot handle database connection args: " 

811 f"sqlitefile={sqlitefile}, postgres_url={postgres_url}, namespace={namespace}") 

812 return apdb_query 

813 

814 

815def select_sources(apdb_query, limit, reliabilityMin=None, reliabilityMax=None): 

816 """Load an APDB and return n sources from it. 

817 

818 Parameters 

819 ---------- 

820 apdb_query : `lsst.analysis.ap.ApdbQuery` 

821 APDB query interface to load from. 

822 limit : `int` 

823 Number of sources to select from the APDB. 

824 reliabilityMin : `float` 

825 Minimum reliability value on which to filter the DiaSources. 

826 reliabilityMax : `float` 

827 Maximum reliability value on which to filter the DiaSources. 

828 

829 Returns 

830 ------- 

831 sources : `pandas.DataFrame` 

832 The loaded DiaSource data. 

833 """ 

834 offset = 0 

835 try: 

836 while True: 

837 with apdb_query.connection as connection: 

838 table = apdb_query._tables["DiaSource"] 

839 query = table.select() 

840 if reliabilityMin is not None: 

841 query = query.where(table.columns['reliability'] >= reliabilityMin) 

842 if reliabilityMax is not None: 

843 query = query.where(table.columns['reliability'] <= reliabilityMax) 

844 query = query.order_by(table.columns["visit"], 

845 table.columns["detector"], 

846 table.columns["diaSourceId"]) 

847 query = query.limit(limit).offset(offset) 

848 sources = pd.read_sql_query(query, connection) 

849 if len(sources) == 0: 

850 break 

851 apdb_query._fill_from_instrument(sources) 

852 

853 yield sources 

854 offset += limit 

855 finally: 

856 connection.close() 

857 

858 

859def len_sources(apdb_query): 

860 """Return the number of DiaSources in the supplied APDB. 

861 

862 Parameters 

863 ---------- 

864 apdb_query : `lsst.analysis.ap.ApdbQuery` 

865 APDB query interface to load from. 

866 

867 Returns 

868 ------- 

869 count : `int` 

870 Number of diaSources in this APDB. 

871 """ 

872 with apdb_query.connection as connection: 

873 count = connection.execute(sqlalchemy.text('select count(*) FROM "DiaSource";')).scalar() 

874 return count 

875 

876 

877def run_cutouts(args): 

878 """Run PlotImageSubtractionCutoutsTask on the parsed commandline arguments. 

879 

880 Parameters 

881 ---------- 

882 args : `argparse.Namespace` 

883 Parsed commandline arguments. 

884 """ 

885 # We have to initialize the logger manually on the commandline. 

886 logging.basicConfig( 

887 level=logging.INFO, format="{name} {levelname}: {message}", style="{" 

888 ) 

889 

890 butler = lsst.daf.butler.Butler(args.repo, collections=args.collections) 

891 apdb_query = _make_apdbQuery(args.instrument, 

892 sqlitefile=args.sqlitefile, 

893 postgres_url=args.postgres_url, 

894 namespace=args.namespace) 

895 data = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax) 

896 

897 config = PlotImageSubtractionCutoutsConfig() 

898 if args.configFile is not None: 

899 config.load(os.path.expanduser(args.configFile)) 

900 config.freeze() 

901 cutouts = PlotImageSubtractionCutoutsTask(config=config, output_path=args.outputPath) 

902 

903 getter = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax) 

904 # Process just one block of length "limit", or all sources in the database? 

905 if not args.all: 

906 data = next(getter) 

907 sources = cutouts.run(data, butler, njobs=args.jobs) 

908 else: 

909 sources = [] 

910 count = len_sources(apdb_query) 

911 for i, data in enumerate(getter): 

912 sources.extend(cutouts.write_images(data, butler, njobs=args.jobs)) 

913 print(f"Completed {i+1} batches of {args.limit} size, out of {count} diaSources.") 

914 cutouts.write_manifest(sources) 

915 

916 print(f"Generated {len(sources)} diaSource cutouts to {args.outputPath}.") 

917 

918 

919def main(): 

920 args = build_argparser().parse_args() 

921 run_cutouts(args)