Coverage for python/lsst/analysis/ap/plotImageSubtractionCutouts.py: 15%

325 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-15 10:33 +0000

1# This file is part of analysis_ap. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Construct template/image/difference cutouts for upload to Zooniverse, or 

23to just to view as images. 

24""" 

25 

26__all__ = ["PlotImageSubtractionCutoutsConfig", "PlotImageSubtractionCutoutsTask", "CutoutPath"] 

27 

28import argparse 

29import functools 

30import io 

31import logging 

32import multiprocessing 

33import os 

34import pathlib 

35from math import log10 

36 

37import astropy.units as u 

38import lsst.dax.apdb 

39import lsst.pex.config as pexConfig 

40import lsst.pex.exceptions 

41import lsst.pipe.base 

42import lsst.utils 

43import numpy as np 

44import pandas as pd 

45import sqlalchemy 

46 

47from . import apdb 

48 

49 

50class _ButlerCache: 

51 """Global class to handle butler queries, to allow lru_cache and 

52 `multiprocessing.Pool` to work together. 

53 

54 If we redo this all to work with BPS or other parallelized systems, or get 

55 good butler-side caching, we could remove this lru_cache system. 

56 """ 

57 

58 def set(self, butler, config): 

59 """Call this to store a Butler and Config instance before using the 

60 global class instance. 

61 

62 Parameters 

63 ---------- 

64 butler : `lsst.daf.butler.Butler` 

65 Butler instance to store. 

66 config : `lsst.pex.config.Config` 

67 Config instance to store. 

68 """ 

69 self._butler = butler 

70 self._config = config 

71 # Ensure the caches are empty if we've been re-set. 

72 self.get_exposures.cache_clear() 

73 self.get_catalog.cache_clear() 

74 

75 @functools.lru_cache(maxsize=4) 

76 def get_exposures(self, instrument, detector, visit): 

77 """Return science, template, difference exposures, using a small 

78 cache so we don't have to re-read files as often. 

79 

80 Parameters 

81 ---------- 

82 instrument : `str` 

83 Instrument name to define the data id. 

84 detector : `int` 

85 Detector id to define the data id. 

86 visit : `int` 

87 Visit id to define the data id. 

88 

89 Returns 

90 ------- 

91 exposures : `tuple` [`lsst.afw.image.ExposureF`] 

92 Science, template, and difference exposure for this data id. 

93 """ 

94 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit} 

95 return (self._butler.get(self._config.science_image_type, data_id), 

96 self._butler.get(f'{self._config.diff_image_type}_templateExp', data_id), 

97 self._butler.get(f'{self._config.diff_image_type}_differenceExp', data_id)) 

98 

99 @functools.lru_cache(maxsize=4) 

100 def get_catalog(self, instrument, detector, visit): 

101 """Return the diaSrc catalog from the butler. 

102 

103 Parameters 

104 ---------- 

105 instrument : `str` 

106 Instrument name to define the data id. 

107 detector : `int` 

108 Detector id to define the data id. 

109 visit : `int` 

110 Visit id to define the data id. 

111 

112 Returns 

113 ------- 

114 catalog : `lsst.afw.table.SourceCatalog` 

115 DiaSource catalog for this data id. 

116 """ 

117 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit} 

118 return self._butler.get(f'{self._config.diff_image_type}_diaSrc', data_id) 

119 

120 

121# Global used within each multiprocessing worker (or single process). 

122butler_cache = _ButlerCache() 

123 

124 

125class PlotImageSubtractionCutoutsConfig(pexConfig.Config): 

126 sizes = pexConfig.ListField( 

127 doc="List of widths of cutout to extract for image from science, \ 

128 template, and difference exposures.", 

129 dtype=int, 

130 default=[30], 

131 ) 

132 use_footprint = pexConfig.Field( 

133 doc="Use source footprint to to define cutout region; " 

134 "If set, ignore `size` and use the footprint bbox instead.", 

135 dtype=bool, 

136 default=False, 

137 ) 

138 url_root = pexConfig.Field( 

139 doc="URL that the resulting images will be served to Zooniverse from, for the manifest file. " 

140 "If not set, no manifest file will be written.", 

141 dtype=str, 

142 default=None, 

143 optional=True, 

144 ) 

145 diff_image_type = pexConfig.Field( 

146 doc="Dataset type of template and difference image to use for cutouts; " 

147 "Will have '_templateExp' and '_differenceExp' appended for butler.get(), respectively.", 

148 dtype=str, 

149 default="goodSeeingDiff", 

150 ) 

151 science_image_type = pexConfig.Field( 

152 doc="Dataset type of science image to use for cutouts.", 

153 dtype=str, 

154 default="calexp", 

155 ) 

156 add_metadata = pexConfig.Field( 

157 doc="Annotate the cutouts with catalog metadata, including coordinates, fluxes, flags, etc.", 

158 dtype=bool, 

159 default=True 

160 ) 

161 chunk_size = pexConfig.Field( 

162 doc="Chunk up files into subdirectories, with at most this many files per directory." 

163 " None means write all the files to one `images/` directory.", 

164 dtype=int, 

165 default=10000, 

166 optional=True 

167 ) 

168 

169 

170class PlotImageSubtractionCutoutsTask(lsst.pipe.base.Task): 

171 """Generate template/science/difference image cutouts of DiaSources and an 

172 optional manifest for upload to a Zooniverse project. 

173 

174 Parameters 

175 ---------- 

176 output_path : `str` 

177 The path to write the output to; manifest goes here, while the 

178 images themselves go into ``output_path/images/``. 

179 """ 

180 ConfigClass = PlotImageSubtractionCutoutsConfig 

181 _DefaultName = "plotImageSubtractionCutouts" 

182 

183 def __init__(self, *, output_path, **kwargs): 

184 super().__init__(**kwargs) 

185 self._output_path = output_path 

186 self.cutout_path = CutoutPath(output_path, chunk_size=self.config.chunk_size) 

187 

188 def _reduce_kwargs(self): 

189 # to allow pickling of this Task 

190 kwargs = super()._reduce_kwargs() 

191 kwargs["output_path"] = self._output_path 

192 return kwargs 

193 

194 def run(self, data, butler, njobs=0): 

195 """Generate cutout images and a manifest for upload to Zooniverse 

196 from a collection of DiaSources. 

197 

198 Parameters 

199 ---------- 

200 data : `pandas.DataFrame` 

201 The DiaSources to extract cutouts for. Must contain at least these 

202 fields: ``ra, dec, diaSourceId, detector, visit, instrument``. 

203 butler : `lsst.daf.butler.Butler` 

204 The butler connection to use to load the data; create it with the 

205 collections you wish to load images from. 

206 njobs : `int`, optional 

207 Number of multiprocessing jobs to make cutouts with; default of 0 

208 means don't use multiprocessing at all. 

209 

210 Returns 

211 ------- 

212 source_ids : `list` [`int`] 

213 DiaSourceIds of cutout images that were generated. 

214 """ 

215 result = self.write_images(data, butler, njobs=njobs) 

216 self.write_manifest(result) 

217 self.log.info("Wrote %d images to %s", len(result), self._output_path) 

218 return result 

219 

220 def write_manifest(self, sources): 

221 """Save a Zooniverse manifest attaching image URLs to source ids. 

222 

223 Parameters 

224 ---------- 

225 sources : `list` [`int`] 

226 The diaSourceIds of the sources that had cutouts succesfully made. 

227 """ 

228 if self.config.url_root is not None: 

229 manifest = self._make_manifest(sources) 

230 manifest.to_csv(os.path.join(self._output_path, "manifest.csv"), index=False) 

231 else: 

232 self.log.info("No url_root config provided, so no Zooniverse manifest file was written.") 

233 

234 def _make_manifest(self, sources): 

235 """Return a Zooniverse manifest attaching image URLs to source ids. 

236 

237 Parameters 

238 ---------- 

239 sources : `list` [`int`] 

240 The diaSourceIds of the sources that had cutouts succesfully made. 

241 

242 Returns 

243 ------- 

244 manifest : `pandas.DataFrame` 

245 The formatted URL manifest for upload to Zooniverse. 

246 """ 

247 cutout_path = CutoutPath(self.config.url_root) 

248 manifest = pd.DataFrame() 

249 manifest["external_id"] = sources 

250 manifest["location:1"] = [cutout_path(x) for x in sources] 

251 manifest["metadata:diaSourceId"] = sources 

252 return manifest 

253 

254 def write_images(self, data, butler, njobs=0): 

255 """Make the 3-part cutout images for each requested source and write 

256 them to disk. 

257 

258 Creates a ``images/`` subdirectory via cutout_path if one 

259 does not already exist; images are written there as PNG files. 

260 

261 Parameters 

262 ---------- 

263 data : `pandas.DataFrame` 

264 The DiaSources to extract cutouts for. Must contain at least these 

265 fields: ``ra, dec, diaSourceId, detector, visit, instrument``. 

266 butler : `lsst.daf.butler.Butler` 

267 The butler connection to use to load the data; create it with the 

268 collections you wish to load images from. 

269 njobs : `int`, optional 

270 Number of multiprocessing jobs to make cutouts with; default of 0 

271 means don't use multiprocessing at all. 

272 

273 Returns 

274 ------- 

275 sources : `list` 

276 DiaSourceIds that had cutouts made. 

277 """ 

278 # Ignore divide-by-zero and log-of-negative-value messages. 

279 seterr_dict = np.seterr(divide="ignore", invalid="ignore") 

280 

281 # Create a subdirectory for the images. 

282 pathlib.Path(os.path.join(self._output_path, "images")).mkdir(exist_ok=True) 

283 

284 sources = [] 

285 butler_cache.set(butler, self.config) 

286 if njobs > 0: 

287 with multiprocessing.Pool(njobs) as pool: 

288 sources = pool.starmap(self._do_one_source, data.to_records()) 

289 else: 

290 for i, source in enumerate(data.to_records()): 

291 id = self._do_one_source(source) 

292 sources.append(id) 

293 

294 # restore numpy error message state 

295 np.seterr(**seterr_dict) 

296 # Only return successful ids, not failures. 

297 return [s for s in sources if s is not None] 

298 

299 def _do_one_source(self, source): 

300 """Make cutouts for one diaSource. 

301 

302 Parameters 

303 ---------- 

304 source : `numpy.record`, optional 

305 DiaSource record for this cutout, to add metadata to the image. 

306 

307 Returns 

308 ------- 

309 diaSourceId : `int` or None 

310 Id of the source that was generated, or None if there was an error. 

311 """ 

312 try: 

313 center = lsst.geom.SpherePoint(source["ra"], source["dec"], lsst.geom.degrees) 

314 science, template, difference = butler_cache.get_exposures(source["instrument"], 

315 source["detector"], 

316 source["visit"]) 

317 if self.config.use_footprint: 

318 catalog = butler_cache.get_catalog(source["instrument"], 

319 source["detector"], 

320 source["visit"]) 

321 # The input catalogs must be sorted. 

322 if not catalog.isSorted(): 

323 data_id = {'instrument': source["instrument"], 

324 'detector': source["detector"], 

325 'visit': source["visit"]} 

326 msg = f"{self.config.diff_image_type}_diaSrc catalog for {data_id} is not sorted!" 

327 raise RuntimeError(msg) 

328 record = catalog.find(source['diaSourceId']) 

329 footprint = record.getFootprint() 

330 

331 scale = science.wcs.getPixelScale().asArcseconds() 

332 image = self.generate_image(science, template, difference, center, scale, 

333 source=source if self.config.add_metadata else None, 

334 footprint=footprint if self.config.use_footprint else None) 

335 self.cutout_path.mkdir(source["diaSourceId"]) 

336 with open(self.cutout_path(source["diaSourceId"]), "wb") as outfile: 

337 outfile.write(image.getbuffer()) 

338 return source["diaSourceId"] 

339 except (LookupError, lsst.pex.exceptions.Exception) as e: 

340 self.log.error( 

341 f"{e.__class__.__name__} processing diaSourceId {source['diaSourceId']}: {e}" 

342 ) 

343 return None 

344 except Exception: 

345 # Ensure other exceptions are interpretable when multiprocessing. 

346 import traceback 

347 traceback.print_exc() 

348 raise 

349 

350 def generate_image(self, science, template, difference, center, scale, 

351 source=None, footprint=None): 

352 """Get a 3-part cutout image to save to disk, for a single source. 

353 

354 Parameters 

355 ---------- 

356 science : `lsst.afw.image.ExposureF` 

357 Science exposure to include in the cutout. 

358 template : `lsst.afw.image.ExposureF` 

359 Matched template exposure to include in the cutout. 

360 difference : `lsst.afw.image.ExposureF` 

361 Matched science minus template exposure to include in the cutout. 

362 center : `lsst.geom.SpherePoint` 

363 Center of the source to be cut out of each image. 

364 scale : `float` 

365 Pixel scale in arcseconds. 

366 source : `numpy.record`, optional 

367 DiaSource record for this cutout, to add metadata to the image. 

368 footprint : `lsst.afw.detection.Footprint`, optional 

369 Detected source footprint; if specified, extract a square 

370 surrounding the footprint bbox, otherwise use ``config.size``. 

371 

372 Returns 

373 ------- 

374 image: `io.BytesIO` 

375 The generated image, to be output to a file or displayed on screen. 

376 """ 

377 if not self.config.use_footprint: 

378 sizes = self.config.sizes 

379 cutout_science, cutout_template, cutout_difference = [], [], [] 

380 for i, s in enumerate(sizes): 

381 extent = lsst.geom.Extent2I(s, s) 

382 cutout_science.append(science.getCutout(center, extent)) 

383 cutout_template.append(template.getCutout(center, extent)) 

384 cutout_difference.append(difference.getCutout(center, extent)) 

385 else: 

386 cutout_science = [science.getCutout(footprint.getBBox())] 

387 cutout_template = [template.getCutout(footprint.getBBox())] 

388 cutout_difference = [difference.getCutout(footprint.getBBox())] 

389 extent = footprint.getBBox().getDimensions() 

390 # Plot a square equal to the largest dimension. 

391 sizes = [extent.x if extent.x > extent.y else extent.y] 

392 return self._plot_cutout(cutout_science, 

393 cutout_template, 

394 cutout_difference, 

395 scale, 

396 sizes, 

397 source=source) 

398 

399 def _plot_cutout(self, science, template, difference, scale, sizes, source=None): 

400 """Plot the cutouts for a source in one image. 

401 

402 Parameters 

403 ---------- 

404 science : `list` [`lsst.afw.image.ExposureF`] 

405 List of cutout Science exposure(s) to include in the image. 

406 template : `list` [`lsst.afw.image.ExposureF`] 

407 List of cutout template exposure(s) to include in the image. 

408 difference : `list` [`lsst.afw.image.ExposureF`] 

409 List of cutout science minus template exposure(s) to include 

410 in the image. 

411 source : `numpy.record`, optional 

412 DiaSource record for this cutout, to add metadata to the image. 

413 scale : `float` 

414 Pixel scale in arcseconds. 

415 size : `list` [`int`] 

416 List of x/y dimensions of of the images passed in, to set imshow 

417 extent. 

418 

419 Returns 

420 ------- 

421 image: `io.BytesIO` 

422 The generated image, to be output to a file via 

423 `image.write(filename)` or displayed on screen. 

424 """ 

425 import astropy.visualization as aviz 

426 import matplotlib 

427 matplotlib.use("AGG") 

428 # Force matplotlib defaults 

429 matplotlib.rcParams.update(matplotlib.rcParamsDefault) 

430 import matplotlib.pyplot as plt 

431 from matplotlib import cm 

432 

433 # TODO DM-32014: how do we color masked pixels (including edges)? 

434 

435 def plot_one_image(ax, data, size, name=None): 

436 """Plot a normalized image on an axis.""" 

437 if name == "Difference": 

438 norm = aviz.ImageNormalize( 

439 # focus on a rect of dim 15 at the center of the image. 

440 data[data.shape[0] // 2 - 7:data.shape[0] // 2 + 8, 

441 data.shape[1] // 2 - 7:data.shape[1] // 2 + 8], 

442 interval=aviz.MinMaxInterval(), 

443 stretch=aviz.AsinhStretch(a=0.1), 

444 ) 

445 else: 

446 norm = aviz.ImageNormalize( 

447 data, 

448 interval=aviz.MinMaxInterval(), 

449 stretch=aviz.AsinhStretch(a=0.1), 

450 ) 

451 ax.imshow(data, cmap=cm.bone, interpolation="none", norm=norm, 

452 extent=(0, size, 0, size), origin="lower", aspect="equal") 

453 x_line = 1 

454 y_line = 1 

455 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="blue", lw=6) 

456 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="yellow", lw=2) 

457 ax.axis("off") 

458 if name is not None: 

459 ax.set_title(name) 

460 

461 try: 

462 len_sizes = len(sizes) 

463 fig, axs = plt.subplots(len_sizes, 3, constrained_layout=True) 

464 if len_sizes == 1: 

465 plot_one_image(axs[0], template[0].image.array, sizes[0], "Template") 

466 plot_one_image(axs[1], science[0].image.array, sizes[0], "Science") 

467 plot_one_image(axs[2], difference[0].image.array, sizes[0], "Difference") 

468 else: 

469 plot_one_image(axs[0][0], template[0].image.array, sizes[0], "Template") 

470 plot_one_image(axs[0][1], science[0].image.array, sizes[0], "Science") 

471 plot_one_image(axs[0][2], difference[0].image.array, sizes[0], "Difference") 

472 for i in range(1, len(axs)): 

473 plot_one_image(axs[i][0], template[i].image.array, sizes[i], None) 

474 plot_one_image(axs[i][1], science[i].image.array, sizes[i], None) 

475 plot_one_image(axs[i][2], difference[i].image.array, sizes[i], None) 

476 plt.tight_layout() 

477 if source is not None: 

478 _annotate_image(fig, source, len_sizes) 

479 

480 output = io.BytesIO() 

481 plt.savefig(output, bbox_inches="tight", format="png") 

482 output.seek(0) # to ensure opening the image starts from the front 

483 finally: 

484 plt.close(fig) 

485 

486 return output 

487 

488 

489def _annotate_image(fig, source, len_sizes): 

490 """Annotate the cutouts image with metadata and flags. 

491 

492 Parameters 

493 ---------- 

494 fig : `matplotlib.Figure` 

495 Figure to be annotated. 

496 source : `numpy.record` 

497 DiaSource record of the object being plotted. 

498 len_sizes : `int` 

499 Length of the ``size`` array set in configuration. 

500 """ 

501 # Names of flags fields to add a flag label to the image, using any(). 

502 flags_psf = ["slot_PsfFlux_flag", "slot_PsfFlux_flag_noGoodPixels", "slot_PsfFlux_flag_edge"] 

503 flags_aperture = ["slot_ApFlux_flag", "slot_ApFlux_flag_apertureTruncated"] 

504 flags_forced = ["ip_diffim_forced_PsfFlux_flag", "ip_diffim_forced_PsfFlux_flag_noGoodPixels", 

505 "ip_diffim_forced_PsfFlux_flag_edge"] 

506 flags_edge = ["pixelFlags_edge"] 

507 flags_interp = ["pixelFlags_interpolated", "pixelFlags_interpolatedCenter"] 

508 flags_saturated = ["pixelFlags_saturated", "pixelFlags_saturatedCenter"] 

509 flags_cr = ["pixelFlags_cr", "pixelFlags_crCenter"] 

510 flags_bad = ["pixelFlags_bad"] 

511 flags_suspect = ["pixelFlags_suspect", "pixelFlags_suspectCenter"] 

512 flags_centroid = ["slot_Centroid_flag"] 

513 flags_shape = ["slot_Shape_flag", "slot_Shape_flag_no_pixels", "slot_Shape_flag_not_contained", 

514 "slot_Shape_flag_parent_source"] 

515 

516 flag_color = "red" 

517 text_color = "grey" 

518 

519 if len_sizes == 1: 

520 heights = [0.95, 0.91, 0.87, 0.83, 0.79] 

521 else: 

522 heights = [1.2, 1.15, 1.1, 1.05, 1.0] 

523 

524 # NOTE: fig.text coordinates are in fractions of the figure. 

525 fig.text(0, heights[0], "diaSourceId:", color=text_color) 

526 fig.text(0.145, heights[0], f"{source['diaSourceId']}") 

527 fig.text(0.43, heights[0], f"{source['instrument']}", fontweight="bold") 

528 fig.text(0.64, heights[0], "detector:", color=text_color) 

529 fig.text(0.74, heights[0], f"{source['detector']}") 

530 fig.text(0.795, heights[0], "visit:", color=text_color) 

531 fig.text(0.85, heights[0], f"{source['visit']}") 

532 fig.text(0.95, heights[0], f"{source['band']}") 

533 

534 fig.text(0.0, heights[1], "ra:", color=text_color) 

535 fig.text(0.037, heights[1], f"{source['ra']:.8f}") 

536 fig.text(0.21, heights[1], "dec:", color=text_color) 

537 fig.text(0.265, heights[1], f"{source['dec']:+.8f}") 

538 fig.text(0.50, heights[1], "detection S/N:", color=text_color) 

539 fig.text(0.66, heights[1], f"{source['snr']:6.1f}") 

540 fig.text(0.75, heights[1], "PSF chi2:", color=text_color) 

541 fig.text(0.85, heights[1], f"{source['psfChi2']/source['psfNdata']:6.2f}") 

542 

543 fig.text(0.0, heights[2], "PSF (nJy):", color=flag_color if any(source[flags_psf]) else text_color) 

544 fig.text(0.25, heights[2], f"{source['psfFlux']:8.1f}", horizontalalignment='right') 

545 fig.text(0.252, heights[2], "+/-", color=text_color) 

546 fig.text(0.29, heights[2], f"{source['psfFluxErr']:8.1f}") 

547 fig.text(0.40, heights[2], "S/N:", color=text_color) 

548 fig.text(0.45, heights[2], f"{abs(source['psfFlux']/source['psfFluxErr']):6.2f}") 

549 

550 # NOTE: yellow is hard to read on white; use goldenrod instead. 

551 if any(source[flags_edge]): 

552 fig.text(0.55, heights[2], "EDGE", color="goldenrod", fontweight="bold") 

553 if any(source[flags_interp]): 

554 fig.text(0.62, heights[2], "INTERP", color="green", fontweight="bold") 

555 if any(source[flags_saturated]): 

556 fig.text(0.72, heights[2], "SAT", color="green", fontweight="bold") 

557 if any(source[flags_cr]): 

558 fig.text(0.77, heights[2], "CR", color="magenta", fontweight="bold") 

559 if any(source[flags_bad]): 

560 fig.text(0.81, heights[2], "BAD", color="red", fontweight="bold") 

561 if source['isDipole']: 

562 fig.text(0.87, heights[2], "DIPOLE", color="indigo", fontweight="bold") 

563 

564 fig.text(0.0, heights[3], "ap (nJy):", color=flag_color if any(source[flags_aperture]) else text_color) 

565 fig.text(0.25, heights[3], f"{source['apFlux']:8.1f}", horizontalalignment='right') 

566 fig.text(0.252, heights[3], "+/-", color=text_color) 

567 fig.text(0.29, heights[3], f"{source['apFluxErr']:8.1f}") 

568 fig.text(0.40, heights[3], "S/N:", color=text_color) 

569 fig.text(0.45, heights[3], f"{abs(source['apFlux']/source['apFluxErr']):#6.2f}") 

570 

571 if any(source[flags_suspect]): 

572 fig.text(0.55, heights[3], "SUS", color="goldenrod", fontweight="bold") 

573 if any(source[flags_centroid]): 

574 fig.text(0.60, heights[3], "CENTROID", color="red", fontweight="bold") 

575 if any(source[flags_shape]): 

576 fig.text(0.73, heights[3], "SHAPE", color="red", fontweight="bold") 

577 # Future option: to add two more flag flavors to the legend, 

578 # use locations 0.80 and 0.87 

579 

580 # rb score 

581 if source['reliability'] is not None and np.isfinite(source['reliability']): 

582 fig.text(0.73, heights[4], f"RB:{source['reliability']:.03f}", 

583 color='#e41a1c' if source['reliability'] < 0.5 else '#4daf4a', 

584 fontweight="bold") 

585 

586 fig.text(0.0, heights[4], "sci (nJy):", color=flag_color if any(source[flags_forced]) else text_color) 

587 fig.text(0.25, heights[4], f"{source['scienceFlux']:8.1f}", horizontalalignment='right') 

588 fig.text(0.252, heights[4], "+/-", color=text_color) 

589 fig.text(0.29, heights[4], f"{source['scienceFluxErr']:8.1f}") 

590 fig.text(0.40, heights[4], "S/N:", color=text_color) 

591 fig.text(0.45, heights[4], f"{abs(source['scienceFlux']/source['scienceFluxErr']):6.2f}") 

592 fig.text(0.55, heights[4], "ABmag:", color=text_color) 

593 fig.text(0.635, heights[4], f"{(source['scienceFlux']*u.nanojansky).to_value(u.ABmag):.3f}") 

594 

595 

596class CutoutPath: 

597 """Manage paths to image cutouts with filenames based on diaSourceId. 

598 

599 Supports local files, and id-chunked directories. 

600 

601 Parameters 

602 ---------- 

603 root : `str` 

604 Root file path to manage. 

605 chunk_size : `int`, optional 

606 At most this many files per directory. Must be a power of 10. 

607 

608 Raises 

609 ------ 

610 RuntimeError 

611 Raised if chunk_size is not a power of 10. 

612 """ 

613 

614 def __init__(self, root, chunk_size=None): 

615 self._root = root 

616 if chunk_size is not None and (log10(chunk_size) != int(log10(chunk_size))): 

617 raise RuntimeError(f"CutoutPath file chunk_size must be a power of 10, got {chunk_size}.") 

618 self._chunk_size = chunk_size 

619 

620 def __call__(self, id): 

621 """Return the full path to a diaSource cutout. 

622 

623 Parameters 

624 ---------- 

625 id : `int` 

626 Source id to create the path for. 

627 

628 Returns 

629 ------- 

630 path : `str` 

631 Full path to the requested file. 

632 """ 

633 def chunker(id, size): 

634 return (id // size)*size 

635 

636 if self._chunk_size is not None: 

637 return os.path.join(self._root, f"images/{chunker(id, self._chunk_size)}/{id}.png") 

638 else: 

639 return os.path.join(self._root, f"images/{id}.png") 

640 

641 def mkdir(self, id): 

642 """Make the directory tree to write this cutout id to. 

643 

644 Parameters 

645 ---------- 

646 id : `int` 

647 Source id to create the path for. 

648 """ 

649 path = os.path.dirname(self(id)) 

650 os.makedirs(path, exist_ok=True) 

651 

652 

653def build_argparser(): 

654 """Construct an argument parser for the ``plotImageSubtractionCutouts`` 

655 script. 

656 

657 Returns 

658 ------- 

659 argparser : `argparse.ArgumentParser` 

660 The argument parser that defines the ``plotImageSubtractionCutouts`` 

661 command-line interface. 

662 """ 

663 parser = argparse.ArgumentParser( 

664 description=__doc__, 

665 formatter_class=argparse.RawDescriptionHelpFormatter, 

666 epilog="More information is available at https://pipelines.lsst.io.", 

667 ) 

668 

669 apdbArgs = parser.add_mutually_exclusive_group(required=True) 

670 apdbArgs.add_argument( 

671 "--sqlitefile", 

672 default=None, 

673 help="Path to sqlite file to load from; required for sqlite connection.", 

674 ) 

675 apdbArgs.add_argument( 

676 "--namespace", 

677 default=None, 

678 help="Postgres namespace (aka schema) to connect to; " 

679 " required for postgres connections." 

680 ) 

681 

682 parser.add_argument( 

683 "--postgres_url", 

684 default="rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl", 

685 help="Postgres connection path, or default (None) to use ApdbPostgresQuery default." 

686 ) 

687 

688 parser.add_argument( 

689 "--limit", 

690 default=5, 

691 type=int, 

692 help="Number of sources to load from the APDB (default=5), or the " 

693 "number of sources to load per 'page' when `--all` is set. " 

694 "This should be significantly larger (100x or more) than the value of `-j`, " 

695 "to ensure efficient use of each process.", 

696 ) 

697 parser.add_argument( 

698 "--all", 

699 default=False, 

700 action="store_true", 

701 help="Process all the sources; --limit then becomes the 'page size' to chunk the DB into.", 

702 ) 

703 

704 parser.add_argument( 

705 "-j", 

706 "--jobs", 

707 default=0, 

708 type=int, 

709 help="Number of processes to use when generating cutouts. " 

710 "Specify 0 (the default) to not use multiprocessing at all. " 

711 "Note that `--limit` determines how efficiently each process is filled." 

712 ) 

713 

714 parser.add_argument( 

715 "--instrument", 

716 required=True, 

717 help="Instrument short-name (e.g. 'DECam') of the data being loaded.", 

718 ) 

719 parser.add_argument( 

720 "-C", 

721 "--configFile", 

722 help="File containing the PlotImageSubtractionCutoutsConfig to load.", 

723 ) 

724 parser.add_argument( 

725 "--collections", 

726 nargs="*", 

727 help=( 

728 "Butler collection(s) to load data from." 

729 " If not specified, will search all butler collections, " 

730 "which may be very slow." 

731 ), 

732 ) 

733 parser.add_argument("repo", help="Path to Butler repository to load data from.") 

734 parser.add_argument( 

735 "outputPath", 

736 help="Path to write the output images and manifest to; " 

737 "manifest is written here, while the images go to `OUTPUTPATH/images/`.", 

738 ) 

739 parser.add_argument( 

740 "--reliabilityMin", 

741 type=float, 

742 default=None, 

743 help="Minimum reliability value (default=None) on which to filter the DiaSources.", 

744 ) 

745 parser.add_argument( 

746 "--reliabilityMax", 

747 type=float, 

748 default=None, 

749 help="Maximum reliability value (default=None) on which to filter the DiaSources.", 

750 ) 

751 return parser 

752 

753 

754def _make_apdbQuery(instrument, sqlitefile=None, postgres_url=None, namespace=None): 

755 """Return a query connection to the specified APDB. 

756 

757 Parameters 

758 ---------- 

759 instrument : `lsst.obs.base.Instrument` 

760 Instrument associated with this data, to get detector/visit data. 

761 sqlitefile : `str`, optional 

762 SQLite file to load APDB from; if set, postgres kwargs are ignored. 

763 postgres_url : `str`, optional 

764 Postgres connection URL to connect to APDB. 

765 namespace : `str`, optional 

766 Postgres schema to load from; required with postgres_url. 

767 

768 Returns 

769 ------- 

770 apdb_query : `lsst.analysis.ap.ApdbQuery` 

771 Query instance to use to load data from APDB. 

772 

773 Raises 

774 ------ 

775 RuntimeError 

776 Raised if the APDB connection kwargs are invalid in some way. 

777 """ 

778 if sqlitefile is not None: 

779 apdb_query = apdb.ApdbSqliteQuery(sqlitefile, instrument=instrument) 

780 elif postgres_url is not None and namespace is not None: 

781 apdb_query = apdb.ApdbPostgresQuery(namespace, postgres_url, instrument=instrument) 

782 else: 

783 raise RuntimeError("Cannot handle database connection args: " 

784 f"sqlitefile={sqlitefile}, postgres_url={postgres_url}, namespace={namespace}") 

785 return apdb_query 

786 

787 

788def select_sources(apdb_query, limit, reliabilityMin=None, reliabilityMax=None): 

789 """Load an APDB and return n sources from it. 

790 

791 Parameters 

792 ---------- 

793 apdb_query : `lsst.analysis.ap.ApdbQuery` 

794 APDB query interface to load from. 

795 limit : `int` 

796 Number of sources to select from the APDB. 

797 reliabilityMin : `float` 

798 Minimum reliability value on which to filter the DiaSources. 

799 reliabilityMax : `float` 

800 Maximum reliability value on which to filter the DiaSources. 

801 

802 Returns 

803 ------- 

804 sources : `pandas.DataFrame` 

805 The loaded DiaSource data. 

806 """ 

807 offset = 0 

808 try: 

809 while True: 

810 with apdb_query.connection as connection: 

811 table = apdb_query._tables["DiaSource"] 

812 query = table.select() 

813 if reliabilityMin is not None: 

814 query = query.where(table.columns['reliability'] >= reliabilityMin) 

815 if reliabilityMax is not None: 

816 query = query.where(table.columns['reliability'] <= reliabilityMax) 

817 query = query.order_by(table.columns["visit"], 

818 table.columns["detector"], 

819 table.columns["diaSourceId"]) 

820 query = query.limit(limit).offset(offset) 

821 sources = pd.read_sql_query(query, connection) 

822 if len(sources) == 0: 

823 break 

824 apdb_query._fill_from_instrument(sources) 

825 

826 yield sources 

827 offset += limit 

828 finally: 

829 connection.close() 

830 

831 

832def len_sources(apdb_query): 

833 """Return the number of DiaSources in the supplied APDB. 

834 

835 Parameters 

836 ---------- 

837 apdb_query : `lsst.analysis.ap.ApdbQuery` 

838 APDB query interface to load from. 

839 

840 Returns 

841 ------- 

842 count : `int` 

843 Number of diaSources in this APDB. 

844 """ 

845 with apdb_query.connection as connection: 

846 count = connection.execute(sqlalchemy.text('select count(*) FROM "DiaSource";')).scalar() 

847 return count 

848 

849 

850def run_cutouts(args): 

851 """Run PlotImageSubtractionCutoutsTask on the parsed commandline arguments. 

852 

853 Parameters 

854 ---------- 

855 args : `argparse.Namespace` 

856 Parsed commandline arguments. 

857 """ 

858 # We have to initialize the logger manually on the commandline. 

859 logging.basicConfig( 

860 level=logging.INFO, format="{name} {levelname}: {message}", style="{" 

861 ) 

862 

863 butler = lsst.daf.butler.Butler(args.repo, collections=args.collections) 

864 apdb_query = _make_apdbQuery(args.instrument, 

865 sqlitefile=args.sqlitefile, 

866 postgres_url=args.postgres_url, 

867 namespace=args.namespace) 

868 data = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax) 

869 

870 config = PlotImageSubtractionCutoutsConfig() 

871 if args.configFile is not None: 

872 config.load(os.path.expanduser(args.configFile)) 

873 config.freeze() 

874 cutouts = PlotImageSubtractionCutoutsTask(config=config, output_path=args.outputPath) 

875 

876 getter = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax) 

877 # Process just one block of length "limit", or all sources in the database? 

878 if not args.all: 

879 data = next(getter) 

880 sources = cutouts.run(data, butler, njobs=args.jobs) 

881 else: 

882 sources = [] 

883 count = len_sources(apdb_query) 

884 for i, data in enumerate(getter): 

885 sources.extend(cutouts.write_images(data, butler, njobs=args.jobs)) 

886 print(f"Completed {i+1} batches of {args.limit} size, out of {count} diaSources.") 

887 cutouts.write_manifest(sources) 

888 

889 print(f"Generated {len(sources)} diaSource cutouts to {args.outputPath}.") 

890 

891 

892def main(): 

893 args = build_argparser().parse_args() 

894 run_cutouts(args)