Coverage for python/lsst/analysis/ap/plotImageSubtractionCutouts.py: 15%
340 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 12:10 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 12:10 +0000
1# This file is part of analysis_ap.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Construct template/image/difference cutouts for upload to Zooniverse, or
23to just to view as images.
24"""
26__all__ = ["PlotImageSubtractionCutoutsConfig", "PlotImageSubtractionCutoutsTask", "CutoutPath"]
28import argparse
29import functools
30import io
31import logging
32import multiprocessing
33import os
34import pathlib
35from math import log10
37import astropy.units as u
38import lsst.dax.apdb
39import lsst.pex.config as pexConfig
40import lsst.pex.exceptions
41import lsst.pipe.base
42import lsst.utils
43import numpy as np
44import pandas as pd
45import sqlalchemy
47from . import apdb
50class _ButlerCache:
51 """Global class to handle butler queries, to allow lru_cache and
52 `multiprocessing.Pool` to work together.
54 If we redo this all to work with BPS or other parallelized systems, or get
55 good butler-side caching, we could remove this lru_cache system.
56 """
58 def set(self, butler, config):
59 """Call this to store a Butler and Config instance before using the
60 global class instance.
62 Parameters
63 ----------
64 butler : `lsst.daf.butler.Butler`
65 Butler instance to store.
66 config : `lsst.pex.config.Config`
67 Config instance to store.
68 """
69 self._butler = butler
70 self._config = config
71 # Ensure the caches are empty if we've been re-set.
72 self.get_exposures.cache_clear()
73 self.get_catalog.cache_clear()
75 @functools.lru_cache(maxsize=4)
76 def get_exposures(self, instrument, detector, visit):
77 """Return science, template, difference exposures, using a small
78 cache so we don't have to re-read files as often.
80 Parameters
81 ----------
82 instrument : `str`
83 Instrument name to define the data id.
84 detector : `int`
85 Detector id to define the data id.
86 visit : `int`
87 Visit id to define the data id.
89 Returns
90 -------
91 exposures : `tuple` [`lsst.afw.image.ExposureF`]
92 Science, template, and difference exposure for this data id.
93 """
94 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit}
95 return (self._butler.get(self._config.science_image_type, data_id),
96 self._butler.get(f'{self._config.diff_image_type}_templateExp', data_id),
97 self._butler.get(f'{self._config.diff_image_type}_differenceExp', data_id))
99 @functools.lru_cache(maxsize=4)
100 def get_catalog(self, instrument, detector, visit):
101 """Return the diaSrc catalog from the butler.
103 Parameters
104 ----------
105 instrument : `str`
106 Instrument name to define the data id.
107 detector : `int`
108 Detector id to define the data id.
109 visit : `int`
110 Visit id to define the data id.
112 Returns
113 -------
114 catalog : `lsst.afw.table.SourceCatalog`
115 DiaSource catalog for this data id.
116 """
117 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit}
118 return self._butler.get(f'{self._config.diff_image_type}_diaSrc', data_id)
121# Global used within each multiprocessing worker (or single process).
122butler_cache = _ButlerCache()
125class PlotImageSubtractionCutoutsConfig(pexConfig.Config):
126 sizes = pexConfig.ListField(
127 doc="List of widths of cutout to extract for image from science, \
128 template, and difference exposures.",
129 dtype=int,
130 default=[30],
131 )
132 use_footprint = pexConfig.Field(
133 doc="Use source footprint to to define cutout region; "
134 "If set, ignore `size` and use the footprint bbox instead.",
135 dtype=bool,
136 default=False,
137 )
138 url_root = pexConfig.Field(
139 doc="URL that the resulting images will be served to Zooniverse from, for the manifest file. "
140 "If not set, no manifest file will be written.",
141 dtype=str,
142 default=None,
143 optional=True,
144 )
145 diff_image_type = pexConfig.Field(
146 doc="Dataset type of template and difference image to use for cutouts; "
147 "Will have '_templateExp' and '_differenceExp' appended for butler.get(), respectively.",
148 dtype=str,
149 default="goodSeeingDiff",
150 )
151 science_image_type = pexConfig.Field(
152 doc="Dataset type of science image to use for cutouts.",
153 dtype=str,
154 default="calexp",
155 )
156 add_metadata = pexConfig.Field(
157 doc="Annotate the cutouts with catalog metadata, including coordinates, fluxes, flags, etc.",
158 dtype=bool,
159 default=True
160 )
161 chunk_size = pexConfig.Field(
162 doc="Chunk up files into subdirectories, with at most this many files per directory."
163 " None means write all the files to one `images/` directory.",
164 dtype=int,
165 default=10000,
166 optional=True
167 )
168 save_as_numpy = pexConfig.Field(
169 doc="Save the raw cutout images in numpy format.",
170 dtype=bool,
171 default=False
172 )
175class PlotImageSubtractionCutoutsTask(lsst.pipe.base.Task):
176 """Generate template/science/difference image cutouts of DiaSources and an
177 optional manifest for upload to a Zooniverse project.
179 Parameters
180 ----------
181 output_path : `str`
182 The path to write the output to; manifest goes here, while the
183 images themselves go into ``output_path/images/``.
184 """
185 ConfigClass = PlotImageSubtractionCutoutsConfig
186 _DefaultName = "plotImageSubtractionCutouts"
188 def __init__(self, *, output_path, **kwargs):
189 super().__init__(**kwargs)
190 self._output_path = output_path
191 self.cutout_path = CutoutPath(output_path, chunk_size=self.config.chunk_size)
193 def _reduce_kwargs(self):
194 # to allow pickling of this Task
195 kwargs = super()._reduce_kwargs()
196 kwargs["output_path"] = self._output_path
197 return kwargs
199 def run(self, data, butler, njobs=0):
200 """Generate cutout images and a manifest for upload to Zooniverse
201 from a collection of DiaSources.
203 Parameters
204 ----------
205 data : `pandas.DataFrame`
206 The DiaSources to extract cutouts for. Must contain at least these
207 fields: ``ra, dec, diaSourceId, detector, visit, instrument``.
208 butler : `lsst.daf.butler.Butler`
209 The butler connection to use to load the data; create it with the
210 collections you wish to load images from.
211 njobs : `int`, optional
212 Number of multiprocessing jobs to make cutouts with; default of 0
213 means don't use multiprocessing at all.
215 Returns
216 -------
217 source_ids : `list` [`int`]
218 DiaSourceIds of cutout images that were generated.
219 """
220 result = self.write_images(data, butler, njobs=njobs)
221 self.write_manifest(result)
222 self.log.info("Wrote %d images to %s", len(result), self._output_path)
223 return result
225 def write_manifest(self, sources):
226 """Save a Zooniverse manifest attaching image URLs to source ids.
228 Parameters
229 ----------
230 sources : `list` [`int`]
231 The diaSourceIds of the sources that had cutouts succesfully made.
232 """
233 if self.config.url_root is not None:
234 manifest = self._make_manifest(sources)
235 manifest.to_csv(os.path.join(self._output_path, "manifest.csv"), index=False)
236 else:
237 self.log.info("No url_root config provided, so no Zooniverse manifest file was written.")
239 def _make_manifest(self, sources):
240 """Return a Zooniverse manifest attaching image URLs to source ids.
242 Parameters
243 ----------
244 sources : `list` [`int`]
245 The diaSourceIds of the sources that had cutouts succesfully made.
247 Returns
248 -------
249 manifest : `pandas.DataFrame`
250 The formatted URL manifest for upload to Zooniverse.
251 """
252 cutout_path = CutoutPath(self.config.url_root)
253 manifest = pd.DataFrame()
254 manifest["external_id"] = sources
255 manifest["location:1"] = [cutout_path(x) for x in sources]
256 manifest["metadata:diaSourceId"] = sources
257 return manifest
259 def write_images(self, data, butler, njobs=0):
260 """Make the 3-part cutout images for each requested source and write
261 them to disk.
263 Creates a ``images/`` subdirectory via cutout_path if one
264 does not already exist; images are written there as PNG files.
266 Parameters
267 ----------
268 data : `pandas.DataFrame`
269 The DiaSources to extract cutouts for. Must contain at least these
270 fields: ``ra, dec, diaSourceId, detector, visit, instrument``.
271 butler : `lsst.daf.butler.Butler`
272 The butler connection to use to load the data; create it with the
273 collections you wish to load images from.
274 njobs : `int`, optional
275 Number of multiprocessing jobs to make cutouts with; default of 0
276 means don't use multiprocessing at all.
278 Returns
279 -------
280 sources : `list`
281 DiaSourceIds that had cutouts made.
282 """
283 # Ignore divide-by-zero and log-of-negative-value messages.
284 seterr_dict = np.seterr(divide="ignore", invalid="ignore")
286 # Create a subdirectory for the images.
287 pathlib.Path(os.path.join(self._output_path, "images")).mkdir(exist_ok=True)
289 sources = []
290 butler_cache.set(butler, self.config)
291 if njobs > 0:
292 with multiprocessing.Pool(njobs) as pool:
293 sources = pool.starmap(self._do_one_source, data.to_records())
294 else:
295 for i, source in enumerate(data.to_records()):
296 id = self._do_one_source(source)
297 sources.append(id)
299 # restore numpy error message state
300 np.seterr(**seterr_dict)
301 # Only return successful ids, not failures.
302 return [s for s in sources if s is not None]
304 def _do_one_source(self, source):
305 """Make cutouts for one diaSource.
307 Parameters
308 ----------
309 source : `numpy.record`, optional
310 DiaSource record for this cutout, to add metadata to the image.
312 Returns
313 -------
314 diaSourceId : `int` or None
315 Id of the source that was generated, or None if there was an error.
316 """
317 try:
318 center = lsst.geom.SpherePoint(source["ra"], source["dec"], lsst.geom.degrees)
319 science, template, difference = butler_cache.get_exposures(source["instrument"],
320 source["detector"],
321 source["visit"])
322 if self.config.use_footprint:
323 catalog = butler_cache.get_catalog(source["instrument"],
324 source["detector"],
325 source["visit"])
326 # The input catalogs must be sorted.
327 if not catalog.isSorted():
328 data_id = {'instrument': source["instrument"],
329 'detector': source["detector"],
330 'visit': source["visit"]}
331 msg = f"{self.config.diff_image_type}_diaSrc catalog for {data_id} is not sorted!"
332 raise RuntimeError(msg)
333 record = catalog.find(source['diaSourceId'])
334 footprint = record.getFootprint()
336 scale = science.wcs.getPixelScale().asArcseconds()
337 image = self.generate_image(science, template, difference, center, scale,
338 dia_source_id=source['diaSourceId'],
339 save_as_numpy=self.config.save_as_numpy,
340 source=source if self.config.add_metadata else None,
341 footprint=footprint if self.config.use_footprint else None)
342 self.cutout_path.mkdir(source["diaSourceId"])
343 with open(self.cutout_path(source["diaSourceId"]), "wb") as outfile:
344 outfile.write(image.getbuffer())
345 return source["diaSourceId"]
346 except (LookupError, lsst.pex.exceptions.Exception) as e:
347 self.log.error(
348 f"{e.__class__.__name__} processing diaSourceId {source['diaSourceId']}: {e}"
349 )
350 return None
351 except Exception:
352 # Ensure other exceptions are interpretable when multiprocessing.
353 import traceback
354 traceback.print_exc()
355 raise
357 def generate_image(self, science, template, difference, center, scale, dia_source_id=None,
358 save_as_numpy=False, source=None, footprint=None):
359 """Get a 3-part cutout image to save to disk, for a single source.
361 Parameters
362 ----------
363 science : `lsst.afw.image.ExposureF`
364 Science exposure to include in the cutout.
365 template : `lsst.afw.image.ExposureF`
366 Matched template exposure to include in the cutout.
367 difference : `lsst.afw.image.ExposureF`
368 Matched science minus template exposure to include in the cutout.
369 center : `lsst.geom.SpherePoint`
370 Center of the source to be cut out of each image.
371 scale : `float`
372 Pixel scale in arcseconds.
373 dia_source_id : `int`, optional
374 DiaSourceId to use in the filename, if saving to disk.
375 save_as_numpy : `bool`, optional
376 Save the raw cutout images in numpy format.
377 source : `numpy.record`, optional
378 DiaSource record for this cutout, to add metadata to the image.
379 footprint : `lsst.afw.detection.Footprint`, optional
380 Detected source footprint; if specified, extract a square
381 surrounding the footprint bbox, otherwise use ``config.size``.
383 Returns
384 -------
385 image: `io.BytesIO`
386 The generated image, to be output to a file or displayed on screen.
387 """
388 numpy_cutouts = {}
389 if not self.config.use_footprint:
390 sizes = self.config.sizes
391 cutout_science, cutout_template, cutout_difference = [], [], []
392 for i, s in enumerate(sizes):
393 extent = lsst.geom.Extent2I(s, s)
394 science_cutout = science.getCutout(center, extent)
395 template_cutout = template.getCutout(center, extent)
396 difference_cutout = difference.getCutout(center, extent)
397 if save_as_numpy:
398 numpy_cutouts[f"sci_{s}"] = science_cutout.image.array
399 numpy_cutouts[f"temp_{s}"] = template_cutout.image.array
400 numpy_cutouts[f"diff_{s}"] = difference_cutout.image.array
401 pathlib.Path(os.path.join(self._output_path, "raw_npy")).mkdir(exist_ok=True)
402 path = os.path.join(self._output_path, "raw_npy")
403 for cutout_type, cutout in numpy_cutouts.items():
404 np.save(f"{path}/{dia_source_id}_{cutout_type}.npy",
405 np.expand_dims(cutout, axis=0))
406 cutout_science.append(science_cutout)
407 cutout_template.append(template_cutout)
408 cutout_difference.append(difference_cutout)
409 else:
410 if self.config.save_as_numpy:
411 raise RuntimeError("Cannot save as numpy when using footprints.")
412 cutout_science = [science.getCutout(footprint.getBBox())]
413 cutout_template = [template.getCutout(footprint.getBBox())]
414 cutout_difference = [difference.getCutout(footprint.getBBox())]
415 extent = footprint.getBBox().getDimensions()
416 # Plot a square equal to the largest dimension.
417 sizes = [extent.x if extent.x > extent.y else extent.y]
419 return self._plot_cutout(cutout_science,
420 cutout_template,
421 cutout_difference,
422 scale,
423 sizes,
424 source=source)
426 def _plot_cutout(self, science, template, difference, scale, sizes, source=None):
427 """Plot the cutouts for a source in one image.
429 Parameters
430 ----------
431 science : `list` [`lsst.afw.image.ExposureF`]
432 List of cutout Science exposure(s) to include in the image.
433 template : `list` [`lsst.afw.image.ExposureF`]
434 List of cutout template exposure(s) to include in the image.
435 difference : `list` [`lsst.afw.image.ExposureF`]
436 List of cutout science minus template exposure(s) to include
437 in the image.
438 source : `numpy.record`, optional
439 DiaSource record for this cutout, to add metadata to the image.
440 scale : `float`
441 Pixel scale in arcseconds.
442 size : `list` [`int`]
443 List of x/y dimensions of of the images passed in, to set imshow
444 extent.
446 Returns
447 -------
448 image: `io.BytesIO`
449 The generated image, to be output to a file via
450 `image.write(filename)` or displayed on screen.
451 """
452 import astropy.visualization as aviz
453 import matplotlib
454 matplotlib.use("AGG")
455 # Force matplotlib defaults
456 matplotlib.rcParams.update(matplotlib.rcParamsDefault)
457 import matplotlib.pyplot as plt
458 from matplotlib import cm
460 # TODO DM-32014: how do we color masked pixels (including edges)?
462 def plot_one_image(ax, data, size, name=None):
463 """Plot a normalized image on an axis."""
464 if name == "Difference":
465 norm = aviz.ImageNormalize(
466 # focus on a rect of dim 15 at the center of the image.
467 data[data.shape[0] // 2 - 7:data.shape[0] // 2 + 8,
468 data.shape[1] // 2 - 7:data.shape[1] // 2 + 8],
469 interval=aviz.MinMaxInterval(),
470 stretch=aviz.AsinhStretch(a=0.1),
471 )
472 else:
473 norm = aviz.ImageNormalize(
474 data,
475 interval=aviz.MinMaxInterval(),
476 stretch=aviz.AsinhStretch(a=0.1),
477 )
478 ax.imshow(data, cmap=cm.bone, interpolation="none", norm=norm,
479 extent=(0, size, 0, size), origin="lower", aspect="equal")
480 x_line = 1
481 y_line = 1
482 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="blue", lw=6)
483 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="yellow", lw=2)
484 ax.axis("off")
485 if name is not None:
486 ax.set_title(name)
488 try:
489 len_sizes = len(sizes)
490 fig, axs = plt.subplots(len_sizes, 3, constrained_layout=True)
491 if len_sizes == 1:
492 plot_one_image(axs[0], template[0].image.array, sizes[0], "Template")
493 plot_one_image(axs[1], science[0].image.array, sizes[0], "Science")
494 plot_one_image(axs[2], difference[0].image.array, sizes[0], "Difference")
495 else:
496 plot_one_image(axs[0][0], template[0].image.array, sizes[0], "Template")
497 plot_one_image(axs[0][1], science[0].image.array, sizes[0], "Science")
498 plot_one_image(axs[0][2], difference[0].image.array, sizes[0], "Difference")
499 for i in range(1, len(axs)):
500 plot_one_image(axs[i][0], template[i].image.array, sizes[i], None)
501 plot_one_image(axs[i][1], science[i].image.array, sizes[i], None)
502 plot_one_image(axs[i][2], difference[i].image.array, sizes[i], None)
503 plt.tight_layout()
504 if source is not None:
505 _annotate_image(fig, source, len_sizes)
507 output = io.BytesIO()
508 plt.savefig(output, bbox_inches="tight", format="png")
509 output.seek(0) # to ensure opening the image starts from the front
510 finally:
511 plt.close(fig)
513 return output
516def _annotate_image(fig, source, len_sizes):
517 """Annotate the cutouts image with metadata and flags.
519 Parameters
520 ----------
521 fig : `matplotlib.Figure`
522 Figure to be annotated.
523 source : `numpy.record`
524 DiaSource record of the object being plotted.
525 len_sizes : `int`
526 Length of the ``size`` array set in configuration.
527 """
528 # Names of flags fields to add a flag label to the image, using any().
529 flags_psf = ["slot_PsfFlux_flag", "slot_PsfFlux_flag_noGoodPixels", "slot_PsfFlux_flag_edge"]
530 flags_aperture = ["slot_ApFlux_flag", "slot_ApFlux_flag_apertureTruncated"]
531 flags_forced = ["ip_diffim_forced_PsfFlux_flag", "ip_diffim_forced_PsfFlux_flag_noGoodPixels",
532 "ip_diffim_forced_PsfFlux_flag_edge"]
533 flags_edge = ["pixelFlags_edge"]
534 flags_interp = ["pixelFlags_interpolated", "pixelFlags_interpolatedCenter"]
535 flags_saturated = ["pixelFlags_saturated", "pixelFlags_saturatedCenter"]
536 flags_cr = ["pixelFlags_cr", "pixelFlags_crCenter"]
537 flags_bad = ["pixelFlags_bad"]
538 flags_suspect = ["pixelFlags_suspect", "pixelFlags_suspectCenter"]
539 flags_centroid = ["slot_Centroid_flag"]
540 flags_shape = ["slot_Shape_flag", "slot_Shape_flag_no_pixels", "slot_Shape_flag_not_contained",
541 "slot_Shape_flag_parent_source"]
543 flag_color = "red"
544 text_color = "grey"
546 if len_sizes == 1:
547 heights = [0.95, 0.91, 0.87, 0.83, 0.79]
548 else:
549 heights = [1.2, 1.15, 1.1, 1.05, 1.0]
551 # NOTE: fig.text coordinates are in fractions of the figure.
552 fig.text(0, heights[0], "diaSourceId:", color=text_color)
553 fig.text(0.145, heights[0], f"{source['diaSourceId']}")
554 fig.text(0.43, heights[0], f"{source['instrument']}", fontweight="bold")
555 fig.text(0.64, heights[0], "detector:", color=text_color)
556 fig.text(0.74, heights[0], f"{source['detector']}")
557 fig.text(0.795, heights[0], "visit:", color=text_color)
558 fig.text(0.85, heights[0], f"{source['visit']}")
559 fig.text(0.95, heights[0], f"{source['band']}")
561 fig.text(0.0, heights[1], "ra:", color=text_color)
562 fig.text(0.037, heights[1], f"{source['ra']:.8f}")
563 fig.text(0.21, heights[1], "dec:", color=text_color)
564 fig.text(0.265, heights[1], f"{source['dec']:+.8f}")
565 fig.text(0.50, heights[1], "detection S/N:", color=text_color)
566 fig.text(0.66, heights[1], f"{source['snr']:6.1f}")
567 fig.text(0.75, heights[1], "PSF chi2:", color=text_color)
568 fig.text(0.85, heights[1], f"{source['psfChi2']/source['psfNdata']:6.2f}")
570 fig.text(0.0, heights[2], "PSF (nJy):", color=flag_color if any(source[flags_psf]) else text_color)
571 fig.text(0.25, heights[2], f"{source['psfFlux']:8.1f}", horizontalalignment='right')
572 fig.text(0.252, heights[2], "+/-", color=text_color)
573 fig.text(0.29, heights[2], f"{source['psfFluxErr']:8.1f}")
574 fig.text(0.40, heights[2], "S/N:", color=text_color)
575 fig.text(0.45, heights[2], f"{abs(source['psfFlux']/source['psfFluxErr']):6.2f}")
577 # NOTE: yellow is hard to read on white; use goldenrod instead.
578 if any(source[flags_edge]):
579 fig.text(0.55, heights[2], "EDGE", color="goldenrod", fontweight="bold")
580 if any(source[flags_interp]):
581 fig.text(0.62, heights[2], "INTERP", color="green", fontweight="bold")
582 if any(source[flags_saturated]):
583 fig.text(0.72, heights[2], "SAT", color="green", fontweight="bold")
584 if any(source[flags_cr]):
585 fig.text(0.77, heights[2], "CR", color="magenta", fontweight="bold")
586 if any(source[flags_bad]):
587 fig.text(0.81, heights[2], "BAD", color="red", fontweight="bold")
588 if source['isDipole']:
589 fig.text(0.87, heights[2], "DIPOLE", color="indigo", fontweight="bold")
591 fig.text(0.0, heights[3], "ap (nJy):", color=flag_color if any(source[flags_aperture]) else text_color)
592 fig.text(0.25, heights[3], f"{source['apFlux']:8.1f}", horizontalalignment='right')
593 fig.text(0.252, heights[3], "+/-", color=text_color)
594 fig.text(0.29, heights[3], f"{source['apFluxErr']:8.1f}")
595 fig.text(0.40, heights[3], "S/N:", color=text_color)
596 fig.text(0.45, heights[3], f"{abs(source['apFlux']/source['apFluxErr']):#6.2f}")
598 if any(source[flags_suspect]):
599 fig.text(0.55, heights[3], "SUS", color="goldenrod", fontweight="bold")
600 if any(source[flags_centroid]):
601 fig.text(0.60, heights[3], "CENTROID", color="red", fontweight="bold")
602 if any(source[flags_shape]):
603 fig.text(0.73, heights[3], "SHAPE", color="red", fontweight="bold")
604 # Future option: to add two more flag flavors to the legend,
605 # use locations 0.80 and 0.87
607 # rb score
608 if source['reliability'] is not None and np.isfinite(source['reliability']):
609 fig.text(0.73, heights[4], f"RB:{source['reliability']:.03f}",
610 color='#e41a1c' if source['reliability'] < 0.5 else '#4daf4a',
611 fontweight="bold")
613 fig.text(0.0, heights[4], "sci (nJy):", color=flag_color if any(source[flags_forced]) else text_color)
614 fig.text(0.25, heights[4], f"{source['scienceFlux']:8.1f}", horizontalalignment='right')
615 fig.text(0.252, heights[4], "+/-", color=text_color)
616 fig.text(0.29, heights[4], f"{source['scienceFluxErr']:8.1f}")
617 fig.text(0.40, heights[4], "S/N:", color=text_color)
618 fig.text(0.45, heights[4], f"{abs(source['scienceFlux']/source['scienceFluxErr']):6.2f}")
619 fig.text(0.55, heights[4], "ABmag:", color=text_color)
620 fig.text(0.635, heights[4], f"{(source['scienceFlux']*u.nanojansky).to_value(u.ABmag):.3f}")
623class CutoutPath:
624 """Manage paths to image cutouts with filenames based on diaSourceId.
626 Supports local files, and id-chunked directories.
628 Parameters
629 ----------
630 root : `str`
631 Root file path to manage.
632 chunk_size : `int`, optional
633 At most this many files per directory. Must be a power of 10.
635 Raises
636 ------
637 RuntimeError
638 Raised if chunk_size is not a power of 10.
639 """
641 def __init__(self, root, chunk_size=None):
642 self._root = root
643 if chunk_size is not None and (log10(chunk_size) != int(log10(chunk_size))):
644 raise RuntimeError(f"CutoutPath file chunk_size must be a power of 10, got {chunk_size}.")
645 self._chunk_size = chunk_size
647 def __call__(self, id):
648 """Return the full path to a diaSource cutout.
650 Parameters
651 ----------
652 id : `int`
653 Source id to create the path for.
655 Returns
656 -------
657 path : `str`
658 Full path to the requested file.
659 """
660 def chunker(id, size):
661 return (id // size)*size
663 if self._chunk_size is not None:
664 return os.path.join(self._root, f"images/{chunker(id, self._chunk_size)}/{id}.png")
665 else:
666 return os.path.join(self._root, f"images/{id}.png")
668 def mkdir(self, id):
669 """Make the directory tree to write this cutout id to.
671 Parameters
672 ----------
673 id : `int`
674 Source id to create the path for.
675 """
676 path = os.path.dirname(self(id))
677 os.makedirs(path, exist_ok=True)
680def build_argparser():
681 """Construct an argument parser for the ``plotImageSubtractionCutouts``
682 script.
684 Returns
685 -------
686 argparser : `argparse.ArgumentParser`
687 The argument parser that defines the ``plotImageSubtractionCutouts``
688 command-line interface.
689 """
690 parser = argparse.ArgumentParser(
691 description=__doc__,
692 formatter_class=argparse.RawDescriptionHelpFormatter,
693 epilog="More information is available at https://pipelines.lsst.io.",
694 )
696 apdbArgs = parser.add_mutually_exclusive_group(required=True)
697 apdbArgs.add_argument(
698 "--sqlitefile",
699 default=None,
700 help="Path to sqlite file to load from; required for sqlite connection.",
701 )
702 apdbArgs.add_argument(
703 "--namespace",
704 default=None,
705 help="Postgres namespace (aka schema) to connect to; "
706 " required for postgres connections."
707 )
709 parser.add_argument(
710 "--postgres_url",
711 default="rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl",
712 help="Postgres connection path, or default (None) to use ApdbPostgresQuery default."
713 )
715 parser.add_argument(
716 "--limit",
717 default=5,
718 type=int,
719 help="Number of sources to load from the APDB (default=5), or the "
720 "number of sources to load per 'page' when `--all` is set. "
721 "This should be significantly larger (100x or more) than the value of `-j`, "
722 "to ensure efficient use of each process.",
723 )
724 parser.add_argument(
725 "--all",
726 default=False,
727 action="store_true",
728 help="Process all the sources; --limit then becomes the 'page size' to chunk the DB into.",
729 )
731 parser.add_argument(
732 "-j",
733 "--jobs",
734 default=0,
735 type=int,
736 help="Number of processes to use when generating cutouts. "
737 "Specify 0 (the default) to not use multiprocessing at all. "
738 "Note that `--limit` determines how efficiently each process is filled."
739 )
741 parser.add_argument(
742 "--instrument",
743 required=True,
744 help="Instrument short-name (e.g. 'DECam') of the data being loaded.",
745 )
746 parser.add_argument(
747 "-C",
748 "--configFile",
749 help="File containing the PlotImageSubtractionCutoutsConfig to load.",
750 )
751 parser.add_argument(
752 "--collections",
753 nargs="*",
754 help=(
755 "Butler collection(s) to load data from."
756 " If not specified, will search all butler collections, "
757 "which may be very slow."
758 ),
759 )
760 parser.add_argument("repo", help="Path to Butler repository to load data from.")
761 parser.add_argument(
762 "outputPath",
763 help="Path to write the output images and manifest to; "
764 "manifest is written here, while the images go to `OUTPUTPATH/images/`.",
765 )
766 parser.add_argument(
767 "--reliabilityMin",
768 type=float,
769 default=None,
770 help="Minimum reliability value (default=None) on which to filter the DiaSources.",
771 )
772 parser.add_argument(
773 "--reliabilityMax",
774 type=float,
775 default=None,
776 help="Maximum reliability value (default=None) on which to filter the DiaSources.",
777 )
778 return parser
781def _make_apdbQuery(instrument, sqlitefile=None, postgres_url=None, namespace=None):
782 """Return a query connection to the specified APDB.
784 Parameters
785 ----------
786 instrument : `lsst.obs.base.Instrument`
787 Instrument associated with this data, to get detector/visit data.
788 sqlitefile : `str`, optional
789 SQLite file to load APDB from; if set, postgres kwargs are ignored.
790 postgres_url : `str`, optional
791 Postgres connection URL to connect to APDB.
792 namespace : `str`, optional
793 Postgres schema to load from; required with postgres_url.
795 Returns
796 -------
797 apdb_query : `lsst.analysis.ap.ApdbQuery`
798 Query instance to use to load data from APDB.
800 Raises
801 ------
802 RuntimeError
803 Raised if the APDB connection kwargs are invalid in some way.
804 """
805 if sqlitefile is not None:
806 apdb_query = apdb.ApdbSqliteQuery(sqlitefile, instrument=instrument)
807 elif postgres_url is not None and namespace is not None:
808 apdb_query = apdb.ApdbPostgresQuery(namespace, postgres_url, instrument=instrument)
809 else:
810 raise RuntimeError("Cannot handle database connection args: "
811 f"sqlitefile={sqlitefile}, postgres_url={postgres_url}, namespace={namespace}")
812 return apdb_query
815def select_sources(apdb_query, limit, reliabilityMin=None, reliabilityMax=None):
816 """Load an APDB and return n sources from it.
818 Parameters
819 ----------
820 apdb_query : `lsst.analysis.ap.ApdbQuery`
821 APDB query interface to load from.
822 limit : `int`
823 Number of sources to select from the APDB.
824 reliabilityMin : `float`
825 Minimum reliability value on which to filter the DiaSources.
826 reliabilityMax : `float`
827 Maximum reliability value on which to filter the DiaSources.
829 Returns
830 -------
831 sources : `pandas.DataFrame`
832 The loaded DiaSource data.
833 """
834 offset = 0
835 try:
836 while True:
837 with apdb_query.connection as connection:
838 table = apdb_query._tables["DiaSource"]
839 query = table.select()
840 if reliabilityMin is not None:
841 query = query.where(table.columns['reliability'] >= reliabilityMin)
842 if reliabilityMax is not None:
843 query = query.where(table.columns['reliability'] <= reliabilityMax)
844 query = query.order_by(table.columns["visit"],
845 table.columns["detector"],
846 table.columns["diaSourceId"])
847 query = query.limit(limit).offset(offset)
848 sources = pd.read_sql_query(query, connection)
849 if len(sources) == 0:
850 break
851 apdb_query._fill_from_instrument(sources)
853 yield sources
854 offset += limit
855 finally:
856 connection.close()
859def len_sources(apdb_query):
860 """Return the number of DiaSources in the supplied APDB.
862 Parameters
863 ----------
864 apdb_query : `lsst.analysis.ap.ApdbQuery`
865 APDB query interface to load from.
867 Returns
868 -------
869 count : `int`
870 Number of diaSources in this APDB.
871 """
872 with apdb_query.connection as connection:
873 count = connection.execute(sqlalchemy.text('select count(*) FROM "DiaSource";')).scalar()
874 return count
877def run_cutouts(args):
878 """Run PlotImageSubtractionCutoutsTask on the parsed commandline arguments.
880 Parameters
881 ----------
882 args : `argparse.Namespace`
883 Parsed commandline arguments.
884 """
885 # We have to initialize the logger manually on the commandline.
886 logging.basicConfig(
887 level=logging.INFO, format="{name} {levelname}: {message}", style="{"
888 )
890 butler = lsst.daf.butler.Butler(args.repo, collections=args.collections)
891 apdb_query = _make_apdbQuery(args.instrument,
892 sqlitefile=args.sqlitefile,
893 postgres_url=args.postgres_url,
894 namespace=args.namespace)
895 data = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax)
897 config = PlotImageSubtractionCutoutsConfig()
898 if args.configFile is not None:
899 config.load(os.path.expanduser(args.configFile))
900 config.freeze()
901 cutouts = PlotImageSubtractionCutoutsTask(config=config, output_path=args.outputPath)
903 getter = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax)
904 # Process just one block of length "limit", or all sources in the database?
905 if not args.all:
906 data = next(getter)
907 sources = cutouts.run(data, butler, njobs=args.jobs)
908 else:
909 sources = []
910 count = len_sources(apdb_query)
911 for i, data in enumerate(getter):
912 sources.extend(cutouts.write_images(data, butler, njobs=args.jobs))
913 print(f"Completed {i+1} batches of {args.limit} size, out of {count} diaSources.")
914 cutouts.write_manifest(sources)
916 print(f"Generated {len(sources)} diaSource cutouts to {args.outputPath}.")
919def main():
920 args = build_argparser().parse_args()
921 run_cutouts(args)