Coverage for python/lsst/analysis/ap/plotImageSubtractionCutouts.py: 15%
325 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 10:33 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 10:33 +0000
1# This file is part of analysis_ap.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Construct template/image/difference cutouts for upload to Zooniverse, or
23to just to view as images.
24"""
26__all__ = ["PlotImageSubtractionCutoutsConfig", "PlotImageSubtractionCutoutsTask", "CutoutPath"]
28import argparse
29import functools
30import io
31import logging
32import multiprocessing
33import os
34import pathlib
35from math import log10
37import astropy.units as u
38import lsst.dax.apdb
39import lsst.pex.config as pexConfig
40import lsst.pex.exceptions
41import lsst.pipe.base
42import lsst.utils
43import numpy as np
44import pandas as pd
45import sqlalchemy
47from . import apdb
50class _ButlerCache:
51 """Global class to handle butler queries, to allow lru_cache and
52 `multiprocessing.Pool` to work together.
54 If we redo this all to work with BPS or other parallelized systems, or get
55 good butler-side caching, we could remove this lru_cache system.
56 """
58 def set(self, butler, config):
59 """Call this to store a Butler and Config instance before using the
60 global class instance.
62 Parameters
63 ----------
64 butler : `lsst.daf.butler.Butler`
65 Butler instance to store.
66 config : `lsst.pex.config.Config`
67 Config instance to store.
68 """
69 self._butler = butler
70 self._config = config
71 # Ensure the caches are empty if we've been re-set.
72 self.get_exposures.cache_clear()
73 self.get_catalog.cache_clear()
75 @functools.lru_cache(maxsize=4)
76 def get_exposures(self, instrument, detector, visit):
77 """Return science, template, difference exposures, using a small
78 cache so we don't have to re-read files as often.
80 Parameters
81 ----------
82 instrument : `str`
83 Instrument name to define the data id.
84 detector : `int`
85 Detector id to define the data id.
86 visit : `int`
87 Visit id to define the data id.
89 Returns
90 -------
91 exposures : `tuple` [`lsst.afw.image.ExposureF`]
92 Science, template, and difference exposure for this data id.
93 """
94 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit}
95 return (self._butler.get(self._config.science_image_type, data_id),
96 self._butler.get(f'{self._config.diff_image_type}_templateExp', data_id),
97 self._butler.get(f'{self._config.diff_image_type}_differenceExp', data_id))
99 @functools.lru_cache(maxsize=4)
100 def get_catalog(self, instrument, detector, visit):
101 """Return the diaSrc catalog from the butler.
103 Parameters
104 ----------
105 instrument : `str`
106 Instrument name to define the data id.
107 detector : `int`
108 Detector id to define the data id.
109 visit : `int`
110 Visit id to define the data id.
112 Returns
113 -------
114 catalog : `lsst.afw.table.SourceCatalog`
115 DiaSource catalog for this data id.
116 """
117 data_id = {'instrument': instrument, 'detector': detector, 'visit': visit}
118 return self._butler.get(f'{self._config.diff_image_type}_diaSrc', data_id)
121# Global used within each multiprocessing worker (or single process).
122butler_cache = _ButlerCache()
125class PlotImageSubtractionCutoutsConfig(pexConfig.Config):
126 sizes = pexConfig.ListField(
127 doc="List of widths of cutout to extract for image from science, \
128 template, and difference exposures.",
129 dtype=int,
130 default=[30],
131 )
132 use_footprint = pexConfig.Field(
133 doc="Use source footprint to to define cutout region; "
134 "If set, ignore `size` and use the footprint bbox instead.",
135 dtype=bool,
136 default=False,
137 )
138 url_root = pexConfig.Field(
139 doc="URL that the resulting images will be served to Zooniverse from, for the manifest file. "
140 "If not set, no manifest file will be written.",
141 dtype=str,
142 default=None,
143 optional=True,
144 )
145 diff_image_type = pexConfig.Field(
146 doc="Dataset type of template and difference image to use for cutouts; "
147 "Will have '_templateExp' and '_differenceExp' appended for butler.get(), respectively.",
148 dtype=str,
149 default="goodSeeingDiff",
150 )
151 science_image_type = pexConfig.Field(
152 doc="Dataset type of science image to use for cutouts.",
153 dtype=str,
154 default="calexp",
155 )
156 add_metadata = pexConfig.Field(
157 doc="Annotate the cutouts with catalog metadata, including coordinates, fluxes, flags, etc.",
158 dtype=bool,
159 default=True
160 )
161 chunk_size = pexConfig.Field(
162 doc="Chunk up files into subdirectories, with at most this many files per directory."
163 " None means write all the files to one `images/` directory.",
164 dtype=int,
165 default=10000,
166 optional=True
167 )
170class PlotImageSubtractionCutoutsTask(lsst.pipe.base.Task):
171 """Generate template/science/difference image cutouts of DiaSources and an
172 optional manifest for upload to a Zooniverse project.
174 Parameters
175 ----------
176 output_path : `str`
177 The path to write the output to; manifest goes here, while the
178 images themselves go into ``output_path/images/``.
179 """
180 ConfigClass = PlotImageSubtractionCutoutsConfig
181 _DefaultName = "plotImageSubtractionCutouts"
183 def __init__(self, *, output_path, **kwargs):
184 super().__init__(**kwargs)
185 self._output_path = output_path
186 self.cutout_path = CutoutPath(output_path, chunk_size=self.config.chunk_size)
188 def _reduce_kwargs(self):
189 # to allow pickling of this Task
190 kwargs = super()._reduce_kwargs()
191 kwargs["output_path"] = self._output_path
192 return kwargs
194 def run(self, data, butler, njobs=0):
195 """Generate cutout images and a manifest for upload to Zooniverse
196 from a collection of DiaSources.
198 Parameters
199 ----------
200 data : `pandas.DataFrame`
201 The DiaSources to extract cutouts for. Must contain at least these
202 fields: ``ra, dec, diaSourceId, detector, visit, instrument``.
203 butler : `lsst.daf.butler.Butler`
204 The butler connection to use to load the data; create it with the
205 collections you wish to load images from.
206 njobs : `int`, optional
207 Number of multiprocessing jobs to make cutouts with; default of 0
208 means don't use multiprocessing at all.
210 Returns
211 -------
212 source_ids : `list` [`int`]
213 DiaSourceIds of cutout images that were generated.
214 """
215 result = self.write_images(data, butler, njobs=njobs)
216 self.write_manifest(result)
217 self.log.info("Wrote %d images to %s", len(result), self._output_path)
218 return result
220 def write_manifest(self, sources):
221 """Save a Zooniverse manifest attaching image URLs to source ids.
223 Parameters
224 ----------
225 sources : `list` [`int`]
226 The diaSourceIds of the sources that had cutouts succesfully made.
227 """
228 if self.config.url_root is not None:
229 manifest = self._make_manifest(sources)
230 manifest.to_csv(os.path.join(self._output_path, "manifest.csv"), index=False)
231 else:
232 self.log.info("No url_root config provided, so no Zooniverse manifest file was written.")
234 def _make_manifest(self, sources):
235 """Return a Zooniverse manifest attaching image URLs to source ids.
237 Parameters
238 ----------
239 sources : `list` [`int`]
240 The diaSourceIds of the sources that had cutouts succesfully made.
242 Returns
243 -------
244 manifest : `pandas.DataFrame`
245 The formatted URL manifest for upload to Zooniverse.
246 """
247 cutout_path = CutoutPath(self.config.url_root)
248 manifest = pd.DataFrame()
249 manifest["external_id"] = sources
250 manifest["location:1"] = [cutout_path(x) for x in sources]
251 manifest["metadata:diaSourceId"] = sources
252 return manifest
254 def write_images(self, data, butler, njobs=0):
255 """Make the 3-part cutout images for each requested source and write
256 them to disk.
258 Creates a ``images/`` subdirectory via cutout_path if one
259 does not already exist; images are written there as PNG files.
261 Parameters
262 ----------
263 data : `pandas.DataFrame`
264 The DiaSources to extract cutouts for. Must contain at least these
265 fields: ``ra, dec, diaSourceId, detector, visit, instrument``.
266 butler : `lsst.daf.butler.Butler`
267 The butler connection to use to load the data; create it with the
268 collections you wish to load images from.
269 njobs : `int`, optional
270 Number of multiprocessing jobs to make cutouts with; default of 0
271 means don't use multiprocessing at all.
273 Returns
274 -------
275 sources : `list`
276 DiaSourceIds that had cutouts made.
277 """
278 # Ignore divide-by-zero and log-of-negative-value messages.
279 seterr_dict = np.seterr(divide="ignore", invalid="ignore")
281 # Create a subdirectory for the images.
282 pathlib.Path(os.path.join(self._output_path, "images")).mkdir(exist_ok=True)
284 sources = []
285 butler_cache.set(butler, self.config)
286 if njobs > 0:
287 with multiprocessing.Pool(njobs) as pool:
288 sources = pool.starmap(self._do_one_source, data.to_records())
289 else:
290 for i, source in enumerate(data.to_records()):
291 id = self._do_one_source(source)
292 sources.append(id)
294 # restore numpy error message state
295 np.seterr(**seterr_dict)
296 # Only return successful ids, not failures.
297 return [s for s in sources if s is not None]
299 def _do_one_source(self, source):
300 """Make cutouts for one diaSource.
302 Parameters
303 ----------
304 source : `numpy.record`, optional
305 DiaSource record for this cutout, to add metadata to the image.
307 Returns
308 -------
309 diaSourceId : `int` or None
310 Id of the source that was generated, or None if there was an error.
311 """
312 try:
313 center = lsst.geom.SpherePoint(source["ra"], source["dec"], lsst.geom.degrees)
314 science, template, difference = butler_cache.get_exposures(source["instrument"],
315 source["detector"],
316 source["visit"])
317 if self.config.use_footprint:
318 catalog = butler_cache.get_catalog(source["instrument"],
319 source["detector"],
320 source["visit"])
321 # The input catalogs must be sorted.
322 if not catalog.isSorted():
323 data_id = {'instrument': source["instrument"],
324 'detector': source["detector"],
325 'visit': source["visit"]}
326 msg = f"{self.config.diff_image_type}_diaSrc catalog for {data_id} is not sorted!"
327 raise RuntimeError(msg)
328 record = catalog.find(source['diaSourceId'])
329 footprint = record.getFootprint()
331 scale = science.wcs.getPixelScale().asArcseconds()
332 image = self.generate_image(science, template, difference, center, scale,
333 source=source if self.config.add_metadata else None,
334 footprint=footprint if self.config.use_footprint else None)
335 self.cutout_path.mkdir(source["diaSourceId"])
336 with open(self.cutout_path(source["diaSourceId"]), "wb") as outfile:
337 outfile.write(image.getbuffer())
338 return source["diaSourceId"]
339 except (LookupError, lsst.pex.exceptions.Exception) as e:
340 self.log.error(
341 f"{e.__class__.__name__} processing diaSourceId {source['diaSourceId']}: {e}"
342 )
343 return None
344 except Exception:
345 # Ensure other exceptions are interpretable when multiprocessing.
346 import traceback
347 traceback.print_exc()
348 raise
350 def generate_image(self, science, template, difference, center, scale,
351 source=None, footprint=None):
352 """Get a 3-part cutout image to save to disk, for a single source.
354 Parameters
355 ----------
356 science : `lsst.afw.image.ExposureF`
357 Science exposure to include in the cutout.
358 template : `lsst.afw.image.ExposureF`
359 Matched template exposure to include in the cutout.
360 difference : `lsst.afw.image.ExposureF`
361 Matched science minus template exposure to include in the cutout.
362 center : `lsst.geom.SpherePoint`
363 Center of the source to be cut out of each image.
364 scale : `float`
365 Pixel scale in arcseconds.
366 source : `numpy.record`, optional
367 DiaSource record for this cutout, to add metadata to the image.
368 footprint : `lsst.afw.detection.Footprint`, optional
369 Detected source footprint; if specified, extract a square
370 surrounding the footprint bbox, otherwise use ``config.size``.
372 Returns
373 -------
374 image: `io.BytesIO`
375 The generated image, to be output to a file or displayed on screen.
376 """
377 if not self.config.use_footprint:
378 sizes = self.config.sizes
379 cutout_science, cutout_template, cutout_difference = [], [], []
380 for i, s in enumerate(sizes):
381 extent = lsst.geom.Extent2I(s, s)
382 cutout_science.append(science.getCutout(center, extent))
383 cutout_template.append(template.getCutout(center, extent))
384 cutout_difference.append(difference.getCutout(center, extent))
385 else:
386 cutout_science = [science.getCutout(footprint.getBBox())]
387 cutout_template = [template.getCutout(footprint.getBBox())]
388 cutout_difference = [difference.getCutout(footprint.getBBox())]
389 extent = footprint.getBBox().getDimensions()
390 # Plot a square equal to the largest dimension.
391 sizes = [extent.x if extent.x > extent.y else extent.y]
392 return self._plot_cutout(cutout_science,
393 cutout_template,
394 cutout_difference,
395 scale,
396 sizes,
397 source=source)
399 def _plot_cutout(self, science, template, difference, scale, sizes, source=None):
400 """Plot the cutouts for a source in one image.
402 Parameters
403 ----------
404 science : `list` [`lsst.afw.image.ExposureF`]
405 List of cutout Science exposure(s) to include in the image.
406 template : `list` [`lsst.afw.image.ExposureF`]
407 List of cutout template exposure(s) to include in the image.
408 difference : `list` [`lsst.afw.image.ExposureF`]
409 List of cutout science minus template exposure(s) to include
410 in the image.
411 source : `numpy.record`, optional
412 DiaSource record for this cutout, to add metadata to the image.
413 scale : `float`
414 Pixel scale in arcseconds.
415 size : `list` [`int`]
416 List of x/y dimensions of of the images passed in, to set imshow
417 extent.
419 Returns
420 -------
421 image: `io.BytesIO`
422 The generated image, to be output to a file via
423 `image.write(filename)` or displayed on screen.
424 """
425 import astropy.visualization as aviz
426 import matplotlib
427 matplotlib.use("AGG")
428 # Force matplotlib defaults
429 matplotlib.rcParams.update(matplotlib.rcParamsDefault)
430 import matplotlib.pyplot as plt
431 from matplotlib import cm
433 # TODO DM-32014: how do we color masked pixels (including edges)?
435 def plot_one_image(ax, data, size, name=None):
436 """Plot a normalized image on an axis."""
437 if name == "Difference":
438 norm = aviz.ImageNormalize(
439 # focus on a rect of dim 15 at the center of the image.
440 data[data.shape[0] // 2 - 7:data.shape[0] // 2 + 8,
441 data.shape[1] // 2 - 7:data.shape[1] // 2 + 8],
442 interval=aviz.MinMaxInterval(),
443 stretch=aviz.AsinhStretch(a=0.1),
444 )
445 else:
446 norm = aviz.ImageNormalize(
447 data,
448 interval=aviz.MinMaxInterval(),
449 stretch=aviz.AsinhStretch(a=0.1),
450 )
451 ax.imshow(data, cmap=cm.bone, interpolation="none", norm=norm,
452 extent=(0, size, 0, size), origin="lower", aspect="equal")
453 x_line = 1
454 y_line = 1
455 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="blue", lw=6)
456 ax.plot((x_line, x_line + 1.0/scale), (y_line, y_line), color="yellow", lw=2)
457 ax.axis("off")
458 if name is not None:
459 ax.set_title(name)
461 try:
462 len_sizes = len(sizes)
463 fig, axs = plt.subplots(len_sizes, 3, constrained_layout=True)
464 if len_sizes == 1:
465 plot_one_image(axs[0], template[0].image.array, sizes[0], "Template")
466 plot_one_image(axs[1], science[0].image.array, sizes[0], "Science")
467 plot_one_image(axs[2], difference[0].image.array, sizes[0], "Difference")
468 else:
469 plot_one_image(axs[0][0], template[0].image.array, sizes[0], "Template")
470 plot_one_image(axs[0][1], science[0].image.array, sizes[0], "Science")
471 plot_one_image(axs[0][2], difference[0].image.array, sizes[0], "Difference")
472 for i in range(1, len(axs)):
473 plot_one_image(axs[i][0], template[i].image.array, sizes[i], None)
474 plot_one_image(axs[i][1], science[i].image.array, sizes[i], None)
475 plot_one_image(axs[i][2], difference[i].image.array, sizes[i], None)
476 plt.tight_layout()
477 if source is not None:
478 _annotate_image(fig, source, len_sizes)
480 output = io.BytesIO()
481 plt.savefig(output, bbox_inches="tight", format="png")
482 output.seek(0) # to ensure opening the image starts from the front
483 finally:
484 plt.close(fig)
486 return output
489def _annotate_image(fig, source, len_sizes):
490 """Annotate the cutouts image with metadata and flags.
492 Parameters
493 ----------
494 fig : `matplotlib.Figure`
495 Figure to be annotated.
496 source : `numpy.record`
497 DiaSource record of the object being plotted.
498 len_sizes : `int`
499 Length of the ``size`` array set in configuration.
500 """
501 # Names of flags fields to add a flag label to the image, using any().
502 flags_psf = ["slot_PsfFlux_flag", "slot_PsfFlux_flag_noGoodPixels", "slot_PsfFlux_flag_edge"]
503 flags_aperture = ["slot_ApFlux_flag", "slot_ApFlux_flag_apertureTruncated"]
504 flags_forced = ["ip_diffim_forced_PsfFlux_flag", "ip_diffim_forced_PsfFlux_flag_noGoodPixels",
505 "ip_diffim_forced_PsfFlux_flag_edge"]
506 flags_edge = ["pixelFlags_edge"]
507 flags_interp = ["pixelFlags_interpolated", "pixelFlags_interpolatedCenter"]
508 flags_saturated = ["pixelFlags_saturated", "pixelFlags_saturatedCenter"]
509 flags_cr = ["pixelFlags_cr", "pixelFlags_crCenter"]
510 flags_bad = ["pixelFlags_bad"]
511 flags_suspect = ["pixelFlags_suspect", "pixelFlags_suspectCenter"]
512 flags_centroid = ["slot_Centroid_flag"]
513 flags_shape = ["slot_Shape_flag", "slot_Shape_flag_no_pixels", "slot_Shape_flag_not_contained",
514 "slot_Shape_flag_parent_source"]
516 flag_color = "red"
517 text_color = "grey"
519 if len_sizes == 1:
520 heights = [0.95, 0.91, 0.87, 0.83, 0.79]
521 else:
522 heights = [1.2, 1.15, 1.1, 1.05, 1.0]
524 # NOTE: fig.text coordinates are in fractions of the figure.
525 fig.text(0, heights[0], "diaSourceId:", color=text_color)
526 fig.text(0.145, heights[0], f"{source['diaSourceId']}")
527 fig.text(0.43, heights[0], f"{source['instrument']}", fontweight="bold")
528 fig.text(0.64, heights[0], "detector:", color=text_color)
529 fig.text(0.74, heights[0], f"{source['detector']}")
530 fig.text(0.795, heights[0], "visit:", color=text_color)
531 fig.text(0.85, heights[0], f"{source['visit']}")
532 fig.text(0.95, heights[0], f"{source['band']}")
534 fig.text(0.0, heights[1], "ra:", color=text_color)
535 fig.text(0.037, heights[1], f"{source['ra']:.8f}")
536 fig.text(0.21, heights[1], "dec:", color=text_color)
537 fig.text(0.265, heights[1], f"{source['dec']:+.8f}")
538 fig.text(0.50, heights[1], "detection S/N:", color=text_color)
539 fig.text(0.66, heights[1], f"{source['snr']:6.1f}")
540 fig.text(0.75, heights[1], "PSF chi2:", color=text_color)
541 fig.text(0.85, heights[1], f"{source['psfChi2']/source['psfNdata']:6.2f}")
543 fig.text(0.0, heights[2], "PSF (nJy):", color=flag_color if any(source[flags_psf]) else text_color)
544 fig.text(0.25, heights[2], f"{source['psfFlux']:8.1f}", horizontalalignment='right')
545 fig.text(0.252, heights[2], "+/-", color=text_color)
546 fig.text(0.29, heights[2], f"{source['psfFluxErr']:8.1f}")
547 fig.text(0.40, heights[2], "S/N:", color=text_color)
548 fig.text(0.45, heights[2], f"{abs(source['psfFlux']/source['psfFluxErr']):6.2f}")
550 # NOTE: yellow is hard to read on white; use goldenrod instead.
551 if any(source[flags_edge]):
552 fig.text(0.55, heights[2], "EDGE", color="goldenrod", fontweight="bold")
553 if any(source[flags_interp]):
554 fig.text(0.62, heights[2], "INTERP", color="green", fontweight="bold")
555 if any(source[flags_saturated]):
556 fig.text(0.72, heights[2], "SAT", color="green", fontweight="bold")
557 if any(source[flags_cr]):
558 fig.text(0.77, heights[2], "CR", color="magenta", fontweight="bold")
559 if any(source[flags_bad]):
560 fig.text(0.81, heights[2], "BAD", color="red", fontweight="bold")
561 if source['isDipole']:
562 fig.text(0.87, heights[2], "DIPOLE", color="indigo", fontweight="bold")
564 fig.text(0.0, heights[3], "ap (nJy):", color=flag_color if any(source[flags_aperture]) else text_color)
565 fig.text(0.25, heights[3], f"{source['apFlux']:8.1f}", horizontalalignment='right')
566 fig.text(0.252, heights[3], "+/-", color=text_color)
567 fig.text(0.29, heights[3], f"{source['apFluxErr']:8.1f}")
568 fig.text(0.40, heights[3], "S/N:", color=text_color)
569 fig.text(0.45, heights[3], f"{abs(source['apFlux']/source['apFluxErr']):#6.2f}")
571 if any(source[flags_suspect]):
572 fig.text(0.55, heights[3], "SUS", color="goldenrod", fontweight="bold")
573 if any(source[flags_centroid]):
574 fig.text(0.60, heights[3], "CENTROID", color="red", fontweight="bold")
575 if any(source[flags_shape]):
576 fig.text(0.73, heights[3], "SHAPE", color="red", fontweight="bold")
577 # Future option: to add two more flag flavors to the legend,
578 # use locations 0.80 and 0.87
580 # rb score
581 if source['reliability'] is not None and np.isfinite(source['reliability']):
582 fig.text(0.73, heights[4], f"RB:{source['reliability']:.03f}",
583 color='#e41a1c' if source['reliability'] < 0.5 else '#4daf4a',
584 fontweight="bold")
586 fig.text(0.0, heights[4], "sci (nJy):", color=flag_color if any(source[flags_forced]) else text_color)
587 fig.text(0.25, heights[4], f"{source['scienceFlux']:8.1f}", horizontalalignment='right')
588 fig.text(0.252, heights[4], "+/-", color=text_color)
589 fig.text(0.29, heights[4], f"{source['scienceFluxErr']:8.1f}")
590 fig.text(0.40, heights[4], "S/N:", color=text_color)
591 fig.text(0.45, heights[4], f"{abs(source['scienceFlux']/source['scienceFluxErr']):6.2f}")
592 fig.text(0.55, heights[4], "ABmag:", color=text_color)
593 fig.text(0.635, heights[4], f"{(source['scienceFlux']*u.nanojansky).to_value(u.ABmag):.3f}")
596class CutoutPath:
597 """Manage paths to image cutouts with filenames based on diaSourceId.
599 Supports local files, and id-chunked directories.
601 Parameters
602 ----------
603 root : `str`
604 Root file path to manage.
605 chunk_size : `int`, optional
606 At most this many files per directory. Must be a power of 10.
608 Raises
609 ------
610 RuntimeError
611 Raised if chunk_size is not a power of 10.
612 """
614 def __init__(self, root, chunk_size=None):
615 self._root = root
616 if chunk_size is not None and (log10(chunk_size) != int(log10(chunk_size))):
617 raise RuntimeError(f"CutoutPath file chunk_size must be a power of 10, got {chunk_size}.")
618 self._chunk_size = chunk_size
620 def __call__(self, id):
621 """Return the full path to a diaSource cutout.
623 Parameters
624 ----------
625 id : `int`
626 Source id to create the path for.
628 Returns
629 -------
630 path : `str`
631 Full path to the requested file.
632 """
633 def chunker(id, size):
634 return (id // size)*size
636 if self._chunk_size is not None:
637 return os.path.join(self._root, f"images/{chunker(id, self._chunk_size)}/{id}.png")
638 else:
639 return os.path.join(self._root, f"images/{id}.png")
641 def mkdir(self, id):
642 """Make the directory tree to write this cutout id to.
644 Parameters
645 ----------
646 id : `int`
647 Source id to create the path for.
648 """
649 path = os.path.dirname(self(id))
650 os.makedirs(path, exist_ok=True)
653def build_argparser():
654 """Construct an argument parser for the ``plotImageSubtractionCutouts``
655 script.
657 Returns
658 -------
659 argparser : `argparse.ArgumentParser`
660 The argument parser that defines the ``plotImageSubtractionCutouts``
661 command-line interface.
662 """
663 parser = argparse.ArgumentParser(
664 description=__doc__,
665 formatter_class=argparse.RawDescriptionHelpFormatter,
666 epilog="More information is available at https://pipelines.lsst.io.",
667 )
669 apdbArgs = parser.add_mutually_exclusive_group(required=True)
670 apdbArgs.add_argument(
671 "--sqlitefile",
672 default=None,
673 help="Path to sqlite file to load from; required for sqlite connection.",
674 )
675 apdbArgs.add_argument(
676 "--namespace",
677 default=None,
678 help="Postgres namespace (aka schema) to connect to; "
679 " required for postgres connections."
680 )
682 parser.add_argument(
683 "--postgres_url",
684 default="rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl",
685 help="Postgres connection path, or default (None) to use ApdbPostgresQuery default."
686 )
688 parser.add_argument(
689 "--limit",
690 default=5,
691 type=int,
692 help="Number of sources to load from the APDB (default=5), or the "
693 "number of sources to load per 'page' when `--all` is set. "
694 "This should be significantly larger (100x or more) than the value of `-j`, "
695 "to ensure efficient use of each process.",
696 )
697 parser.add_argument(
698 "--all",
699 default=False,
700 action="store_true",
701 help="Process all the sources; --limit then becomes the 'page size' to chunk the DB into.",
702 )
704 parser.add_argument(
705 "-j",
706 "--jobs",
707 default=0,
708 type=int,
709 help="Number of processes to use when generating cutouts. "
710 "Specify 0 (the default) to not use multiprocessing at all. "
711 "Note that `--limit` determines how efficiently each process is filled."
712 )
714 parser.add_argument(
715 "--instrument",
716 required=True,
717 help="Instrument short-name (e.g. 'DECam') of the data being loaded.",
718 )
719 parser.add_argument(
720 "-C",
721 "--configFile",
722 help="File containing the PlotImageSubtractionCutoutsConfig to load.",
723 )
724 parser.add_argument(
725 "--collections",
726 nargs="*",
727 help=(
728 "Butler collection(s) to load data from."
729 " If not specified, will search all butler collections, "
730 "which may be very slow."
731 ),
732 )
733 parser.add_argument("repo", help="Path to Butler repository to load data from.")
734 parser.add_argument(
735 "outputPath",
736 help="Path to write the output images and manifest to; "
737 "manifest is written here, while the images go to `OUTPUTPATH/images/`.",
738 )
739 parser.add_argument(
740 "--reliabilityMin",
741 type=float,
742 default=None,
743 help="Minimum reliability value (default=None) on which to filter the DiaSources.",
744 )
745 parser.add_argument(
746 "--reliabilityMax",
747 type=float,
748 default=None,
749 help="Maximum reliability value (default=None) on which to filter the DiaSources.",
750 )
751 return parser
754def _make_apdbQuery(instrument, sqlitefile=None, postgres_url=None, namespace=None):
755 """Return a query connection to the specified APDB.
757 Parameters
758 ----------
759 instrument : `lsst.obs.base.Instrument`
760 Instrument associated with this data, to get detector/visit data.
761 sqlitefile : `str`, optional
762 SQLite file to load APDB from; if set, postgres kwargs are ignored.
763 postgres_url : `str`, optional
764 Postgres connection URL to connect to APDB.
765 namespace : `str`, optional
766 Postgres schema to load from; required with postgres_url.
768 Returns
769 -------
770 apdb_query : `lsst.analysis.ap.ApdbQuery`
771 Query instance to use to load data from APDB.
773 Raises
774 ------
775 RuntimeError
776 Raised if the APDB connection kwargs are invalid in some way.
777 """
778 if sqlitefile is not None:
779 apdb_query = apdb.ApdbSqliteQuery(sqlitefile, instrument=instrument)
780 elif postgres_url is not None and namespace is not None:
781 apdb_query = apdb.ApdbPostgresQuery(namespace, postgres_url, instrument=instrument)
782 else:
783 raise RuntimeError("Cannot handle database connection args: "
784 f"sqlitefile={sqlitefile}, postgres_url={postgres_url}, namespace={namespace}")
785 return apdb_query
788def select_sources(apdb_query, limit, reliabilityMin=None, reliabilityMax=None):
789 """Load an APDB and return n sources from it.
791 Parameters
792 ----------
793 apdb_query : `lsst.analysis.ap.ApdbQuery`
794 APDB query interface to load from.
795 limit : `int`
796 Number of sources to select from the APDB.
797 reliabilityMin : `float`
798 Minimum reliability value on which to filter the DiaSources.
799 reliabilityMax : `float`
800 Maximum reliability value on which to filter the DiaSources.
802 Returns
803 -------
804 sources : `pandas.DataFrame`
805 The loaded DiaSource data.
806 """
807 offset = 0
808 try:
809 while True:
810 with apdb_query.connection as connection:
811 table = apdb_query._tables["DiaSource"]
812 query = table.select()
813 if reliabilityMin is not None:
814 query = query.where(table.columns['reliability'] >= reliabilityMin)
815 if reliabilityMax is not None:
816 query = query.where(table.columns['reliability'] <= reliabilityMax)
817 query = query.order_by(table.columns["visit"],
818 table.columns["detector"],
819 table.columns["diaSourceId"])
820 query = query.limit(limit).offset(offset)
821 sources = pd.read_sql_query(query, connection)
822 if len(sources) == 0:
823 break
824 apdb_query._fill_from_instrument(sources)
826 yield sources
827 offset += limit
828 finally:
829 connection.close()
832def len_sources(apdb_query):
833 """Return the number of DiaSources in the supplied APDB.
835 Parameters
836 ----------
837 apdb_query : `lsst.analysis.ap.ApdbQuery`
838 APDB query interface to load from.
840 Returns
841 -------
842 count : `int`
843 Number of diaSources in this APDB.
844 """
845 with apdb_query.connection as connection:
846 count = connection.execute(sqlalchemy.text('select count(*) FROM "DiaSource";')).scalar()
847 return count
850def run_cutouts(args):
851 """Run PlotImageSubtractionCutoutsTask on the parsed commandline arguments.
853 Parameters
854 ----------
855 args : `argparse.Namespace`
856 Parsed commandline arguments.
857 """
858 # We have to initialize the logger manually on the commandline.
859 logging.basicConfig(
860 level=logging.INFO, format="{name} {levelname}: {message}", style="{"
861 )
863 butler = lsst.daf.butler.Butler(args.repo, collections=args.collections)
864 apdb_query = _make_apdbQuery(args.instrument,
865 sqlitefile=args.sqlitefile,
866 postgres_url=args.postgres_url,
867 namespace=args.namespace)
868 data = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax)
870 config = PlotImageSubtractionCutoutsConfig()
871 if args.configFile is not None:
872 config.load(os.path.expanduser(args.configFile))
873 config.freeze()
874 cutouts = PlotImageSubtractionCutoutsTask(config=config, output_path=args.outputPath)
876 getter = select_sources(apdb_query, args.limit, args.reliabilityMin, args.reliabilityMax)
877 # Process just one block of length "limit", or all sources in the database?
878 if not args.all:
879 data = next(getter)
880 sources = cutouts.run(data, butler, njobs=args.jobs)
881 else:
882 sources = []
883 count = len_sources(apdb_query)
884 for i, data in enumerate(getter):
885 sources.extend(cutouts.write_images(data, butler, njobs=args.jobs))
886 print(f"Completed {i+1} batches of {args.limit} size, out of {count} diaSources.")
887 cutouts.write_manifest(sources)
889 print(f"Generated {len(sources)} diaSource cutouts to {args.outputPath}.")
892def main():
893 args = build_argparser().parse_args()
894 run_cutouts(args)