Coverage for python/lsst/meas/algorithms/convertReferenceCatalog.py: 31%
62 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-07 01:50 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-07 01:50 -0700
1# This file is part of meas_algorithms.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""
23Convert an external reference catalog into the hierarchical triangular mesh
24(HTM) sharded LSST-style format, to be ingested into the butler.
25"""
27__all__ = ["ConvertReferenceCatalogTask"]
29import argparse
30import glob
31import os
32import pathlib
33import logging
35import astropy
37from . import ConvertReferenceCatalogBase
40class ConvertReferenceCatalogTask(ConvertReferenceCatalogBase):
41 """Class for producing HTM-indexed reference catalogs from external
42 catalog data.
44 Parameters
45 ----------
46 output_dir : `str`
47 The path to write the output files to, in a subdirectory defined by
48 ``DatasetConfig.ref_dataset_name``.
49 """
50 _DefaultName = 'ConvertReferenceCatalogTask'
52 def __init__(self, *, output_dir=None, **kwargs):
53 super().__init__(**kwargs)
54 if output_dir is None:
55 raise RuntimeError("Must specify output_dir.")
56 self.base_dir = output_dir
57 self.output_dir = os.path.join(output_dir, self.config.dataset_config.ref_dataset_name)
58 self.ingest_table_file = os.path.join(self.base_dir, "filename_to_htm.ecsv")
60 def _preRun(self):
61 # Create the output path, if it doesn't exist; fail if the path exists:
62 # we don't want to accidentally append to existing files.
63 pathlib.Path(self.output_dir).mkdir(exist_ok=False)
65 def _postRun(self, result):
66 # Write the astropy table containing the htm->filename relationship
67 dimension = f"htm{self.config.dataset_config.indexer.active.depth}"
68 table = astropy.table.Table(names=("filename", dimension), dtype=('str', 'int'))
69 for key in result:
70 table.add_row((result[key], key))
71 table.write(self.ingest_table_file)
73 def _persistConfig(self):
74 filename = os.path.join(self.output_dir, "config.py")
75 with open(filename, 'w') as file:
76 self.config.dataset_config.saveToStream(file)
78 def _getOnePixelFilename(self, start):
79 return os.path.join(self.output_dir, f"{self.indexer.htm}.fits")
81 def _writeMasterSchema(self, catalog):
82 filename = os.path.join(self.output_dir, "master_schema.fits")
83 catalog.writeFits(filename)
85 def _reduce_kwargs(self):
86 # Need to be able to pickle this class to use the multiprocess manager.
87 kwargs = super()._reduce_kwargs()
88 kwargs['output_dir'] = self.base_dir
89 return kwargs
92def build_argparser():
93 """Construct an argument parser for the ``convertReferenceCatalog`` script.
95 Returns
96 -------
97 argparser : `argparse.ArgumentParser`
98 The argument parser that defines the ``convertReferenceCatalog``
99 command-line interface.
100 """
101 parser = argparse.ArgumentParser(
102 description=__doc__,
103 formatter_class=argparse.RawDescriptionHelpFormatter,
104 epilog='More information is available at https://pipelines.lsst.io.'
105 )
106 parser.add_argument("outputDir",
107 help="Path to write the output shard files, configs, and `ingest-files` table to.")
108 parser.add_argument("configFile",
109 help="File containing the ConvertReferenceCatalogConfig fields.")
110 # Use a "+"-list here, so we can produce a more useful error if the user
111 # uses an unquoted glob that gets shell expanded.
112 parser.add_argument("fileglob", nargs="+",
113 help="Quoted glob for the files to be read in and converted."
114 " Example (note required quotes to prevent shell expansion):"
115 ' "gaia_source/csv/GaiaSource*"')
116 return parser
119def run_convert(outputDir, configFile, fileglob):
120 """Run `ConvertReferenceCatalogTask` on the input arguments.
122 Parameters
123 ----------
124 outputDir : `str`
125 Path to write the output files to.
126 configFile : `str`
127 File specifying the ``ConvertReferenceCatalogConfig`` fields.
128 fileglob : `str`
129 Quoted glob for the files to be read in and converted.
130 """
131 # We have to initialize the logger manually when running from the commandline.
132 logging.basicConfig(level=logging.INFO, format="{name} {levelname}: {message}", style="{")
134 config = ConvertReferenceCatalogTask.ConfigClass()
135 config.load(configFile)
136 config.validate()
137 converter = ConvertReferenceCatalogTask(output_dir=outputDir, config=config)
138 files = glob.glob(fileglob)
139 converter.run(files)
140 with open(os.path.join(outputDir, "convertReferenceCatalogConfig.py"), "w") as outfile:
141 converter.config.saveToStream(outfile)
142 msg = ("Completed refcat conversion.\n\n"
143 "Ingest the resulting files with the following commands, substituting the path\n"
144 "to your butler repo for `REPO`, and the ticket number you are tracking this\n"
145 "ingest on for `DM-NNNNN`:\n"
146 f"\n butler register-dataset-type REPO {config.dataset_config.ref_dataset_name} "
147 "SimpleCatalog htm7"
148 "\n butler ingest-files -t direct REPO gaia_dr2 refcats/DM-NNNNN "
149 f"{converter.ingest_table_file}"
150 "\n butler collection-chain REPO --mode extend refcats refcats/DM-NNNNN")
151 print(msg)
154def main():
155 args = build_argparser().parse_args()
156 if len(args.fileglob) > 1:
157 raise RuntimeError("Final argument must be a quoted file glob, not a shell-expanded list of files.")
158 # Fileglob comes out as a length=1 list, so we can test it above.
159 run_convert(args.outputDir, args.configFile, args.fileglob[0])