Coverage for python/lsst/meas/algorithms/convertReferenceCatalog.py: 26%

214 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-28 09:55 +0000

1# This file is part of meas_algorithms. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22""" 

23Convert an external reference catalog into the hierarchical triangular mesh 

24(HTM) sharded LSST-style format, to be ingested into the butler. 

25""" 

26 

27__all__ = ["ConvertReferenceCatalogTask", "ConvertReferenceCatalogConfig", "DatasetConfig"] 

28 

29import argparse 

30import glob 

31import numpy 

32import os 

33import pathlib 

34import logging 

35import itertools 

36 

37import astropy 

38 

39import lsst.afw.table 

40import lsst.pipe.base 

41import lsst.pex.config as pexConfig 

42from lsst.daf.base import PropertyList 

43 

44from .indexerRegistry import IndexerRegistry 

45from .readTextCatalogTask import ReadTextCatalogTask 

46from . import convertRefcatManager 

47 

48# The most recent Indexed Reference Catalog on-disk format version. 

49# See DatasetConfig.format_version for details of version numbers. 

50LATEST_FORMAT_VERSION = 2 

51 

52 

53def addRefCatMetadata(catalog): 

54 """Add metadata to a new (not yet populated) reference catalog. 

55 

56 Parameters 

57 ---------- 

58 catalog : `lsst.afw.table.SimpleCatalog` 

59 Catalog to which metadata should be attached. Will be modified 

60 in-place. 

61 """ 

62 md = catalog.getMetadata() 

63 if md is None: 

64 md = PropertyList() 

65 md.set("REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION) 

66 catalog.setMetadata(md) 

67 

68 

69def _makeSchema(filterNameList, *, addCentroid=False, 

70 addIsPhotometric=False, addIsResolved=False, 

71 addIsVariable=False, fullPositionInformation=False): 

72 """Make a standard schema for reference object catalogs. 

73 

74 Parameters 

75 ---------- 

76 filterNameList : `list` of `str` 

77 List of filter names. Used to create <filterName>_flux fields. 

78 addCentroid : `bool` 

79 If True then add fields "centroid" and "hasCentroid". 

80 addIsPhotometric : `bool` 

81 If True then add field "photometric". 

82 addIsResolved : `bool` 

83 If True then add field "resolved". 

84 addIsVariable : `bool` 

85 If True then add field "variable". 

86 fullPositionInformation : `bool` 

87 If True then add epoch, proper motion, and parallax, along with the 

88 full five-dimensional covariance between ra and dec coordinates, 

89 proper motion in ra and dec, and parallax. 

90 

91 Returns 

92 ------- 

93 schema : `lsst.afw.table.Schema` 

94 Schema for reference catalog, an 

95 `lsst.afw.table.SimpleCatalog`. 

96 """ 

97 schema = lsst.afw.table.SimpleTable.makeMinimalSchema() 

98 if addCentroid: 

99 lsst.afw.table.Point2DKey.addFields( 

100 schema, 

101 "centroid", 

102 "centroid on an exposure, if relevant", 

103 "pixel", 

104 ) 

105 schema.addField( 

106 field="hasCentroid", 

107 type="Flag", 

108 doc="is position known?", 

109 ) 

110 for filterName in filterNameList: 

111 schema.addField( 

112 field="%s_flux" % (filterName,), 

113 type=numpy.float64, 

114 doc="flux in filter %s" % (filterName,), 

115 units="nJy", 

116 ) 

117 for filterName in filterNameList: 

118 schema.addField( 

119 field="%s_fluxErr" % (filterName,), 

120 type=numpy.float64, 

121 doc="flux uncertainty in filter %s" % (filterName,), 

122 units="nJy", 

123 ) 

124 if addIsPhotometric: 

125 schema.addField( 

126 field="photometric", 

127 type="Flag", 

128 doc="set if the object can be used for photometric calibration", 

129 ) 

130 if addIsResolved: 

131 schema.addField( 

132 field="resolved", 

133 type="Flag", 

134 doc="set if the object is spatially resolved", 

135 ) 

136 if addIsVariable: 

137 schema.addField( 

138 field="variable", 

139 type="Flag", 

140 doc="set if the object has variable brightness", 

141 ) 

142 lsst.afw.table.CovarianceMatrix2fKey.addFields( 

143 schema=schema, 

144 prefix="coord", 

145 names=["ra", "dec"], 

146 units=["rad", "rad"], 

147 diagonalOnly=True, 

148 ) 

149 

150 if fullPositionInformation: 

151 schema.addField( 

152 field="epoch", 

153 type=numpy.float64, 

154 doc="date of observation (TAI, MJD)", 

155 units="day", 

156 ) 

157 schema.addField( 

158 field="pm_ra", 

159 type="Angle", 

160 doc="proper motion in the right ascension direction = dra/dt * cos(dec)", 

161 units="rad/year", 

162 ) 

163 schema.addField( 

164 field="pm_dec", 

165 type="Angle", 

166 doc="proper motion in the declination direction", 

167 units="rad/year", 

168 ) 

169 lsst.afw.table.CovarianceMatrix2fKey.addFields( 

170 schema=schema, 

171 prefix="pm", 

172 names=["ra", "dec"], 

173 units=["rad/year", "rad/year"], 

174 diagonalOnly=True, 

175 ) 

176 schema.addField( 

177 field="pm_flag", 

178 type="Flag", 

179 doc="Set if proper motion or proper motion error is bad", 

180 ) 

181 schema.addField( 

182 field="parallax", 

183 type="Angle", 

184 doc="parallax", 

185 units="rad", 

186 ) 

187 schema.addField( 

188 field="parallaxErr", 

189 type="Angle", 

190 doc="uncertainty in parallax", 

191 units="rad", 

192 ) 

193 schema.addField( 

194 field="parallax_flag", 

195 type="Flag", 

196 doc="Set if parallax or parallax error is bad", 

197 ) 

198 # Add all the off-diagonal covariance terms 

199 fields = ["coord_ra", "coord_dec", "pm_ra", "pm_dec", "parallax"] 

200 units = ["rad", "rad", "rad/year", "rad/year", "rad"] 

201 for field, unit in zip(itertools.combinations(fields, r=2), itertools.combinations(units, r=2)): 

202 i_field = field[0] 

203 i_unit = unit[0] 

204 j_field = field[1] 

205 j_unit = unit[1] 

206 formatted_unit = "rad^2" 

207 if ("year" in i_unit) and ("year" in j_unit): 

208 formatted_unit += "/year^2" 

209 elif ("year" in i_unit) or ("year" in j_unit): 

210 formatted_unit += "/year" 

211 schema.addField( 

212 field=f"{i_field}_{j_field}_Cov", 

213 type="F", 

214 doc=f"Covariance between {i_field} and {j_field}", 

215 units=formatted_unit 

216 ) 

217 return schema 

218 

219 

220class DatasetConfig(pexConfig.Config): 

221 """Description of the on-disk storage format for the converted reference 

222 catalog. 

223 """ 

224 format_version = pexConfig.Field( 

225 dtype=int, 

226 doc="Version number of the persisted on-disk storage format." 

227 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)." 

228 "\nVersion 1 had nJy as flux units." 

229 "\nVersion 2 had position-related covariances.", 

230 default=0 # This needs to always be 0, so that unversioned catalogs are interpreted as version 0. 

231 ) 

232 ref_dataset_name = pexConfig.Field( 

233 dtype=str, 

234 doc="Name of this reference catalog; this should match the name used during butler ingest.", 

235 ) 

236 indexer = IndexerRegistry.makeField( 

237 default='HTM', 

238 doc='Name of indexer algoritm to use. Default is HTM', 

239 ) 

240 

241 

242class ConvertReferenceCatalogConfig(pexConfig.Config): 

243 dataset_config = pexConfig.ConfigField( 

244 dtype=DatasetConfig, 

245 doc="Configuration for reading the ingested data", 

246 ) 

247 n_processes = pexConfig.Field( 

248 dtype=int, 

249 doc=("Number of python processes to use when ingesting."), 

250 default=1 

251 ) 

252 manager = pexConfig.ConfigurableField( 

253 target=convertRefcatManager.ConvertRefcatManager, 

254 doc="Multiprocessing manager to perform the actual conversion of values, file-by-file." 

255 ) 

256 file_reader = pexConfig.ConfigurableField( 

257 target=ReadTextCatalogTask, 

258 doc='Task to use to read the files. Default is to expect text files.' 

259 ) 

260 ra_name = pexConfig.Field( 

261 dtype=str, 

262 doc="Name of RA column (values in decimal degrees)", 

263 ) 

264 dec_name = pexConfig.Field( 

265 dtype=str, 

266 doc="Name of Dec column (values in decimal degrees)", 

267 ) 

268 ra_err_name = pexConfig.Field( 

269 dtype=str, 

270 doc="Name of RA error column", 

271 optional=True, 

272 ) 

273 dec_err_name = pexConfig.Field( 

274 dtype=str, 

275 doc="Name of Dec error column", 

276 optional=True, 

277 ) 

278 coord_err_unit = pexConfig.Field( 

279 dtype=str, 

280 doc="Unit of RA/Dec error fields (astropy.unit.Unit compatible)", 

281 optional=True 

282 ) 

283 mag_column_list = pexConfig.ListField( 

284 dtype=str, 

285 doc="The values in the reference catalog are assumed to be in AB magnitudes. " 

286 "List of column names to use for photometric information. At least one entry is required." 

287 ) 

288 mag_err_column_map = pexConfig.DictField( 

289 keytype=str, 

290 itemtype=str, 

291 default={}, 

292 doc="A map of magnitude column name (key) to magnitude error column (value)." 

293 ) 

294 is_photometric_name = pexConfig.Field( 

295 dtype=str, 

296 optional=True, 

297 doc='Name of column stating if satisfactory for photometric calibration (optional).' 

298 ) 

299 is_resolved_name = pexConfig.Field( 

300 dtype=str, 

301 optional=True, 

302 doc='Name of column stating if the object is resolved (optional).' 

303 ) 

304 is_variable_name = pexConfig.Field( 

305 dtype=str, 

306 optional=True, 

307 doc='Name of column stating if the object is measured to be variable (optional).' 

308 ) 

309 id_name = pexConfig.Field( 

310 dtype=str, 

311 optional=True, 

312 doc='Name of column to use as an identifier (optional).' 

313 ) 

314 pm_ra_name = pexConfig.Field( 

315 dtype=str, 

316 doc="Name of proper motion RA column", 

317 optional=True, 

318 ) 

319 pm_dec_name = pexConfig.Field( 

320 dtype=str, 

321 doc="Name of proper motion Dec column", 

322 optional=True, 

323 ) 

324 pm_ra_err_name = pexConfig.Field( 

325 dtype=str, 

326 doc="Name of proper motion RA error column", 

327 optional=True, 

328 ) 

329 pm_dec_err_name = pexConfig.Field( 

330 dtype=str, 

331 doc="Name of proper motion Dec error column", 

332 optional=True, 

333 ) 

334 pm_scale = pexConfig.Field( 

335 dtype=float, 

336 doc="Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year", 

337 default=1.0, 

338 ) 

339 parallax_name = pexConfig.Field( 

340 dtype=str, 

341 doc="Name of parallax column", 

342 optional=True, 

343 ) 

344 parallax_err_name = pexConfig.Field( 

345 dtype=str, 

346 doc="Name of parallax error column", 

347 optional=True, 

348 ) 

349 parallax_scale = pexConfig.Field( 

350 dtype=float, 

351 doc="Scale factor by which to multiply parallax values to obtain units of milliarcsec", 

352 default=1.0, 

353 ) 

354 full_position_information = pexConfig.Field( 

355 dtype=bool, 

356 doc="Include epoch, proper motions, parallax, and covariances between sky coordinates, proper motion," 

357 " and parallax in the schema. If true, a custom ``ConvertRefcatManager`` class must exist to" 

358 " compute the output covariances.", 

359 default=False 

360 ) 

361 epoch_name = pexConfig.Field( 

362 dtype=str, 

363 doc="Name of epoch column", 

364 optional=True, 

365 ) 

366 epoch_format = pexConfig.Field( 

367 dtype=str, 

368 doc="Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'", 

369 optional=True, 

370 ) 

371 epoch_scale = pexConfig.Field( 

372 dtype=str, 

373 doc="Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'", 

374 optional=True, 

375 ) 

376 extra_col_names = pexConfig.ListField( 

377 dtype=str, 

378 default=[], 

379 doc='Extra columns to add to the reference catalog.' 

380 ) 

381 

382 def setDefaults(self): 

383 # Newly ingested reference catalogs always have the latest format_version. 

384 self.dataset_config.format_version = LATEST_FORMAT_VERSION 

385 # gen3 refcats are all depth=7 

386 self.dataset_config.indexer['HTM'].depth = 7 

387 

388 def validate(self): 

389 super().validate() 

390 

391 def assertAllOrNone(*names): 

392 """Raise ValueError unless all the named fields are set or are 

393 all none (or blank). 

394 """ 

395 setNames = [name for name in names if bool(getattr(self, name))] 

396 if len(setNames) in (len(names), 0): 

397 return 

398 prefix = "Both or neither" if len(names) == 2 else "All or none" 

399 raise ValueError("{} of {} must be set, but only {} are set".format( 

400 prefix, ", ".join(names), ", ".join(setNames))) 

401 

402 if not (self.ra_name and self.dec_name and self.mag_column_list): 

403 raise ValueError( 

404 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.") 

405 if self.mag_err_column_map and set(self.mag_column_list) != set(self.mag_err_column_map.keys()): 

406 raise ValueError( 

407 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format( 

408 sorted(self.mag_err_column_map.keys()), sorted(self.mag_column_list))) 

409 assertAllOrNone("ra_err_name", "dec_err_name", "coord_err_unit") 

410 if self.coord_err_unit is not None: 

411 result = astropy.units.Unit(self.coord_err_unit, parse_strict='silent') 

412 if isinstance(result, astropy.units.UnrecognizedUnit): 

413 msg = f"{self.coord_err_unit} is not a valid astropy unit string." 

414 raise pexConfig.FieldValidationError(ConvertReferenceCatalogConfig.coord_err_unit, self, msg) 

415 

416 assertAllOrNone("epoch_name", "epoch_format", "epoch_scale") 

417 assertAllOrNone("pm_ra_name", "pm_dec_name") 

418 assertAllOrNone("pm_ra_err_name", "pm_dec_err_name") 

419 assertAllOrNone("parallax_name", "parallax_err_name") 

420 if self.pm_ra_err_name and not self.pm_ra_name: 

421 raise ValueError('"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified') 

422 if (self.pm_ra_name or self.parallax_name) and not self.epoch_name: 

423 raise ValueError( 

424 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified') 

425 

426 # Need all the error field names set if we are including covariances. 

427 if self.full_position_information: 

428 # Since full_position_information is True, this will only pass for 

429 # the "All" case. 

430 assertAllOrNone("full_position_information", 

431 "ra_err_name", "dec_err_name", "coord_err_unit", 

432 "epoch_name", "epoch_format", "epoch_scale", 

433 "pm_ra_name", "pm_dec_name", 

434 "pm_ra_err_name", "pm_dec_err_name", 

435 "parallax_name", "parallax_err_name" 

436 ) 

437 

438 

439class ConvertReferenceCatalogTask(lsst.pipe.base.Task): 

440 """Class for producing HTM-indexed reference catalogs from external 

441 catalog data. 

442 

443 This implements an indexing scheme based on hierarchical triangular 

444 mesh (HTM). The term index really means breaking the catalog into 

445 localized chunks called shards. In this case each shard contains 

446 the entries from the catalog in a single HTM trixel 

447 

448 For producing catalogs this task makes the following assumptions 

449 about the input catalogs: 

450 

451 - RA, Dec are in decimal degrees. 

452 - Epoch is available in a column, in a format supported by astropy.time.Time. 

453 - There are either no off-diagonal covariance terms, or there are all the 

454 five-dimensional covariance terms (between RA, Dec, proper motion, and 

455 parallax). In the latter case, a custom ``ConvertRefcatManager`` must 

456 exist to handle the covariance terms. 

457 

458 Parameters 

459 ---------- 

460 output_dir : `str` 

461 The path to write the output files to, in a subdirectory defined by 

462 ``DatasetConfig.ref_dataset_name``. 

463 """ 

464 canMultiprocess = False 

465 ConfigClass = ConvertReferenceCatalogConfig 

466 _DefaultName = 'ConvertReferenceCatalogTask' 

467 

468 def __init__(self, *, output_dir=None, **kwargs): 

469 super().__init__(**kwargs) 

470 if output_dir is None: 

471 raise RuntimeError("Must specify output_dir.") 

472 self.base_dir = output_dir 

473 self.output_dir = os.path.join(output_dir, self.config.dataset_config.ref_dataset_name) 

474 self.ingest_table_file = os.path.join(self.base_dir, "filename_to_htm.ecsv") 

475 self.indexer = IndexerRegistry[self.config.dataset_config.indexer.name]( 

476 self.config.dataset_config.indexer.active) 

477 self.makeSubtask('file_reader') 

478 

479 def run(self, inputFiles): 

480 """Index a set of files comprising a reference catalog. 

481 

482 Outputs are persisted in the butler repository. 

483 

484 Parameters 

485 ---------- 

486 inputFiles : `list` 

487 A list of file paths to read. 

488 """ 

489 # Create the output path, if it doesn't exist; fail if the path exists: 

490 # we don't want to accidentally append to existing files. 

491 pathlib.Path(self.output_dir).mkdir(exist_ok=False) 

492 

493 schema, key_map = self._writeMasterSchema(inputFiles[0]) 

494 # create an HTM we can interrogate about pixel ids 

495 htm = lsst.sphgeom.HtmPixelization(self.indexer.htm.get_depth()) 

496 filenames = self._getOutputFilenames(htm) 

497 worker = self.config.manager.target(filenames, 

498 self.config, 

499 self.file_reader, 

500 self.indexer, 

501 schema, 

502 key_map, 

503 htm.universe()[0], 

504 addRefCatMetadata, 

505 self.log) 

506 result = worker.run(inputFiles) 

507 

508 self._writeConfig() 

509 self._writeIngestHelperFile(result) 

510 

511 def _writeIngestHelperFile(self, result): 

512 """Write the astropy table containing the htm->filename relationship, 

513 used for the ``butler ingest-files`` command after this task completes. 

514 """ 

515 dimension = f"htm{self.config.dataset_config.indexer.active.depth}" 

516 table = astropy.table.Table(names=("filename", dimension), dtype=('str', 'int')) 

517 for key in result: 

518 table.add_row((result[key], key)) 

519 table.write(self.ingest_table_file) 

520 

521 def _writeConfig(self): 

522 """Write the config that was used to generate the refcat.""" 

523 filename = os.path.join(self.output_dir, "config.py") 

524 with open(filename, 'w') as file: 

525 self.config.dataset_config.saveToStream(file) 

526 

527 def _getOutputFilenames(self, htm): 

528 """Get filenames from the butler for each output htm pixel. 

529 

530 Parameters 

531 ---------- 

532 htm : `lsst.sphgeom.HtmPixelization` 

533 The HTM pixelization scheme to be used to build filenames. 

534 

535 Returns 

536 ------- 

537 filenames : `list [str]` 

538 List of filenames to write each HTM pixel to. 

539 """ 

540 filenames = {} 

541 start, end = htm.universe()[0] 

542 path = os.path.join(self.output_dir, f"{self.indexer.htm}.fits") 

543 base = os.path.join(os.path.dirname(path), "%d"+os.path.splitext(path)[1]) 

544 for pixelId in range(start, end): 

545 filenames[pixelId] = base % pixelId 

546 

547 return filenames 

548 

549 def makeSchema(self, dtype): 

550 """Make the schema to use in constructing the persisted catalogs. 

551 

552 Parameters 

553 ---------- 

554 dtype : `numpy.dtype` 

555 Data type describing each entry in ``config.extra_col_names`` 

556 for the catalogs being ingested. 

557 

558 Returns 

559 ------- 

560 schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`) 

561 A tuple containing two items: 

562 - The schema for the output source catalog. 

563 - A map of catalog keys to use in filling the record 

564 """ 

565 # make a schema with the standard fields 

566 schema = _makeSchema( 

567 filterNameList=self.config.mag_column_list, 

568 addCentroid=False, 

569 addIsPhotometric=bool(self.config.is_photometric_name), 

570 addIsResolved=bool(self.config.is_resolved_name), 

571 addIsVariable=bool(self.config.is_variable_name), 

572 fullPositionInformation=self.config.full_position_information, 

573 ) 

574 keysToSkip = set(("id", "centroid_x", "centroid_y", "hasCentroid")) 

575 key_map = {fieldName: schema[fieldName].asKey() for fieldName in schema.getOrderedNames() 

576 if fieldName not in keysToSkip} 

577 

578 def addField(name): 

579 if dtype[name].kind == 'U': 

580 # dealing with a string like thing. Need to get type and size. 

581 at_size = dtype[name].itemsize 

582 return schema.addField(name, type=str, size=at_size) 

583 else: 

584 at_type = dtype[name].type 

585 return schema.addField(name, at_type) 

586 

587 for col in self.config.extra_col_names: 

588 key_map[col] = addField(col) 

589 return schema, key_map 

590 

591 def _writeMasterSchema(self, inputfile): 

592 """Generate and save the master catalog schema. 

593 

594 Parameters 

595 ---------- 

596 inputfile : `str` 

597 An input file to read to get the input dtype. 

598 """ 

599 arr = self.file_reader.run(inputfile) 

600 schema, key_map = self.makeSchema(arr.dtype) 

601 

602 catalog = lsst.afw.table.SimpleCatalog(schema) 

603 addRefCatMetadata(catalog) 

604 outputfile = os.path.join(self.output_dir, "master_schema.fits") 

605 catalog.writeFits(outputfile) 

606 return schema, key_map 

607 

608 def _reduce_kwargs(self): 

609 # Need to be able to pickle this class to use the multiprocess manager. 

610 kwargs = super()._reduce_kwargs() 

611 kwargs['output_dir'] = self.base_dir 

612 return kwargs 

613 

614 

615def build_argparser(): 

616 """Construct an argument parser for the ``convertReferenceCatalog`` script. 

617 

618 Returns 

619 ------- 

620 argparser : `argparse.ArgumentParser` 

621 The argument parser that defines the ``convertReferenceCatalog`` 

622 command-line interface. 

623 """ 

624 parser = argparse.ArgumentParser( 

625 description=__doc__, 

626 formatter_class=argparse.RawDescriptionHelpFormatter, 

627 epilog='More information is available at https://pipelines.lsst.io.' 

628 ) 

629 parser.add_argument("outputDir", 

630 help="Path to write the output shard files, configs, and `ingest-files` table to.") 

631 parser.add_argument("configFile", 

632 help="File containing the ConvertReferenceCatalogConfig fields.") 

633 # Use a "+"-list here, so we can produce a more useful error if the user 

634 # uses an unquoted glob that gets shell expanded. 

635 parser.add_argument("fileglob", nargs="+", 

636 help="Quoted glob for the files to be read in and converted." 

637 " Example (note required quotes to prevent shell expansion):" 

638 ' "gaia_source/csv/GaiaSource*"') 

639 return parser 

640 

641 

642def run_convert(outputDir, configFile, fileglob): 

643 """Run `ConvertReferenceCatalogTask` on the input arguments. 

644 

645 Parameters 

646 ---------- 

647 outputDir : `str` 

648 Path to write the output files to. 

649 configFile : `str` 

650 File specifying the ``ConvertReferenceCatalogConfig`` fields. 

651 fileglob : `str` 

652 Quoted glob for the files to be read in and converted. 

653 """ 

654 # We have to initialize the logger manually when running from the commandline. 

655 logging.basicConfig(level=logging.INFO, format="{name} {levelname}: {message}", style="{") 

656 

657 config = ConvertReferenceCatalogTask.ConfigClass() 

658 config.load(configFile) 

659 converter = ConvertReferenceCatalogTask(output_dir=outputDir, config=config) 

660 files = glob.glob(fileglob) 

661 converter.run(files) 

662 with open(os.path.join(outputDir, "convertReferenceCatalogConfig.py"), "w") as outfile: 

663 converter.config.saveToStream(outfile) 

664 msg = ("Completed refcat conversion.\n\n" 

665 "Ingest the resulting files with the following commands, substituting the path\n" 

666 "to your butler repo for `REPO`, and the ticket number you are tracking this\n" 

667 "ingest on for `DM-NNNNN`:\n" 

668 f"\n butler register-dataset-type REPO {config.dataset_config.ref_dataset_name} " 

669 "SimpleCatalog htm7" 

670 "\n butler ingest-files -t direct REPO gaia_dr2 refcats/DM-NNNNN " 

671 f"{converter.ingest_table_file}" 

672 "\n butler collection-chain REPO --mode extend refcats refcats/DM-NNNNN") 

673 print(msg) 

674 

675 

676def main(): 

677 args = build_argparser().parse_args() 

678 if len(args.fileglob) > 1: 

679 raise RuntimeError("Final argument must be a quoted file glob, not a shell-expanded list of files.") 

680 # Fileglob comes out as a length=1 list, so we can test it above. 

681 run_convert(args.outputDir, args.configFile, args.fileglob[0])