Coverage for python/lsst/meas/algorithms/convertReferenceCatalog.py: 65%

62 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-20 02:33 -0700

1# This file is part of meas_algorithms. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22""" 

23Convert an external reference catalog into the hierarchical triangular mesh 

24(HTM) sharded LSST-style format, to be ingested into the butler. 

25""" 

26 

27__all__ = ["ConvertReferenceCatalogTask"] 

28 

29import argparse 

30import glob 

31import os 

32import pathlib 

33import logging 

34 

35import astropy 

36 

37from . import ConvertReferenceCatalogBase 

38 

39 

40class ConvertReferenceCatalogTask(ConvertReferenceCatalogBase): 

41 """Class for producing HTM-indexed reference catalogs from external 

42 catalog data. 

43 

44 Parameters 

45 ---------- 

46 output_dir : `str` 

47 The path to write the output files to, in a subdirectory defined by 

48 ``DatasetConfig.ref_dataset_name``. 

49 """ 

50 _DefaultName = 'ConvertReferenceCatalogTask' 

51 

52 def __init__(self, *, output_dir=None, **kwargs): 

53 super().__init__(**kwargs) 

54 if output_dir is None: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 raise RuntimeError("Must specify output_dir.") 

56 self.base_dir = output_dir 

57 self.output_dir = os.path.join(output_dir, self.config.dataset_config.ref_dataset_name) 

58 self.ingest_table_file = os.path.join(self.base_dir, "filename_to_htm.ecsv") 

59 

60 def _preRun(self): 

61 # Create the output path, if it doesn't exist; fail if the path exists: 

62 # we don't want to accidentally append to existing files. 

63 pathlib.Path(self.output_dir).mkdir(exist_ok=False) 

64 

65 def _postRun(self, result): 

66 # Write the astropy table containing the htm->filename relationship 

67 dimension = f"htm{self.config.dataset_config.indexer.active.depth}" 

68 table = astropy.table.Table(names=("filename", dimension), dtype=('str', 'int')) 

69 for key in result: 

70 table.add_row((result[key], key)) 

71 table.write(self.ingest_table_file) 

72 

73 def _persistConfig(self): 

74 filename = os.path.join(self.output_dir, "config.py") 

75 with open(filename, 'w') as file: 

76 self.config.dataset_config.saveToStream(file) 

77 

78 def _getOnePixelFilename(self, start): 

79 return os.path.join(self.output_dir, f"{self.indexer.htm}.fits") 

80 

81 def _writeMasterSchema(self, catalog): 

82 filename = os.path.join(self.output_dir, "master_schema.fits") 

83 catalog.writeFits(filename) 

84 

85 def _reduce_kwargs(self): 

86 # Need to be able to pickle this class to use the multiprocess manager. 

87 kwargs = super()._reduce_kwargs() 

88 kwargs['output_dir'] = self.base_dir 

89 return kwargs 

90 

91 

92def build_argparser(): 

93 """Construct an argument parser for the ``convertReferenceCatalog`` script. 

94 

95 Returns 

96 ------- 

97 argparser : `argparse.ArgumentParser` 

98 The argument parser that defines the ``convertReferenceCatalog`` 

99 command-line interface. 

100 """ 

101 parser = argparse.ArgumentParser( 

102 description=__doc__, 

103 formatter_class=argparse.RawDescriptionHelpFormatter, 

104 epilog='More information is available at https://pipelines.lsst.io.' 

105 ) 

106 parser.add_argument("outputDir", 

107 help="Path to write the output shard files, configs, and `ingest-files` table to.") 

108 parser.add_argument("configFile", 

109 help="File containing the ConvertReferenceCatalogConfig fields.") 

110 # Use a "+"-list here, so we can produce a more useful error if the user 

111 # uses an unquoted glob that gets shell expanded. 

112 parser.add_argument("fileglob", nargs="+", 

113 help="Quoted glob for the files to be read in and converted." 

114 " Example (note required quotes to prevent shell expansion):" 

115 ' "gaia_source/csv/GaiaSource*"') 

116 return parser 

117 

118 

119def run_convert(outputDir, configFile, fileglob): 

120 """Run `ConvertReferenceCatalogTask` on the input arguments. 

121 

122 Parameters 

123 ---------- 

124 outputDir : `str` 

125 Path to write the output files to. 

126 configFile : `str` 

127 File specifying the ``ConvertReferenceCatalogConfig`` fields. 

128 fileglob : `str` 

129 Quoted glob for the files to be read in and converted. 

130 """ 

131 # We have to initialize the logger manually when running from the commandline. 

132 logging.basicConfig(level=logging.INFO, format="{name} {levelname}: {message}", style="{") 

133 

134 config = ConvertReferenceCatalogTask.ConfigClass() 

135 config.load(configFile) 

136 config.validate() 

137 converter = ConvertReferenceCatalogTask(output_dir=outputDir, config=config) 

138 files = glob.glob(fileglob) 

139 converter.run(files) 

140 with open(os.path.join(outputDir, "convertReferenceCatalogConfig.py"), "w") as outfile: 

141 converter.config.saveToStream(outfile) 

142 msg = ("Completed refcat conversion.\n\n" 

143 "Ingest the resulting files with the following commands, substituting the path\n" 

144 "to your butler repo for `REPO`, and the ticket number you are tracking this\n" 

145 "ingest on for `DM-NNNNN`:\n" 

146 f"\n butler register-dataset-type REPO {config.dataset_config.ref_dataset_name} " 

147 "SimpleCatalog htm7" 

148 "\n butler ingest-files -t direct REPO gaia_dr2 refcats/DM-NNNNN " 

149 f"{converter.ingest_table_file}" 

150 "\n butler collection-chain REPO --mode extend refcats refcats/DM-NNNNN") 

151 print(msg) 

152 

153 

154def main(): 

155 args = build_argparser().parse_args() 

156 if len(args.fileglob) > 1: 

157 raise RuntimeError("Final argument must be a quoted file glob, not a shell-expanded list of files.") 

158 # Fileglob comes out as a length=1 list, so we can test it above. 

159 run_convert(args.outputDir, args.configFile, args.fileglob[0])