Coverage for tests/nopytest_ingestIndexReferenceCatalog.py: 98%

124 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-05 18:11 -0800

1# This file is part of meas_algorithms. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22# This file is excluded from running through pytest due to concerns about the 

23# interaction between multiprocessing as invoked by this code, and the process 

24# pool used by pytest. 

25# 

26# Note that it is invoked independently by SCons, so the tests are still run 

27# as part of the build. 

28 

29import os.path 

30import tempfile 

31import unittest 

32import unittest.mock 

33 

34import numpy as np 

35 

36import lsst.daf.butler 

37from lsst.daf.butler import DatasetType, DeferredDatasetHandle 

38from lsst.daf.butler.script import ingest_files 

39from lsst.meas.algorithms import (ConvertReferenceCatalogTask, ReferenceObjectLoader, 

40 LoadIndexedReferenceObjectsConfig) 

41from lsst.meas.algorithms.htmIndexer import HtmIndexer 

42from lsst.meas.algorithms.ingestIndexReferenceTask import addRefCatMetadata 

43from lsst.meas.algorithms.convertRefcatManager import ConvertRefcatManager 

44from lsst.meas.algorithms.readTextCatalogTask import ReadTextCatalogTask 

45import lsst.utils 

46 

47import ingestIndexTestBase 

48 

49 

50class TestConvertReferenceCatalogParallel(ingestIndexTestBase.ConvertReferenceCatalogTestBase, 

51 lsst.utils.tests.TestCase): 

52 """Test converting a refcat with multiprocessing turned on.""" 

53 def testIngestTwoFilesTwoCores(self): 

54 def runTest(withRaDecErr): 

55 # Generate a second catalog, with different ids 

56 inPath1 = tempfile.mkdtemp() 

57 skyCatalogFile1, _, skyCatalog1 = self.makeSkyCatalog(inPath1, idStart=25, seed=123) 

58 inPath2 = tempfile.mkdtemp() 

59 skyCatalogFile2, _, skyCatalog2 = self.makeSkyCatalog(inPath2, idStart=5432, seed=11) 

60 # override some field names, and use multiple cores 

61 config = ingestIndexTestBase.makeConvertConfig(withRaDecErr=withRaDecErr, withMagErr=True, 

62 withPm=True, withPmErr=True) 

63 # use a very small HTM pixelization depth to ensure there will be collisions when 

64 # ingesting the files in parallel 

65 depth = 2 

66 config.dataset_config.indexer.active.depth = depth 

67 # np.savetxt prepends '# ' to the header lines, so use a reader that understands that 

68 config.file_reader.format = 'ascii.commented_header' 

69 config.n_processes = 2 # use multiple cores for this test only 

70 config.id_name = 'id' # Use the ids from the generated catalogs 

71 repoPath = os.path.join(self.outPath, "output_multifile_parallel", 

72 "_withRaDecErr" if withRaDecErr else "_noRaDecErr") 

73 

74 # Convert the input data files to our HTM indexed format. 

75 dataPath = tempfile.mkdtemp() 

76 converter = ConvertReferenceCatalogTask(output_dir=dataPath, config=config) 

77 converter.run([skyCatalogFile1, skyCatalogFile2]) 

78 

79 # Make a temporary butler to ingest them into. 

80 butler = self.makeTemporaryRepo(repoPath, config.dataset_config.indexer.active.depth) 

81 dimensions = [f"htm{depth}"] 

82 datasetType = DatasetType(config.dataset_config.ref_dataset_name, 

83 dimensions, 

84 "SimpleCatalog", 

85 universe=butler.registry.dimensions, 

86 isCalibration=False) 

87 butler.registry.registerDatasetType(datasetType) 

88 

89 # Ingest the files into the new butler. 

90 run = "testingRun" 

91 htmTableFile = os.path.join(dataPath, "filename_to_htm.ecsv") 

92 ingest_files(repoPath, 

93 config.dataset_config.ref_dataset_name, 

94 run, 

95 htmTableFile, 

96 transfer="auto") 

97 

98 # Test if we can get back the catalogs, with a new butler. 

99 butler = lsst.daf.butler.Butler(repoPath) 

100 datasetRefs = list(butler.registry.queryDatasets(config.dataset_config.ref_dataset_name, 

101 collections=[run]).expanded()) 

102 handlers = [] 

103 for dataRef in datasetRefs: 

104 handlers.append(DeferredDatasetHandle(butler=butler, ref=dataRef, parameters=None)) 

105 loaderConfig = LoadIndexedReferenceObjectsConfig() 

106 # ReferenceObjectLoader is not a Task, so needs a log object 

107 # (otherwise it logs to `root`); only show WARN logs because each 

108 # loadRegion (called once per source) in the check below will log 

109 # twice to INFO. 

110 log = lsst.log.Log.getLogger('ReferenceObjectLoader') 

111 log.setLevel(lsst.log.WARN) 

112 loader = ReferenceObjectLoader([dataRef.dataId for dataRef in datasetRefs], 

113 handlers, 

114 loaderConfig, 

115 log=log) 

116 self.checkAllRowsInRefcat(loader, skyCatalog1, config) 

117 self.checkAllRowsInRefcat(loader, skyCatalog2, config) 

118 

119 runTest(withRaDecErr=True) 

120 runTest(withRaDecErr=False) 

121 

122 

123class TestConvertRefcatManager(ingestIndexTestBase.ConvertReferenceCatalogTestBase, 

124 lsst.utils.tests.TestCase): 

125 """Unittests of various methods of ConvertRefcatManager. 

126 

127 Uses mocks to force particular behavior regarding e.g. catalogs. 

128 """ 

129 def setUp(self): 

130 np.random.seed(10) 

131 

132 tempPath = tempfile.mkdtemp() 

133 self.log = lsst.log.Log.getLogger("TestIngestIndexManager") 

134 self.config = ingestIndexTestBase.makeConvertConfig(withRaDecErr=True) 

135 self.config.id_name = 'id' 

136 self.depth = 2 # very small depth, for as few pixels as possible. 

137 self.indexer = HtmIndexer(self.depth) 

138 self.htm = lsst.sphgeom.HtmPixelization(self.depth) 

139 ingester = ConvertReferenceCatalogTask(output_dir=tempPath, config=self.config) 

140 dtype = [('id', '<f8'), ('ra', '<f8'), ('dec', '<f8'), ('ra_err', '<f8'), ('dec_err', '<f8'), 

141 ('a', '<f8'), ('a_err', '<f8')] 

142 self.schema, self.key_map = ingester.makeSchema(dtype) 

143 self.fileReader = ReadTextCatalogTask() 

144 

145 self.fakeInput = self.makeSkyCatalog(outPath=None, size=5, idStart=6543) 

146 self.matchedPixels = np.array([1, 1, 2, 2, 3]) 

147 self.path = tempfile.mkdtemp() 

148 self.filenames = {x: os.path.join(self.path, "%d.fits" % x) for x in set(self.matchedPixels)} 

149 

150 self.worker = ConvertRefcatManager(self.filenames, 

151 self.config, 

152 self.fileReader, 

153 self.indexer, 

154 self.schema, 

155 self.key_map, 

156 self.htm.universe()[0], 

157 addRefCatMetadata, 

158 self.log) 

159 

160 def _createFakeCatalog(self, nOld=5, nNew=0, idStart=42): 

161 """Create a fake output SimpleCatalog, populated with nOld+nNew elements. 

162 

163 Parameters 

164 ---------- 

165 nOld : `int`, optional 

166 The number of filled in sources to put in the catalog. 

167 nNew : `int`, optional 

168 The number of empty sources to put in the catalog. 

169 idStart : `int`, optional 

170 The start id of the ``nOld`` sources. 

171 

172 Returns 

173 ------- 

174 catalog : `lsst.afw.table.SimpleCatalog` 

175 A catalog populated with random data and contiguous ids. 

176 """ 

177 catalog = lsst.afw.table.SimpleCatalog(self.schema) 

178 catalog.resize(nOld) 

179 for x in self.schema: 

180 catalog[x.key] = np.random.random(nOld) 

181 # do the ids separately, so there are no duplicates 

182 catalog['id'] = np.arange(idStart, idStart + nOld) 

183 catalog.resize(nOld + nNew) # make space for the elements we will add 

184 return catalog.copy(deep=True) 

185 

186 def test_doOnePixelNewData(self): 

187 """Test that we can add new data to an existing catalog.""" 

188 pixelId = 1 # the pixel we are going to test 

189 

190 nOld = 5 

191 nNew = sum(self.matchedPixels == pixelId) 

192 catalog = self._createFakeCatalog(nOld=nOld, nNew=nNew) 

193 self.worker.getCatalog = unittest.mock.Mock(self.worker.getCatalog, return_value=catalog) 

194 

195 self.worker._doOnePixel(self.fakeInput, self.matchedPixels, pixelId, {}, {}) 

196 newcat = lsst.afw.table.SimpleCatalog.readFits(self.filenames[pixelId]) 

197 

198 # check that the "pre" catalog is unchanged, exactly 

199 np.testing.assert_equal(newcat[:nOld]['id'], catalog[:nOld]['id']) 

200 self.assertFloatsEqual(newcat[:nOld]['coord_ra'], catalog[:nOld]['coord_ra']) 

201 self.assertFloatsEqual(newcat[:nOld]['coord_dec'], catalog[:nOld]['coord_dec']) 

202 

203 # check that the new catalog elements are set correctly 

204 newElements = self.fakeInput[self.matchedPixels == pixelId] 

205 np.testing.assert_equal(newcat[nOld:]['id'], newElements['id']) 

206 self.assertFloatsAlmostEqual(newcat[nOld:]['coord_ra'], newElements['ra_icrs']*np.pi/180) 

207 self.assertFloatsAlmostEqual(newcat[nOld:]['coord_dec'], newElements['dec_icrs']*np.pi/180) 

208 

209 def test_doOnePixelNoData(self): 

210 """Test that we can put new data into an empty catalog.""" 

211 pixelId = 2 

212 

213 nOld = 0 

214 nNew = sum(self.matchedPixels == pixelId) 

215 catalog = self._createFakeCatalog(nOld=nOld, nNew=nNew) 

216 self.worker.getCatalog = unittest.mock.Mock(self.worker.getCatalog, return_value=catalog) 

217 

218 self.worker._doOnePixel(self.fakeInput, self.matchedPixels, pixelId, {}, {}) 

219 newcat = lsst.afw.table.SimpleCatalog.readFits(self.filenames[pixelId]) 

220 

221 # check that the new catalog elements are set correctly 

222 newElements = self.fakeInput[self.matchedPixels == pixelId] 

223 np.testing.assert_equal(newcat['id'], newElements['id']) 

224 self.assertFloatsAlmostEqual(newcat['coord_ra'], newElements['ra_icrs']*np.pi/180) 

225 self.assertFloatsAlmostEqual(newcat['coord_dec'], newElements['dec_icrs']*np.pi/180) 

226 

227 def test_getCatalog(self): 

228 """Test that getCatalog returns a properly expanded new catalog.""" 

229 pixelId = 3 

230 nOld = 10 

231 nNewElements = 5 

232 # save a catalog to disk that we can check against the getCatalog()'s return 

233 catalog = self._createFakeCatalog(nOld=nOld, nNew=0) 

234 catalog.writeFits(self.filenames[pixelId]) 

235 newcat = self.worker.getCatalog(pixelId, self.schema, nNewElements) 

236 

237 self.assertEqual(len(newcat), nOld + nNewElements) 

238 

239 np.testing.assert_equal(newcat[:len(catalog)]['id'], catalog['id']) 

240 self.assertFloatsEqual(newcat[:len(catalog)]['coord_ra'], catalog['coord_ra']) 

241 self.assertFloatsEqual(newcat[:len(catalog)]['coord_dec'], catalog['coord_dec']) 

242 

243 

244class TestMemory(lsst.utils.tests.MemoryTestCase): 

245 pass 

246 

247 

248def setup_module(module): 

249 lsst.utils.tests.init() 

250 

251 

252if __name__ == "__main__": 252 ↛ 253line 252 didn't jump to line 253, because the condition on line 252 was never true

253 lsst.utils.tests.init() 

254 unittest.main()