Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 41%

144 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 11:14 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MockDataset", 

26 "MockStorageClass", 

27 "MockDatasetQuantum", 

28 "MockStorageClassDelegate", 

29 "get_mock_name", 

30 "get_original_name", 

31 "is_mock_name", 

32) 

33 

34from collections.abc import Callable, Iterable, Mapping 

35from typing import Any, cast 

36 

37try: 

38 import pydantic.v1 as pydantic 

39except ModuleNotFoundError: 

40 import pydantic # type: ignore 

41from lsst.daf.butler import ( 

42 DatasetComponent, 

43 Formatter, 

44 FormatterFactory, 

45 LookupKey, 

46 SerializedDataCoordinate, 

47 SerializedDatasetRef, 

48 SerializedDatasetType, 

49 StorageClass, 

50 StorageClassDelegate, 

51 StorageClassFactory, 

52) 

53from lsst.daf.butler.formatters.json import JsonFormatter 

54from lsst.utils.introspection import get_full_type_name 

55 

56_NAME_PREFIX: str = "_mock_" 

57 

58 

59def get_mock_name(original: str) -> str: 

60 """Return the name of the mock storage class, dataset type, or task label 

61 for the given original name. 

62 """ 

63 return _NAME_PREFIX + original 

64 

65 

66def get_original_name(mock: str) -> str: 

67 """Return the name of the original storage class, dataset type, or task 

68 label that corresponds to the given mock name. 

69 """ 

70 assert mock.startswith(_NAME_PREFIX) 

71 return mock.removeprefix(_NAME_PREFIX) 

72 

73 

74def is_mock_name(name: str) -> bool: 

75 """Return whether the given name is that of a mock storage class, dataset 

76 type, or task label. 

77 """ 

78 return name.startswith(_NAME_PREFIX) 

79 

80 

81# Tests for this module are in the ci_middleware package, where we have easy 

82# access to complex real storage classes (and their pytypes) to test against. 

83 

84 

85class MockDataset(pydantic.BaseModel): 

86 """The in-memory dataset type used by `MockStorageClass`.""" 

87 

88 ref: SerializedDatasetRef 

89 """Reference used to read and write this dataset. 

90 

91 This is a `~lsst.daf.butler.SerializedDatasetRef` instead of a "real" one 

92 for two reasons: 

93 

94 - the mock dataset may need to be read from disk in a context in which a 

95 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

96 - we don't want the complexity of having a separate 

97 ``SerializedMockDataset``. 

98 

99 The downside of this is that we end up effectively reimplementing a few 

100 fairly trivial DatasetType/DatasetRef methods that override storage classes 

101 and extract components (in `MockStorageClass` and 

102 `MockStorageClassDelegate`). 

103 """ 

104 

105 quantum: MockDatasetQuantum | None = None 

106 """Description of the quantum that produced this dataset. 

107 """ 

108 

109 output_connection_name: str | None = None 

110 """The name of the PipelineTask output connection that produced this 

111 dataset. 

112 """ 

113 

114 converted_from: MockDataset | None = None 

115 """Another `MockDataset` that underwent a storage class conversion to 

116 produce this one. 

117 """ 

118 

119 parent: MockDataset | None = None 

120 """Another `MockDataset` from which a component was extract to form this 

121 one. 

122 """ 

123 

124 parameters: dict[str, str] | None = None 

125 """`repr` of all parameters applied when reading this dataset.""" 

126 

127 @property 

128 def dataset_type(self) -> SerializedDatasetType: 

129 return cast(SerializedDatasetType, self.ref.datasetType) 

130 

131 @property 

132 def storage_class(self) -> str: 

133 return cast(str, self.dataset_type.storageClass) 

134 

135 def make_derived(self, **kwargs: Any) -> MockDataset: 

136 """Return a new MockDataset that represents applying some storage class 

137 operation to this one. 

138 

139 Keyword arguments are fields of `MockDataset` or 

140 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

141 """ 

142 dataset_type_updates = { 

143 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.__fields__ 

144 } 

145 derived_dataset_type = self.dataset_type.copy(update=dataset_type_updates) 

146 derived_ref = self.ref.copy(update=dict(datasetType=derived_dataset_type)) 

147 # Fields below are those that should not be propagated to the derived 

148 # dataset, because they're not about the intrinsic on-disk thing. 

149 kwargs.setdefault("converted_from", None) 

150 kwargs.setdefault("parent", None) 

151 kwargs.setdefault("parameters", None) 

152 # Also use setdefault on the ref in case caller wants to override that 

153 # directly, but this is expected to be rare enough that it's not worth 

154 # it to try to optimize out the work above to make derived_ref. 

155 kwargs.setdefault("ref", derived_ref) 

156 return self.copy(update=kwargs) 

157 

158 

159class MockDatasetQuantum(pydantic.BaseModel): 

160 """Description of the quantum that produced a mock dataset.""" 

161 

162 task_label: str 

163 """Label of the producing PipelineTask in its pipeline.""" 

164 

165 data_id: SerializedDataCoordinate 

166 """Data ID for the quantum.""" 

167 

168 inputs: dict[str, list[MockDataset]] 

169 """Mock datasets provided as input to the quantum.""" 

170 

171 

172MockDataset.update_forward_refs() 

173 

174 

175class MockStorageClassDelegate(StorageClassDelegate): 

176 """Implementation of the StorageClassDelegate interface for mock datasets. 

177 

178 This class does not implement assembly and disassembly just because it's 

179 not needed right now. That could be added in the future with some 

180 additional tracking attributes in `MockDataset`. 

181 """ 

182 

183 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

184 # Docstring inherited. 

185 raise NotImplementedError("Mock storage classes do not implement assembly.") 

186 

187 def getComponent(self, composite: Any, componentName: str) -> Any: 

188 # Docstring inherited. 

189 assert isinstance( 

190 composite, MockDataset 

191 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

192 return composite.make_derived( 

193 name=f"{composite.dataset_type.name}.{componentName}", 

194 storageClass=self.storageClass.allComponents()[componentName].name, 

195 parentStorageClass=self.storageClass.name, 

196 parent=composite, 

197 ) 

198 

199 def disassemble( 

200 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

201 ) -> dict[str, DatasetComponent]: 

202 # Docstring inherited. 

203 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

204 

205 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

206 # Docstring inherited. 

207 assert isinstance( 

208 inMemoryDataset, MockDataset 

209 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

210 if not parameters: 

211 return inMemoryDataset 

212 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

213 

214 

215class MockStorageClass(StorageClass): 

216 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

217 

218 Each `MockStorageClass` instance corresponds to a real "original" storage 

219 class, with components and conversions that are mocks of the original's 

220 components and conversions. The `pytype` for all `MockStorageClass` 

221 instances is `MockDataset`. 

222 """ 

223 

224 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

225 name = get_mock_name(original.name) 

226 if factory is None: 

227 factory = StorageClassFactory() 

228 super().__init__( 

229 name=name, 

230 pytype=MockDataset, 

231 components={ 

232 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

233 }, 

234 derivedComponents={ 

235 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

236 }, 

237 parameters=frozenset(original.parameters), 

238 delegate=get_full_type_name(MockStorageClassDelegate), 

239 # Conversions work differently for mock storage classes, since they 

240 # all have the same pytype: we use the original storage class being 

241 # mocked to see if we can convert, then just make a new MockDataset 

242 # that points back to the original. 

243 converters={}, 

244 ) 

245 self.original = original 

246 # Make certain no one tries to use the converters. 

247 self._converters = None # type: ignore 

248 

249 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

250 # Docstring inherited. 

251 raise NotImplementedError("MockStorageClass does not use converters.") 

252 

253 @classmethod 

254 def get_or_register_mock( 

255 cls, original: str, factory: StorageClassFactory | None = None 

256 ) -> MockStorageClass: 

257 """Return a mock storage class for the given original storage class, 

258 creating and registering it if necessary. 

259 

260 Parameters 

261 ---------- 

262 original : `str` 

263 Name of the original storage class to be mocked. 

264 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

265 Storage class factory singleton instance. 

266 

267 Returns 

268 ------- 

269 mock : `MockStorageClass` 

270 New storage class that mocks ``original``. 

271 """ 

272 name = get_mock_name(original) 

273 if factory is None: 

274 factory = StorageClassFactory() 

275 if name in factory: 

276 return cast(MockStorageClass, factory.getStorageClass(name)) 

277 else: 

278 result = cls(factory.getStorageClass(original), factory) 

279 factory.registerStorageClass(result) 

280 return result 

281 

282 def allComponents(self) -> Mapping[str, MockStorageClass]: 

283 # Docstring inherited. 

284 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

285 

286 @property 

287 def components(self) -> Mapping[str, MockStorageClass]: 

288 # Docstring inherited. 

289 return cast(Mapping[str, MockStorageClass], super().components) 

290 

291 @property 

292 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

293 # Docstring inherited. 

294 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

295 

296 def can_convert(self, other: StorageClass) -> bool: 

297 # Docstring inherited. 

298 if not isinstance(other, MockStorageClass): 

299 return False 

300 return self.original.can_convert(other.original) 

301 

302 def coerce_type(self, incorrect: Any) -> Any: 

303 # Docstring inherited. 

304 if not isinstance(incorrect, MockDataset): 

305 raise TypeError( 

306 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

307 f"corresponding to other mock storage classes, not {incorrect!r}." 

308 ) 

309 factory = StorageClassFactory() 

310 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

311 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

312 if other_storage_class.name == self.name: 

313 return incorrect 

314 if not self.can_convert(other_storage_class): 

315 raise TypeError( 

316 f"Mocked storage class {self.original.name!r} cannot convert from " 

317 f"{other_storage_class.original.name!r}." 

318 ) 

319 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

320 

321 

322def _monkeypatch_daf_butler() -> None: 

323 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

324 classes to automatically recognize mock storage classes. 

325 

326 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

327 package is imported, and it affects all butler instances created before or 

328 after that imported. 

329 """ 

330 original_get_storage_class = StorageClassFactory.getStorageClass 

331 

332 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

333 try: 

334 return original_get_storage_class(self, storageClassName) 

335 except KeyError: 

336 if is_mock_name(storageClassName): 

337 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

338 raise 

339 

340 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

341 

342 del new_get_storage_class 

343 

344 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

345 

346 def new_get_formatter_class_with_match( 

347 self: FormatterFactory, entity: Any 

348 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

349 try: 

350 return original_get_formatter_class_with_match(self, entity) 

351 except KeyError: 

352 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

353 for key in lookup_keys: 

354 # This matches mock dataset type names before mock storage 

355 # classes, and it would even match some regular dataset types 

356 # that are automatic connections (logs, configs, metadata) of 

357 # mocked tasks. The latter would be a problem, except that 

358 # those should have already matched in the try block above. 

359 if is_mock_name(key.name): 

360 return (key, JsonFormatter, {}) 

361 raise 

362 

363 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

364 

365 del new_get_formatter_class_with_match 

366 

367 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

368 

369 def new_get_formatter_with_match( 

370 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

371 ) -> tuple[LookupKey, Formatter]: 

372 try: 

373 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

374 except KeyError: 

375 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

376 for key in lookup_keys: 

377 if is_mock_name(key.name): 

378 return (key, JsonFormatter(*args, **kwargs)) 

379 raise 

380 

381 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

382 

383 del new_get_formatter_with_match 

384 

385 

386_monkeypatch_daf_butler()