Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 40%

141 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-14 02:10 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MockDataset", 

26 "MockStorageClass", 

27 "MockDatasetQuantum", 

28 "MockStorageClassDelegate", 

29 "get_mock_name", 

30 "get_original_name", 

31 "is_mock_name", 

32) 

33 

34from collections.abc import Callable, Iterable, Mapping 

35from typing import Any, cast 

36 

37import pydantic 

38from lsst.daf.butler import ( 

39 DatasetComponent, 

40 Formatter, 

41 FormatterFactory, 

42 LookupKey, 

43 SerializedDataCoordinate, 

44 SerializedDatasetRef, 

45 SerializedDatasetType, 

46 StorageClass, 

47 StorageClassDelegate, 

48 StorageClassFactory, 

49) 

50from lsst.daf.butler.formatters.json import JsonFormatter 

51from lsst.utils.introspection import get_full_type_name 

52 

53_NAME_PREFIX: str = "_mock_" 

54 

55 

56def get_mock_name(original: str) -> str: 

57 """Return the name of the mock storage class, dataset type, or task label 

58 for the given original name. 

59 """ 

60 return _NAME_PREFIX + original 

61 

62 

63def get_original_name(mock: str) -> str: 

64 """Return the name of the original storage class, dataset type, or task 

65 label that corresponds to the given mock name. 

66 """ 

67 assert mock.startswith(_NAME_PREFIX) 

68 return mock.removeprefix(_NAME_PREFIX) 

69 

70 

71def is_mock_name(name: str) -> bool: 

72 """Return whether the given name is that of a mock storage class, dataset 

73 type, or task label. 

74 """ 

75 return name.startswith(_NAME_PREFIX) 

76 

77 

78# Tests for this module are in the ci_middleware package, where we have easy 

79# access to complex real storage classes (and their pytypes) to test against. 

80 

81 

82class MockDataset(pydantic.BaseModel): 

83 """The in-memory dataset type used by `MockStorageClass`.""" 

84 

85 ref: SerializedDatasetRef 

86 """Reference used to read and write this dataset. 

87 

88 This is a `SerializedDatasetRef` instead of a "real" one for two reasons: 

89 

90 - the mock dataset may need to be read from disk in a context in which a 

91 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

92 - we don't want the complexity of having a separate 

93 ``SerializedMockDataset``. 

94 

95 The downside of this is that we end up effectively reimplementing a few 

96 fairly trivial DatasetType/DatasetRef methods that override storage classes 

97 and extract components (in `MockStorageClass` and 

98 `MockStorageClassDelegate`). 

99 """ 

100 

101 quantum: MockDatasetQuantum | None = None 

102 """Description of the quantum that produced this dataset. 

103 """ 

104 

105 output_connection_name: str | None = None 

106 """The name of the PipelineTask output connection that produced this 

107 dataset. 

108 """ 

109 

110 converted_from: MockDataset | None = None 

111 """Another `MockDataset` that underwent a storage class conversion to 

112 produce this one. 

113 """ 

114 

115 parent: MockDataset | None = None 

116 """Another `MockDataset` from which a component was extract to form this 

117 one. 

118 """ 

119 

120 parameters: dict[str, str] | None = None 

121 """`repr` of all parameters applied when reading this dataset.""" 

122 

123 @property 

124 def dataset_type(self) -> SerializedDatasetType: 

125 return cast(SerializedDatasetType, self.ref.datasetType) 

126 

127 @property 

128 def storage_class(self) -> str: 

129 return cast(str, self.dataset_type.storageClass) 

130 

131 def make_derived(self, **kwargs: Any) -> MockDataset: 

132 """Return a new MockDataset that represents applying some storage class 

133 operation to this one. 

134 

135 Keyword arguments are fields of `MockDataset` or 

136 `SerializedDatasetType` to override in the result. 

137 """ 

138 dataset_type_updates = { 

139 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.__fields__ 

140 } 

141 derived_dataset_type = self.dataset_type.copy(update=dataset_type_updates) 

142 derived_ref = self.ref.copy(update=dict(datasetType=derived_dataset_type)) 

143 # Fields below are those that should not be propagated to the derived 

144 # dataset, because they're not about the intrinsic on-disk thing. 

145 kwargs.setdefault("converted_from", None) 

146 kwargs.setdefault("parent", None) 

147 kwargs.setdefault("parameters", None) 

148 # Also use setdefault on the ref in case caller wants to override that 

149 # directly, but this is expected to be rare enough that it's not worth 

150 # it to try to optimize out the work above to make derived_ref. 

151 kwargs.setdefault("ref", derived_ref) 

152 return self.copy(update=kwargs) 

153 

154 

155class MockDatasetQuantum(pydantic.BaseModel): 

156 """Description of the quantum that produced a mock dataset.""" 

157 

158 task_label: str 

159 """Label of the producing PipelineTask in its pipeline.""" 

160 

161 data_id: SerializedDataCoordinate 

162 """Data ID for the quantum.""" 

163 

164 inputs: dict[str, list[MockDataset]] 

165 """Mock datasets provided as input to the quantum.""" 

166 

167 

168MockDataset.update_forward_refs() 

169 

170 

171class MockStorageClassDelegate(StorageClassDelegate): 

172 """Implementation of the StorageClassDelegate interface for mock datasets. 

173 

174 This class does not implement assembly and disassembly just because it's 

175 not needed right now. That could be added in the future with some 

176 additional tracking attributes in `MockDataset`. 

177 """ 

178 

179 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

180 # Docstring inherited. 

181 raise NotImplementedError("Mock storage classes do not implement assembly.") 

182 

183 def getComponent(self, composite: Any, componentName: str) -> Any: 

184 # Docstring inherited. 

185 assert isinstance( 

186 composite, MockDataset 

187 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

188 return composite.make_derived( 

189 name=f"{composite.dataset_type.name}.{componentName}", 

190 storageClass=self.storageClass.allComponents()[componentName].name, 

191 parentStorageClass=self.storageClass.name, 

192 parent=composite, 

193 ) 

194 

195 def disassemble( 

196 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

197 ) -> dict[str, DatasetComponent]: 

198 # Docstring inherited. 

199 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

200 

201 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

202 # Docstring inherited. 

203 assert isinstance( 

204 inMemoryDataset, MockDataset 

205 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

206 if not parameters: 

207 return inMemoryDataset 

208 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

209 

210 

211class MockStorageClass(StorageClass): 

212 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

213 

214 Each `MockStorageClass` instance corresponds to a real "original" storage 

215 class, with components and conversions that are mocks of the original's 

216 components and conversions. The `pytype` for all `MockStorageClass` 

217 instances is `MockDataset`. 

218 """ 

219 

220 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

221 name = get_mock_name(original.name) 

222 if factory is None: 

223 factory = StorageClassFactory() 

224 super().__init__( 

225 name=name, 

226 pytype=MockDataset, 

227 components={ 

228 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

229 }, 

230 derivedComponents={ 

231 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

232 }, 

233 parameters=frozenset(original.parameters), 

234 delegate=get_full_type_name(MockStorageClassDelegate), 

235 # Conversions work differently for mock storage classes, since they 

236 # all have the same pytype: we use the original storage class being 

237 # mocked to see if we can convert, then just make a new MockDataset 

238 # that points back to the original. 

239 converters={}, 

240 ) 

241 self.original = original 

242 # Make certain no one tries to use the converters. 

243 self._converters = None # type: ignore 

244 

245 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

246 # Docstring inherited. 

247 raise NotImplementedError("MockStorageClass does not use converters.") 

248 

249 @classmethod 

250 def get_or_register_mock( 

251 cls, original: str, factory: StorageClassFactory | None = None 

252 ) -> MockStorageClass: 

253 """Return a mock storage class for the given original storage class, 

254 creating and registering it if necessary. 

255 

256 Parameters 

257 ---------- 

258 original : `str` 

259 Name of the original storage class to be mocked. 

260 factory : `StorageClassFactory`, optional 

261 Storage class factory singleton instance. 

262 

263 Returns 

264 ------- 

265 mock : `MockStorageClass` 

266 New storage class that mocks ``original``. 

267 """ 

268 name = get_mock_name(original) 

269 if factory is None: 

270 factory = StorageClassFactory() 

271 if name in factory: 

272 return cast(MockStorageClass, factory.getStorageClass(name)) 

273 else: 

274 result = cls(factory.getStorageClass(original), factory) 

275 factory.registerStorageClass(result) 

276 return result 

277 

278 def allComponents(self) -> Mapping[str, MockStorageClass]: 

279 # Docstring inherited. 

280 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

281 

282 @property 

283 def components(self) -> Mapping[str, MockStorageClass]: 

284 # Docstring inherited. 

285 return cast(Mapping[str, MockStorageClass], super().components) 

286 

287 @property 

288 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

289 # Docstring inherited. 

290 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

291 

292 def can_convert(self, other: StorageClass) -> bool: 

293 # Docstring inherited. 

294 if not isinstance(other, MockStorageClass): 

295 return False 

296 return self.original.can_convert(other.original) 

297 

298 def coerce_type(self, incorrect: Any) -> Any: 

299 # Docstring inherited. 

300 if not isinstance(incorrect, MockDataset): 

301 raise TypeError( 

302 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

303 f"corresponding to other mock storage classes, not {incorrect!r}." 

304 ) 

305 factory = StorageClassFactory() 

306 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

307 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

308 if other_storage_class.name == self.name: 

309 return incorrect 

310 if not self.can_convert(other_storage_class): 

311 raise TypeError( 

312 f"Mocked storage class {self.original.name!r} cannot convert from " 

313 f"{other_storage_class.original.name!r}." 

314 ) 

315 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

316 

317 

318def _monkeypatch_daf_butler() -> None: 

319 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

320 classes to automatically recognize mock storage classes. 

321 

322 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

323 package is imported, and it affects all butler instances created before or 

324 after that imported. 

325 """ 

326 original_get_storage_class = StorageClassFactory.getStorageClass 

327 

328 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

329 try: 

330 return original_get_storage_class(self, storageClassName) 

331 except KeyError: 

332 if is_mock_name(storageClassName): 

333 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

334 raise 

335 

336 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

337 

338 del new_get_storage_class 

339 

340 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

341 

342 def new_get_formatter_class_with_match( 

343 self: FormatterFactory, entity: Any 

344 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

345 try: 

346 return original_get_formatter_class_with_match(self, entity) 

347 except KeyError: 

348 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

349 for key in lookup_keys: 

350 # This matches mock dataset type names before mock storage 

351 # classes, and it would even match some regular dataset types 

352 # that are automatic connections (logs, configs, metadata) of 

353 # mocked tasks. The latter would be a problem, except that 

354 # those should have already matched in the try block above. 

355 if is_mock_name(key.name): 

356 return (key, JsonFormatter, {}) 

357 raise 

358 

359 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

360 

361 del new_get_formatter_class_with_match 

362 

363 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

364 

365 def new_get_formatter_with_match( 

366 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

367 ) -> tuple[LookupKey, Formatter]: 

368 try: 

369 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

370 except KeyError: 

371 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

372 for key in lookup_keys: 

373 if is_mock_name(key.name): 

374 return (key, JsonFormatter(*args, **kwargs)) 

375 raise 

376 

377 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

378 

379 del new_get_formatter_with_match 

380 

381 

382_monkeypatch_daf_butler()