Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 43%

141 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-23 08:14 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MockDataset", 

26 "MockStorageClass", 

27 "MockDatasetQuantum", 

28 "MockStorageClassDelegate", 

29 "get_mock_name", 

30 "get_original_name", 

31 "is_mock_name", 

32) 

33 

34from collections.abc import Callable, Iterable, Mapping 

35from typing import Any, cast 

36 

37from lsst.daf.butler import ( 

38 DatasetComponent, 

39 Formatter, 

40 FormatterFactory, 

41 LookupKey, 

42 SerializedDataCoordinate, 

43 SerializedDatasetRef, 

44 SerializedDatasetType, 

45 StorageClass, 

46 StorageClassDelegate, 

47 StorageClassFactory, 

48) 

49from lsst.daf.butler._compat import _BaseModelCompat 

50from lsst.daf.butler.formatters.json import JsonFormatter 

51from lsst.utils.introspection import get_full_type_name 

52 

53_NAME_PREFIX: str = "_mock_" 

54 

55 

56def get_mock_name(original: str) -> str: 

57 """Return the name of the mock storage class, dataset type, or task label 

58 for the given original name. 

59 """ 

60 return _NAME_PREFIX + original 

61 

62 

63def get_original_name(mock: str) -> str: 

64 """Return the name of the original storage class, dataset type, or task 

65 label that corresponds to the given mock name. 

66 """ 

67 assert mock.startswith(_NAME_PREFIX) 

68 return mock.removeprefix(_NAME_PREFIX) 

69 

70 

71def is_mock_name(name: str) -> bool: 

72 """Return whether the given name is that of a mock storage class, dataset 

73 type, or task label. 

74 """ 

75 return name.startswith(_NAME_PREFIX) 

76 

77 

78# Tests for this module are in the ci_middleware package, where we have easy 

79# access to complex real storage classes (and their pytypes) to test against. 

80 

81 

82class MockDataset(_BaseModelCompat): 

83 """The in-memory dataset type used by `MockStorageClass`.""" 

84 

85 ref: SerializedDatasetRef 

86 """Reference used to read and write this dataset. 

87 

88 This is a `~lsst.daf.butler.SerializedDatasetRef` instead of a "real" one 

89 for two reasons: 

90 

91 - the mock dataset may need to be read from disk in a context in which a 

92 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

93 - we don't want the complexity of having a separate 

94 ``SerializedMockDataset``. 

95 

96 The downside of this is that we end up effectively reimplementing a few 

97 fairly trivial DatasetType/DatasetRef methods that override storage classes 

98 and extract components (in `MockStorageClass` and 

99 `MockStorageClassDelegate`). 

100 """ 

101 

102 quantum: MockDatasetQuantum | None = None 

103 """Description of the quantum that produced this dataset. 

104 """ 

105 

106 output_connection_name: str | None = None 

107 """The name of the PipelineTask output connection that produced this 

108 dataset. 

109 """ 

110 

111 converted_from: MockDataset | None = None 

112 """Another `MockDataset` that underwent a storage class conversion to 

113 produce this one. 

114 """ 

115 

116 parent: MockDataset | None = None 

117 """Another `MockDataset` from which a component was extract to form this 

118 one. 

119 """ 

120 

121 parameters: dict[str, str] | None = None 

122 """`repr` of all parameters applied when reading this dataset.""" 

123 

124 @property 

125 def dataset_type(self) -> SerializedDatasetType: 

126 return cast(SerializedDatasetType, self.ref.datasetType) 

127 

128 @property 

129 def storage_class(self) -> str: 

130 return cast(str, self.dataset_type.storageClass) 

131 

132 def make_derived(self, **kwargs: Any) -> MockDataset: 

133 """Return a new MockDataset that represents applying some storage class 

134 operation to this one. 

135 

136 Keyword arguments are fields of `MockDataset` or 

137 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

138 """ 

139 dataset_type_updates = { 

140 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields # type: ignore 

141 } 

142 derived_dataset_type = self.dataset_type.copy(update=dataset_type_updates) 

143 derived_ref = self.ref.copy(update=dict(datasetType=derived_dataset_type)) 

144 # Fields below are those that should not be propagated to the derived 

145 # dataset, because they're not about the intrinsic on-disk thing. 

146 kwargs.setdefault("converted_from", None) 

147 kwargs.setdefault("parent", None) 

148 kwargs.setdefault("parameters", None) 

149 # Also use setdefault on the ref in case caller wants to override that 

150 # directly, but this is expected to be rare enough that it's not worth 

151 # it to try to optimize out the work above to make derived_ref. 

152 kwargs.setdefault("ref", derived_ref) 

153 return self.copy(update=kwargs) 

154 

155 

156class MockDatasetQuantum(_BaseModelCompat): 

157 """Description of the quantum that produced a mock dataset.""" 

158 

159 task_label: str 

160 """Label of the producing PipelineTask in its pipeline.""" 

161 

162 data_id: SerializedDataCoordinate 

163 """Data ID for the quantum.""" 

164 

165 inputs: dict[str, list[MockDataset]] 

166 """Mock datasets provided as input to the quantum.""" 

167 

168 

169MockDataset.update_forward_refs() 

170 

171 

172class MockStorageClassDelegate(StorageClassDelegate): 

173 """Implementation of the StorageClassDelegate interface for mock datasets. 

174 

175 This class does not implement assembly and disassembly just because it's 

176 not needed right now. That could be added in the future with some 

177 additional tracking attributes in `MockDataset`. 

178 """ 

179 

180 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

181 # Docstring inherited. 

182 raise NotImplementedError("Mock storage classes do not implement assembly.") 

183 

184 def getComponent(self, composite: Any, componentName: str) -> Any: 

185 # Docstring inherited. 

186 assert isinstance( 

187 composite, MockDataset 

188 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

189 return composite.make_derived( 

190 name=f"{composite.dataset_type.name}.{componentName}", 

191 storageClass=self.storageClass.allComponents()[componentName].name, 

192 parentStorageClass=self.storageClass.name, 

193 parent=composite, 

194 ) 

195 

196 def disassemble( 

197 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

198 ) -> dict[str, DatasetComponent]: 

199 # Docstring inherited. 

200 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

201 

202 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

203 # Docstring inherited. 

204 assert isinstance( 

205 inMemoryDataset, MockDataset 

206 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

207 if not parameters: 

208 return inMemoryDataset 

209 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

210 

211 

212class MockStorageClass(StorageClass): 

213 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

214 

215 Each `MockStorageClass` instance corresponds to a real "original" storage 

216 class, with components and conversions that are mocks of the original's 

217 components and conversions. The `pytype` for all `MockStorageClass` 

218 instances is `MockDataset`. 

219 """ 

220 

221 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

222 name = get_mock_name(original.name) 

223 if factory is None: 

224 factory = StorageClassFactory() 

225 super().__init__( 

226 name=name, 

227 pytype=MockDataset, 

228 components={ 

229 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

230 }, 

231 derivedComponents={ 

232 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

233 }, 

234 parameters=frozenset(original.parameters), 

235 delegate=get_full_type_name(MockStorageClassDelegate), 

236 # Conversions work differently for mock storage classes, since they 

237 # all have the same pytype: we use the original storage class being 

238 # mocked to see if we can convert, then just make a new MockDataset 

239 # that points back to the original. 

240 converters={}, 

241 ) 

242 self.original = original 

243 # Make certain no one tries to use the converters. 

244 self._converters = None # type: ignore 

245 

246 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

247 # Docstring inherited. 

248 raise NotImplementedError("MockStorageClass does not use converters.") 

249 

250 @classmethod 

251 def get_or_register_mock( 

252 cls, original: str, factory: StorageClassFactory | None = None 

253 ) -> MockStorageClass: 

254 """Return a mock storage class for the given original storage class, 

255 creating and registering it if necessary. 

256 

257 Parameters 

258 ---------- 

259 original : `str` 

260 Name of the original storage class to be mocked. 

261 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

262 Storage class factory singleton instance. 

263 

264 Returns 

265 ------- 

266 mock : `MockStorageClass` 

267 New storage class that mocks ``original``. 

268 """ 

269 name = get_mock_name(original) 

270 if factory is None: 

271 factory = StorageClassFactory() 

272 if name in factory: 

273 return cast(MockStorageClass, factory.getStorageClass(name)) 

274 else: 

275 result = cls(factory.getStorageClass(original), factory) 

276 factory.registerStorageClass(result) 

277 return result 

278 

279 def allComponents(self) -> Mapping[str, MockStorageClass]: 

280 # Docstring inherited. 

281 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

282 

283 @property 

284 def components(self) -> Mapping[str, MockStorageClass]: 

285 # Docstring inherited. 

286 return cast(Mapping[str, MockStorageClass], super().components) 

287 

288 @property 

289 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

290 # Docstring inherited. 

291 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

292 

293 def can_convert(self, other: StorageClass) -> bool: 

294 # Docstring inherited. 

295 if not isinstance(other, MockStorageClass): 

296 return False 

297 return self.original.can_convert(other.original) 

298 

299 def coerce_type(self, incorrect: Any) -> Any: 

300 # Docstring inherited. 

301 if not isinstance(incorrect, MockDataset): 

302 raise TypeError( 

303 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

304 f"corresponding to other mock storage classes, not {incorrect!r}." 

305 ) 

306 factory = StorageClassFactory() 

307 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

308 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

309 if other_storage_class.name == self.name: 

310 return incorrect 

311 if not self.can_convert(other_storage_class): 

312 raise TypeError( 

313 f"Mocked storage class {self.original.name!r} cannot convert from " 

314 f"{other_storage_class.original.name!r}." 

315 ) 

316 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

317 

318 

319def _monkeypatch_daf_butler() -> None: 

320 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

321 classes to automatically recognize mock storage classes. 

322 

323 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

324 package is imported, and it affects all butler instances created before or 

325 after that imported. 

326 """ 

327 original_get_storage_class = StorageClassFactory.getStorageClass 

328 

329 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

330 try: 

331 return original_get_storage_class(self, storageClassName) 

332 except KeyError: 

333 if is_mock_name(storageClassName): 

334 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

335 raise 

336 

337 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

338 

339 del new_get_storage_class 

340 

341 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

342 

343 def new_get_formatter_class_with_match( 

344 self: FormatterFactory, entity: Any 

345 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

346 try: 

347 return original_get_formatter_class_with_match(self, entity) 

348 except KeyError: 

349 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

350 for key in lookup_keys: 

351 # This matches mock dataset type names before mock storage 

352 # classes, and it would even match some regular dataset types 

353 # that are automatic connections (logs, configs, metadata) of 

354 # mocked tasks. The latter would be a problem, except that 

355 # those should have already matched in the try block above. 

356 if is_mock_name(key.name): 

357 return (key, JsonFormatter, {}) 

358 raise 

359 

360 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

361 

362 del new_get_formatter_class_with_match 

363 

364 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

365 

366 def new_get_formatter_with_match( 

367 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

368 ) -> tuple[LookupKey, Formatter]: 

369 try: 

370 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

371 except KeyError: 

372 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

373 for key in lookup_keys: 

374 if is_mock_name(key.name): 

375 return (key, JsonFormatter(*args, **kwargs)) 

376 raise 

377 

378 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

379 

380 del new_get_formatter_with_match 

381 

382 

383_monkeypatch_daf_butler()