Coverage for python/lsst/daf/butler/registry/managers.py: 34%

133 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-15 10:02 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31from collections.abc import Mapping 

32from typing import Any, Dict, Generic, Optional, Type, TypeVar 

33 

34import sqlalchemy 

35from lsst.utils import doImportType 

36 

37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl 

38from ._config import RegistryConfig 

39from .interfaces import ( 

40 ButlerAttributeManager, 

41 CollectionManager, 

42 Database, 

43 DatasetRecordStorageManager, 

44 DatastoreRegistryBridgeManager, 

45 DimensionRecordStorageManager, 

46 ObsCoreTableManager, 

47 OpaqueTableStorageManager, 

48 StaticTablesContext, 

49) 

50from .versions import ButlerVersionsManager 

51 

52_Attributes = TypeVar("_Attributes") 

53_Dimensions = TypeVar("_Dimensions") 

54_Collections = TypeVar("_Collections") 

55_Datasets = TypeVar("_Datasets") 

56_Opaque = TypeVar("_Opaque") 

57_Datastores = TypeVar("_Datastores") 

58_ObsCore = TypeVar("_ObsCore") 

59 

60 

61_LOG = logging.getLogger(__name__) 

62 

63# key for dimensions configuration in attributes table 

64_DIMENSIONS_ATTR = "config:dimensions.json" 

65 

66# key for obscore configuration in attributes table 

67_OBSCORE_ATTR = "config:obscore.json" 

68 

69 

70@dataclasses.dataclass(frozen=True, eq=False) 

71class _GenericRegistryManagers( 

72 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore] 

73): 

74 """Base struct used to pass around the manager instances or types that back 

75 a `Registry`. 

76 

77 This class should only be used via its non-generic subclasses, 

78 `RegistryManagerInstances` and `RegistryManagerTypes`. 

79 """ 

80 

81 attributes: _Attributes 

82 """Manager for flat key-value pairs, including versions. 

83 """ 

84 

85 dimensions: _Dimensions 

86 """Manager for dimensions. 

87 """ 

88 

89 collections: _Collections 

90 """Manager for collections. 

91 """ 

92 

93 datasets: _Datasets 

94 """Manager for datasets, dataset types, and collection summaries. 

95 """ 

96 

97 opaque: _Opaque 

98 """Manager for opaque (to the Registry) tables. 

99 """ 

100 

101 datastores: _Datastores 

102 """Manager for the interface between `Registry` and `Datastore`. 

103 """ 

104 

105 obscore: Optional[_ObsCore] 

106 """Manager for `ObsCore` table(s). 

107 """ 

108 

109 

110@dataclasses.dataclass(frozen=True, eq=False) 

111class RegistryManagerTypes( 

112 _GenericRegistryManagers[ 

113 Type[ButlerAttributeManager], 

114 Type[DimensionRecordStorageManager], 

115 Type[CollectionManager], 

116 Type[DatasetRecordStorageManager], 

117 Type[OpaqueTableStorageManager], 

118 Type[DatastoreRegistryBridgeManager], 

119 Type[ObsCoreTableManager], 

120 ] 

121): 

122 """A struct used to pass around the types of the manager objects that back 

123 a `Registry`. 

124 """ 

125 

126 @classmethod 

127 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

128 """Construct by extracting class names from configuration and importing 

129 them. 

130 

131 Parameters 

132 ---------- 

133 config : `RegistryConfig` 

134 Configuration object with a "managers" section that contains all 

135 fully-qualified class names for all manager types. 

136 

137 Returns 

138 ------- 

139 types : `RegistryManagerTypes` 

140 A new struct containing type objects. 

141 """ 

142 # We only check for manager names defined in class attributes. 

143 # TODO: Maybe we need to check keys for unknown names/typos? 

144 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs"} 

145 # Values of "config" sub-key, if any, indexed by manager name. 

146 configs: Dict[str, Mapping] = {} 

147 manager_types: Dict[str, Type] = {} 

148 for manager in managers: 

149 manager_config = config["managers"].get(manager) 

150 if isinstance(manager_config, Config): 

151 # Expect "cls" and optional "config" sub-keys. 

152 manager_config_dict = manager_config.toDict() 

153 try: 

154 class_name = manager_config_dict.pop("cls") 

155 except KeyError: 

156 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None 

157 if (mgr_config := manager_config_dict.pop("config", None)) is not None: 

158 configs[manager] = mgr_config 

159 if manager_config_dict: 

160 raise ValueError( 

161 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}" 

162 ) 

163 elif isinstance(manager_config, str): 

164 class_name = manager_config 

165 elif manager_config is None: 

166 # Some managers may be optional. 

167 continue 

168 else: 

169 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}") 

170 manager_types[manager] = doImportType(class_name) 

171 

172 # obscore need special care because it's the only manager which can be 

173 # None, and we cannot define default value for it. 

174 if "obscore" in manager_types: 

175 return cls(**manager_types, manager_configs=configs) 

176 else: 

177 return cls(**manager_types, obscore=None, manager_configs=configs) 

178 

179 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

180 """Create all persistent `Registry` state for a new, empty data 

181 repository, and return a new struct containing manager instances. 

182 

183 Parameters 

184 ---------- 

185 database : `Database` 

186 Object that represents a connection to the SQL database that will 

187 back the data repository. Must point to an empty namespace, or at 

188 least one with no tables or other entities whose names might clash 

189 with those used by butler. 

190 dimensionConfig : `DimensionConfig` 

191 Configuration that defines a `DimensionUniverse`, to be written 

192 into the data repository and used to define aspects of the schema. 

193 

194 Returns 

195 ------- 

196 instances : `RegistryManagerInstances` 

197 Struct containing instances of the types contained by ``self``, 

198 pointing to the new repository and backed by ``database``. 

199 """ 

200 universe = DimensionUniverse(dimensionConfig) 

201 with database.declareStaticTables(create=True) as context: 

202 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger: 

203 raise RuntimeError( 

204 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

205 "integer dataset IDs.", 

206 ) 

207 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

208 versions = instances.getVersions() 

209 # store managers and their versions in attributes table 

210 versions.storeManagersConfig() 

211 versions.storeManagersVersions() 

212 # dump universe config as json into attributes (faster than YAML) 

213 json = dimensionConfig.dump(format="json") 

214 if json is not None: 

215 instances.attributes.set(_DIMENSIONS_ATTR, json) 

216 else: 

217 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

218 if instances.obscore is not None: 

219 json = instances.obscore.config_json() 

220 instances.attributes.set(_OBSCORE_ATTR, json) 

221 return instances 

222 

223 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

224 """Construct manager instances that point to an existing data 

225 repository. 

226 

227 Parameters 

228 ---------- 

229 database : `Database` 

230 Object that represents a connection to the SQL database that backs 

231 the data repository. Must point to a namespace that already holds 

232 all tables and other persistent entities used by butler. 

233 

234 Returns 

235 ------- 

236 instances : `RegistryManagerInstances` 

237 Struct containing instances of the types contained by ``self``, 

238 pointing to the new repository and backed by ``database``. 

239 """ 

240 # Create attributes manager only first, so we can use it to load the 

241 # embedded dimensions configuration. 

242 with database.declareStaticTables(create=False) as context: 

243 attributes = self.attributes.initialize(database, context) 

244 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

245 # verify that configured versions are compatible with schema 

246 versions.checkManagersConfig() 

247 versions.checkManagersVersions(database.isWriteable()) 

248 # get serialized as a string from database 

249 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

250 if dimensionsString is not None: 

251 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

252 else: 

253 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

254 universe = DimensionUniverse(dimensionConfig) 

255 if self.obscore is not None: 

256 # Get ObsCore configuration from attributes table, this silently 

257 # overrides whatever may come from config file. Idea is that we do 

258 # not want to carry around the whole thing, and butler config will 

259 # have empty obscore configuration after initialization. When 

260 # configuration is missing from attributes table, the obscore table 

261 # does not exist, and we do not instantiate obscore manager. 

262 obscoreString = attributes.get(_OBSCORE_ATTR) 

263 if obscoreString is not None: 

264 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json") 

265 with database.declareStaticTables(create=False) as context: 

266 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

267 versions = instances.getVersions() 

268 # verify that configured versions are compatible with schema 

269 versions.checkManagersConfig() 

270 versions.checkManagersVersions(database.isWriteable()) 

271 # Load content from database that we try to keep in-memory. 

272 instances.refresh() 

273 return instances 

274 

275 manager_configs: Dict[str, Mapping] = dataclasses.field(default_factory=dict) 

276 """Per-manager configuration options passed to their initialize methods. 

277 """ 

278 

279 

280@dataclasses.dataclass(frozen=True, eq=False) 

281class RegistryManagerInstances( 

282 _GenericRegistryManagers[ 

283 ButlerAttributeManager, 

284 DimensionRecordStorageManager, 

285 CollectionManager, 

286 DatasetRecordStorageManager, 

287 OpaqueTableStorageManager, 

288 DatastoreRegistryBridgeManager, 

289 ObsCoreTableManager, 

290 ] 

291): 

292 """A struct used to pass around the manager instances that back a 

293 `Registry`. 

294 """ 

295 

296 column_types: ColumnTypeInfo 

297 """Information about column types that can differ between data repositories 

298 and registry instances, including the dimension universe. 

299 """ 

300 

301 @classmethod 

302 def initialize( 

303 cls, 

304 database: Database, 

305 context: StaticTablesContext, 

306 *, 

307 types: RegistryManagerTypes, 

308 universe: DimensionUniverse, 

309 ) -> RegistryManagerInstances: 

310 """Construct manager instances from their types and an existing 

311 database connection. 

312 

313 Parameters 

314 ---------- 

315 database : `Database` 

316 Object that represents a connection to the SQL database that backs 

317 the data repository. 

318 context : `StaticTablesContext` 

319 Object used to create tables in ``database``. 

320 types : `RegistryManagerTypes` 

321 Struct containing type objects for the manager instances to 

322 construct. 

323 universe : `DimensionUniverse` 

324 Object that describes all dimensions in this data repository. 

325 

326 Returns 

327 ------- 

328 instances : `RegistryManagerInstances` 

329 Struct containing manager instances. 

330 """ 

331 dummy_table = ddl.TableSpec(fields=()) 

332 kwargs: Dict[str, Any] = {} 

333 kwargs["column_types"] = ColumnTypeInfo( 

334 database.getTimespanRepresentation(), 

335 universe, 

336 dataset_id_spec=types.datasets.addDatasetForeignKey( 

337 dummy_table, 

338 primaryKey=False, 

339 nullable=False, 

340 ), 

341 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

342 ) 

343 kwargs["attributes"] = types.attributes.initialize(database, context) 

344 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

345 kwargs["collections"] = types.collections.initialize( 

346 database, 

347 context, 

348 dimensions=kwargs["dimensions"], 

349 ) 

350 kwargs["datasets"] = types.datasets.initialize( 

351 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"] 

352 ) 

353 kwargs["opaque"] = types.opaque.initialize(database, context) 

354 kwargs["datastores"] = types.datastores.initialize( 

355 database, 

356 context, 

357 opaque=kwargs["opaque"], 

358 datasets=types.datasets, 

359 universe=universe, 

360 ) 

361 if types.obscore is not None and "obscore" in types.manager_configs: 

362 kwargs["obscore"] = types.obscore.initialize( 

363 database, 

364 context, 

365 universe=universe, 

366 config=types.manager_configs["obscore"], 

367 datasets=types.datasets, 

368 dimensions=kwargs["dimensions"], 

369 ) 

370 else: 

371 kwargs["obscore"] = None 

372 return cls(**kwargs) 

373 

374 def getVersions(self) -> ButlerVersionsManager: 

375 """Return an object that can report, check, and save the versions of 

376 all manager objects. 

377 

378 Returns 

379 ------- 

380 versions : `ButlerVersionsManager` 

381 Object that manages versions. 

382 """ 

383 return ButlerVersionsManager( 

384 self.attributes, 

385 # Can't use dataclasses.asdict here, because it tries to do some 

386 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

387 # that doesn't work on some manager objects that definitely aren't 

388 # supposed to be deep-copied anyway. 

389 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"}, 

390 ) 

391 

392 def refresh(self) -> None: 

393 """Refresh all in-memory state by querying the database.""" 

394 self.dimensions.clearCaches() 

395 self.dimensions.refresh() 

396 self.collections.refresh() 

397 self.datasets.refresh()