Coverage for python/lsst/daf/butler/registry/managers.py: 34%

135 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-30 02:19 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31import warnings 

32from collections.abc import Mapping 

33from typing import Any, Dict, Generic, Optional, Type, TypeVar 

34 

35import sqlalchemy 

36from lsst.utils import doImportType 

37 

38from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl 

39from ._config import RegistryConfig 

40from .interfaces import ( 

41 ButlerAttributeManager, 

42 CollectionManager, 

43 Database, 

44 DatasetRecordStorageManager, 

45 DatastoreRegistryBridgeManager, 

46 DimensionRecordStorageManager, 

47 ObsCoreTableManager, 

48 OpaqueTableStorageManager, 

49 StaticTablesContext, 

50) 

51from .versions import ButlerVersionsManager 

52 

53_Attributes = TypeVar("_Attributes") 

54_Dimensions = TypeVar("_Dimensions") 

55_Collections = TypeVar("_Collections") 

56_Datasets = TypeVar("_Datasets") 

57_Opaque = TypeVar("_Opaque") 

58_Datastores = TypeVar("_Datastores") 

59_ObsCore = TypeVar("_ObsCore") 

60 

61 

62_LOG = logging.getLogger(__name__) 

63 

64# key for dimensions configuration in attributes table 

65_DIMENSIONS_ATTR = "config:dimensions.json" 

66 

67# key for obscore configuration in attributes table 

68_OBSCORE_ATTR = "config:obscore.json" 

69 

70 

71@dataclasses.dataclass(frozen=True, eq=False) 

72class _GenericRegistryManagers( 

73 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore] 

74): 

75 """Base struct used to pass around the manager instances or types that back 

76 a `Registry`. 

77 

78 This class should only be used via its non-generic subclasses, 

79 `RegistryManagerInstances` and `RegistryManagerTypes`. 

80 """ 

81 

82 attributes: _Attributes 

83 """Manager for flat key-value pairs, including versions. 

84 """ 

85 

86 dimensions: _Dimensions 

87 """Manager for dimensions. 

88 """ 

89 

90 collections: _Collections 

91 """Manager for collections. 

92 """ 

93 

94 datasets: _Datasets 

95 """Manager for datasets, dataset types, and collection summaries. 

96 """ 

97 

98 opaque: _Opaque 

99 """Manager for opaque (to the Registry) tables. 

100 """ 

101 

102 datastores: _Datastores 

103 """Manager for the interface between `Registry` and `Datastore`. 

104 """ 

105 

106 obscore: Optional[_ObsCore] 

107 """Manager for `ObsCore` table(s). 

108 """ 

109 

110 

111@dataclasses.dataclass(frozen=True, eq=False) 

112class RegistryManagerTypes( 

113 _GenericRegistryManagers[ 

114 Type[ButlerAttributeManager], 

115 Type[DimensionRecordStorageManager], 

116 Type[CollectionManager], 

117 Type[DatasetRecordStorageManager], 

118 Type[OpaqueTableStorageManager], 

119 Type[DatastoreRegistryBridgeManager], 

120 Type[ObsCoreTableManager], 

121 ] 

122): 

123 """A struct used to pass around the types of the manager objects that back 

124 a `Registry`. 

125 """ 

126 

127 @classmethod 

128 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

129 """Construct by extracting class names from configuration and importing 

130 them. 

131 

132 Parameters 

133 ---------- 

134 config : `RegistryConfig` 

135 Configuration object with a "managers" section that contains all 

136 fully-qualified class names for all manager types. 

137 

138 Returns 

139 ------- 

140 types : `RegistryManagerTypes` 

141 A new struct containing type objects. 

142 """ 

143 # We only check for manager names defined in class attributes. 

144 # TODO: Maybe we need to check keys for unknown names/typos? 

145 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs"} 

146 # Values of "config" sub-key, if any, indexed by manager name. 

147 configs: Dict[str, Mapping] = {} 

148 manager_types: Dict[str, Type] = {} 

149 for manager in managers: 

150 manager_config = config["managers"].get(manager) 

151 if isinstance(manager_config, Config): 

152 # Expect "cls" and optional "config" sub-keys. 

153 manager_config_dict = manager_config.toDict() 

154 try: 

155 class_name = manager_config_dict.pop("cls") 

156 except KeyError: 

157 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None 

158 if (mgr_config := manager_config_dict.pop("config", None)) is not None: 

159 configs[manager] = mgr_config 

160 if manager_config_dict: 

161 raise ValueError( 

162 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}" 

163 ) 

164 elif isinstance(manager_config, str): 

165 class_name = manager_config 

166 elif manager_config is None: 

167 # Some managers may be optional. 

168 continue 

169 else: 

170 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}") 

171 manager_types[manager] = doImportType(class_name) 

172 

173 # obscore need special care because it's the only manager which can be 

174 # None, and we cannot define default value for it. 

175 if "obscore" in manager_types: 

176 return cls(**manager_types, manager_configs=configs) 

177 else: 

178 return cls(**manager_types, obscore=None, manager_configs=configs) 

179 

180 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

181 """Create all persistent `Registry` state for a new, empty data 

182 repository, and return a new struct containing manager instances. 

183 

184 Parameters 

185 ---------- 

186 database : `Database` 

187 Object that represents a connection to the SQL database that will 

188 back the data repository. Must point to an empty namespace, or at 

189 least one with no tables or other entities whose names might clash 

190 with those used by butler. 

191 dimensionConfig : `DimensionConfig` 

192 Configuration that defines a `DimensionUniverse`, to be written 

193 into the data repository and used to define aspects of the schema. 

194 

195 Returns 

196 ------- 

197 instances : `RegistryManagerInstances` 

198 Struct containing instances of the types contained by ``self``, 

199 pointing to the new repository and backed by ``database``. 

200 """ 

201 universe = DimensionUniverse(dimensionConfig) 

202 with database.declareStaticTables(create=True) as context: 

203 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger: 

204 warnings.warn( 

205 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

206 "integer dataset IDs; support for integers will be removed after v25.", 

207 FutureWarning, 

208 ) 

209 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

210 versions = instances.getVersions() 

211 # store managers and their versions in attributes table 

212 versions.storeManagersConfig() 

213 versions.storeManagersVersions() 

214 # dump universe config as json into attributes (faster than YAML) 

215 json = dimensionConfig.dump(format="json") 

216 if json is not None: 

217 instances.attributes.set(_DIMENSIONS_ATTR, json) 

218 else: 

219 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

220 if instances.obscore is not None: 

221 json = instances.obscore.config_json() 

222 instances.attributes.set(_OBSCORE_ATTR, json) 

223 return instances 

224 

225 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

226 """Construct manager instances that point to an existing data 

227 repository. 

228 

229 Parameters 

230 ---------- 

231 database : `Database` 

232 Object that represents a connection to the SQL database that backs 

233 the data repository. Must point to a namespace that already holds 

234 all tables and other persistent entities used by butler. 

235 

236 Returns 

237 ------- 

238 instances : `RegistryManagerInstances` 

239 Struct containing instances of the types contained by ``self``, 

240 pointing to the new repository and backed by ``database``. 

241 """ 

242 # Create attributes manager only first, so we can use it to load the 

243 # embedded dimensions configuration. 

244 with database.declareStaticTables(create=False) as context: 

245 attributes = self.attributes.initialize(database, context) 

246 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

247 # verify that configured versions are compatible with schema 

248 versions.checkManagersConfig() 

249 versions.checkManagersVersions(database.isWriteable()) 

250 # get serialized as a string from database 

251 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

252 if dimensionsString is not None: 

253 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

254 else: 

255 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

256 universe = DimensionUniverse(dimensionConfig) 

257 if self.obscore is not None: 

258 # Get ObsCore configuration from attributes table, this silently 

259 # overrides whatever may come from config file. Idea is that we do 

260 # not want to carry around the whole thing, and butler config will 

261 # have empty obscore configuration after initialization. 

262 obscoreString = attributes.get(_OBSCORE_ATTR) 

263 if obscoreString is not None: 

264 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json") 

265 else: 

266 raise LookupError(f"Registry attribute {_OBSCORE_ATTR} is missing from database") 

267 with database.declareStaticTables(create=False) as context: 

268 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

269 versions = instances.getVersions() 

270 # verify that configured versions are compatible with schema 

271 versions.checkManagersConfig() 

272 versions.checkManagersVersions(database.isWriteable()) 

273 # Load content from database that we try to keep in-memory. 

274 instances.refresh() 

275 return instances 

276 

277 manager_configs: Dict[str, Mapping] = dataclasses.field(default_factory=dict) 

278 """Per-manager configuration options passed to their initialize methods. 

279 """ 

280 

281 

282@dataclasses.dataclass(frozen=True, eq=False) 

283class RegistryManagerInstances( 

284 _GenericRegistryManagers[ 

285 ButlerAttributeManager, 

286 DimensionRecordStorageManager, 

287 CollectionManager, 

288 DatasetRecordStorageManager, 

289 OpaqueTableStorageManager, 

290 DatastoreRegistryBridgeManager, 

291 ObsCoreTableManager, 

292 ] 

293): 

294 """A struct used to pass around the manager instances that back a 

295 `Registry`. 

296 """ 

297 

298 column_types: ColumnTypeInfo 

299 """Information about column types that can differ between data repositories 

300 and registry instances, including the dimension universe. 

301 """ 

302 

303 @classmethod 

304 def initialize( 

305 cls, 

306 database: Database, 

307 context: StaticTablesContext, 

308 *, 

309 types: RegistryManagerTypes, 

310 universe: DimensionUniverse, 

311 ) -> RegistryManagerInstances: 

312 """Construct manager instances from their types and an existing 

313 database connection. 

314 

315 Parameters 

316 ---------- 

317 database : `Database` 

318 Object that represents a connection to the SQL database that backs 

319 the data repository. 

320 context : `StaticTablesContext` 

321 Object used to create tables in ``database``. 

322 types : `RegistryManagerTypes` 

323 Struct containing type objects for the manager instances to 

324 construct. 

325 universe : `DimensionUniverse` 

326 Object that describes all dimensions in this data repository. 

327 

328 Returns 

329 ------- 

330 instances : `RegistryManagerInstances` 

331 Struct containing manager instances. 

332 """ 

333 dummy_table = ddl.TableSpec(fields=()) 

334 kwargs: Dict[str, Any] = {} 

335 kwargs["column_types"] = ColumnTypeInfo( 

336 database.getTimespanRepresentation(), 

337 universe, 

338 dataset_id_spec=types.datasets.addDatasetForeignKey( 

339 dummy_table, 

340 primaryKey=False, 

341 nullable=False, 

342 ), 

343 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

344 ) 

345 kwargs["attributes"] = types.attributes.initialize(database, context) 

346 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

347 kwargs["collections"] = types.collections.initialize( 

348 database, 

349 context, 

350 dimensions=kwargs["dimensions"], 

351 ) 

352 kwargs["datasets"] = types.datasets.initialize( 

353 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"] 

354 ) 

355 kwargs["opaque"] = types.opaque.initialize(database, context) 

356 kwargs["datastores"] = types.datastores.initialize( 

357 database, 

358 context, 

359 opaque=kwargs["opaque"], 

360 datasets=types.datasets, 

361 universe=universe, 

362 ) 

363 if types.obscore is not None: 

364 kwargs["obscore"] = types.obscore.initialize( 

365 database, 

366 context, 

367 universe=universe, 

368 config=types.manager_configs["obscore"], 

369 datasets=types.datasets, 

370 dimensions=kwargs["dimensions"], 

371 ) 

372 else: 

373 kwargs["obscore"] = None 

374 return cls(**kwargs) 

375 

376 def getVersions(self) -> ButlerVersionsManager: 

377 """Return an object that can report, check, and save the versions of 

378 all manager objects. 

379 

380 Returns 

381 ------- 

382 versions : `ButlerVersionsManager` 

383 Object that manages versions. 

384 """ 

385 return ButlerVersionsManager( 

386 self.attributes, 

387 # Can't use dataclasses.asdict here, because it tries to do some 

388 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

389 # that doesn't work on some manager objects that definitely aren't 

390 # supposed to be deep-copied anyway. 

391 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"}, 

392 ) 

393 

394 def refresh(self) -> None: 

395 """Refresh all in-memory state by querying the database.""" 

396 self.dimensions.clearCaches() 

397 self.dimensions.refresh() 

398 self.collections.refresh() 

399 self.datasets.refresh()