Coverage for python/lsst/daf/butler/registry/managers.py: 34%
133 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-15 10:02 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-15 10:02 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31from collections.abc import Mapping
32from typing import Any, Dict, Generic, Optional, Type, TypeVar
34import sqlalchemy
35from lsst.utils import doImportType
37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl
38from ._config import RegistryConfig
39from .interfaces import (
40 ButlerAttributeManager,
41 CollectionManager,
42 Database,
43 DatasetRecordStorageManager,
44 DatastoreRegistryBridgeManager,
45 DimensionRecordStorageManager,
46 ObsCoreTableManager,
47 OpaqueTableStorageManager,
48 StaticTablesContext,
49)
50from .versions import ButlerVersionsManager
52_Attributes = TypeVar("_Attributes")
53_Dimensions = TypeVar("_Dimensions")
54_Collections = TypeVar("_Collections")
55_Datasets = TypeVar("_Datasets")
56_Opaque = TypeVar("_Opaque")
57_Datastores = TypeVar("_Datastores")
58_ObsCore = TypeVar("_ObsCore")
61_LOG = logging.getLogger(__name__)
63# key for dimensions configuration in attributes table
64_DIMENSIONS_ATTR = "config:dimensions.json"
66# key for obscore configuration in attributes table
67_OBSCORE_ATTR = "config:obscore.json"
70@dataclasses.dataclass(frozen=True, eq=False)
71class _GenericRegistryManagers(
72 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore]
73):
74 """Base struct used to pass around the manager instances or types that back
75 a `Registry`.
77 This class should only be used via its non-generic subclasses,
78 `RegistryManagerInstances` and `RegistryManagerTypes`.
79 """
81 attributes: _Attributes
82 """Manager for flat key-value pairs, including versions.
83 """
85 dimensions: _Dimensions
86 """Manager for dimensions.
87 """
89 collections: _Collections
90 """Manager for collections.
91 """
93 datasets: _Datasets
94 """Manager for datasets, dataset types, and collection summaries.
95 """
97 opaque: _Opaque
98 """Manager for opaque (to the Registry) tables.
99 """
101 datastores: _Datastores
102 """Manager for the interface between `Registry` and `Datastore`.
103 """
105 obscore: Optional[_ObsCore]
106 """Manager for `ObsCore` table(s).
107 """
110@dataclasses.dataclass(frozen=True, eq=False)
111class RegistryManagerTypes(
112 _GenericRegistryManagers[
113 Type[ButlerAttributeManager],
114 Type[DimensionRecordStorageManager],
115 Type[CollectionManager],
116 Type[DatasetRecordStorageManager],
117 Type[OpaqueTableStorageManager],
118 Type[DatastoreRegistryBridgeManager],
119 Type[ObsCoreTableManager],
120 ]
121):
122 """A struct used to pass around the types of the manager objects that back
123 a `Registry`.
124 """
126 @classmethod
127 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
128 """Construct by extracting class names from configuration and importing
129 them.
131 Parameters
132 ----------
133 config : `RegistryConfig`
134 Configuration object with a "managers" section that contains all
135 fully-qualified class names for all manager types.
137 Returns
138 -------
139 types : `RegistryManagerTypes`
140 A new struct containing type objects.
141 """
142 # We only check for manager names defined in class attributes.
143 # TODO: Maybe we need to check keys for unknown names/typos?
144 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs"}
145 # Values of "config" sub-key, if any, indexed by manager name.
146 configs: Dict[str, Mapping] = {}
147 manager_types: Dict[str, Type] = {}
148 for manager in managers:
149 manager_config = config["managers"].get(manager)
150 if isinstance(manager_config, Config):
151 # Expect "cls" and optional "config" sub-keys.
152 manager_config_dict = manager_config.toDict()
153 try:
154 class_name = manager_config_dict.pop("cls")
155 except KeyError:
156 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None
157 if (mgr_config := manager_config_dict.pop("config", None)) is not None:
158 configs[manager] = mgr_config
159 if manager_config_dict:
160 raise ValueError(
161 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}"
162 )
163 elif isinstance(manager_config, str):
164 class_name = manager_config
165 elif manager_config is None:
166 # Some managers may be optional.
167 continue
168 else:
169 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}")
170 manager_types[manager] = doImportType(class_name)
172 # obscore need special care because it's the only manager which can be
173 # None, and we cannot define default value for it.
174 if "obscore" in manager_types:
175 return cls(**manager_types, manager_configs=configs)
176 else:
177 return cls(**manager_types, obscore=None, manager_configs=configs)
179 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
180 """Create all persistent `Registry` state for a new, empty data
181 repository, and return a new struct containing manager instances.
183 Parameters
184 ----------
185 database : `Database`
186 Object that represents a connection to the SQL database that will
187 back the data repository. Must point to an empty namespace, or at
188 least one with no tables or other entities whose names might clash
189 with those used by butler.
190 dimensionConfig : `DimensionConfig`
191 Configuration that defines a `DimensionUniverse`, to be written
192 into the data repository and used to define aspects of the schema.
194 Returns
195 -------
196 instances : `RegistryManagerInstances`
197 Struct containing instances of the types contained by ``self``,
198 pointing to the new repository and backed by ``database``.
199 """
200 universe = DimensionUniverse(dimensionConfig)
201 with database.declareStaticTables(create=True) as context:
202 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger:
203 raise RuntimeError(
204 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
205 "integer dataset IDs.",
206 )
207 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
208 versions = instances.getVersions()
209 # store managers and their versions in attributes table
210 versions.storeManagersConfig()
211 versions.storeManagersVersions()
212 # dump universe config as json into attributes (faster than YAML)
213 json = dimensionConfig.dump(format="json")
214 if json is not None:
215 instances.attributes.set(_DIMENSIONS_ATTR, json)
216 else:
217 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
218 if instances.obscore is not None:
219 json = instances.obscore.config_json()
220 instances.attributes.set(_OBSCORE_ATTR, json)
221 return instances
223 def loadRepo(self, database: Database) -> RegistryManagerInstances:
224 """Construct manager instances that point to an existing data
225 repository.
227 Parameters
228 ----------
229 database : `Database`
230 Object that represents a connection to the SQL database that backs
231 the data repository. Must point to a namespace that already holds
232 all tables and other persistent entities used by butler.
234 Returns
235 -------
236 instances : `RegistryManagerInstances`
237 Struct containing instances of the types contained by ``self``,
238 pointing to the new repository and backed by ``database``.
239 """
240 # Create attributes manager only first, so we can use it to load the
241 # embedded dimensions configuration.
242 with database.declareStaticTables(create=False) as context:
243 attributes = self.attributes.initialize(database, context)
244 versions = ButlerVersionsManager(attributes, dict(attributes=attributes))
245 # verify that configured versions are compatible with schema
246 versions.checkManagersConfig()
247 versions.checkManagersVersions(database.isWriteable())
248 # get serialized as a string from database
249 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
250 if dimensionsString is not None:
251 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
252 else:
253 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
254 universe = DimensionUniverse(dimensionConfig)
255 if self.obscore is not None:
256 # Get ObsCore configuration from attributes table, this silently
257 # overrides whatever may come from config file. Idea is that we do
258 # not want to carry around the whole thing, and butler config will
259 # have empty obscore configuration after initialization. When
260 # configuration is missing from attributes table, the obscore table
261 # does not exist, and we do not instantiate obscore manager.
262 obscoreString = attributes.get(_OBSCORE_ATTR)
263 if obscoreString is not None:
264 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json")
265 with database.declareStaticTables(create=False) as context:
266 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
267 versions = instances.getVersions()
268 # verify that configured versions are compatible with schema
269 versions.checkManagersConfig()
270 versions.checkManagersVersions(database.isWriteable())
271 # Load content from database that we try to keep in-memory.
272 instances.refresh()
273 return instances
275 manager_configs: Dict[str, Mapping] = dataclasses.field(default_factory=dict)
276 """Per-manager configuration options passed to their initialize methods.
277 """
280@dataclasses.dataclass(frozen=True, eq=False)
281class RegistryManagerInstances(
282 _GenericRegistryManagers[
283 ButlerAttributeManager,
284 DimensionRecordStorageManager,
285 CollectionManager,
286 DatasetRecordStorageManager,
287 OpaqueTableStorageManager,
288 DatastoreRegistryBridgeManager,
289 ObsCoreTableManager,
290 ]
291):
292 """A struct used to pass around the manager instances that back a
293 `Registry`.
294 """
296 column_types: ColumnTypeInfo
297 """Information about column types that can differ between data repositories
298 and registry instances, including the dimension universe.
299 """
301 @classmethod
302 def initialize(
303 cls,
304 database: Database,
305 context: StaticTablesContext,
306 *,
307 types: RegistryManagerTypes,
308 universe: DimensionUniverse,
309 ) -> RegistryManagerInstances:
310 """Construct manager instances from their types and an existing
311 database connection.
313 Parameters
314 ----------
315 database : `Database`
316 Object that represents a connection to the SQL database that backs
317 the data repository.
318 context : `StaticTablesContext`
319 Object used to create tables in ``database``.
320 types : `RegistryManagerTypes`
321 Struct containing type objects for the manager instances to
322 construct.
323 universe : `DimensionUniverse`
324 Object that describes all dimensions in this data repository.
326 Returns
327 -------
328 instances : `RegistryManagerInstances`
329 Struct containing manager instances.
330 """
331 dummy_table = ddl.TableSpec(fields=())
332 kwargs: Dict[str, Any] = {}
333 kwargs["column_types"] = ColumnTypeInfo(
334 database.getTimespanRepresentation(),
335 universe,
336 dataset_id_spec=types.datasets.addDatasetForeignKey(
337 dummy_table,
338 primaryKey=False,
339 nullable=False,
340 ),
341 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
342 )
343 kwargs["attributes"] = types.attributes.initialize(database, context)
344 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe)
345 kwargs["collections"] = types.collections.initialize(
346 database,
347 context,
348 dimensions=kwargs["dimensions"],
349 )
350 kwargs["datasets"] = types.datasets.initialize(
351 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"]
352 )
353 kwargs["opaque"] = types.opaque.initialize(database, context)
354 kwargs["datastores"] = types.datastores.initialize(
355 database,
356 context,
357 opaque=kwargs["opaque"],
358 datasets=types.datasets,
359 universe=universe,
360 )
361 if types.obscore is not None and "obscore" in types.manager_configs:
362 kwargs["obscore"] = types.obscore.initialize(
363 database,
364 context,
365 universe=universe,
366 config=types.manager_configs["obscore"],
367 datasets=types.datasets,
368 dimensions=kwargs["dimensions"],
369 )
370 else:
371 kwargs["obscore"] = None
372 return cls(**kwargs)
374 def getVersions(self) -> ButlerVersionsManager:
375 """Return an object that can report, check, and save the versions of
376 all manager objects.
378 Returns
379 -------
380 versions : `ButlerVersionsManager`
381 Object that manages versions.
382 """
383 return ButlerVersionsManager(
384 self.attributes,
385 # Can't use dataclasses.asdict here, because it tries to do some
386 # deepcopy stuff (?!) in order to find dataclasses recursively, and
387 # that doesn't work on some manager objects that definitely aren't
388 # supposed to be deep-copied anyway.
389 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"},
390 )
392 def refresh(self) -> None:
393 """Refresh all in-memory state by querying the database."""
394 self.dimensions.clearCaches()
395 self.dimensions.refresh()
396 self.collections.refresh()
397 self.datasets.refresh()