Coverage for python/lsst/daf/butler/registry/managers.py: 31%
148 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31from collections.abc import Mapping
32from typing import Any, Generic, Optional, Type, TypeVar
34import sqlalchemy
35from lsst.utils import doImportType
37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl
38from ._config import RegistryConfig
39from .interfaces import (
40 ButlerAttributeManager,
41 CollectionManager,
42 Database,
43 DatasetRecordStorageManager,
44 DatastoreRegistryBridgeManager,
45 DimensionRecordStorageManager,
46 ObsCoreTableManager,
47 OpaqueTableStorageManager,
48 StaticTablesContext,
49 VersionedExtension,
50 VersionTuple,
51)
52from .versions import ButlerVersionsManager
54_Attributes = TypeVar("_Attributes")
55_Dimensions = TypeVar("_Dimensions")
56_Collections = TypeVar("_Collections")
57_Datasets = TypeVar("_Datasets")
58_Opaque = TypeVar("_Opaque")
59_Datastores = TypeVar("_Datastores")
60_ObsCore = TypeVar("_ObsCore")
63_LOG = logging.getLogger(__name__)
65# key for dimensions configuration in attributes table
66_DIMENSIONS_ATTR = "config:dimensions.json"
68# key for obscore configuration in attributes table
69_OBSCORE_ATTR = "config:obscore.json"
72@dataclasses.dataclass(frozen=True, eq=False)
73class _GenericRegistryManagers(
74 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore]
75):
76 """Base struct used to pass around the manager instances or types that back
77 a `Registry`.
79 This class should only be used via its non-generic subclasses,
80 `RegistryManagerInstances` and `RegistryManagerTypes`.
81 """
83 attributes: _Attributes
84 """Manager for flat key-value pairs, including versions.
85 """
87 dimensions: _Dimensions
88 """Manager for dimensions.
89 """
91 collections: _Collections
92 """Manager for collections.
93 """
95 datasets: _Datasets
96 """Manager for datasets, dataset types, and collection summaries.
97 """
99 opaque: _Opaque
100 """Manager for opaque (to the Registry) tables.
101 """
103 datastores: _Datastores
104 """Manager for the interface between `Registry` and `Datastore`.
105 """
107 obscore: Optional[_ObsCore]
108 """Manager for `ObsCore` table(s).
109 """
112@dataclasses.dataclass(frozen=True, eq=False)
113class RegistryManagerTypes(
114 _GenericRegistryManagers[
115 Type[ButlerAttributeManager],
116 Type[DimensionRecordStorageManager],
117 Type[CollectionManager],
118 Type[DatasetRecordStorageManager],
119 Type[OpaqueTableStorageManager],
120 Type[DatastoreRegistryBridgeManager],
121 Type[ObsCoreTableManager],
122 ]
123):
124 """A struct used to pass around the types of the manager objects that back
125 a `Registry`.
126 """
128 @classmethod
129 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
130 """Construct by extracting class names from configuration and importing
131 them.
133 Parameters
134 ----------
135 config : `RegistryConfig`
136 Configuration object with a "managers" section that contains all
137 fully-qualified class names for all manager types.
139 Returns
140 -------
141 types : `RegistryManagerTypes`
142 A new struct containing type objects.
143 """
144 # We only check for manager names defined in class attributes.
145 # TODO: Maybe we need to check keys for unknown names/typos?
146 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"}
147 # Values of "config" sub-key, if any, indexed by manager name.
148 configs: dict[str, Mapping] = {}
149 schema_versions: dict[str, VersionTuple] = {}
150 manager_types: dict[str, Type] = {}
151 for manager in managers:
152 manager_config = config["managers"].get(manager)
153 if isinstance(manager_config, Config):
154 # Expect "cls" and optional "config" and "schema_version"
155 # sub-keys.
156 manager_config_dict = manager_config.toDict()
157 try:
158 class_name = manager_config_dict.pop("cls")
159 except KeyError:
160 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None
161 if (mgr_config := manager_config_dict.pop("config", None)) is not None:
162 configs[manager] = mgr_config
163 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None:
164 # Note that we do not check versions that come from config
165 # for compatibility, they may be overriden later by
166 # versions from registry.
167 schema_versions[manager] = VersionTuple.fromString(mgr_version)
168 if manager_config_dict:
169 raise ValueError(
170 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}"
171 )
172 elif isinstance(manager_config, str):
173 class_name = manager_config
174 elif manager_config is None:
175 # Some managers may be optional.
176 continue
177 else:
178 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}")
179 manager_types[manager] = doImportType(class_name)
181 # obscore need special care because it's the only manager which can be
182 # None, and we cannot define default value for it.
183 if "obscore" in manager_types:
184 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions)
185 else:
186 return cls(
187 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions
188 )
190 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
191 """Create all persistent `Registry` state for a new, empty data
192 repository, and return a new struct containing manager instances.
194 Parameters
195 ----------
196 database : `Database`
197 Object that represents a connection to the SQL database that will
198 back the data repository. Must point to an empty namespace, or at
199 least one with no tables or other entities whose names might clash
200 with those used by butler.
201 dimensionConfig : `DimensionConfig`
202 Configuration that defines a `DimensionUniverse`, to be written
203 into the data repository and used to define aspects of the schema.
205 Returns
206 -------
207 instances : `RegistryManagerInstances`
208 Struct containing instances of the types contained by ``self``,
209 pointing to the new repository and backed by ``database``.
210 """
211 # If schema versions were specified in the config, check that they are
212 # compatible with their managers.
213 managers = self.as_dict()
214 for manager_type, schema_version in self.schema_versions.items():
215 manager_class = managers[manager_type]
216 manager_class.checkNewSchemaVersion(schema_version)
218 universe = DimensionUniverse(dimensionConfig)
219 with database.declareStaticTables(create=True) as context:
220 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger:
221 raise RuntimeError(
222 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
223 "integer dataset IDs.",
224 )
225 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
227 # store managers and their versions in attributes table
228 versions = ButlerVersionsManager(instances.attributes)
229 versions.storeManagersConfig(instances.as_dict())
231 # dump universe config as json into attributes (faster than YAML)
232 json = dimensionConfig.dump(format="json")
233 if json is not None:
234 instances.attributes.set(_DIMENSIONS_ATTR, json)
235 else:
236 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
237 if instances.obscore is not None:
238 json = instances.obscore.config_json()
239 instances.attributes.set(_OBSCORE_ATTR, json)
240 return instances
242 def loadRepo(self, database: Database) -> RegistryManagerInstances:
243 """Construct manager instances that point to an existing data
244 repository.
246 Parameters
247 ----------
248 database : `Database`
249 Object that represents a connection to the SQL database that backs
250 the data repository. Must point to a namespace that already holds
251 all tables and other persistent entities used by butler.
253 Returns
254 -------
255 instances : `RegistryManagerInstances`
256 Struct containing instances of the types contained by ``self``,
257 pointing to the new repository and backed by ``database``.
258 """
259 # Create attributes manager only first, so we can use it to load the
260 # embedded dimensions configuration. Note that we do not check this
261 # manager version before initializing it, it is supposed to be
262 # completely backward- and forward-compatible.
263 with database.declareStaticTables(create=False) as context:
264 attributes = self.attributes.initialize(database, context)
266 # Verify that configured classes are compatible with the ones stored
267 # in registry.
268 versions = ButlerVersionsManager(attributes)
269 versions.checkManagersConfig(self.as_dict())
271 # Read schema versions from registry and validate them.
272 self.schema_versions.update(versions.managerVersions())
273 for manager_type, manager_class in self.as_dict().items():
274 schema_version = self.schema_versions.get(manager_type)
275 if schema_version is not None:
276 manager_class.checkCompatibility(schema_version, database.isWriteable())
278 # get serialized as a string from database
279 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
280 if dimensionsString is not None:
281 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
282 else:
283 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
284 universe = DimensionUniverse(dimensionConfig)
285 if self.obscore is not None:
286 # Get ObsCore configuration from attributes table, this silently
287 # overrides whatever may come from config file. Idea is that we do
288 # not want to carry around the whole thing, and butler config will
289 # have empty obscore configuration after initialization. When
290 # configuration is missing from attributes table, the obscore table
291 # does not exist, and we do not instantiate obscore manager.
292 obscoreString = attributes.get(_OBSCORE_ATTR)
293 if obscoreString is not None:
294 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json")
296 with database.declareStaticTables(create=False) as context:
297 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
299 # Load content from database that we try to keep in-memory.
300 instances.refresh()
301 return instances
303 def as_dict(self) -> Mapping[str, type[VersionedExtension]]:
304 """Return contained managers as a dictionary with manager type name as
305 a key.
307 Returns
308 -------
309 extensions : `Mapping` [`str`, `VersionedExtension`]
310 Maps manager type name (e.g. "datasets") to its corresponding
311 manager class. Only existing managers are returned.
312 """
313 extras = {"manager_configs", "schema_versions"}
314 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras}
315 return {key: value for key, value in managers.items() if value is not None}
317 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict)
318 """Per-manager configuration options passed to their initialize methods.
319 """
321 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict)
322 """Per-manager schema versions defined by configuration, optional."""
325@dataclasses.dataclass(frozen=True, eq=False)
326class RegistryManagerInstances(
327 _GenericRegistryManagers[
328 ButlerAttributeManager,
329 DimensionRecordStorageManager,
330 CollectionManager,
331 DatasetRecordStorageManager,
332 OpaqueTableStorageManager,
333 DatastoreRegistryBridgeManager,
334 ObsCoreTableManager,
335 ]
336):
337 """A struct used to pass around the manager instances that back a
338 `Registry`.
339 """
341 column_types: ColumnTypeInfo
342 """Information about column types that can differ between data repositories
343 and registry instances, including the dimension universe.
344 """
346 @classmethod
347 def initialize(
348 cls,
349 database: Database,
350 context: StaticTablesContext,
351 *,
352 types: RegistryManagerTypes,
353 universe: DimensionUniverse,
354 ) -> RegistryManagerInstances:
355 """Construct manager instances from their types and an existing
356 database connection.
358 Parameters
359 ----------
360 database : `Database`
361 Object that represents a connection to the SQL database that backs
362 the data repository.
363 context : `StaticTablesContext`
364 Object used to create tables in ``database``.
365 types : `RegistryManagerTypes`
366 Struct containing type objects for the manager instances to
367 construct.
368 universe : `DimensionUniverse`
369 Object that describes all dimensions in this data repository.
371 Returns
372 -------
373 instances : `RegistryManagerInstances`
374 Struct containing manager instances.
375 """
376 dummy_table = ddl.TableSpec(fields=())
377 kwargs: dict[str, Any] = {}
378 schema_versions = types.schema_versions
379 kwargs["attributes"] = types.attributes.initialize(
380 database, context, registry_schema_version=schema_versions.get("attributes")
381 )
382 kwargs["dimensions"] = types.dimensions.initialize(
383 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions")
384 )
385 kwargs["collections"] = types.collections.initialize(
386 database,
387 context,
388 dimensions=kwargs["dimensions"],
389 registry_schema_version=schema_versions.get("collections"),
390 )
391 datasets = types.datasets.initialize(
392 database,
393 context,
394 collections=kwargs["collections"],
395 dimensions=kwargs["dimensions"],
396 registry_schema_version=schema_versions.get("datasets"),
397 )
398 kwargs["datasets"] = datasets
399 kwargs["opaque"] = types.opaque.initialize(
400 database, context, registry_schema_version=schema_versions.get("opaque")
401 )
402 kwargs["datastores"] = types.datastores.initialize(
403 database,
404 context,
405 opaque=kwargs["opaque"],
406 datasets=types.datasets,
407 universe=universe,
408 registry_schema_version=schema_versions.get("datastores"),
409 )
410 if types.obscore is not None and "obscore" in types.manager_configs:
411 kwargs["obscore"] = types.obscore.initialize(
412 database,
413 context,
414 universe=universe,
415 config=types.manager_configs["obscore"],
416 datasets=types.datasets,
417 dimensions=kwargs["dimensions"],
418 registry_schema_version=schema_versions.get("obscore"),
419 )
420 else:
421 kwargs["obscore"] = None
422 kwargs["column_types"] = ColumnTypeInfo(
423 database.getTimespanRepresentation(),
424 universe,
425 dataset_id_spec=types.datasets.addDatasetForeignKey(
426 dummy_table,
427 primaryKey=False,
428 nullable=False,
429 ),
430 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
431 ingest_date_dtype=datasets.ingest_date_dtype(),
432 )
433 return cls(**kwargs)
435 def as_dict(self) -> Mapping[str, VersionedExtension]:
436 """Return contained managers as a dictionary with manager type name as
437 a key.
439 Returns
440 -------
441 extensions : `Mapping` [`str`, `VersionedExtension`]
442 Maps manager type name (e.g. "datasets") to its corresponding
443 manager instance. Only existing managers are returned.
444 """
445 instances = {
446 f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"
447 }
448 return {key: value for key, value in instances.items() if value is not None}
450 def refresh(self) -> None:
451 """Refresh all in-memory state by querying the database or clearing
452 caches."""
453 self.dimensions.clearCaches()
454 self.collections.refresh()
455 self.datasets.refresh()