Coverage for python/lsst/daf/butler/registry/managers.py: 33%
157 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from .. import ddl
32__all__ = (
33 "RegistryManagerInstances",
34 "RegistryManagerTypes",
35)
37import dataclasses
38import logging
39from collections.abc import Mapping
40from typing import Any, Generic, TypeVar
42import sqlalchemy
43from lsst.utils import doImportType
45from .._column_type_info import ColumnTypeInfo
46from .._config import Config
47from ..dimensions import DimensionConfig, DimensionUniverse
48from ._caching_context import CachingContext
49from ._config import RegistryConfig
50from .interfaces import (
51 ButlerAttributeManager,
52 CollectionManager,
53 Database,
54 DatasetRecordStorageManager,
55 DatastoreRegistryBridgeManager,
56 DimensionRecordStorageManager,
57 ObsCoreTableManager,
58 OpaqueTableStorageManager,
59 StaticTablesContext,
60 VersionedExtension,
61 VersionTuple,
62)
63from .versions import ButlerVersionsManager
65_Attributes = TypeVar("_Attributes")
66_Dimensions = TypeVar("_Dimensions")
67_Collections = TypeVar("_Collections")
68_Datasets = TypeVar("_Datasets")
69_Opaque = TypeVar("_Opaque")
70_Datastores = TypeVar("_Datastores")
71_ObsCore = TypeVar("_ObsCore")
74_LOG = logging.getLogger(__name__)
76# key for dimensions configuration in attributes table
77_DIMENSIONS_ATTR = "config:dimensions.json"
79# key for obscore configuration in attributes table
80_OBSCORE_ATTR = "config:obscore.json"
83@dataclasses.dataclass(frozen=True, eq=False)
84class _GenericRegistryManagers(
85 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore]
86):
87 """Base struct used to pass around the manager instances or types that back
88 a `Registry`.
90 This class should only be used via its non-generic subclasses,
91 `RegistryManagerInstances` and `RegistryManagerTypes`.
92 """
94 attributes: _Attributes
95 """Manager for flat key-value pairs, including versions.
96 """
98 dimensions: _Dimensions
99 """Manager for dimensions.
100 """
102 collections: _Collections
103 """Manager for collections.
104 """
106 datasets: _Datasets
107 """Manager for datasets, dataset types, and collection summaries.
108 """
110 opaque: _Opaque
111 """Manager for opaque (to the Registry) tables.
112 """
114 datastores: _Datastores
115 """Manager for the interface between `Registry` and `Datastore`.
116 """
118 obscore: _ObsCore | None
119 """Manager for `ObsCore` table(s).
120 """
123@dataclasses.dataclass(frozen=True, eq=False)
124class RegistryManagerTypes(
125 _GenericRegistryManagers[
126 type[ButlerAttributeManager],
127 type[DimensionRecordStorageManager],
128 type[CollectionManager],
129 type[DatasetRecordStorageManager],
130 type[OpaqueTableStorageManager],
131 type[DatastoreRegistryBridgeManager],
132 type[ObsCoreTableManager],
133 ]
134):
135 """A struct used to pass around the types of the manager objects that back
136 a `Registry`.
137 """
139 @classmethod
140 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
141 """Construct by extracting class names from configuration and importing
142 them.
144 Parameters
145 ----------
146 config : `RegistryConfig`
147 Configuration object with a "managers" section that contains all
148 fully-qualified class names for all manager types.
150 Returns
151 -------
152 types : `RegistryManagerTypes`
153 A new struct containing type objects.
154 """
155 # We only check for manager names defined in class attributes.
156 # TODO: Maybe we need to check keys for unknown names/typos?
157 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"}
158 # Values of "config" sub-key, if any, indexed by manager name.
159 configs: dict[str, Mapping] = {}
160 schema_versions: dict[str, VersionTuple] = {}
161 manager_types: dict[str, type] = {}
162 for manager in managers:
163 manager_config = config["managers"].get(manager)
164 if isinstance(manager_config, Config):
165 # Expect "cls" and optional "config" and "schema_version"
166 # sub-keys.
167 manager_config_dict = manager_config.toDict()
168 try:
169 class_name = manager_config_dict.pop("cls")
170 except KeyError:
171 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None
172 if (mgr_config := manager_config_dict.pop("config", None)) is not None:
173 configs[manager] = mgr_config
174 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None:
175 # Note that we do not check versions that come from config
176 # for compatibility, they may be overriden later by
177 # versions from registry.
178 schema_versions[manager] = VersionTuple.fromString(mgr_version)
179 if manager_config_dict:
180 raise ValueError(
181 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}"
182 )
183 elif isinstance(manager_config, str):
184 class_name = manager_config
185 elif manager_config is None:
186 # Some managers may be optional.
187 continue
188 else:
189 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}")
190 manager_types[manager] = doImportType(class_name)
192 # obscore need special care because it's the only manager which can be
193 # None, and we cannot define default value for it.
194 if "obscore" in manager_types:
195 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions)
196 else:
197 return cls(
198 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions
199 )
201 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
202 """Create all persistent `Registry` state for a new, empty data
203 repository, and return a new struct containing manager instances.
205 Parameters
206 ----------
207 database : `Database`
208 Object that represents a connection to the SQL database that will
209 back the data repository. Must point to an empty namespace, or at
210 least one with no tables or other entities whose names might clash
211 with those used by butler.
212 dimensionConfig : `DimensionConfig`
213 Configuration that defines a `DimensionUniverse`, to be written
214 into the data repository and used to define aspects of the schema.
216 Returns
217 -------
218 instances : `RegistryManagerInstances`
219 Struct containing instances of the types contained by ``self``,
220 pointing to the new repository and backed by ``database``.
221 """
222 # If schema versions were specified in the config, check that they are
223 # compatible with their managers.
224 managers = self.as_dict()
225 for manager_type, schema_version in self.schema_versions.items():
226 manager_class = managers[manager_type]
227 manager_class.checkNewSchemaVersion(schema_version)
229 universe = DimensionUniverse(dimensionConfig)
230 with database.declareStaticTables(create=True) as context:
231 if self.datasets.getIdColumnType() is sqlalchemy.BigInteger:
232 raise RuntimeError(
233 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
234 "integer dataset IDs.",
235 )
236 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
238 # store managers and their versions in attributes table
239 versions = ButlerVersionsManager(instances.attributes)
240 versions.storeManagersConfig(instances.as_dict())
242 # dump universe config as json into attributes (faster than YAML)
243 json = dimensionConfig.dump(format="json")
244 if json is not None:
245 instances.attributes.set(_DIMENSIONS_ATTR, json)
246 else:
247 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
248 if instances.obscore is not None:
249 json = instances.obscore.config_json()
250 instances.attributes.set(_OBSCORE_ATTR, json)
251 return instances
253 def loadRepo(self, database: Database) -> RegistryManagerInstances:
254 """Construct manager instances that point to an existing data
255 repository.
257 Parameters
258 ----------
259 database : `Database`
260 Object that represents a connection to the SQL database that backs
261 the data repository. Must point to a namespace that already holds
262 all tables and other persistent entities used by butler.
264 Returns
265 -------
266 instances : `RegistryManagerInstances`
267 Struct containing instances of the types contained by ``self``,
268 pointing to the new repository and backed by ``database``.
269 """
270 # Create attributes manager only first, so we can use it to load the
271 # embedded dimensions configuration. Note that we do not check this
272 # manager version before initializing it, it is supposed to be
273 # completely backward- and forward-compatible.
274 with database.declareStaticTables(create=False) as context:
275 attributes = self.attributes.initialize(database, context)
277 # Verify that configured classes are compatible with the ones stored
278 # in registry.
279 versions = ButlerVersionsManager(attributes)
280 versions.checkManagersConfig(self.as_dict())
282 # Read schema versions from registry and validate them.
283 self.schema_versions.update(versions.managerVersions())
284 for manager_type, manager_class in self.as_dict().items():
285 schema_version = self.schema_versions.get(manager_type)
286 if schema_version is not None:
287 manager_class.checkCompatibility(schema_version, database.isWriteable())
289 # get serialized as a string from database
290 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
291 if dimensionsString is not None:
292 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
293 else:
294 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
295 universe = DimensionUniverse(dimensionConfig)
296 if self.obscore is not None:
297 # Get ObsCore configuration from attributes table, this silently
298 # overrides whatever may come from config file. Idea is that we do
299 # not want to carry around the whole thing, and butler config will
300 # have empty obscore configuration after initialization. When
301 # configuration is missing from attributes table, the obscore table
302 # does not exist, and we do not instantiate obscore manager.
303 obscoreString = attributes.get(_OBSCORE_ATTR)
304 if obscoreString is not None:
305 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json")
307 with database.declareStaticTables(create=False) as context:
308 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
310 # Load content from database that we try to keep in-memory.
311 instances.refresh()
312 return instances
314 def as_dict(self) -> Mapping[str, type[VersionedExtension]]:
315 """Return contained managers as a dictionary with manager type name as
316 a key.
318 Returns
319 -------
320 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`]
321 Maps manager type name (e.g. "datasets") to its corresponding
322 manager class. Only existing managers are returned.
323 """
324 extras = {"manager_configs", "schema_versions"}
325 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras}
326 return {key: value for key, value in managers.items() if value is not None}
328 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict)
329 """Per-manager configuration options passed to their initialize methods.
330 """
332 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict)
333 """Per-manager schema versions defined by configuration, optional."""
336@dataclasses.dataclass(frozen=True, eq=False)
337class RegistryManagerInstances(
338 _GenericRegistryManagers[
339 ButlerAttributeManager,
340 DimensionRecordStorageManager,
341 CollectionManager,
342 DatasetRecordStorageManager,
343 OpaqueTableStorageManager,
344 DatastoreRegistryBridgeManager,
345 ObsCoreTableManager,
346 ]
347):
348 """A struct used to pass around the manager instances that back a
349 `Registry`.
350 """
352 column_types: ColumnTypeInfo
353 """Information about column types that can differ between data repositories
354 and registry instances, including the dimension universe.
355 """
357 caching_context: CachingContext
358 """Object containing caches for for various information generated by
359 managers.
360 """
362 @classmethod
363 def initialize(
364 cls,
365 database: Database,
366 context: StaticTablesContext,
367 *,
368 types: RegistryManagerTypes,
369 universe: DimensionUniverse,
370 caching_context: CachingContext | None = None,
371 ) -> RegistryManagerInstances:
372 """Construct manager instances from their types and an existing
373 database connection.
375 Parameters
376 ----------
377 database : `Database`
378 Object that represents a connection to the SQL database that backs
379 the data repository.
380 context : `StaticTablesContext`
381 Object used to create tables in ``database``.
382 types : `RegistryManagerTypes`
383 Struct containing type objects for the manager instances to
384 construct.
385 universe : `DimensionUniverse`
386 Object that describes all dimensions in this data repository.
388 Returns
389 -------
390 instances : `RegistryManagerInstances`
391 Struct containing manager instances.
392 """
393 if caching_context is None:
394 caching_context = CachingContext()
395 dummy_table = ddl.TableSpec(fields=())
396 kwargs: dict[str, Any] = {}
397 schema_versions = types.schema_versions
398 kwargs["attributes"] = types.attributes.initialize(
399 database, context, registry_schema_version=schema_versions.get("attributes")
400 )
401 kwargs["dimensions"] = types.dimensions.initialize(
402 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions")
403 )
404 kwargs["collections"] = types.collections.initialize(
405 database,
406 context,
407 dimensions=kwargs["dimensions"],
408 caching_context=caching_context,
409 registry_schema_version=schema_versions.get("collections"),
410 )
411 datasets = types.datasets.initialize(
412 database,
413 context,
414 collections=kwargs["collections"],
415 dimensions=kwargs["dimensions"],
416 registry_schema_version=schema_versions.get("datasets"),
417 caching_context=caching_context,
418 )
419 kwargs["datasets"] = datasets
420 kwargs["opaque"] = types.opaque.initialize(
421 database, context, registry_schema_version=schema_versions.get("opaque")
422 )
423 kwargs["datastores"] = types.datastores.initialize(
424 database,
425 context,
426 opaque=kwargs["opaque"],
427 datasets=types.datasets,
428 universe=universe,
429 registry_schema_version=schema_versions.get("datastores"),
430 )
431 if types.obscore is not None and "obscore" in types.manager_configs:
432 kwargs["obscore"] = types.obscore.initialize(
433 database,
434 context,
435 universe=universe,
436 config=types.manager_configs["obscore"],
437 datasets=types.datasets,
438 dimensions=kwargs["dimensions"],
439 registry_schema_version=schema_versions.get("obscore"),
440 )
441 else:
442 kwargs["obscore"] = None
443 kwargs["column_types"] = ColumnTypeInfo(
444 database.getTimespanRepresentation(),
445 universe,
446 dataset_id_spec=types.datasets.addDatasetForeignKey(
447 dummy_table,
448 primaryKey=False,
449 nullable=False,
450 ),
451 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
452 ingest_date_dtype=datasets.ingest_date_dtype(),
453 )
454 kwargs["caching_context"] = caching_context
455 return cls(**kwargs)
457 def as_dict(self) -> Mapping[str, VersionedExtension]:
458 """Return contained managers as a dictionary with manager type name as
459 a key.
461 Returns
462 -------
463 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`]
464 Maps manager type name (e.g. "datasets") to its corresponding
465 manager instance. Only existing managers are returned.
466 """
467 instances = {
468 f.name: getattr(self, f.name)
469 for f in dataclasses.fields(self)
470 if f.name not in ("column_types", "caching_context")
471 }
472 return {key: value for key, value in instances.items() if value is not None}
474 def refresh(self) -> None:
475 """Refresh all in-memory state by querying the database or clearing
476 caches.
477 """
478 self.dimensions.clearCaches()
479 self.collections.refresh()
480 self.datasets.refresh()