Coverage for python/lsst/daf/butler/registry/managers.py: 33%
173 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from .. import ddl
32__all__ = (
33 "RegistryManagerInstances",
34 "RegistryManagerTypes",
35)
37import dataclasses
38import logging
39from collections.abc import Iterator, Mapping
40from contextlib import contextmanager
41from typing import Any, Generic, TypeVar
43import sqlalchemy
44from lsst.utils import doImportType
46from .._column_type_info import ColumnTypeInfo
47from .._config import Config
48from ..dimensions import DimensionConfig, DimensionUniverse
49from ._caching_context import CachingContext
50from ._config import RegistryConfig
51from .interfaces import (
52 ButlerAttributeManager,
53 CollectionManager,
54 Database,
55 DatasetRecordStorageManager,
56 DatastoreRegistryBridgeManager,
57 DimensionRecordStorageManager,
58 ObsCoreTableManager,
59 OpaqueTableStorageManager,
60 StaticTablesContext,
61 VersionedExtension,
62 VersionTuple,
63)
64from .versions import ButlerVersionsManager
66_Attributes = TypeVar("_Attributes")
67_Dimensions = TypeVar("_Dimensions")
68_Collections = TypeVar("_Collections")
69_Datasets = TypeVar("_Datasets")
70_Opaque = TypeVar("_Opaque")
71_Datastores = TypeVar("_Datastores")
72_ObsCore = TypeVar("_ObsCore")
75_LOG = logging.getLogger(__name__)
77# key for dimensions configuration in attributes table
78_DIMENSIONS_ATTR = "config:dimensions.json"
80# key for obscore configuration in attributes table
81_OBSCORE_ATTR = "config:obscore.json"
84@dataclasses.dataclass(frozen=True, eq=False)
85class _GenericRegistryManagers(
86 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore]
87):
88 """Base struct used to pass around the manager instances or types that back
89 a `Registry`.
91 This class should only be used via its non-generic subclasses,
92 `RegistryManagerInstances` and `RegistryManagerTypes`.
93 """
95 attributes: _Attributes
96 """Manager for flat key-value pairs, including versions.
97 """
99 dimensions: _Dimensions
100 """Manager for dimensions.
101 """
103 collections: _Collections
104 """Manager for collections.
105 """
107 datasets: _Datasets
108 """Manager for datasets, dataset types, and collection summaries.
109 """
111 opaque: _Opaque
112 """Manager for opaque (to the Registry) tables.
113 """
115 datastores: _Datastores
116 """Manager for the interface between `Registry` and `Datastore`.
117 """
119 obscore: _ObsCore | None
120 """Manager for `ObsCore` table(s).
121 """
124@dataclasses.dataclass(frozen=True, eq=False)
125class RegistryManagerTypes(
126 _GenericRegistryManagers[
127 type[ButlerAttributeManager],
128 type[DimensionRecordStorageManager],
129 type[CollectionManager],
130 type[DatasetRecordStorageManager],
131 type[OpaqueTableStorageManager],
132 type[DatastoreRegistryBridgeManager],
133 type[ObsCoreTableManager],
134 ]
135):
136 """A struct used to pass around the types of the manager objects that back
137 a `Registry`.
138 """
140 @classmethod
141 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
142 """Construct by extracting class names from configuration and importing
143 them.
145 Parameters
146 ----------
147 config : `RegistryConfig`
148 Configuration object with a "managers" section that contains all
149 fully-qualified class names for all manager types.
151 Returns
152 -------
153 types : `RegistryManagerTypes`
154 A new struct containing type objects.
155 """
156 # We only check for manager names defined in class attributes.
157 # TODO: Maybe we need to check keys for unknown names/typos?
158 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs", "schema_versions"}
159 # Values of "config" sub-key, if any, indexed by manager name.
160 configs: dict[str, Mapping] = {}
161 schema_versions: dict[str, VersionTuple] = {}
162 manager_types: dict[str, type] = {}
163 for manager in managers:
164 manager_config = config["managers"].get(manager)
165 if isinstance(manager_config, Config):
166 # Expect "cls" and optional "config" and "schema_version"
167 # sub-keys.
168 manager_config_dict = manager_config.toDict()
169 try:
170 class_name = manager_config_dict.pop("cls")
171 except KeyError:
172 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None
173 if (mgr_config := manager_config_dict.pop("config", None)) is not None:
174 configs[manager] = mgr_config
175 if (mgr_version := manager_config_dict.pop("schema_version", None)) is not None:
176 # Note that we do not check versions that come from config
177 # for compatibility, they may be overriden later by
178 # versions from registry.
179 schema_versions[manager] = VersionTuple.fromString(mgr_version)
180 if manager_config_dict:
181 raise ValueError(
182 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}"
183 )
184 elif isinstance(manager_config, str):
185 class_name = manager_config
186 elif manager_config is None:
187 # Some managers may be optional.
188 continue
189 else:
190 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}")
191 manager_types[manager] = doImportType(class_name)
193 # obscore need special care because it's the only manager which can be
194 # None, and we cannot define default value for it.
195 if "obscore" in manager_types:
196 return cls(**manager_types, manager_configs=configs, schema_versions=schema_versions)
197 else:
198 return cls(
199 **manager_types, obscore=None, manager_configs=configs, schema_versions=schema_versions
200 )
202 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
203 """Create all persistent `Registry` state for a new, empty data
204 repository, and return a new struct containing manager instances.
206 Parameters
207 ----------
208 database : `Database`
209 Object that represents a connection to the SQL database that will
210 back the data repository. Must point to an empty namespace, or at
211 least one with no tables or other entities whose names might clash
212 with those used by butler.
213 dimensionConfig : `DimensionConfig`
214 Configuration that defines a `DimensionUniverse`, to be written
215 into the data repository and used to define aspects of the schema.
217 Returns
218 -------
219 instances : `RegistryManagerInstances`
220 Struct containing instances of the types contained by ``self``,
221 pointing to the new repository and backed by ``database``.
222 """
223 # If schema versions were specified in the config, check that they are
224 # compatible with their managers.
225 managers = self.as_dict()
226 for manager_type, schema_version in self.schema_versions.items():
227 manager_class = managers[manager_type]
228 manager_class.checkNewSchemaVersion(schema_version)
230 universe = DimensionUniverse(dimensionConfig)
231 with database.declareStaticTables(create=True) as context:
232 if self.datasets.getIdColumnType() is sqlalchemy.BigInteger:
233 raise RuntimeError(
234 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
235 "integer dataset IDs.",
236 )
237 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
239 # store managers and their versions in attributes table
240 versions = ButlerVersionsManager(instances.attributes)
241 versions.storeManagersConfig(instances.as_dict())
243 # dump universe config as json into attributes (faster than YAML)
244 json = dimensionConfig.dump(format="json")
245 if json is not None:
246 instances.attributes.set(_DIMENSIONS_ATTR, json)
247 else:
248 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
249 if instances.obscore is not None:
250 json = instances.obscore.config_json()
251 instances.attributes.set(_OBSCORE_ATTR, json)
252 return instances
254 def loadRepo(self, database: Database) -> RegistryManagerInstances:
255 """Construct manager instances that point to an existing data
256 repository.
258 Parameters
259 ----------
260 database : `Database`
261 Object that represents a connection to the SQL database that backs
262 the data repository. Must point to a namespace that already holds
263 all tables and other persistent entities used by butler.
265 Returns
266 -------
267 instances : `RegistryManagerInstances`
268 Struct containing instances of the types contained by ``self``,
269 pointing to the new repository and backed by ``database``.
270 """
271 # Create attributes manager only first, so we can use it to load the
272 # embedded dimensions configuration. Note that we do not check this
273 # manager version before initializing it, it is supposed to be
274 # completely backward- and forward-compatible.
275 with database.declareStaticTables(create=False) as context:
276 attributes = self.attributes.initialize(database, context)
278 # Verify that configured classes are compatible with the ones stored
279 # in registry.
280 versions = ButlerVersionsManager(attributes)
281 versions.checkManagersConfig(self.as_dict())
283 # Read schema versions from registry and validate them.
284 self.schema_versions.update(versions.managerVersions())
285 for manager_type, manager_class in self.as_dict().items():
286 schema_version = self.schema_versions.get(manager_type)
287 if schema_version is not None:
288 manager_class.checkCompatibility(schema_version, database.isWriteable())
290 # get serialized as a string from database
291 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
292 if dimensionsString is not None:
293 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
294 else:
295 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
296 universe = DimensionUniverse(dimensionConfig)
297 if self.obscore is not None:
298 # Get ObsCore configuration from attributes table, this silently
299 # overrides whatever may come from config file. Idea is that we do
300 # not want to carry around the whole thing, and butler config will
301 # have empty obscore configuration after initialization. When
302 # configuration is missing from attributes table, the obscore table
303 # does not exist, and we do not instantiate obscore manager.
304 obscoreString = attributes.get(_OBSCORE_ATTR)
305 if obscoreString is not None:
306 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json")
308 with database.declareStaticTables(create=False) as context:
309 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
311 # Load content from database that we try to keep in-memory.
312 instances.refresh()
313 return instances
315 def as_dict(self) -> Mapping[str, type[VersionedExtension]]:
316 """Return contained managers as a dictionary with manager type name as
317 a key.
319 Returns
320 -------
321 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`]
322 Maps manager type name (e.g. "datasets") to its corresponding
323 manager class. Only existing managers are returned.
324 """
325 extras = {"manager_configs", "schema_versions"}
326 managers = {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name not in extras}
327 return {key: value for key, value in managers.items() if value is not None}
329 manager_configs: dict[str, Mapping] = dataclasses.field(default_factory=dict)
330 """Per-manager configuration options passed to their initialize methods.
331 """
333 schema_versions: dict[str, VersionTuple] = dataclasses.field(default_factory=dict)
334 """Per-manager schema versions defined by configuration, optional."""
337@dataclasses.dataclass(frozen=True, eq=False)
338class RegistryManagerInstances(
339 _GenericRegistryManagers[
340 ButlerAttributeManager,
341 DimensionRecordStorageManager,
342 CollectionManager,
343 DatasetRecordStorageManager,
344 OpaqueTableStorageManager,
345 DatastoreRegistryBridgeManager,
346 ObsCoreTableManager,
347 ]
348):
349 """A struct used to pass around the manager instances that back a
350 `Registry`.
351 """
353 column_types: ColumnTypeInfo
354 """Information about column types that can differ between data repositories
355 and registry instances, including the dimension universe.
356 """
358 caching_context: CachingContext
359 """Object containing caches for for various information generated by
360 managers.
361 """
363 @contextmanager
364 def caching_context_manager(self) -> Iterator[None]:
365 """Context manager that enables caching.
367 Calls to this method may be nested and the returned context managers
368 may even be closed out of order, with only the context manager entered
369 and the last context manager exited having any effect.
370 """
371 self.caching_context._enable()
372 try:
373 yield
374 finally:
375 self.caching_context._disable()
377 @classmethod
378 def initialize(
379 cls,
380 database: Database,
381 context: StaticTablesContext,
382 *,
383 types: RegistryManagerTypes,
384 universe: DimensionUniverse,
385 caching_context: CachingContext | None = None,
386 ) -> RegistryManagerInstances:
387 """Construct manager instances from their types and an existing
388 database connection.
390 Parameters
391 ----------
392 database : `Database`
393 Object that represents a connection to the SQL database that backs
394 the data repository.
395 context : `StaticTablesContext`
396 Object used to create tables in ``database``.
397 types : `RegistryManagerTypes`
398 Struct containing type objects for the manager instances to
399 construct.
400 universe : `DimensionUniverse`
401 Object that describes all dimensions in this data repository.
402 caching_context : `CachingContext` or `None`, optional
403 Caching context to use.
405 Returns
406 -------
407 instances : `RegistryManagerInstances`
408 Struct containing manager instances.
409 """
410 if caching_context is None:
411 caching_context = CachingContext()
412 dummy_table = ddl.TableSpec(fields=())
413 kwargs: dict[str, Any] = {}
414 schema_versions = types.schema_versions
415 kwargs["attributes"] = types.attributes.initialize(
416 database, context, registry_schema_version=schema_versions.get("attributes")
417 )
418 kwargs["dimensions"] = types.dimensions.initialize(
419 database, context, universe=universe, registry_schema_version=schema_versions.get("dimensions")
420 )
421 kwargs["collections"] = types.collections.initialize(
422 database,
423 context,
424 caching_context=caching_context,
425 registry_schema_version=schema_versions.get("collections"),
426 )
427 datasets = types.datasets.initialize(
428 database,
429 context,
430 collections=kwargs["collections"],
431 dimensions=kwargs["dimensions"],
432 registry_schema_version=schema_versions.get("datasets"),
433 caching_context=caching_context,
434 )
435 kwargs["datasets"] = datasets
436 kwargs["opaque"] = types.opaque.initialize(
437 database, context, registry_schema_version=schema_versions.get("opaque")
438 )
439 kwargs["datastores"] = types.datastores.initialize(
440 database,
441 context,
442 opaque=kwargs["opaque"],
443 datasets=types.datasets,
444 universe=universe,
445 registry_schema_version=schema_versions.get("datastores"),
446 )
447 if types.obscore is not None and "obscore" in types.manager_configs:
448 kwargs["obscore"] = types.obscore.initialize(
449 database,
450 context,
451 universe=universe,
452 config=types.manager_configs["obscore"],
453 datasets=types.datasets,
454 dimensions=kwargs["dimensions"],
455 registry_schema_version=schema_versions.get("obscore"),
456 )
457 else:
458 kwargs["obscore"] = None
459 kwargs["column_types"] = ColumnTypeInfo(
460 database.getTimespanRepresentation(),
461 universe,
462 dataset_id_spec=types.datasets.addDatasetForeignKey(
463 dummy_table,
464 primaryKey=False,
465 nullable=False,
466 ),
467 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
468 ingest_date_dtype=datasets.ingest_date_dtype(),
469 )
470 kwargs["caching_context"] = caching_context
471 return cls(**kwargs)
473 def clone(
474 self,
475 db: Database,
476 ) -> RegistryManagerInstances:
477 """Make an independent copy of the manager instances with a new
478 `Database` instance.
480 Parameters
481 ----------
482 db : `Database`
483 New `Database` object to use when instantiating managers.
485 Returns
486 -------
487 instances : `RegistryManagerInstances`
488 New manager instances with the same configuration as this instance,
489 but bound to a new Database object.
490 """
491 caching_context = CachingContext()
492 dimensions = self.dimensions.clone(db)
493 collections = self.collections.clone(db, caching_context)
494 opaque = self.opaque.clone(db)
495 datasets = self.datasets.clone(
496 db=db, collections=collections, dimensions=dimensions, caching_context=caching_context
497 )
498 obscore = None
499 if self.obscore is not None:
500 obscore = self.obscore.clone(db=db, dimensions=dimensions)
501 return RegistryManagerInstances(
502 attributes=self.attributes.clone(db),
503 dimensions=dimensions,
504 collections=collections,
505 datasets=datasets,
506 opaque=opaque,
507 datastores=self.datastores.clone(db=db, opaque=opaque),
508 obscore=obscore,
509 column_types=self.column_types,
510 caching_context=caching_context,
511 )
513 def as_dict(self) -> Mapping[str, VersionedExtension]:
514 """Return contained managers as a dictionary with manager type name as
515 a key.
517 Returns
518 -------
519 extensions : `~collections.abc.Mapping` [`str`, `VersionedExtension`]
520 Maps manager type name (e.g. "datasets") to its corresponding
521 manager instance. Only existing managers are returned.
522 """
523 instances = {
524 f.name: getattr(self, f.name)
525 for f in dataclasses.fields(self)
526 if f.name not in ("column_types", "caching_context")
527 }
528 return {key: value for key, value in instances.items() if value is not None}
530 def refresh(self) -> None:
531 """Refresh all in-memory state by querying the database or clearing
532 caches.
533 """
534 self.collections.refresh()
535 self.datasets.refresh()