Coverage for python/lsst/daf/butler/registry/managers.py: 34%
135 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-07 09:47 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-07 09:47 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31import warnings
32from collections.abc import Mapping
33from typing import Any, Dict, Generic, Optional, Type, TypeVar
35import sqlalchemy
36from lsst.utils import doImportType
38from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl
39from ._config import RegistryConfig
40from .interfaces import (
41 ButlerAttributeManager,
42 CollectionManager,
43 Database,
44 DatasetRecordStorageManager,
45 DatastoreRegistryBridgeManager,
46 DimensionRecordStorageManager,
47 ObsCoreTableManager,
48 OpaqueTableStorageManager,
49 StaticTablesContext,
50)
51from .versions import ButlerVersionsManager
53_Attributes = TypeVar("_Attributes")
54_Dimensions = TypeVar("_Dimensions")
55_Collections = TypeVar("_Collections")
56_Datasets = TypeVar("_Datasets")
57_Opaque = TypeVar("_Opaque")
58_Datastores = TypeVar("_Datastores")
59_ObsCore = TypeVar("_ObsCore")
62_LOG = logging.getLogger(__name__)
64# key for dimensions configuration in attributes table
65_DIMENSIONS_ATTR = "config:dimensions.json"
67# key for obscore configuration in attributes table
68_OBSCORE_ATTR = "config:obscore.json"
71@dataclasses.dataclass(frozen=True, eq=False)
72class _GenericRegistryManagers(
73 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores, _ObsCore]
74):
75 """Base struct used to pass around the manager instances or types that back
76 a `Registry`.
78 This class should only be used via its non-generic subclasses,
79 `RegistryManagerInstances` and `RegistryManagerTypes`.
80 """
82 attributes: _Attributes
83 """Manager for flat key-value pairs, including versions.
84 """
86 dimensions: _Dimensions
87 """Manager for dimensions.
88 """
90 collections: _Collections
91 """Manager for collections.
92 """
94 datasets: _Datasets
95 """Manager for datasets, dataset types, and collection summaries.
96 """
98 opaque: _Opaque
99 """Manager for opaque (to the Registry) tables.
100 """
102 datastores: _Datastores
103 """Manager for the interface between `Registry` and `Datastore`.
104 """
106 obscore: Optional[_ObsCore]
107 """Manager for `ObsCore` table(s).
108 """
111@dataclasses.dataclass(frozen=True, eq=False)
112class RegistryManagerTypes(
113 _GenericRegistryManagers[
114 Type[ButlerAttributeManager],
115 Type[DimensionRecordStorageManager],
116 Type[CollectionManager],
117 Type[DatasetRecordStorageManager],
118 Type[OpaqueTableStorageManager],
119 Type[DatastoreRegistryBridgeManager],
120 Type[ObsCoreTableManager],
121 ]
122):
123 """A struct used to pass around the types of the manager objects that back
124 a `Registry`.
125 """
127 @classmethod
128 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
129 """Construct by extracting class names from configuration and importing
130 them.
132 Parameters
133 ----------
134 config : `RegistryConfig`
135 Configuration object with a "managers" section that contains all
136 fully-qualified class names for all manager types.
138 Returns
139 -------
140 types : `RegistryManagerTypes`
141 A new struct containing type objects.
142 """
143 # We only check for manager names defined in class attributes.
144 # TODO: Maybe we need to check keys for unknown names/typos?
145 managers = {field.name for field in dataclasses.fields(cls)} - {"manager_configs"}
146 # Values of "config" sub-key, if any, indexed by manager name.
147 configs: Dict[str, Mapping] = {}
148 manager_types: Dict[str, Type] = {}
149 for manager in managers:
150 manager_config = config["managers"].get(manager)
151 if isinstance(manager_config, Config):
152 # Expect "cls" and optional "config" sub-keys.
153 manager_config_dict = manager_config.toDict()
154 try:
155 class_name = manager_config_dict.pop("cls")
156 except KeyError:
157 raise KeyError(f"'cls' key is not defined in {manager!r} manager configuration") from None
158 if (mgr_config := manager_config_dict.pop("config", None)) is not None:
159 configs[manager] = mgr_config
160 if manager_config_dict:
161 raise ValueError(
162 f"{manager!r} manager configuration has unexpected keys: {set(manager_config_dict)}"
163 )
164 elif isinstance(manager_config, str):
165 class_name = manager_config
166 elif manager_config is None:
167 # Some managers may be optional.
168 continue
169 else:
170 raise KeyError(f"Unexpected type of {manager!r} manager configuration: {manager_config!r}")
171 manager_types[manager] = doImportType(class_name)
173 # obscore need special care because it's the only manager which can be
174 # None, and we cannot define default value for it.
175 if "obscore" in manager_types:
176 return cls(**manager_types, manager_configs=configs)
177 else:
178 return cls(**manager_types, obscore=None, manager_configs=configs)
180 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
181 """Create all persistent `Registry` state for a new, empty data
182 repository, and return a new struct containing manager instances.
184 Parameters
185 ----------
186 database : `Database`
187 Object that represents a connection to the SQL database that will
188 back the data repository. Must point to an empty namespace, or at
189 least one with no tables or other entities whose names might clash
190 with those used by butler.
191 dimensionConfig : `DimensionConfig`
192 Configuration that defines a `DimensionUniverse`, to be written
193 into the data repository and used to define aspects of the schema.
195 Returns
196 -------
197 instances : `RegistryManagerInstances`
198 Struct containing instances of the types contained by ``self``,
199 pointing to the new repository and backed by ``database``.
200 """
201 universe = DimensionUniverse(dimensionConfig)
202 with database.declareStaticTables(create=True) as context:
203 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger:
204 warnings.warn(
205 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
206 "integer dataset IDs; support for integers will be removed after v25.",
207 FutureWarning,
208 )
209 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
210 versions = instances.getVersions()
211 # store managers and their versions in attributes table
212 versions.storeManagersConfig()
213 versions.storeManagersVersions()
214 # dump universe config as json into attributes (faster than YAML)
215 json = dimensionConfig.dump(format="json")
216 if json is not None:
217 instances.attributes.set(_DIMENSIONS_ATTR, json)
218 else:
219 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
220 if instances.obscore is not None:
221 json = instances.obscore.config_json()
222 instances.attributes.set(_OBSCORE_ATTR, json)
223 return instances
225 def loadRepo(self, database: Database) -> RegistryManagerInstances:
226 """Construct manager instances that point to an existing data
227 repository.
229 Parameters
230 ----------
231 database : `Database`
232 Object that represents a connection to the SQL database that backs
233 the data repository. Must point to a namespace that already holds
234 all tables and other persistent entities used by butler.
236 Returns
237 -------
238 instances : `RegistryManagerInstances`
239 Struct containing instances of the types contained by ``self``,
240 pointing to the new repository and backed by ``database``.
241 """
242 # Create attributes manager only first, so we can use it to load the
243 # embedded dimensions configuration.
244 with database.declareStaticTables(create=False) as context:
245 attributes = self.attributes.initialize(database, context)
246 versions = ButlerVersionsManager(attributes, dict(attributes=attributes))
247 # verify that configured versions are compatible with schema
248 versions.checkManagersConfig()
249 versions.checkManagersVersions(database.isWriteable())
250 # get serialized as a string from database
251 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
252 if dimensionsString is not None:
253 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
254 else:
255 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
256 universe = DimensionUniverse(dimensionConfig)
257 if self.obscore is not None:
258 # Get ObsCore configuration from attributes table, this silently
259 # overrides whatever may come from config file. Idea is that we do
260 # not want to carry around the whole thing, and butler config will
261 # have empty obscore configuration after initialization.
262 obscoreString = attributes.get(_OBSCORE_ATTR)
263 if obscoreString is not None:
264 self.manager_configs["obscore"] = Config.fromString(obscoreString, format="json")
265 else:
266 raise LookupError(f"Registry attribute {_OBSCORE_ATTR} is missing from database")
267 with database.declareStaticTables(create=False) as context:
268 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
269 versions = instances.getVersions()
270 # verify that configured versions are compatible with schema
271 versions.checkManagersConfig()
272 versions.checkManagersVersions(database.isWriteable())
273 # Load content from database that we try to keep in-memory.
274 instances.refresh()
275 return instances
277 manager_configs: Dict[str, Mapping] = dataclasses.field(default_factory=dict)
278 """Per-manager configuration options passed to their initialize methods.
279 """
282@dataclasses.dataclass(frozen=True, eq=False)
283class RegistryManagerInstances(
284 _GenericRegistryManagers[
285 ButlerAttributeManager,
286 DimensionRecordStorageManager,
287 CollectionManager,
288 DatasetRecordStorageManager,
289 OpaqueTableStorageManager,
290 DatastoreRegistryBridgeManager,
291 ObsCoreTableManager,
292 ]
293):
294 """A struct used to pass around the manager instances that back a
295 `Registry`.
296 """
298 column_types: ColumnTypeInfo
299 """Information about column types that can differ between data repositories
300 and registry instances, including the dimension universe.
301 """
303 @classmethod
304 def initialize(
305 cls,
306 database: Database,
307 context: StaticTablesContext,
308 *,
309 types: RegistryManagerTypes,
310 universe: DimensionUniverse,
311 ) -> RegistryManagerInstances:
312 """Construct manager instances from their types and an existing
313 database connection.
315 Parameters
316 ----------
317 database : `Database`
318 Object that represents a connection to the SQL database that backs
319 the data repository.
320 context : `StaticTablesContext`
321 Object used to create tables in ``database``.
322 types : `RegistryManagerTypes`
323 Struct containing type objects for the manager instances to
324 construct.
325 universe : `DimensionUniverse`
326 Object that describes all dimensions in this data repository.
328 Returns
329 -------
330 instances : `RegistryManagerInstances`
331 Struct containing manager instances.
332 """
333 dummy_table = ddl.TableSpec(fields=())
334 kwargs: Dict[str, Any] = {}
335 kwargs["column_types"] = ColumnTypeInfo(
336 database.getTimespanRepresentation(),
337 universe,
338 dataset_id_spec=types.datasets.addDatasetForeignKey(
339 dummy_table,
340 primaryKey=False,
341 nullable=False,
342 ),
343 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
344 )
345 kwargs["attributes"] = types.attributes.initialize(database, context)
346 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe)
347 kwargs["collections"] = types.collections.initialize(
348 database,
349 context,
350 dimensions=kwargs["dimensions"],
351 )
352 kwargs["datasets"] = types.datasets.initialize(
353 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"]
354 )
355 kwargs["opaque"] = types.opaque.initialize(database, context)
356 kwargs["datastores"] = types.datastores.initialize(
357 database,
358 context,
359 opaque=kwargs["opaque"],
360 datasets=types.datasets,
361 universe=universe,
362 )
363 if types.obscore is not None:
364 kwargs["obscore"] = types.obscore.initialize(
365 database,
366 context,
367 universe=universe,
368 config=types.manager_configs["obscore"],
369 datasets=types.datasets,
370 dimensions=kwargs["dimensions"],
371 )
372 else:
373 kwargs["obscore"] = None
374 return cls(**kwargs)
376 def getVersions(self) -> ButlerVersionsManager:
377 """Return an object that can report, check, and save the versions of
378 all manager objects.
380 Returns
381 -------
382 versions : `ButlerVersionsManager`
383 Object that manages versions.
384 """
385 return ButlerVersionsManager(
386 self.attributes,
387 # Can't use dataclasses.asdict here, because it tries to do some
388 # deepcopy stuff (?!) in order to find dataclasses recursively, and
389 # that doesn't work on some manager objects that definitely aren't
390 # supposed to be deep-copied anyway.
391 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"},
392 )
394 def refresh(self) -> None:
395 """Refresh all in-memory state by querying the database."""
396 self.dimensions.clearCaches()
397 self.dimensions.refresh()
398 self.collections.refresh()
399 self.datasets.refresh()