Coverage for python/lsst/daf/butler/registry/managers.py: 46%
93 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 02:00 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 02:00 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31import warnings
32from typing import Any, Dict, Generic, Type, TypeVar
34import sqlalchemy
35from lsst.utils import doImportType
37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl
38from ._config import RegistryConfig
39from .interfaces import (
40 ButlerAttributeManager,
41 CollectionManager,
42 Database,
43 DatasetRecordStorageManager,
44 DatastoreRegistryBridgeManager,
45 DimensionRecordStorageManager,
46 OpaqueTableStorageManager,
47 StaticTablesContext,
48)
49from .versions import ButlerVersionsManager
51_Attributes = TypeVar("_Attributes")
52_Dimensions = TypeVar("_Dimensions")
53_Collections = TypeVar("_Collections")
54_Datasets = TypeVar("_Datasets")
55_Opaque = TypeVar("_Opaque")
56_Datastores = TypeVar("_Datastores")
59_LOG = logging.getLogger(__name__)
61# key for dimensions configuration in attributes table
62_DIMENSIONS_ATTR = "config:dimensions.json"
65@dataclasses.dataclass(frozen=True, eq=False)
66class _GenericRegistryManagers(
67 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores]
68):
69 """Base struct used to pass around the manager instances or types that back
70 a `Registry`.
72 This class should only be used via its non-generic subclasses,
73 `RegistryManagerInstances` and `RegistryManagerTypes`.
74 """
76 attributes: _Attributes
77 """Manager for flat key-value pairs, including versions.
78 """
80 dimensions: _Dimensions
81 """Manager for dimensions.
82 """
84 collections: _Collections
85 """Manager for collections.
86 """
88 datasets: _Datasets
89 """Manager for datasets, dataset types, and collection summaries.
90 """
92 opaque: _Opaque
93 """Manager for opaque (to the Registry) tables.
94 """
96 datastores: _Datastores
97 """Manager for the interface between `Registry` and `Datastore`.
98 """
101class RegistryManagerTypes(
102 _GenericRegistryManagers[
103 Type[ButlerAttributeManager],
104 Type[DimensionRecordStorageManager],
105 Type[CollectionManager],
106 Type[DatasetRecordStorageManager],
107 Type[OpaqueTableStorageManager],
108 Type[DatastoreRegistryBridgeManager],
109 ]
110):
111 """A struct used to pass around the types of the manager objects that back
112 a `Registry`.
113 """
115 @classmethod
116 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
117 """Construct by extracting class names from configuration and importing
118 them.
120 Parameters
121 ----------
122 config : `RegistryConfig`
123 Configuration object with a "managers" section that contains all
124 fully-qualified class names for all manager types.
126 Returns
127 -------
128 types : `RegistryManagerTypes`
129 A new struct containing type objects.
130 """
131 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)})
133 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
134 """Create all persistent `Registry` state for a new, empty data
135 repository, and return a new struct containing manager instances.
137 Parameters
138 ----------
139 database : `Database`
140 Object that represents a connection to the SQL database that will
141 back the data repository. Must point to an empty namespace, or at
142 least one with no tables or other entities whose names might clash
143 with those used by butler.
144 dimensionConfig : `DimensionConfig`
145 Configuration that defines a `DimensionUniverse`, to be written
146 into the data repository and used to define aspects of the schema.
148 Returns
149 -------
150 instances : `RegistryManagerInstances`
151 Struct containing instances of the types contained by ``self``,
152 pointing to the new repository and backed by ``database``.
153 """
154 universe = DimensionUniverse(dimensionConfig)
155 with database.declareStaticTables(create=True) as context:
156 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger:
157 warnings.warn(
158 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
159 "integer dataset IDs; support for integers will be removed after v25.",
160 FutureWarning,
161 )
162 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
163 versions = instances.getVersions()
164 # store managers and their versions in attributes table
165 versions.storeManagersConfig()
166 versions.storeManagersVersions()
167 # dump universe config as json into attributes (faster than YAML)
168 json = dimensionConfig.dump(format="json")
169 if json is not None:
170 instances.attributes.set(_DIMENSIONS_ATTR, json)
171 else:
172 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
173 return instances
175 def loadRepo(self, database: Database) -> RegistryManagerInstances:
176 """Construct manager instances that point to an existing data
177 repository.
179 Parameters
180 ----------
181 database : `Database`
182 Object that represents a connection to the SQL database that backs
183 the data repository. Must point to a namespace that already holds
184 all tables and other persistent entities used by butler.
186 Returns
187 -------
188 instances : `RegistryManagerInstances`
189 Struct containing instances of the types contained by ``self``,
190 pointing to the new repository and backed by ``database``.
191 """
192 # Create attributes manager only first, so we can use it to load the
193 # embedded dimensions configuration.
194 with database.declareStaticTables(create=False) as context:
195 attributes = self.attributes.initialize(database, context)
196 versions = ButlerVersionsManager(attributes, dict(attributes=attributes))
197 # verify that configured versions are compatible with schema
198 versions.checkManagersConfig()
199 versions.checkManagersVersions(database.isWriteable())
200 # get serialized as a string from database
201 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
202 if dimensionsString is not None:
203 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
204 else:
205 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
206 universe = DimensionUniverse(dimensionConfig)
207 with database.declareStaticTables(create=False) as context:
208 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
209 versions = instances.getVersions()
210 # verify that configured versions are compatible with schema
211 versions.checkManagersConfig()
212 versions.checkManagersVersions(database.isWriteable())
213 # Load content from database that we try to keep in-memory.
214 instances.refresh()
215 return instances
218@dataclasses.dataclass(frozen=True, eq=False)
219class RegistryManagerInstances(
220 _GenericRegistryManagers[
221 ButlerAttributeManager,
222 DimensionRecordStorageManager,
223 CollectionManager,
224 DatasetRecordStorageManager,
225 OpaqueTableStorageManager,
226 DatastoreRegistryBridgeManager,
227 ]
228):
229 """A struct used to pass around the manager instances that back a
230 `Registry`.
231 """
233 column_types: ColumnTypeInfo
234 """Information about column types that can differ between data repositories
235 and registry instances, including the dimension universe.
236 """
238 @classmethod
239 def initialize(
240 cls,
241 database: Database,
242 context: StaticTablesContext,
243 *,
244 types: RegistryManagerTypes,
245 universe: DimensionUniverse,
246 ) -> RegistryManagerInstances:
247 """Construct manager instances from their types and an existing
248 database connection.
250 Parameters
251 ----------
252 database : `Database`
253 Object that represents a connection to the SQL database that backs
254 the data repository.
255 context : `StaticTablesContext`
256 Object used to create tables in ``database``.
257 types : `RegistryManagerTypes`
258 Struct containing type objects for the manager instances to
259 construct.
260 universe : `DimensionUniverse`
261 Object that describes all dimensions in this data repository.
263 Returns
264 -------
265 instances : `RegistryManagerInstances`
266 Struct containing manager instances.
267 """
268 dummy_table = ddl.TableSpec(fields=())
269 kwargs: Dict[str, Any] = {}
270 kwargs["column_types"] = ColumnTypeInfo(
271 database.getTimespanRepresentation(),
272 universe,
273 dataset_id_spec=types.datasets.addDatasetForeignKey(
274 dummy_table,
275 primaryKey=False,
276 nullable=False,
277 ),
278 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False),
279 )
280 kwargs["attributes"] = types.attributes.initialize(database, context)
281 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe)
282 kwargs["collections"] = types.collections.initialize(
283 database,
284 context,
285 dimensions=kwargs["dimensions"],
286 )
287 kwargs["datasets"] = types.datasets.initialize(
288 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"]
289 )
290 kwargs["opaque"] = types.opaque.initialize(database, context)
291 kwargs["datastores"] = types.datastores.initialize(
292 database,
293 context,
294 opaque=kwargs["opaque"],
295 datasets=types.datasets,
296 universe=universe,
297 )
298 return cls(**kwargs)
300 def getVersions(self) -> ButlerVersionsManager:
301 """Return an object that can report, check, and save the versions of
302 all manager objects.
304 Returns
305 -------
306 versions : `ButlerVersionsManager`
307 Object that manages versions.
308 """
309 return ButlerVersionsManager(
310 self.attributes,
311 # Can't use dataclasses.asdict here, because it tries to do some
312 # deepcopy stuff (?!) in order to find dataclasses recursively, and
313 # that doesn't work on some manager objects that definitely aren't
314 # supposed to be deep-copied anyway.
315 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"},
316 )
318 def refresh(self) -> None:
319 """Refresh all in-memory state by querying the database."""
320 self.dimensions.clearCaches()
321 self.dimensions.refresh()
322 self.collections.refresh()
323 self.datasets.refresh()