Coverage for python/lsst/daf/butler/registry/managers.py: 45%
88 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31import warnings
32from typing import Any, Dict, Generic, Type, TypeVar
34import sqlalchemy
35from lsst.utils import doImportType
37from ..core import Config, DimensionConfig, DimensionUniverse
38from ._config import RegistryConfig
39from .interfaces import (
40 ButlerAttributeManager,
41 CollectionManager,
42 Database,
43 DatasetRecordStorageManager,
44 DatastoreRegistryBridgeManager,
45 DimensionRecordStorageManager,
46 OpaqueTableStorageManager,
47 StaticTablesContext,
48)
49from .versions import ButlerVersionsManager
51_Attributes = TypeVar("_Attributes")
52_Dimensions = TypeVar("_Dimensions")
53_Collections = TypeVar("_Collections")
54_Datasets = TypeVar("_Datasets")
55_Opaque = TypeVar("_Opaque")
56_Datastores = TypeVar("_Datastores")
59_LOG = logging.getLogger(__name__)
61# key for dimensions configuration in attributes table
62_DIMENSIONS_ATTR = "config:dimensions.json"
65@dataclasses.dataclass(frozen=True, eq=False)
66class _GenericRegistryManagers(
67 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores]
68):
69 """Base struct used to pass around the manager instances or types that back
70 a `Registry`.
72 This class should only be used via its non-generic subclasses,
73 `RegistryManagerInstances` and `RegistryManagerTypes`.
74 """
76 attributes: _Attributes
77 """Manager for flat key-value pairs, including versions.
78 """
80 dimensions: _Dimensions
81 """Manager for dimensions.
82 """
84 collections: _Collections
85 """Manager for collections.
86 """
88 datasets: _Datasets
89 """Manager for datasets, dataset types, and collection summaries.
90 """
92 opaque: _Opaque
93 """Manager for opaque (to the Registry) tables.
94 """
96 datastores: _Datastores
97 """Manager for the interface between `Registry` and `Datastore`.
98 """
101class RegistryManagerTypes(
102 _GenericRegistryManagers[
103 Type[ButlerAttributeManager],
104 Type[DimensionRecordStorageManager],
105 Type[CollectionManager],
106 Type[DatasetRecordStorageManager],
107 Type[OpaqueTableStorageManager],
108 Type[DatastoreRegistryBridgeManager],
109 ]
110):
111 """A struct used to pass around the types of the manager objects that back
112 a `Registry`.
113 """
115 @classmethod
116 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
117 """Construct by extracting class names from configuration and importing
118 them.
120 Parameters
121 ----------
122 config : `RegistryConfig`
123 Configuration object with a "managers" section that contains all
124 fully-qualified class names for all manager types.
126 Returns
127 -------
128 types : `RegistryManagerTypes`
129 A new struct containing type objects.
130 """
131 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)})
133 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
134 """Create all persistent `Registry` state for a new, empty data
135 repository, and return a new struct containing manager instances.
137 Parameters
138 ----------
139 database : `Database`
140 Object that represents a connection to the SQL database that will
141 back the data repository. Must point to an empty namespace, or at
142 least one with no tables or other entities whose names might clash
143 with those used by butler.
144 dimensionConfig : `DimensionConfig`
145 Configuration that defines a `DimensionUniverse`, to be written
146 into the data repository and used to define aspects of the schema.
148 Returns
149 -------
150 instances : `RegistryManagerInstances`
151 Struct containing instances of the types contained by ``self``,
152 pointing to the new repository and backed by ``database``.
153 """
154 universe = DimensionUniverse(dimensionConfig)
155 with database.declareStaticTables(create=True) as context:
156 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger:
157 warnings.warn(
158 "New data repositories should be created with UUID dataset IDs instead of autoincrement "
159 "integer dataset IDs; support for integers will be removed after v25.",
160 FutureWarning,
161 )
162 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
163 versions = instances.getVersions()
164 # store managers and their versions in attributes table
165 versions.storeManagersConfig()
166 versions.storeManagersVersions()
167 # dump universe config as json into attributes (faster than YAML)
168 json = dimensionConfig.dump(format="json")
169 if json is not None:
170 instances.attributes.set(_DIMENSIONS_ATTR, json)
171 else:
172 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
173 return instances
175 def loadRepo(self, database: Database) -> RegistryManagerInstances:
176 """Construct manager instances that point to an existing data
177 repository.
179 Parameters
180 ----------
181 database : `Database`
182 Object that represents a connection to the SQL database that backs
183 the data repository. Must point to a namespace that already holds
184 all tables and other persistent entities used by butler.
186 Returns
187 -------
188 instances : `RegistryManagerInstances`
189 Struct containing instances of the types contained by ``self``,
190 pointing to the new repository and backed by ``database``.
191 """
192 # Create attributes manager only first, so we can use it to load the
193 # embedded dimensions configuration.
194 with database.declareStaticTables(create=False) as context:
195 attributes = self.attributes.initialize(database, context)
196 versions = ButlerVersionsManager(attributes, dict(attributes=attributes))
197 # verify that configured versions are compatible with schema
198 versions.checkManagersConfig()
199 versions.checkManagersVersions(database.isWriteable())
200 # get serialized as a string from database
201 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
202 if dimensionsString is not None:
203 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
204 else:
205 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
206 universe = DimensionUniverse(dimensionConfig)
207 with database.declareStaticTables(create=False) as context:
208 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
209 versions = instances.getVersions()
210 # verify that configured versions are compatible with schema
211 versions.checkManagersConfig()
212 versions.checkManagersVersions(database.isWriteable())
213 # Load content from database that we try to keep in-memory.
214 instances.refresh()
215 return instances
218class RegistryManagerInstances(
219 _GenericRegistryManagers[
220 ButlerAttributeManager,
221 DimensionRecordStorageManager,
222 CollectionManager,
223 DatasetRecordStorageManager,
224 OpaqueTableStorageManager,
225 DatastoreRegistryBridgeManager,
226 ]
227):
228 """A struct used to pass around the manager instances that back a
229 `Registry`.
230 """
232 @classmethod
233 def initialize(
234 cls,
235 database: Database,
236 context: StaticTablesContext,
237 *,
238 types: RegistryManagerTypes,
239 universe: DimensionUniverse,
240 ) -> RegistryManagerInstances:
241 """Construct manager instances from their types and an existing
242 database connection.
244 Parameters
245 ----------
246 database : `Database`
247 Object that represents a connection to the SQL database that backs
248 the data repository.
249 context : `StaticTablesContext`
250 Object used to create tables in ``database``.
251 types : `RegistryManagerTypes`
252 Struct containing type objects for the manager instances to
253 construct.
254 universe : `DimensionUniverse`
255 Object that describes all dimensions in this data repository.
257 Returns
258 -------
259 instances : `RegistryManagerInstances`
260 Struct containing manager instances.
261 """
262 kwargs: Dict[str, Any] = {}
263 kwargs["attributes"] = types.attributes.initialize(database, context)
264 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe)
265 kwargs["collections"] = types.collections.initialize(
266 database,
267 context,
268 dimensions=kwargs["dimensions"],
269 )
270 kwargs["datasets"] = types.datasets.initialize(
271 database,
272 context,
273 collections=kwargs["collections"],
274 dimensions=kwargs["dimensions"],
275 )
276 kwargs["opaque"] = types.opaque.initialize(database, context)
277 kwargs["datastores"] = types.datastores.initialize(
278 database,
279 context,
280 opaque=kwargs["opaque"],
281 datasets=types.datasets,
282 universe=universe,
283 )
284 return cls(**kwargs)
286 def getVersions(self) -> ButlerVersionsManager:
287 """Return an object that can report, check, and save the versions of
288 all manager objects.
290 Returns
291 -------
292 versions : `ButlerVersionsManager`
293 Object that manages versions.
294 """
295 return ButlerVersionsManager(
296 self.attributes,
297 # Can't use dataclasses.asdict here, because it tries to do some
298 # deepcopy stuff (?!) in order to find dataclasses recursively, and
299 # that doesn't work on some manager objects that definitely aren't
300 # supposed to be deep-copied anyway.
301 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)},
302 )
304 def refresh(self) -> None:
305 """Refresh all in-memory state by querying the database."""
306 self.dimensions.clearCaches()
307 self.dimensions.refresh()
308 self.collections.refresh()
309 self.datasets.refresh()