Coverage for python/lsst/daf/butler/registry/managers.py: 44%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "RegistryManagerInstances",
26 "RegistryManagerTypes",
27)
29import dataclasses
30import logging
31from typing import Any, Dict, Generic, Type, TypeVar
33from lsst.utils import doImportType
35from ..core import Config, DimensionConfig, DimensionUniverse
36from ._config import RegistryConfig
37from .interfaces import (
38 ButlerAttributeManager,
39 CollectionManager,
40 Database,
41 DatasetRecordStorageManager,
42 DatastoreRegistryBridgeManager,
43 DimensionRecordStorageManager,
44 OpaqueTableStorageManager,
45 StaticTablesContext,
46)
47from .versions import ButlerVersionsManager, DigestMismatchError
49_Attributes = TypeVar("_Attributes")
50_Dimensions = TypeVar("_Dimensions")
51_Collections = TypeVar("_Collections")
52_Datasets = TypeVar("_Datasets")
53_Opaque = TypeVar("_Opaque")
54_Datastores = TypeVar("_Datastores")
57_LOG = logging.getLogger(__name__)
59# key for dimensions configuration in attributes table
60_DIMENSIONS_ATTR = "config:dimensions.json"
63@dataclasses.dataclass(frozen=True, eq=False)
64class _GenericRegistryManagers(
65 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores]
66):
67 """Base struct used to pass around the manager instances or types that back
68 a `Registry`.
70 This class should only be used via its non-generic subclasses,
71 `RegistryManagerInstances` and `RegistryManagerTypes`.
72 """
74 attributes: _Attributes
75 """Manager for flat key-value pairs, including versions.
76 """
78 dimensions: _Dimensions
79 """Manager for dimensions.
80 """
82 collections: _Collections
83 """Manager for collections.
84 """
86 datasets: _Datasets
87 """Manager for datasets, dataset types, and collection summaries.
88 """
90 opaque: _Opaque
91 """Manager for opaque (to the Registry) tables.
92 """
94 datastores: _Datastores
95 """Manager for the interface between `Registry` and `Datastore`.
96 """
99class RegistryManagerTypes(
100 _GenericRegistryManagers[
101 Type[ButlerAttributeManager],
102 Type[DimensionRecordStorageManager],
103 Type[CollectionManager],
104 Type[DatasetRecordStorageManager],
105 Type[OpaqueTableStorageManager],
106 Type[DatastoreRegistryBridgeManager],
107 ]
108):
109 """A struct used to pass around the types of the manager objects that back
110 a `Registry`.
111 """
113 @classmethod
114 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes:
115 """Construct by extracting class names from configuration and importing
116 them.
118 Parameters
119 ----------
120 config : `RegistryConfig`
121 Configuration object with a "managers" section that contains all
122 fully-qualified class names for all manager types.
124 Returns
125 -------
126 types : `RegistryManagerTypes`
127 A new struct containing type objects.
128 """
129 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)})
131 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances:
132 """Create all persistent `Registry` state for a new, empty data
133 repository, and return a new struct containing manager instances.
135 Parameters
136 ----------
137 database : `Database`
138 Object that represents a connection to the SQL database that will
139 back the data repository. Must point to an empty namespace, or at
140 least one with no tables or other entities whose names might clash
141 with those used by butler.
142 dimensionConfig : `DimensionConfig`
143 Configuration that defines a `DimensionUniverse`, to be written
144 into the data repository and used to define aspects of the schema.
146 Returns
147 -------
148 instances : `RegistryManagerInstances`
149 Struct containing instances of the types contained by ``self``,
150 pointing to the new repository and backed by ``database``.
151 """
152 universe = DimensionUniverse(dimensionConfig)
153 with database.declareStaticTables(create=True) as context:
154 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
155 versions = instances.getVersions()
156 # store managers and their versions in attributes table
157 versions.storeManagersConfig()
158 versions.storeManagersVersions()
159 # dump universe config as json into attributes (faster than YAML)
160 json = dimensionConfig.dump(format="json")
161 if json is not None:
162 instances.attributes.set(_DIMENSIONS_ATTR, json)
163 else:
164 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON")
165 return instances
167 def loadRepo(self, database: Database) -> RegistryManagerInstances:
168 """Construct manager instances that point to an existing data
169 repository.
171 Parameters
172 ----------
173 database : `Database`
174 Object that represents a connection to the SQL database that backs
175 the data repository. Must point to a namespace that already holds
176 all tables and other persistent entities used by butler.
178 Returns
179 -------
180 instances : `RegistryManagerInstances`
181 Struct containing instances of the types contained by ``self``,
182 pointing to the new repository and backed by ``database``.
183 """
184 # Create attributes manager only first, so we can use it to load the
185 # embedded dimensions configuration.
186 with database.declareStaticTables(create=False) as context:
187 attributes = self.attributes.initialize(database, context)
188 versions = ButlerVersionsManager(attributes, dict(attributes=attributes))
189 # verify that configured versions are compatible with schema
190 versions.checkManagersConfig()
191 versions.checkManagersVersions(database.isWriteable())
192 # get serialized as a string from database
193 dimensionsString = attributes.get(_DIMENSIONS_ATTR)
194 if dimensionsString is not None:
195 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json"))
196 else:
197 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database")
198 universe = DimensionUniverse(dimensionConfig)
199 with database.declareStaticTables(create=False) as context:
200 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe)
201 versions = instances.getVersions()
202 # verify that configured versions are compatible with schema
203 versions.checkManagersConfig()
204 versions.checkManagersVersions(database.isWriteable())
205 try:
206 versions.checkManagersDigests()
207 except DigestMismatchError as exc:
208 # potentially digest mismatch is a serious error but during
209 # development it could be benign, treat this as warning for
210 # now.
211 _LOG.warning(f"Registry schema digest mismatch: {exc}")
212 # Load content from database that we try to keep in-memory.
213 instances.refresh()
214 return instances
217class RegistryManagerInstances(
218 _GenericRegistryManagers[
219 ButlerAttributeManager,
220 DimensionRecordStorageManager,
221 CollectionManager,
222 DatasetRecordStorageManager,
223 OpaqueTableStorageManager,
224 DatastoreRegistryBridgeManager,
225 ]
226):
227 """A struct used to pass around the manager instances that back a
228 `Registry`.
229 """
231 @classmethod
232 def initialize(
233 cls,
234 database: Database,
235 context: StaticTablesContext,
236 *,
237 types: RegistryManagerTypes,
238 universe: DimensionUniverse,
239 ) -> RegistryManagerInstances:
240 """Construct manager instances from their types and an existing
241 database connection.
243 Parameters
244 ----------
245 database : `Database`
246 Object that represents a connection to the SQL database that backs
247 the data repository.
248 context : `StaticTablesContext`
249 Object used to create tables in ``database``.
250 types : `RegistryManagerTypes`
251 Struct containing type objects for the manager instances to
252 construct.
253 universe : `DimensionUniverse`
254 Object that describes all dimensions in this data repository.
256 Returns
257 -------
258 instances : `RegistryManagerInstances`
259 Struct containing manager instances.
260 """
261 kwargs: Dict[str, Any] = {}
262 kwargs["attributes"] = types.attributes.initialize(database, context)
263 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe)
264 kwargs["collections"] = types.collections.initialize(
265 database,
266 context,
267 dimensions=kwargs["dimensions"],
268 )
269 kwargs["datasets"] = types.datasets.initialize(
270 database,
271 context,
272 collections=kwargs["collections"],
273 dimensions=kwargs["dimensions"],
274 )
275 kwargs["opaque"] = types.opaque.initialize(database, context)
276 kwargs["datastores"] = types.datastores.initialize(
277 database,
278 context,
279 opaque=kwargs["opaque"],
280 datasets=types.datasets,
281 universe=universe,
282 )
283 return cls(**kwargs)
285 def getVersions(self) -> ButlerVersionsManager:
286 """Return an object that can report, check, and save the versions of
287 all manager objects.
289 Returns
290 -------
291 versions : `ButlerVersionsManager`
292 Object that manages versions.
293 """
294 return ButlerVersionsManager(
295 self.attributes,
296 # Can't use dataclasses.asdict here, because it tries to do some
297 # deepcopy stuff (?!) in order to find dataclasses recursively, and
298 # that doesn't work on some manager objects that definitely aren't
299 # supposed to be deep-copied anyway.
300 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)},
301 )
303 def refresh(self) -> None:
304 """Refresh all in-memory state by querying the database."""
305 self.dimensions.clearCaches()
306 self.dimensions.refresh()
307 self.collections.refresh()
308 self.datasets.refresh()