Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 94%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = (
4 "ByDimensionsDatasetRecordStorageManager",
5 "ByDimensionsDatasetRecordStorageManagerUUID",
6)
8from typing import (
9 Any,
10 Dict,
11 Iterator,
12 Optional,
13 Tuple,
14 Type,
15 TYPE_CHECKING,
16)
18import copy
19import sqlalchemy
21from lsst.daf.butler import (
22 DatasetId,
23 DatasetRef,
24 DatasetType,
25 ddl,
26 DimensionUniverse,
27)
28from lsst.daf.butler.registry import ConflictingDefinitionError, OrphanedRecordError
29from lsst.daf.butler.registry.interfaces import (
30 DatasetRecordStorage,
31 DatasetRecordStorageManager,
32 VersionTuple
33)
35from .tables import (
36 addDatasetForeignKey,
37 makeCalibTableName,
38 makeCalibTableSpec,
39 makeStaticTableSpecs,
40 makeTagTableName,
41 makeTagTableSpec,
42)
43from .summaries import CollectionSummaryManager
44from ._storage import (
45 ByDimensionsDatasetRecordStorage,
46 ByDimensionsDatasetRecordStorageInt,
47 ByDimensionsDatasetRecordStorageUUID
48)
49from ...summaries import CollectionSummary
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from lsst.daf.butler.registry.interfaces import (
54 CollectionManager,
55 CollectionRecord,
56 Database,
57 DimensionRecordStorageManager,
58 StaticTablesContext,
59 )
60 from .tables import StaticDatasetTablesTuple
63# This has to be updated on every schema change
64_VERSION_INT = VersionTuple(1, 0, 0)
65_VERSION_UUID = VersionTuple(1, 0, 0)
68class ByDimensionsDatasetRecordStorageManagerBase(DatasetRecordStorageManager):
69 """A manager class for datasets that uses one dataset-collection table for
70 each group of dataset types that share the same dimensions.
72 In addition to the table organization, this class makes a number of
73 other design choices that would have been cumbersome (to say the least) to
74 try to pack into its name:
76 - It uses a private surrogate integer autoincrement field to identify
77 dataset types, instead of using the name as the primary and foreign key
78 directly.
80 - It aggressively loads all DatasetTypes into memory instead of fetching
81 them from the database only when needed or attempting more clever forms
82 of caching.
84 Alternative implementations that make different choices for these while
85 keeping the same general table organization might be reasonable as well.
87 This class provides complete implementation of manager logic but it is
88 parametrized by few class attributes that have to be defined by
89 sub-classes.
91 Parameters
92 ----------
93 db : `Database`
94 Interface to the underlying database engine and namespace.
95 collections : `CollectionManager`
96 Manager object for the collections in this `Registry`.
97 dimensions : `DimensionRecordStorageManager`
98 Manager object for the dimensions in this `Registry`.
99 static : `StaticDatasetTablesTuple`
100 Named tuple of `sqlalchemy.schema.Table` instances for all static
101 tables used by this class.
102 summaries : `CollectionSummaryManager`
103 Structure containing tables that summarize the contents of collections.
104 """
105 def __init__(
106 self, *,
107 db: Database,
108 collections: CollectionManager,
109 dimensions: DimensionRecordStorageManager,
110 static: StaticDatasetTablesTuple,
111 summaries: CollectionSummaryManager,
112 ):
113 self._db = db
114 self._collections = collections
115 self._dimensions = dimensions
116 self._static = static
117 self._summaries = summaries
118 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
119 self._byId: Dict[DatasetId, ByDimensionsDatasetRecordStorage] = {}
121 @classmethod
122 def initialize(
123 cls,
124 db: Database,
125 context: StaticTablesContext, *,
126 collections: CollectionManager,
127 dimensions: DimensionRecordStorageManager,
128 ) -> DatasetRecordStorageManager:
129 # Docstring inherited from DatasetRecordStorageManager.
130 specs = cls.makeStaticTableSpecs(type(collections), universe=dimensions.universe)
131 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
132 summaries = CollectionSummaryManager.initialize(
133 db,
134 context,
135 collections=collections,
136 dimensions=dimensions,
137 )
138 return cls(db=db, collections=collections, dimensions=dimensions, static=static, summaries=summaries)
140 @classmethod
141 def currentVersion(cls) -> Optional[VersionTuple]:
142 # Docstring inherited from VersionedExtension.
143 return cls._version
145 @classmethod
146 def makeStaticTableSpecs(cls, collections: Type[CollectionManager],
147 universe: DimensionUniverse) -> StaticDatasetTablesTuple:
148 """Construct all static tables used by the classes in this package.
150 Static tables are those that are present in all Registries and do not
151 depend on what DatasetTypes have been registered.
153 Parameters
154 ----------
155 collections: `CollectionManager`
156 Manager object for the collections in this `Registry`.
157 universe : `DimensionUniverse`
158 Universe graph containing all dimensions known to this `Registry`.
160 Returns
161 -------
162 specs : `StaticDatasetTablesTuple`
163 A named tuple containing `ddl.TableSpec` instances.
164 """
165 return makeStaticTableSpecs(collections, universe=universe,
166 dtype=cls.getIdColumnType(), autoincrement=cls._autoincrement)
168 @classmethod
169 def getIdColumnType(cls) -> type:
170 # Docstring inherited from base class.
171 return cls._idColumnType
173 @classmethod
174 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
175 constraint: bool = True, onDelete: Optional[str] = None,
176 **kwargs: Any) -> ddl.FieldSpec:
177 # Docstring inherited from DatasetRecordStorageManager.
178 return addDatasetForeignKey(tableSpec, cls.getIdColumnType(), name=name, onDelete=onDelete,
179 constraint=constraint, **kwargs)
181 def refresh(self) -> None:
182 # Docstring inherited from DatasetRecordStorageManager.
183 byName = {}
184 byId: Dict[DatasetId, ByDimensionsDatasetRecordStorage] = {}
185 c = self._static.dataset_type.columns
186 for row in self._db.query(self._static.dataset_type.select()).fetchall():
187 name = row[c.name]
188 dimensions = self._dimensions.loadDimensionGraph(row[c.dimensions_key])
189 calibTableName = row[c.calibration_association_table]
190 datasetType = DatasetType(name, dimensions, row[c.storage_class],
191 isCalibration=(calibTableName is not None))
192 tags = self._db.getExistingTable(
193 row[c.tag_association_table],
194 makeTagTableSpec(datasetType, type(self._collections), self.getIdColumnType()))
195 if calibTableName is not None:
196 calibs = self._db.getExistingTable(row[c.calibration_association_table],
197 makeCalibTableSpec(datasetType, type(self._collections),
198 self._db.getTimespanRepresentation(),
199 self.getIdColumnType()))
200 else:
201 calibs = None
202 storage = self._recordStorageType(db=self._db, datasetType=datasetType,
203 static=self._static, summaries=self._summaries,
204 tags=tags, calibs=calibs,
205 dataset_type_id=row["id"],
206 collections=self._collections)
207 byName[datasetType.name] = storage
208 byId[storage._dataset_type_id] = storage
209 self._byName = byName
210 self._byId = byId
211 self._summaries.refresh(lambda dataset_type_id: self._byId[dataset_type_id].datasetType)
213 def remove(self, name: str) -> None:
214 # Docstring inherited from DatasetRecordStorageManager.
215 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
216 if componentName is not None:
217 raise ValueError(f"Cannot delete a dataset type of a component of a composite (given {name})")
219 # Delete the row
220 try:
221 self._db.delete(self._static.dataset_type, ["name"], {"name": name})
222 except sqlalchemy.exc.IntegrityError as e:
223 raise OrphanedRecordError(f"Dataset type {name} can not be removed."
224 " It is associated with datasets that must be removed first.") from e
226 # Now refresh everything -- removal is rare enough that this does
227 # not need to be fast.
228 self.refresh()
230 def find(self, name: str) -> Optional[DatasetRecordStorage]:
231 # Docstring inherited from DatasetRecordStorageManager.
232 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
233 storage = self._byName.get(compositeName)
234 if storage is not None and componentName is not None:
235 componentStorage = copy.copy(storage)
236 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
237 return componentStorage
238 else:
239 return storage
241 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
242 # Docstring inherited from DatasetRecordStorageManager.
243 if datasetType.isComponent(): 243 ↛ 244line 243 didn't jump to line 244, because the condition on line 243 was never true
244 raise ValueError("Component dataset types can not be stored in registry."
245 f" Rejecting {datasetType.name}")
246 storage = self._byName.get(datasetType.name)
247 if storage is None:
248 dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions)
249 tagTableName = makeTagTableName(datasetType, dimensionsKey)
250 calibTableName = (makeCalibTableName(datasetType, dimensionsKey)
251 if datasetType.isCalibration() else None)
252 row, inserted = self._db.sync(
253 self._static.dataset_type,
254 keys={"name": datasetType.name},
255 compared={
256 "dimensions_key": dimensionsKey,
257 "storage_class": datasetType.storageClass.name,
258 },
259 extra={
260 "tag_association_table": tagTableName,
261 "calibration_association_table": calibTableName,
262 },
263 returning=["id", "tag_association_table"],
264 )
265 assert row is not None
266 tags = self._db.ensureTableExists(
267 tagTableName,
268 makeTagTableSpec(datasetType, type(self._collections), self.getIdColumnType()),
269 )
270 if calibTableName is not None:
271 calibs = self._db.ensureTableExists(
272 calibTableName,
273 makeCalibTableSpec(datasetType, type(self._collections),
274 self._db.getTimespanRepresentation(), self.getIdColumnType()),
275 )
276 else:
277 calibs = None
278 storage = self._recordStorageType(db=self._db, datasetType=datasetType,
279 static=self._static, summaries=self._summaries,
280 tags=tags, calibs=calibs,
281 dataset_type_id=row["id"],
282 collections=self._collections)
283 self._byName[datasetType.name] = storage
284 self._byId[storage._dataset_type_id] = storage
285 else:
286 if datasetType != storage.datasetType:
287 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
288 f"with database definition {storage.datasetType}.")
289 inserted = False
290 return storage, inserted
292 def __iter__(self) -> Iterator[DatasetType]:
293 for storage in self._byName.values():
294 yield storage.datasetType
296 def getDatasetRef(self, id: DatasetId) -> Optional[DatasetRef]:
297 # Docstring inherited from DatasetRecordStorageManager.
298 sql = sqlalchemy.sql.select(
299 [
300 self._static.dataset.columns.dataset_type_id,
301 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
302 ]
303 ).select_from(
304 self._static.dataset
305 ).where(
306 self._static.dataset.columns.id == id
307 )
308 row = self._db.query(sql).fetchone()
309 if row is None:
310 return None
311 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
312 if recordsForType is None: 312 ↛ 313line 312 didn't jump to line 313, because the condition on line 312 was never true
313 self.refresh()
314 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
315 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
316 return DatasetRef(
317 recordsForType.datasetType,
318 dataId=recordsForType.getDataId(id=id),
319 id=id,
320 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
321 )
323 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary:
324 # Docstring inherited from DatasetRecordStorageManager.
325 return self._summaries.get(collection)
327 def schemaDigest(self) -> Optional[str]:
328 # Docstring inherited from VersionedExtension.
329 return self._defaultSchemaDigest(self._static, self._db.dialect)
331 _version: VersionTuple
332 """Schema version for this class."""
334 _recordStorageType: Type[ByDimensionsDatasetRecordStorage]
335 """Type of the storage class returned by this manager."""
337 _autoincrement: bool
338 """If True then PK column of the dataset table is auto-increment."""
340 _idColumnType: type
341 """Type of dataset column used to store dataset ID."""
344class ByDimensionsDatasetRecordStorageManager(ByDimensionsDatasetRecordStorageManagerBase):
345 """Implementation of ByDimensionsDatasetRecordStorageManagerBase which uses
346 auto-incremental integer for dataset primary key.
347 """
348 _version: VersionTuple = _VERSION_INT
349 _recordStorageType: Type[ByDimensionsDatasetRecordStorage] = ByDimensionsDatasetRecordStorageInt
350 _autoincrement: bool = True
351 _idColumnType: type = sqlalchemy.BigInteger
354class ByDimensionsDatasetRecordStorageManagerUUID(ByDimensionsDatasetRecordStorageManagerBase):
355 """Implementation of ByDimensionsDatasetRecordStorageManagerBase which uses
356 UUID for dataset primary key.
357 """
358 _version: VersionTuple = _VERSION_UUID
359 _recordStorageType: Type[ByDimensionsDatasetRecordStorage] = ByDimensionsDatasetRecordStorageUUID
360 _autoincrement: bool = False
361 _idColumnType: type = ddl.GUID