Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 93%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import copy
15import sqlalchemy
17from lsst.daf.butler import (
18 DatasetRef,
19 DatasetType,
20 ddl,
21)
22from lsst.daf.butler.registry import ConflictingDefinitionError, OrphanedRecordError
23from lsst.daf.butler.registry.interfaces import (
24 DatasetRecordStorage,
25 DatasetRecordStorageManager,
26 VersionTuple
27)
29from .tables import (
30 addDatasetForeignKey,
31 makeCalibTableName,
32 makeCalibTableSpec,
33 makeStaticTableSpecs,
34 makeTagTableName,
35 makeTagTableSpec,
36)
37from .summaries import CollectionSummaryManager
38from ._storage import ByDimensionsDatasetRecordStorage
39from ...summaries import CollectionSummary
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from lsst.daf.butler.registry.interfaces import (
44 CollectionManager,
45 CollectionRecord,
46 Database,
47 DimensionRecordStorageManager,
48 StaticTablesContext,
49 )
50 from .tables import StaticDatasetTablesTuple
53# This has to be updated on every schema change
54_VERSION = VersionTuple(1, 0, 0)
57class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
58 """A manager class for datasets that uses one dataset-collection table for
59 each group of dataset types that share the same dimensions.
61 In addition to the table organization, this class makes a number of
62 other design choices that would have been cumbersome (to say the least) to
63 try to pack into its name:
65 - It uses a private surrogate integer autoincrement field to identify
66 dataset types, instead of using the name as the primary and foreign key
67 directly.
69 - It aggressively loads all DatasetTypes into memory instead of fetching
70 them from the database only when needed or attempting more clever forms
71 of caching.
73 Alternative implementations that make different choices for these while
74 keeping the same general table organization might be reasonable as well.
76 Parameters
77 ----------
78 db : `Database`
79 Interface to the underlying database engine and namespace.
80 collections : `CollectionManager`
81 Manager object for the collections in this `Registry`.
82 dimensions : `DimensionRecordStorageManager`
83 Manager object for the dimensions in this `Registry`.
84 static : `StaticDatasetTablesTuple`
85 Named tuple of `sqlalchemy.schema.Table` instances for all static
86 tables used by this class.
87 summaries : `CollectionSummaryManager`
88 Structure containing tables that summarize the contents of collections.
89 """
90 def __init__(
91 self, *,
92 db: Database,
93 collections: CollectionManager,
94 dimensions: DimensionRecordStorageManager,
95 static: StaticDatasetTablesTuple,
96 summaries: CollectionSummaryManager,
97 ):
98 self._db = db
99 self._collections = collections
100 self._dimensions = dimensions
101 self._static = static
102 self._summaries = summaries
103 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
104 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
106 @classmethod
107 def initialize(
108 cls,
109 db: Database,
110 context: StaticTablesContext, *,
111 collections: CollectionManager,
112 dimensions: DimensionRecordStorageManager,
113 ) -> DatasetRecordStorageManager:
114 # Docstring inherited from DatasetRecordStorageManager.
115 specs = makeStaticTableSpecs(type(collections), universe=dimensions.universe)
116 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
117 summaries = CollectionSummaryManager.initialize(
118 db,
119 context,
120 collections=collections,
121 dimensions=dimensions,
122 )
123 return cls(db=db, collections=collections, dimensions=dimensions, static=static, summaries=summaries)
125 @classmethod
126 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
127 constraint: bool = True, onDelete: Optional[str] = None,
128 **kwargs: Any) -> ddl.FieldSpec:
129 # Docstring inherited from DatasetRecordStorageManager.
130 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
132 def refresh(self) -> None:
133 # Docstring inherited from DatasetRecordStorageManager.
134 byName = {}
135 byId = {}
136 c = self._static.dataset_type.columns
137 for row in self._db.query(self._static.dataset_type.select()).fetchall():
138 name = row[c.name]
139 dimensions = self._dimensions.loadDimensionGraph(row[c.dimensions_key])
140 calibTableName = row[c.calibration_association_table]
141 datasetType = DatasetType(name, dimensions, row[c.storage_class],
142 isCalibration=(calibTableName is not None))
143 tags = self._db.getExistingTable(row[c.tag_association_table],
144 makeTagTableSpec(datasetType, type(self._collections)))
145 if calibTableName is not None:
146 calibs = self._db.getExistingTable(row[c.calibration_association_table],
147 makeCalibTableSpec(datasetType, type(self._collections),
148 self._db.getTimespanRepresentation()))
149 else:
150 calibs = None
151 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
152 static=self._static, summaries=self._summaries,
153 tags=tags, calibs=calibs,
154 dataset_type_id=row["id"],
155 collections=self._collections)
156 byName[datasetType.name] = storage
157 byId[storage._dataset_type_id] = storage
158 self._byName = byName
159 self._byId = byId
160 self._summaries.refresh(lambda dataset_type_id: self._byId[dataset_type_id].datasetType)
162 def remove(self, name: str) -> None:
163 # Docstring inherited from DatasetRecordStorageManager.
164 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
165 if componentName is not None:
166 raise ValueError(f"Cannot delete a dataset type of a component of a composite (given {name})")
168 # Delete the row
169 try:
170 self._db.delete(self._static.dataset_type, ["name"], {"name": name})
171 except sqlalchemy.exc.IntegrityError as e:
172 raise OrphanedRecordError(f"Dataset type {name} can not be removed."
173 " It is associated with datasets that must be removed first.") from e
175 # Now refresh everything -- removal is rare enough that this does
176 # not need to be fast.
177 self.refresh()
179 def find(self, name: str) -> Optional[DatasetRecordStorage]:
180 # Docstring inherited from DatasetRecordStorageManager.
181 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
182 storage = self._byName.get(compositeName)
183 if storage is not None and componentName is not None:
184 componentStorage = copy.copy(storage)
185 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
186 return componentStorage
187 else:
188 return storage
190 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
191 # Docstring inherited from DatasetRecordStorageManager.
192 if datasetType.isComponent(): 192 ↛ 193line 192 didn't jump to line 193, because the condition on line 192 was never true
193 raise ValueError("Component dataset types can not be stored in registry."
194 f" Rejecting {datasetType.name}")
195 storage = self._byName.get(datasetType.name)
196 if storage is None:
197 dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions)
198 tagTableName = makeTagTableName(datasetType, dimensionsKey)
199 calibTableName = (makeCalibTableName(datasetType, dimensionsKey)
200 if datasetType.isCalibration() else None)
201 row, inserted = self._db.sync(
202 self._static.dataset_type,
203 keys={"name": datasetType.name},
204 compared={
205 "dimensions_key": dimensionsKey,
206 "storage_class": datasetType.storageClass.name,
207 },
208 extra={
209 "tag_association_table": tagTableName,
210 "calibration_association_table": calibTableName,
211 },
212 returning=["id", "tag_association_table"],
213 )
214 assert row is not None
215 tags = self._db.ensureTableExists(
216 tagTableName,
217 makeTagTableSpec(datasetType, type(self._collections)),
218 )
219 if calibTableName is not None:
220 calibs = self._db.ensureTableExists(
221 calibTableName,
222 makeCalibTableSpec(datasetType, type(self._collections),
223 self._db.getTimespanRepresentation()),
224 )
225 else:
226 calibs = None
227 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
228 static=self._static, summaries=self._summaries,
229 tags=tags, calibs=calibs,
230 dataset_type_id=row["id"],
231 collections=self._collections)
232 self._byName[datasetType.name] = storage
233 self._byId[storage._dataset_type_id] = storage
234 else:
235 if datasetType != storage.datasetType:
236 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
237 f"with database definition {storage.datasetType}.")
238 inserted = False
239 return storage, inserted
241 def __iter__(self) -> Iterator[DatasetType]:
242 for storage in self._byName.values():
243 yield storage.datasetType
245 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
246 # Docstring inherited from DatasetRecordStorageManager.
247 sql = sqlalchemy.sql.select(
248 [
249 self._static.dataset.columns.dataset_type_id,
250 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
251 ]
252 ).select_from(
253 self._static.dataset
254 ).where(
255 self._static.dataset.columns.id == id
256 )
257 row = self._db.query(sql).fetchone()
258 if row is None:
259 return None
260 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
261 if recordsForType is None: 261 ↛ 262line 261 didn't jump to line 262, because the condition on line 261 was never true
262 self.refresh()
263 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
264 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
265 return DatasetRef(
266 recordsForType.datasetType,
267 dataId=recordsForType.getDataId(id=id),
268 id=id,
269 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
270 )
272 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary:
273 # Docstring inherited from DatasetRecordStorageManager.
274 return self._summaries.get(collection)
276 @classmethod
277 def currentVersion(cls) -> Optional[VersionTuple]:
278 # Docstring inherited from VersionedExtension.
279 return _VERSION
281 def schemaDigest(self) -> Optional[str]:
282 # Docstring inherited from VersionedExtension.
283 return self._defaultSchemaDigest(self._static, self._db.dialect)