Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 93%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import copy
15import sqlalchemy
17from lsst.daf.butler import (
18 DatasetRef,
19 DatasetType,
20 ddl,
21)
22from lsst.daf.butler.registry import ConflictingDefinitionError, OrphanedRecordError
23from lsst.daf.butler.registry.interfaces import (
24 DatasetRecordStorage,
25 DatasetRecordStorageManager,
26 VersionTuple
27)
29from .tables import (
30 addDatasetForeignKey,
31 CollectionSummaryTables,
32 makeCalibTableName,
33 makeCalibTableSpec,
34 makeStaticTableSpecs,
35 makeTagTableName,
36 makeTagTableSpec,
37)
38from ._storage import ByDimensionsDatasetRecordStorage
40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 from lsst.daf.butler.registry.interfaces import (
42 CollectionManager,
43 Database,
44 DimensionRecordStorageManager,
45 StaticTablesContext,
46 )
47 from .tables import StaticDatasetTablesTuple
50# This has to be updated on every schema change
51_VERSION = VersionTuple(1, 0, 0)
54class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
55 """A manager class for datasets that uses one dataset-collection table for
56 each group of dataset types that share the same dimensions.
58 In addition to the table organization, this class makes a number of
59 other design choices that would have been cumbersome (to say the least) to
60 try to pack into its name:
62 - It uses a private surrogate integer autoincrement field to identify
63 dataset types, instead of using the name as the primary and foreign key
64 directly.
66 - It aggressively loads all DatasetTypes into memory instead of fetching
67 them from the database only when needed or attempting more clever forms
68 of caching.
70 Alternative implementations that make different choices for these while
71 keeping the same general table organization might be reasonable as well.
73 Parameters
74 ----------
75 db : `Database`
76 Interface to the underlying database engine and namespace.
77 collections : `CollectionManager`
78 Manager object for the collections in this `Registry`.
79 dimensions : `DimensionRecordStorageManager`
80 Manager object for the dimensions in this `Registry`.
81 static : `StaticDatasetTablesTuple`
82 Named tuple of `sqlalchemy.schema.Table` instances for all static
83 tables used by this class.
84 summaries : `CollectionSummaryTables`
85 Structure containing tables that summarize the contents of collections.
86 """
87 def __init__(
88 self, *,
89 db: Database,
90 collections: CollectionManager,
91 dimensions: DimensionRecordStorageManager,
92 static: StaticDatasetTablesTuple,
93 summaries: CollectionSummaryTables,
94 ):
95 self._db = db
96 self._collections = collections
97 self._dimensions = dimensions
98 self._static = static
99 self._summaries = summaries
100 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
101 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
103 @classmethod
104 def initialize(
105 cls,
106 db: Database,
107 context: StaticTablesContext, *,
108 collections: CollectionManager,
109 dimensions: DimensionRecordStorageManager,
110 ) -> DatasetRecordStorageManager:
111 # Docstring inherited from DatasetRecordStorageManager.
112 specs = makeStaticTableSpecs(type(collections), universe=dimensions.universe)
113 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
114 summaries = CollectionSummaryTables.initialize(
115 db,
116 context,
117 collections=collections,
118 dimensions=dimensions,
119 )
120 return cls(db=db, collections=collections, dimensions=dimensions, static=static, summaries=summaries)
122 @classmethod
123 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
124 constraint: bool = True, onDelete: Optional[str] = None,
125 **kwargs: Any) -> ddl.FieldSpec:
126 # Docstring inherited from DatasetRecordStorageManager.
127 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
129 def refresh(self) -> None:
130 # Docstring inherited from DatasetRecordStorageManager.
131 byName = {}
132 byId = {}
133 c = self._static.dataset_type.columns
134 for row in self._db.query(self._static.dataset_type.select()).fetchall():
135 name = row[c.name]
136 dimensions = self._dimensions.loadDimensionGraph(row[c.dimensions_key])
137 calibTableName = row[c.calibration_association_table]
138 datasetType = DatasetType(name, dimensions, row[c.storage_class],
139 isCalibration=(calibTableName is not None))
140 tags = self._db.getExistingTable(row[c.tag_association_table],
141 makeTagTableSpec(datasetType, type(self._collections)))
142 if calibTableName is not None:
143 calibs = self._db.getExistingTable(row[c.calibration_association_table],
144 makeCalibTableSpec(datasetType, type(self._collections),
145 self._db.getTimespanRepresentation()))
146 else:
147 calibs = None
148 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
149 static=self._static, summaries=self._summaries,
150 tags=tags, calibs=calibs,
151 dataset_type_id=row["id"],
152 collections=self._collections)
153 byName[datasetType.name] = storage
154 byId[storage._dataset_type_id] = storage
155 self._byName = byName
156 self._byId = byId
158 def remove(self, name: str) -> None:
159 # Docstring inherited from DatasetRecordStorageManager.
160 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
161 if componentName is not None:
162 raise ValueError(f"Cannot delete a dataset type of a component of a composite (given {name})")
164 # Delete the row
165 try:
166 self._db.delete(self._static.dataset_type, ["name"], {"name": name})
167 except sqlalchemy.exc.IntegrityError as e:
168 raise OrphanedRecordError(f"Dataset type {name} can not be removed."
169 " It is associated with datasets that must be removed first.") from e
171 # Now refresh everything -- removal is rare enough that this does
172 # not need to be fast.
173 self.refresh()
175 def find(self, name: str) -> Optional[DatasetRecordStorage]:
176 # Docstring inherited from DatasetRecordStorageManager.
177 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
178 storage = self._byName.get(compositeName)
179 if storage is not None and componentName is not None:
180 componentStorage = copy.copy(storage)
181 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
182 return componentStorage
183 else:
184 return storage
186 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
187 # Docstring inherited from DatasetRecordStorageManager.
188 if datasetType.isComponent(): 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true
189 raise ValueError("Component dataset types can not be stored in registry."
190 f" Rejecting {datasetType.name}")
191 storage = self._byName.get(datasetType.name)
192 if storage is None:
193 dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions)
194 tagTableName = makeTagTableName(datasetType, dimensionsKey)
195 calibTableName = (makeCalibTableName(datasetType, dimensionsKey)
196 if datasetType.isCalibration() else None)
197 row, inserted = self._db.sync(
198 self._static.dataset_type,
199 keys={"name": datasetType.name},
200 compared={
201 "dimensions_key": dimensionsKey,
202 "storage_class": datasetType.storageClass.name,
203 },
204 extra={
205 "tag_association_table": tagTableName,
206 "calibration_association_table": calibTableName,
207 },
208 returning=["id", "tag_association_table"],
209 )
210 assert row is not None
211 tags = self._db.ensureTableExists(
212 tagTableName,
213 makeTagTableSpec(datasetType, type(self._collections)),
214 )
215 if calibTableName is not None:
216 calibs = self._db.ensureTableExists(
217 calibTableName,
218 makeCalibTableSpec(datasetType, type(self._collections),
219 self._db.getTimespanRepresentation()),
220 )
221 else:
222 calibs = None
223 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
224 static=self._static, summaries=self._summaries,
225 tags=tags, calibs=calibs,
226 dataset_type_id=row["id"],
227 collections=self._collections)
228 self._byName[datasetType.name] = storage
229 self._byId[storage._dataset_type_id] = storage
230 else:
231 if datasetType != storage.datasetType:
232 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
233 f"with database definition {storage.datasetType}.")
234 inserted = False
235 return storage, inserted
237 def __iter__(self) -> Iterator[DatasetType]:
238 for storage in self._byName.values():
239 yield storage.datasetType
241 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
242 # Docstring inherited from DatasetRecordStorageManager.
243 sql = sqlalchemy.sql.select(
244 [
245 self._static.dataset.columns.dataset_type_id,
246 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
247 ]
248 ).select_from(
249 self._static.dataset
250 ).where(
251 self._static.dataset.columns.id == id
252 )
253 row = self._db.query(sql).fetchone()
254 if row is None:
255 return None
256 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
257 if recordsForType is None: 257 ↛ 258line 257 didn't jump to line 258, because the condition on line 257 was never true
258 self.refresh()
259 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
260 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
261 return DatasetRef(
262 recordsForType.datasetType,
263 dataId=recordsForType.getDataId(id=id),
264 id=id,
265 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
266 )
268 @classmethod
269 def currentVersion(cls) -> Optional[VersionTuple]:
270 # Docstring inherited from VersionedExtension.
271 return _VERSION
273 def schemaDigest(self) -> Optional[str]:
274 # Docstring inherited from VersionedExtension.
275 return self._defaultSchemaDigest(self._static, self._db.dialect)