Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 93%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import copy
15import sqlalchemy
17from lsst.daf.butler import (
18 DatasetRef,
19 DatasetType,
20 ddl,
21 DimensionGraph,
22 DimensionUniverse,
23)
24from lsst.daf.butler.registry import ConflictingDefinitionError, OrphanedRecordError
25from lsst.daf.butler.registry.interfaces import (
26 DatasetRecordStorage,
27 DatasetRecordStorageManager,
28 VersionTuple
29)
31from .tables import (
32 makeStaticTableSpecs,
33 addDatasetForeignKey,
34 makeCalibTableName,
35 makeCalibTableSpec,
36 makeTagTableName,
37 makeTagTableSpec,
38)
39from ._storage import ByDimensionsDatasetRecordStorage
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from lsst.daf.butler.registry.interfaces import (
43 CollectionManager,
44 Database,
45 StaticTablesContext,
46 )
47 from .tables import StaticDatasetTablesTuple
50# This has to be updated on every schema change
51_VERSION = VersionTuple(0, 3, 0)
54class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
55 """A manager class for datasets that uses one dataset-collection table for
56 each group of dataset types that share the same dimensions.
58 In addition to the table organization, this class makes a number of
59 other design choices that would have been cumbersome (to say the least) to
60 try to pack into its name:
62 - It uses a private surrogate integer autoincrement field to identify
63 dataset types, instead of using the name as the primary and foreign key
64 directly.
66 - It aggressively loads all DatasetTypes into memory instead of fetching
67 them from the database only when needed or attempting more clever forms
68 of caching.
70 Alternative implementations that make different choices for these while
71 keeping the same general table organization might be reasonable as well.
73 Parameters
74 ----------
75 db : `Database`
76 Interface to the underlying database engine and namespace.
77 collections : `CollectionManager`
78 Manager object for the collections in this `Registry`.
79 static : `StaticDatasetTablesTuple`
80 Named tuple of `sqlalchemy.schema.Table` instances for all static
81 tables used by this class.
82 """
83 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
84 self._db = db
85 self._collections = collections
86 self._static = static
87 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
88 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
90 @classmethod
91 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
92 universe: DimensionUniverse) -> DatasetRecordStorageManager:
93 # Docstring inherited from DatasetRecordStorageManager.
94 specs = makeStaticTableSpecs(type(collections), universe=universe)
95 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
96 return cls(db=db, collections=collections, static=static)
98 @classmethod
99 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
100 constraint: bool = True, onDelete: Optional[str] = None,
101 **kwargs: Any) -> ddl.FieldSpec:
102 # Docstring inherited from DatasetRecordStorageManager.
103 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
105 def refresh(self, *, universe: DimensionUniverse) -> None:
106 # Docstring inherited from DatasetRecordStorageManager.
107 byName = {}
108 byId = {}
109 c = self._static.dataset_type.columns
110 for row in self._db.query(self._static.dataset_type.select()).fetchall():
111 name = row[c.name]
112 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
113 calibTableName = row[c.calibration_association_table]
114 datasetType = DatasetType(name, dimensions, row[c.storage_class],
115 isCalibration=(calibTableName is not None))
116 tags = self._db.getExistingTable(row[c.tag_association_table],
117 makeTagTableSpec(datasetType, type(self._collections)))
118 if calibTableName is not None:
119 calibs = self._db.getExistingTable(row[c.calibration_association_table],
120 makeCalibTableSpec(datasetType, type(self._collections),
121 self._db.getTimespanRepresentation()))
122 else:
123 calibs = None
124 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
125 static=self._static, tags=tags, calibs=calibs,
126 dataset_type_id=row["id"],
127 collections=self._collections)
128 byName[datasetType.name] = storage
129 byId[storage._dataset_type_id] = storage
130 self._byName = byName
131 self._byId = byId
133 def remove(self, name: str, *, universe: DimensionUniverse) -> None:
134 # Docstring inherited from DatasetRecordStorageManager.
135 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
136 if componentName is not None:
137 raise ValueError(f"Cannot delete a dataset type of a component of a composite (given {name})")
139 # Delete the row
140 try:
141 self._db.delete(self._static.dataset_type, ["name"], {"name": name})
142 except sqlalchemy.exc.IntegrityError as e:
143 raise OrphanedRecordError(f"Dataset type {name} can not be removed."
144 " It is associated with datasets that must be removed first.") from e
146 # Now refresh everything -- removal is rare enough that this does
147 # not need to be fast.
148 self.refresh(universe=universe)
150 def find(self, name: str) -> Optional[DatasetRecordStorage]:
151 # Docstring inherited from DatasetRecordStorageManager.
152 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
153 storage = self._byName.get(compositeName)
154 if storage is not None and componentName is not None:
155 componentStorage = copy.copy(storage)
156 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
157 return componentStorage
158 else:
159 return storage
161 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
162 # Docstring inherited from DatasetRecordStorageManager.
163 if datasetType.isComponent(): 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true
164 raise ValueError("Component dataset types can not be stored in registry."
165 f" Rejecting {datasetType.name}")
166 storage = self._byName.get(datasetType.name)
167 if storage is None:
168 tagTableName = makeTagTableName(datasetType)
169 calibTableName = makeCalibTableName(datasetType) if datasetType.isCalibration() else None
170 row, inserted = self._db.sync(
171 self._static.dataset_type,
172 keys={"name": datasetType.name},
173 compared={
174 "dimensions_encoded": datasetType.dimensions.encode(),
175 "storage_class": datasetType.storageClass.name,
176 },
177 extra={
178 "tag_association_table": tagTableName,
179 "calibration_association_table": calibTableName,
180 },
181 returning=["id", "tag_association_table"],
182 )
183 assert row is not None
184 tags = self._db.ensureTableExists(
185 tagTableName,
186 makeTagTableSpec(datasetType, type(self._collections)),
187 )
188 if calibTableName is not None:
189 calibs = self._db.ensureTableExists(
190 calibTableName,
191 makeCalibTableSpec(datasetType, type(self._collections),
192 self._db.getTimespanRepresentation()),
193 )
194 else:
195 calibs = None
196 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
197 static=self._static, tags=tags, calibs=calibs,
198 dataset_type_id=row["id"],
199 collections=self._collections)
200 self._byName[datasetType.name] = storage
201 self._byId[storage._dataset_type_id] = storage
202 else:
203 if datasetType != storage.datasetType:
204 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
205 f"with database definition {storage.datasetType}.")
206 inserted = False
207 return storage, inserted
209 def __iter__(self) -> Iterator[DatasetType]:
210 for storage in self._byName.values():
211 yield storage.datasetType
213 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
214 # Docstring inherited from DatasetRecordStorageManager.
215 sql = sqlalchemy.sql.select(
216 [
217 self._static.dataset.columns.dataset_type_id,
218 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
219 ]
220 ).select_from(
221 self._static.dataset
222 ).where(
223 self._static.dataset.columns.id == id
224 )
225 row = self._db.query(sql).fetchone()
226 if row is None:
227 return None
228 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
229 if recordsForType is None: 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true
230 self.refresh(universe=universe)
231 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
232 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
233 return DatasetRef(
234 recordsForType.datasetType,
235 dataId=recordsForType.getDataId(id=id),
236 id=id,
237 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
238 )
240 @classmethod
241 def currentVersion(cls) -> Optional[VersionTuple]:
242 # Docstring inherited from VersionedExtension.
243 return _VERSION
245 def schemaDigest(self) -> Optional[str]:
246 # Docstring inherited from VersionedExtension.
247 return self._defaultSchemaDigest(self._static, self._db.dialect)