Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 91%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import copy
15import sqlalchemy
17from lsst.daf.butler import (
18 DatasetRef,
19 DatasetType,
20 ddl,
21 DimensionGraph,
22 DimensionUniverse,
23)
24from lsst.daf.butler.registry import ConflictingDefinitionError
25from lsst.daf.butler.registry.interfaces import (
26 DatasetRecordStorage,
27 DatasetRecordStorageManager,
28 VersionTuple
29)
31from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec
32from ._storage import ByDimensionsDatasetRecordStorage
34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 from lsst.daf.butler.registry.interfaces import (
36 CollectionManager,
37 Database,
38 StaticTablesContext,
39 )
40 from .tables import StaticDatasetTablesTuple
43# This has to be updated on every schema change
44_VERSION = VersionTuple(0, 1, 0)
47class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
48 """A manager class for datasets that uses one dataset-collection table for
49 each group of dataset types that share the same dimensions.
51 In addition to the table organization, this class makes a number of
52 other design choices that would have been cumbersome (to say the least) to
53 try to pack into its name:
55 - It uses a private surrogate integer autoincrement field to identify
56 dataset types, instead of using the name as the primary and foreign key
57 directly.
59 - It aggressively loads all DatasetTypes into memory instead of fetching
60 them from the database only when needed or attempting more clever forms
61 of caching.
63 Alternative implementations that make different choices for these while
64 keeping the same general table organization might be reasonable as well.
66 Parameters
67 ----------
68 db : `Database`
69 Interface to the underlying database engine and namespace.
70 collections : `CollectionManager`
71 Manager object for the collections in this `Registry`.
72 static : `StaticDatasetTablesTuple`
73 Named tuple of `sqlalchemy.schema.Table` instances for all static
74 tables used by this class.
75 """
76 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
77 self._db = db
78 self._collections = collections
79 self._static = static
80 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
81 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
83 @classmethod
84 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
85 universe: DimensionUniverse) -> DatasetRecordStorageManager:
86 # Docstring inherited from DatasetRecordStorageManager.
87 specs = makeStaticTableSpecs(type(collections), universe=universe)
88 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
89 return cls(db=db, collections=collections, static=static)
91 @classmethod
92 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
93 constraint: bool = True, onDelete: Optional[str] = None,
94 **kwargs: Any) -> ddl.FieldSpec:
95 # Docstring inherited from DatasetRecordStorageManager.
96 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
98 def refresh(self, *, universe: DimensionUniverse) -> None:
99 # Docstring inherited from DatasetRecordStorageManager.
100 byName = {}
101 byId = {}
102 c = self._static.dataset_type.columns
103 for row in self._db.query(self._static.dataset_type.select()).fetchall():
104 name = row[c.name]
105 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
106 datasetType = DatasetType(name, dimensions, row[c.storage_class])
107 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType),
108 makeDynamicTableSpec(datasetType, type(self._collections)))
109 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
110 static=self._static, dynamic=dynamic,
111 dataset_type_id=row["id"],
112 collections=self._collections)
113 byName[datasetType.name] = storage
114 byId[storage._dataset_type_id] = storage
115 self._byName = byName
116 self._byId = byId
118 def find(self, name: str) -> Optional[DatasetRecordStorage]:
119 # Docstring inherited from DatasetRecordStorageManager.
120 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
121 storage = self._byName.get(compositeName)
122 if storage is not None and componentName is not None:
123 componentStorage = copy.copy(storage)
124 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
125 return componentStorage
126 else:
127 return storage
129 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
130 # Docstring inherited from DatasetRecordStorageManager.
131 if datasetType.isComponent(): 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true
132 raise ValueError("Component dataset types can not be stored in registry."
133 f" Rejecting {datasetType.name}")
134 storage = self._byName.get(datasetType.name)
135 if storage is None:
136 row, inserted = self._db.sync(
137 self._static.dataset_type,
138 keys={"name": datasetType.name},
139 compared={
140 "dimensions_encoded": datasetType.dimensions.encode(),
141 "storage_class": datasetType.storageClass.name,
142 },
143 returning=["id"],
144 )
145 assert row is not None
146 dynamic = self._db.ensureTableExists(
147 makeDynamicTableName(datasetType),
148 makeDynamicTableSpec(datasetType, type(self._collections)),
149 )
150 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
151 static=self._static, dynamic=dynamic,
152 dataset_type_id=row["id"],
153 collections=self._collections)
154 self._byName[datasetType.name] = storage
155 self._byId[storage._dataset_type_id] = storage
156 else:
157 if datasetType != storage.datasetType:
158 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
159 f"with database definition {storage.datasetType}.")
160 inserted = False
161 return storage, inserted
163 def __iter__(self) -> Iterator[DatasetType]:
164 for storage in self._byName.values():
165 yield storage.datasetType
167 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
168 # Docstring inherited from DatasetRecordStorageManager.
169 sql = sqlalchemy.sql.select(
170 [
171 self._static.dataset.columns.dataset_type_id,
172 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
173 ]
174 ).select_from(
175 self._static.dataset
176 ).where(
177 self._static.dataset.columns.id == id
178 )
179 row = self._db.query(sql).fetchone()
180 if row is None:
181 return None
182 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
183 if recordsForType is None: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true
184 self.refresh(universe=universe)
185 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
186 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
187 return DatasetRef(
188 recordsForType.datasetType,
189 dataId=recordsForType.getDataId(id=id),
190 id=id,
191 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
192 )
194 @classmethod
195 def currentVersion(cls) -> Optional[VersionTuple]:
196 # Docstring inherited from VersionedExtension.
197 return _VERSION
199 def schemaDigest(self) -> Optional[str]:
200 # Docstring inherited from VersionedExtension.
201 return self._defaultSchemaDigest(self._static)