Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Dict,
7 Iterable,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import sqlalchemy
16from lsst.daf.butler import (
17 DatasetRef,
18 DatasetType,
19 ddl,
20 DimensionGraph,
21 DimensionUniverse,
22)
23from lsst.daf.butler.registry import ConflictingDefinitionError
24from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager
26from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec
27from ._storage import ByDimensionsDatasetRecordStorage
29if TYPE_CHECKING: 29 ↛ 30line 29 didn't jump to line 30, because the condition on line 29 was never true
30 from lsst.daf.butler.registry.interfaces import (
31 CollectionManager,
32 Database,
33 StaticTablesContext,
34 )
35 from .tables import StaticDatasetTablesTuple
38class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
39 """A manager class for datasets that uses one dataset-collection table for
40 each group of dataset types that share the same dimensions.
42 In addition to the table organization, this class makes a number of
43 other design choices that would have been cumbersome (to say the least) to
44 try to pack into its name:
46 - It uses a private surrogate integer autoincrement field to identify
47 dataset types, instead of using the name as the primary and foreign key
48 directly.
50 - It aggressively loads all DatasetTypes into memory instead of fetching
51 them from the database only when needed or attempting more clever forms
52 of caching.
54 Alternative implementations that make different choices for these while
55 keeping the same general table organization might be reasonable as well.
57 Parameters
58 ----------
59 db : `Database`
60 Interface to the underlying database engine and namespace.
61 collections : `CollectionManager`
62 Manager object for the collections in this `Registry`.
63 static : `StaticDatasetTablesTuple`
64 Named tuple of `sqlalchemy.schema.Table` instances for all static
65 tables used by this class.
66 """
67 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
68 self._db = db
69 self._collections = collections
70 self._static = static
71 self._byName = {}
72 self._byId = {}
74 @classmethod
75 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
76 universe: DimensionUniverse) -> DatasetRecordStorageManager:
77 # Docstring inherited from DatasetRecordStorageManager.
78 specs = makeStaticTableSpecs(type(collections), universe=universe)
79 static = context.addTableTuple(specs)
80 return cls(db=db, collections=collections, static=static)
82 @classmethod
83 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
84 constraint: bool = True, onDelete: Optional[str] = None,
85 **kwargs) -> ddl.FieldSpec:
86 # Docstring inherited from DatasetRecordStorageManager.
87 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
89 def refresh(self, *, universe: DimensionUniverse):
90 # Docstring inherited from DatasetRecordStorageManager.
91 byName = {}
92 byId = {}
93 c = self._static.dataset_type.columns
94 for row in self._db.query(self._static.dataset_type.select()).fetchall():
95 name = row[c.name]
96 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
97 datasetType = DatasetType(name, dimensions, row[c.storage_class])
98 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType),
99 makeDynamicTableSpec(datasetType, type(self._collections)))
100 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
101 static=self._static, dynamic=dynamic,
102 dataset_type_id=row["id"],
103 collections=self._collections)
104 byName[datasetType.name] = storage
105 byId[storage._dataset_type_id] = storage
106 self._byName = byName
107 self._byId = byId
109 def find(self, name: str) -> Optional[DatasetRecordStorage]:
110 # Docstring inherited from DatasetRecordStorageManager.
111 return self._byName.get(name)
113 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
114 # Docstring inherited from DatasetRecordStorageManager.
115 storage = self._byName.get(datasetType.name)
116 if storage is None:
117 row, inserted = self._db.sync(
118 self._static.dataset_type,
119 keys={"name": datasetType.name},
120 compared={
121 "dimensions_encoded": datasetType.dimensions.encode(),
122 "storage_class": datasetType.storageClass.name,
123 },
124 returning=["id"],
125 )
126 dynamic = self._db.ensureTableExists(
127 makeDynamicTableName(datasetType),
128 makeDynamicTableSpec(datasetType, type(self._collections)),
129 )
130 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
131 static=self._static, dynamic=dynamic,
132 dataset_type_id=row["id"],
133 collections=self._collections)
134 self._byName[datasetType.name] = storage
135 self._byId[storage._dataset_type_id] = storage
136 else:
137 if datasetType != storage.datasetType:
138 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
139 f"with database definition {storage.datasetType}.")
140 inserted = False
141 if inserted and datasetType.isComposite:
142 for component in datasetType.storageClass.components:
143 self.register(datasetType.makeComponentDatasetType(component))
144 return storage, inserted
146 def __iter__(self) -> Iterator[DatasetType]:
147 for storage in self._byName.values():
148 yield storage.datasetType
150 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
151 # Docstring inherited from DatasetRecordStorageManager.
152 sql = sqlalchemy.sql.select(
153 [
154 self._static.dataset.columns.dataset_type_id,
155 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
156 ]
157 ).select_from(
158 self._static.dataset
159 ).where(
160 self._static.dataset.columns.id == id
161 )
162 row = self._db.query(sql).fetchone()
163 if row is None:
164 return None
165 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
166 if recordsForType is None: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true
167 self.refresh()
168 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
169 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
170 return DatasetRef(
171 recordsForType.datasetType,
172 dataId=recordsForType.getDataId(id=id),
173 id=id,
174 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
175 )
177 def attachComponents(self, composites: Iterable[Tuple[DatasetRef, Dict[str, DatasetRef]]]
178 ) -> Iterator[DatasetRef]:
179 # Docstring inherited from DatasetRecordStorageManager.
180 rows = []
181 results = []
182 for parentRef, components in composites:
183 rows.extend(
184 {
185 "component_name": componentName,
186 "component_dataset_id": componentRef.getCheckedId(),
187 "parent_dataset_id": parentRef.getCheckedId(),
188 "simple": (parentRef.dataId == componentRef.dataId and parentRef.run == componentRef.run)
189 }
190 for componentName, componentRef in components.items()
191 )
192 results.append(parentRef.resolved(parentRef.id, parentRef.run, components=components))
193 self._db.insert(self._static.dataset_composition, *rows)
194 yield from results
196 def fetchComponents(self, ref: DatasetRef) -> DatasetRef:
197 # Docstring inherited from DatasetRecordStorageManager.
198 sql = sqlalchemy.sql.select(
199 [
200 self._static.dataset_composition.columns.component_dataset_id.label("id"),
201 self._static.dataset_composition.columns.component_name.label("name"),
202 self._static.dataset_composition.columns.simple.label("simple"),
203 ]
204 ).select_from(
205 self._static.dataset_composition
206 ).where(
207 self._static.dataset_composition.columns.parent_dataset_id == ref.getCheckedId()
208 )
209 components = {}
210 for row in self._db.query(sql).fetchall():
211 if row["simple"]: 211 ↛ 215line 211 didn't jump to line 215, because the condition on line 211 was never false
212 datasetType = ref.datasetType.makeComponentDatasetType(row["name"])
213 components[row["name"]] = DatasetRef(datasetType, ref.dataId, id=row["id"], run=ref.run)
214 else:
215 components[row["name"]] = self.getDatasetRef(row["id"])
216 return ref.resolved(id=ref.id, run=ref.run, components=components)