Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import sqlalchemy
16from lsst.daf.butler import (
17 DatasetRef,
18 DatasetType,
19 ddl,
20 DimensionGraph,
21 DimensionUniverse,
22)
23from lsst.daf.butler.registry import ConflictingDefinitionError
24from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager
26from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec
27from ._storage import ByDimensionsDatasetRecordStorage
29if TYPE_CHECKING: 29 ↛ 30line 29 didn't jump to line 30, because the condition on line 29 was never true
30 from lsst.daf.butler.registry.interfaces import (
31 CollectionManager,
32 Database,
33 StaticTablesContext,
34 )
35 from .tables import StaticDatasetTablesTuple
38class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
39 """A manager class for datasets that uses one dataset-collection table for
40 each group of dataset types that share the same dimensions.
42 In addition to the table organization, this class makes a number of
43 other design choices that would have been cumbersome (to say the least) to
44 try to pack into its name:
46 - It uses a private surrogate integer autoincrement field to identify
47 dataset types, instead of using the name as the primary and foreign key
48 directly.
50 - It aggressively loads all DatasetTypes into memory instead of fetching
51 them from the database only when needed or attempting more clever forms
52 of caching.
54 Alternative implementations that make different choices for these while
55 keeping the same general table organization might be reasonable as well.
57 Parameters
58 ----------
59 db : `Database`
60 Interface to the underlying database engine and namespace.
61 collections : `CollectionManager`
62 Manager object for the collections in this `Registry`.
63 static : `StaticDatasetTablesTuple`
64 Named tuple of `sqlalchemy.schema.Table` instances for all static
65 tables used by this class.
66 """
67 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
68 self._db = db
69 self._collections = collections
70 self._static = static
71 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
72 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
74 @classmethod
75 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
76 universe: DimensionUniverse) -> DatasetRecordStorageManager:
77 # Docstring inherited from DatasetRecordStorageManager.
78 specs = makeStaticTableSpecs(type(collections), universe=universe)
79 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
80 return cls(db=db, collections=collections, static=static)
82 @classmethod
83 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
84 constraint: bool = True, onDelete: Optional[str] = None,
85 **kwargs: Any) -> ddl.FieldSpec:
86 # Docstring inherited from DatasetRecordStorageManager.
87 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
89 def refresh(self, *, universe: DimensionUniverse) -> None:
90 # Docstring inherited from DatasetRecordStorageManager.
91 byName = {}
92 byId = {}
93 c = self._static.dataset_type.columns
94 for row in self._db.query(self._static.dataset_type.select()).fetchall():
95 name = row[c.name]
96 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
97 datasetType = DatasetType(name, dimensions, row[c.storage_class])
98 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType),
99 makeDynamicTableSpec(datasetType, type(self._collections)))
100 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
101 static=self._static, dynamic=dynamic,
102 dataset_type_id=row["id"],
103 collections=self._collections)
104 byName[datasetType.name] = storage
105 byId[storage._dataset_type_id] = storage
106 self._byName = byName
107 self._byId = byId
109 def find(self, name: str) -> Optional[DatasetRecordStorage]:
110 # Docstring inherited from DatasetRecordStorageManager.
111 return self._byName.get(name)
113 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
114 # Docstring inherited from DatasetRecordStorageManager.
115 storage = self._byName.get(datasetType.name)
116 if storage is None:
117 row, inserted = self._db.sync(
118 self._static.dataset_type,
119 keys={"name": datasetType.name},
120 compared={
121 "dimensions_encoded": datasetType.dimensions.encode(),
122 "storage_class": datasetType.storageClass.name,
123 },
124 returning=["id"],
125 )
126 assert row is not None
127 dynamic = self._db.ensureTableExists(
128 makeDynamicTableName(datasetType),
129 makeDynamicTableSpec(datasetType, type(self._collections)),
130 )
131 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
132 static=self._static, dynamic=dynamic,
133 dataset_type_id=row["id"],
134 collections=self._collections)
135 self._byName[datasetType.name] = storage
136 self._byId[storage._dataset_type_id] = storage
137 else:
138 if datasetType != storage.datasetType:
139 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
140 f"with database definition {storage.datasetType}.")
141 inserted = False
142 if inserted and datasetType.isComposite:
143 for component in datasetType.storageClass.components:
144 self.register(datasetType.makeComponentDatasetType(component))
145 return storage, inserted
147 def __iter__(self) -> Iterator[DatasetType]:
148 for storage in self._byName.values():
149 yield storage.datasetType
151 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
152 # Docstring inherited from DatasetRecordStorageManager.
153 sql = sqlalchemy.sql.select(
154 [
155 self._static.dataset.columns.dataset_type_id,
156 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
157 ]
158 ).select_from(
159 self._static.dataset
160 ).where(
161 self._static.dataset.columns.id == id
162 )
163 row = self._db.query(sql).fetchone()
164 if row is None:
165 return None
166 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
167 if recordsForType is None: 167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true
168 self.refresh(universe=universe)
169 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
170 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
171 return DatasetRef(
172 recordsForType.datasetType,
173 dataId=recordsForType.getDataId(id=id),
174 id=id,
175 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
176 )