Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 91%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Any,
7 Dict,
8 Iterator,
9 Optional,
10 Tuple,
11 TYPE_CHECKING,
12)
14import copy
15import sqlalchemy
17from lsst.daf.butler import (
18 DatasetRef,
19 DatasetType,
20 ddl,
21 DimensionGraph,
22 DimensionUniverse,
23)
24from lsst.daf.butler.registry import ConflictingDefinitionError
25from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager
27from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec
28from ._storage import ByDimensionsDatasetRecordStorage
30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from lsst.daf.butler.registry.interfaces import (
32 CollectionManager,
33 Database,
34 StaticTablesContext,
35 )
36 from .tables import StaticDatasetTablesTuple
39class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
40 """A manager class for datasets that uses one dataset-collection table for
41 each group of dataset types that share the same dimensions.
43 In addition to the table organization, this class makes a number of
44 other design choices that would have been cumbersome (to say the least) to
45 try to pack into its name:
47 - It uses a private surrogate integer autoincrement field to identify
48 dataset types, instead of using the name as the primary and foreign key
49 directly.
51 - It aggressively loads all DatasetTypes into memory instead of fetching
52 them from the database only when needed or attempting more clever forms
53 of caching.
55 Alternative implementations that make different choices for these while
56 keeping the same general table organization might be reasonable as well.
58 Parameters
59 ----------
60 db : `Database`
61 Interface to the underlying database engine and namespace.
62 collections : `CollectionManager`
63 Manager object for the collections in this `Registry`.
64 static : `StaticDatasetTablesTuple`
65 Named tuple of `sqlalchemy.schema.Table` instances for all static
66 tables used by this class.
67 """
68 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
69 self._db = db
70 self._collections = collections
71 self._static = static
72 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {}
73 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {}
75 @classmethod
76 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
77 universe: DimensionUniverse) -> DatasetRecordStorageManager:
78 # Docstring inherited from DatasetRecordStorageManager.
79 specs = makeStaticTableSpecs(type(collections), universe=universe)
80 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore
81 return cls(db=db, collections=collections, static=static)
83 @classmethod
84 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
85 constraint: bool = True, onDelete: Optional[str] = None,
86 **kwargs: Any) -> ddl.FieldSpec:
87 # Docstring inherited from DatasetRecordStorageManager.
88 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
90 def refresh(self, *, universe: DimensionUniverse) -> None:
91 # Docstring inherited from DatasetRecordStorageManager.
92 byName = {}
93 byId = {}
94 c = self._static.dataset_type.columns
95 for row in self._db.query(self._static.dataset_type.select()).fetchall():
96 name = row[c.name]
97 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
98 datasetType = DatasetType(name, dimensions, row[c.storage_class])
99 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType),
100 makeDynamicTableSpec(datasetType, type(self._collections)))
101 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
102 static=self._static, dynamic=dynamic,
103 dataset_type_id=row["id"],
104 collections=self._collections)
105 byName[datasetType.name] = storage
106 byId[storage._dataset_type_id] = storage
107 self._byName = byName
108 self._byId = byId
110 def find(self, name: str) -> Optional[DatasetRecordStorage]:
111 # Docstring inherited from DatasetRecordStorageManager.
112 compositeName, componentName = DatasetType.splitDatasetTypeName(name)
113 storage = self._byName.get(compositeName)
114 if storage is not None and componentName is not None:
115 componentStorage = copy.copy(storage)
116 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
117 return componentStorage
118 else:
119 return storage
121 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
122 # Docstring inherited from DatasetRecordStorageManager.
123 if datasetType.isComponent(): 123 ↛ 124line 123 didn't jump to line 124, because the condition on line 123 was never true
124 raise ValueError("Component dataset types can not be stored in registry."
125 f" Rejecting {datasetType.name}")
126 storage = self._byName.get(datasetType.name)
127 if storage is None:
128 row, inserted = self._db.sync(
129 self._static.dataset_type,
130 keys={"name": datasetType.name},
131 compared={
132 "dimensions_encoded": datasetType.dimensions.encode(),
133 "storage_class": datasetType.storageClass.name,
134 },
135 returning=["id"],
136 )
137 assert row is not None
138 dynamic = self._db.ensureTableExists(
139 makeDynamicTableName(datasetType),
140 makeDynamicTableSpec(datasetType, type(self._collections)),
141 )
142 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
143 static=self._static, dynamic=dynamic,
144 dataset_type_id=row["id"],
145 collections=self._collections)
146 self._byName[datasetType.name] = storage
147 self._byId[storage._dataset_type_id] = storage
148 else:
149 if datasetType != storage.datasetType:
150 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
151 f"with database definition {storage.datasetType}.")
152 inserted = False
153 return storage, inserted
155 def __iter__(self) -> Iterator[DatasetType]:
156 for storage in self._byName.values():
157 yield storage.datasetType
159 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
160 # Docstring inherited from DatasetRecordStorageManager.
161 sql = sqlalchemy.sql.select(
162 [
163 self._static.dataset.columns.dataset_type_id,
164 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
165 ]
166 ).select_from(
167 self._static.dataset
168 ).where(
169 self._static.dataset.columns.id == id
170 )
171 row = self._db.query(sql).fetchone()
172 if row is None:
173 return None
174 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
175 if recordsForType is None: 175 ↛ 176line 175 didn't jump to line 176, because the condition on line 175 was never true
176 self.refresh(universe=universe)
177 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
178 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
179 return DatasetRef(
180 recordsForType.datasetType,
181 dataId=recordsForType.getDataId(id=id),
182 id=id,
183 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
184 )