Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorageManager",)
5from typing import (
6 Iterator,
7 Optional,
8 Tuple,
9 TYPE_CHECKING,
10)
12import sqlalchemy
14from lsst.daf.butler import (
15 DatasetRef,
16 DatasetType,
17 ddl,
18 DimensionGraph,
19 DimensionUniverse,
20)
21from lsst.daf.butler.registry import ConflictingDefinitionError
22from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager
24from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec
25from ._storage import ByDimensionsDatasetRecordStorage
27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true
28 from lsst.daf.butler.registry.interfaces import (
29 CollectionManager,
30 Database,
31 StaticTablesContext,
32 )
33 from .tables import StaticDatasetTablesTuple
36class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager):
37 """A manager class for datasets that uses one dataset-collection table for
38 each group of dataset types that share the same dimensions.
40 In addition to the table organization, this class makes a number of
41 other design choices that would have been cumbersome (to say the least) to
42 try to pack into its name:
44 - It uses a private surrogate integer autoincrement field to identify
45 dataset types, instead of using the name as the primary and foreign key
46 directly.
48 - It aggressively loads all DatasetTypes into memory instead of fetching
49 them from the database only when needed or attempting more clever forms
50 of caching.
52 Alternative implementations that make different choices for these while
53 keeping the same general table organization might be reasonable as well.
55 Parameters
56 ----------
57 db : `Database`
58 Interface to the underlying database engine and namespace.
59 collections : `CollectionManager`
60 Manager object for the collections in this `Registry`.
61 static : `StaticDatasetTablesTuple`
62 Named tuple of `sqlalchemy.schema.Table` instances for all static
63 tables used by this class.
64 """
65 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple):
66 self._db = db
67 self._collections = collections
68 self._static = static
69 self._byName = {}
70 self._byId = {}
72 @classmethod
73 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
74 universe: DimensionUniverse) -> DatasetRecordStorageManager:
75 # Docstring inherited from DatasetRecordStorageManager.
76 specs = makeStaticTableSpecs(type(collections), universe=universe)
77 static = context.addTableTuple(specs)
78 return cls(db=db, collections=collections, static=static)
80 @classmethod
81 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset",
82 constraint: bool = True, onDelete: Optional[str] = None,
83 **kwargs) -> ddl.FieldSpec:
84 # Docstring inherited from DatasetRecordStorageManager.
85 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs)
87 def refresh(self, *, universe: DimensionUniverse):
88 # Docstring inherited from DatasetRecordStorageManager.
89 byName = {}
90 byId = {}
91 c = self._static.dataset_type.columns
92 for row in self._db.query(self._static.dataset_type.select()).fetchall():
93 name = row[c.name]
94 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe)
95 datasetType = DatasetType(name, dimensions, row[c.storage_class])
96 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType),
97 makeDynamicTableSpec(datasetType, type(self._collections)))
98 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
99 static=self._static, dynamic=dynamic,
100 dataset_type_id=row["id"],
101 collections=self._collections)
102 byName[datasetType.name] = storage
103 byId[storage._dataset_type_id] = storage
104 self._byName = byName
105 self._byId = byId
107 def find(self, name: str) -> Optional[DatasetRecordStorage]:
108 # Docstring inherited from DatasetRecordStorageManager.
109 return self._byName.get(name)
111 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
112 # Docstring inherited from DatasetRecordStorageManager.
113 storage = self._byName.get(datasetType.name)
114 if storage is None:
115 row, inserted = self._db.sync(
116 self._static.dataset_type,
117 keys={"name": datasetType.name},
118 compared={
119 "dimensions_encoded": datasetType.dimensions.encode(),
120 "storage_class": datasetType.storageClass.name,
121 },
122 returning=["id"],
123 )
124 dynamic = self._db.ensureTableExists(
125 makeDynamicTableName(datasetType),
126 makeDynamicTableSpec(datasetType, type(self._collections)),
127 )
128 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
129 static=self._static, dynamic=dynamic,
130 dataset_type_id=row["id"],
131 collections=self._collections)
132 self._byName[datasetType.name] = storage
133 self._byId[storage._dataset_type_id] = storage
134 else:
135 if datasetType != storage.datasetType:
136 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
137 f"with database definition {storage.datasetType}.")
138 inserted = False
139 if inserted and datasetType.isComposite:
140 for component in datasetType.storageClass.components:
141 self.register(datasetType.makeComponentDatasetType(component))
142 return storage, inserted
144 def __iter__(self) -> Iterator[DatasetType]:
145 for storage in self._byName.values():
146 yield storage.datasetType
148 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
149 # Docstring inherited from DatasetRecordStorageManager.
150 sql = sqlalchemy.sql.select(
151 [
152 self._static.dataset.columns.dataset_type_id,
153 self._static.dataset.columns[self._collections.getRunForeignKeyName()],
154 ]
155 ).select_from(
156 self._static.dataset
157 ).where(
158 self._static.dataset.columns.id == id
159 )
160 row = self._db.query(sql).fetchone()
161 if row is None:
162 return None
163 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
164 if recordsForType is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true
165 self.refresh()
166 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
167 assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
168 return DatasetRef(
169 recordsForType.datasetType,
170 dataId=recordsForType.getDataId(id=id),
171 id=id,
172 run=self._collections[row[self._collections.getRunForeignKeyName()]].name
173 )