Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py : 93%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3__all__ = ("ByDimensionsDatasetRecordStorage",)
5from typing import (
6 Iterable,
7 Iterator,
8 Optional,
9 TYPE_CHECKING,
10)
12import sqlalchemy
14from lsst.daf.butler import (
15 CollectionType,
16 DataCoordinate,
17 DatasetRef,
18 DatasetType,
19 ExpandedDataCoordinate,
20 Quantum,
21)
22from lsst.daf.butler.registry.interfaces import DatasetRecordStorage
23from lsst.daf.butler.registry.simpleQuery import SimpleQuery, Select
25if TYPE_CHECKING: 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true
26 from ...interfaces import CollectionManager, CollectionRecord, Database, RunRecord
27 from .tables import StaticDatasetTablesTuple
30class ByDimensionsDatasetRecordStorage(DatasetRecordStorage):
31 """Dataset record storage implementation paired with
32 `ByDimensionsDatasetRecordStorageManager`; see that class for more
33 information.
35 Instances of this class should never be constructed directly; use
36 `DatasetRecordStorageManager.register` instead.
37 """
38 def __init__(self, *, datasetType: DatasetType,
39 db: Database,
40 dataset_type_id: int,
41 collections: CollectionManager,
42 static: StaticDatasetTablesTuple,
43 dynamic: sqlalchemy.sql.Table):
44 super().__init__(datasetType=datasetType)
45 self._dataset_type_id = dataset_type_id
46 self._db = db
47 self._collections = collections
48 self._static = static
49 self._dynamic = dynamic
50 self._runKeyColumn = collections.getRunForeignKeyName()
52 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *,
53 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]:
54 # Docstring inherited from DatasetRecordStorageManager.
55 staticRow = {
56 "dataset_type_id": self._dataset_type_id,
57 self._runKeyColumn: run.key,
58 "quantum_id": quantum.id if quantum is not None else None,
59 }
60 dataIds = list(dataIds)
61 # Insert into the static dataset table, generating autoincrement
62 # dataset_id values.
63 with self._db.transaction():
64 datasetIds = self._db.insert(self._static.dataset, *([staticRow]*len(dataIds)),
65 returnIds=True)
66 # Combine the generated dataset_id values and data ID fields to
67 # form rows to be inserted into the dynamic table.
68 protoDynamicRow = {
69 "dataset_type_id": self._dataset_type_id,
70 self._collections.getCollectionForeignKeyName(): run.key,
71 }
72 dynamicRows = [
73 dict(protoDynamicRow, dataset_id=dataset_id, **dataId.byName())
74 for dataId, dataset_id in zip(dataIds, datasetIds)
75 ]
76 # Insert those rows into the dynamic table. This is where we'll
77 # get any unique constraint violations.
78 self._db.insert(self._dynamic, *dynamicRows)
79 for dataId, datasetId in zip(dataIds, datasetIds):
80 yield DatasetRef(
81 datasetType=self.datasetType,
82 dataId=dataId,
83 id=datasetId,
84 run=run.name,
85 )
87 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
88 # Docstring inherited from DatasetRecordStorageManager.
89 assert dataId.graph == self.datasetType.dimensions
90 sql = self.select(collection=collection, dataId=dataId, id=Select, run=Select).combine()
91 row = self._db.query(sql).fetchone()
92 if row is None:
93 return None
94 return DatasetRef(
95 datasetType=self.datasetType,
96 dataId=dataId,
97 id=row["id"],
98 run=self._collections[row[self._runKeyColumn]].name
99 )
101 def delete(self, datasets: Iterable[DatasetRef]):
102 # Docstring inherited from DatasetRecordStorageManager.
103 # Only delete from common dataset table; ON DELETE foreign key clauses
104 # will handle the rest.
105 self._db.delete(
106 self._static.dataset,
107 ["id"],
108 *[{"id": dataset.getCheckedId()} for dataset in datasets],
109 )
111 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]):
112 # Docstring inherited from DatasetRecordStorageManager.
113 if collection.type is not CollectionType.TAGGED: 113 ↛ 114line 113 didn't jump to line 114, because the condition on line 113 was never true
114 raise TypeError(f"Cannot associate into collection '{collection}' "
115 f"of type {collection.type.name}; must be TAGGED.")
116 protoRow = {
117 self._collections.getCollectionForeignKeyName(): collection.key,
118 "dataset_type_id": self._dataset_type_id,
119 }
120 rows = []
121 for dataset in datasets:
122 row = dict(protoRow, dataset_id=dataset.getCheckedId())
123 for dimension, value in dataset.dataId.items():
124 row[dimension.name] = value
125 rows.append(row)
126 self._db.replace(self._dynamic, *rows)
128 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]):
129 # Docstring inherited from DatasetRecordStorageManager.
130 if collection.type is not CollectionType.TAGGED: 130 ↛ 131line 130 didn't jump to line 131, because the condition on line 130 was never true
131 raise TypeError(f"Cannot disassociate from collection '{collection}' "
132 f"of type {collection.type.name}; must be TAGGED.")
133 rows = [
134 {
135 "dataset_id": dataset.getCheckedId(),
136 self._collections.getCollectionForeignKeyName(): collection.key
137 }
138 for dataset in datasets
139 ]
140 self._db.delete(self._dynamic, ["dataset_id", self._collections.getCollectionForeignKeyName()],
141 *rows)
143 def select(self, collection: CollectionRecord,
144 dataId: Select.Or[DataCoordinate] = Select,
145 id: Select.Or[Optional[int]] = Select,
146 run: Select.Or[None] = Select,
147 ) -> Optional[SimpleQuery]:
148 # Docstring inherited from DatasetRecordStorageManager.
149 assert collection.type is not CollectionType.CHAINED
150 query = SimpleQuery()
151 # We always include the _static.dataset table, and we can always get
152 # the id and run fields from that; passing them as kwargs here tells
153 # SimpleQuery to handle them whether they're constraints or results.
154 # We always constraint the dataset_type_id here as well.
155 query.join(
156 self._static.dataset,
157 id=id,
158 dataset_type_id=self._dataset_type_id,
159 **{self._runKeyColumn: run}
160 )
161 # If and only if the collection is a RUN, we constrain it in the static
162 # table (and also the dynamic table below)
163 if collection.type is CollectionType.RUN:
164 query.where.append(self._static.dataset.columns[self._runKeyColumn]
165 == collection.key)
166 # We get or constrain the data ID from the dynamic table, but that's
167 # multiple columns, not one, so we need to transform the one Select.Or
168 # argument into a dictionary of them.
169 if dataId is Select:
170 kwargs = {dim.name: Select for dim in self.datasetType.dimensions.required}
171 else:
172 kwargs = dict(dataId.byName())
173 # We always constrain (never retrieve) the collection from the dynamic
174 # table.
175 kwargs[self._collections.getCollectionForeignKeyName()] = collection.key
176 # And now we finally join in the dynamic table.
177 query.join(
178 self._dynamic,
179 onclause=self._static.dataset.columns.id == self._dynamic.columns.dataset_id,
180 **kwargs
181 )
182 return query
184 def getDataId(self, id: int) -> DataCoordinate:
185 # Docstring inherited from DatasetRecordStorageManager.
186 # This query could return multiple rows (one for each tagged collection
187 # the dataset is in, plus one for its run collection), and we don't
188 # care which of those we get.
189 sql = self._dynamic.select().where(
190 sqlalchemy.sql.and_(
191 self._dynamic.columns.dataset_id == id,
192 self._dynamic.columns.dataset_type_id == self._dataset_type_id
193 )
194 ).limit(1)
195 row = self._db.query(sql).fetchone()
196 assert row is not None, "Should be guaranteed by caller and foreign key constraints."
197 return DataCoordinate.standardize(
198 {dimension: row[dimension.name] for dimension in self.datasetType.dimensions.required},
199 graph=self.datasetType.dimensions
200 )