Coverage for python/lsst/daf/butler/registry/bridge/monolithic.py : 84%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("MonolithicDatastoreRegistryBridgeManager", "MonolithicDatastoreRegistryBridge")
25from collections import namedtuple
26from contextlib import contextmanager
27import copy
28from typing import cast, Dict, Iterable, Iterator, List, Optional, Type, TYPE_CHECKING
30import sqlalchemy
32from lsst.daf.butler import DatasetRef, ddl, NamedValueSet
33from lsst.daf.butler.registry.interfaces import (
34 DatasetIdRef,
35 DatastoreRegistryBridge,
36 DatastoreRegistryBridgeManager,
37 FakeDatasetRef,
38 VersionTuple,
39)
40from lsst.daf.butler.registry.bridge.ephemeral import EphemeralDatastoreRegistryBridge
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from lsst.daf.butler import DimensionUniverse
44 from lsst.daf.butler.registry.interfaces import (
45 Database,
46 DatasetRecordStorageManager,
47 OpaqueTableStorageManager,
48 StaticTablesContext,
49 )
52_TablesTuple = namedtuple(
53 "_TablesTuple",
54 [
55 "dataset_location",
56 "dataset_location_trash",
57 ]
58)
60# This has to be updated on every schema change
61_VERSION = VersionTuple(0, 2, 0)
64def _makeTableSpecs(datasets: Type[DatasetRecordStorageManager]) -> _TablesTuple:
65 """Construct specifications for tables used by the monolithic datastore
66 bridge classes.
68 Parameters
69 ----------
70 universe : `DimensionUniverse`
71 All dimensions known to the `Registry`.
72 datasets : subclass of `DatasetRecordStorageManager`
73 Manager class for datasets; used only to create foreign key fields.
75 Returns
76 -------
77 specs : `_TablesTuple`
78 A named tuple containing `ddl.TableSpec` instances.
79 """
80 # We want the dataset_location and dataset_location_trash tables
81 # to have the same definition, aside from the behavior of their link
82 # to the dataset table: the trash table has no foreign key constraint.
83 dataset_location_spec = ddl.TableSpec(
84 doc=(
85 "A table that provides information on whether a dataset is stored in "
86 "one or more Datastores. The presence or absence of a record in this "
87 "table itself indicates whether the dataset is present in that "
88 "Datastore. "
89 ),
90 fields=NamedValueSet([
91 ddl.FieldSpec(
92 name="datastore_name",
93 dtype=sqlalchemy.String,
94 length=256,
95 primaryKey=True,
96 nullable=False,
97 doc="Name of the Datastore this entry corresponds to.",
98 ),
99 ]),
100 )
101 dataset_location = copy.deepcopy(dataset_location_spec)
102 datasets.addDatasetForeignKey(dataset_location, primaryKey=True)
103 dataset_location_trash = copy.deepcopy(dataset_location_spec)
104 datasets.addDatasetForeignKey(dataset_location_trash, primaryKey=True, constraint=False)
105 return _TablesTuple(
106 dataset_location=dataset_location,
107 dataset_location_trash=dataset_location_trash,
108 )
111class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge):
112 """An implementation of `DatastoreRegistryBridge` that uses the same two
113 tables for all non-ephemeral datastores.
115 Parameters
116 ----------
117 datastoreName : `str`
118 Name of the `Datastore` as it should appear in `Registry` tables
119 referencing it.
120 db : `Database`
121 Object providing a database connection and generic distractions.
122 tables : `_TablesTuple`
123 Named tuple containing `sqlalchemy.schema.Table` instances.
124 """
125 def __init__(self, datastoreName: str, *, db: Database, tables: _TablesTuple):
126 super().__init__(datastoreName)
127 self._db = db
128 self._tables = tables
130 def _refsToRows(self, refs: Iterable[DatasetIdRef]) -> List[dict]:
131 """Transform an iterable of `DatasetRef` or `FakeDatasetRef` objects to
132 a list of dictionaries that match the schema of the tables used by this
133 class.
135 Parameters
136 ----------
137 refs : `Iterable` [ `DatasetRef` or `FakeDatasetRef` ]
138 Datasets to transform.
140 Returns
141 -------
142 rows : `list` [ `dict` ]
143 List of dictionaries, with "datastoreName" and "dataset_id" keys.
144 """
145 return [{"datastore_name": self.datastoreName, "dataset_id": ref.getCheckedId()} for ref in refs]
147 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
148 # Docstring inherited from DatastoreRegistryBridge
149 self._db.insert(self._tables.dataset_location, *self._refsToRows(refs))
151 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
152 # Docstring inherited from DatastoreRegistryBridge
153 rows = self._refsToRows(self.check(refs))
154 self._db.delete(self._tables.dataset_location, ["datastore_name", "dataset_id"], *rows)
156 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
157 # Docstring inherited from DatastoreRegistryBridge
158 # TODO: avoid self.check() call via queries like
159 # INSERT INTO dataset_location_trash
160 # SELECT datastore_name, dataset_id FROM dataset_location
161 # WHERE datastore_name=? AND dataset_id IN (?);
162 # DELETE FROM dataset_location
163 # WHERE datastore_name=? AND dataset_id IN (?);
164 # ...but the Database interface doesn't support those kinds of queries
165 # right now.
166 rows = self._refsToRows(self.check(refs))
167 with self._db.transaction():
168 self._db.delete(self._tables.dataset_location, ["datastore_name", "dataset_id"], *rows)
169 self._db.insert(self._tables.dataset_location_trash, *rows)
171 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
172 # Docstring inherited from DatastoreRegistryBridge
173 byId = {ref.getCheckedId(): ref for ref in refs}
174 sql = sqlalchemy.sql.select(
175 [self._tables.dataset_location.columns.dataset_id]
176 ).select_from(
177 self._tables.dataset_location
178 ).where(
179 sqlalchemy.sql.and_(
180 self._tables.dataset_location.columns.datastore_name == self.datastoreName,
181 self._tables.dataset_location.columns.dataset_id.in_(byId.keys())
182 )
183 )
184 for row in self._db.query(sql).fetchall():
185 yield byId[row["dataset_id"]]
187 @contextmanager
188 def emptyTrash(self) -> Iterator[Iterable[DatasetIdRef]]:
189 # Docstring inherited from DatastoreRegistryBridge
190 sql = sqlalchemy.sql.select(
191 [self._tables.dataset_location_trash.columns.dataset_id]
192 ).select_from(
193 self._tables.dataset_location_trash
194 ).where(
195 self._tables.dataset_location_trash.columns.datastore_name == self.datastoreName
196 )
197 # Run query, transform results into a list of dicts that we can later
198 # use to delete.
199 rows = [{"dataset_id": row["dataset_id"], "datastore_name": self.datastoreName}
200 for row in self._db.query(sql).fetchall()]
201 # Start contextmanager, returning generator expression to iterate over.
202 yield (FakeDatasetRef(row["dataset_id"]) for row in rows)
203 # No exception raised in context manager block. Delete those rows
204 # from the trash table.
205 self._db.delete(self._tables.dataset_location_trash, ["dataset_id", "datastore_name"], *rows)
208class MonolithicDatastoreRegistryBridgeManager(DatastoreRegistryBridgeManager):
209 """An implementation of `DatastoreRegistryBridgeManager` that uses the same
210 two tables for all non-ephemeral datastores.
212 Parameters
213 ----------
214 db : `Database`
215 Object providing a database connection and generic distractions.
216 tables : `_TablesTuple`
217 Named tuple containing `sqlalchemy.schema.Table` instances.
218 opaque : `OpaqueTableStorageManager`
219 Manager object for opaque table storage in the `Registry`.
220 universe : `DimensionUniverse`
221 All dimensions know to the `Registry`.
222 datasetIdColumnType : `type`
223 Type for dataset ID column.
224 """
225 def __init__(self, *, db: Database, tables: _TablesTuple,
226 opaque: OpaqueTableStorageManager, universe: DimensionUniverse,
227 datasetIdColumnType: type):
228 super().__init__(opaque=opaque, universe=universe, datasetIdColumnType=datasetIdColumnType)
229 self._db = db
230 self._tables = tables
231 self._ephemeral: Dict[str, EphemeralDatastoreRegistryBridge] = {}
233 @classmethod
234 def initialize(cls, db: Database, context: StaticTablesContext, *,
235 opaque: OpaqueTableStorageManager,
236 datasets: Type[DatasetRecordStorageManager],
237 universe: DimensionUniverse,
238 ) -> DatastoreRegistryBridgeManager:
239 # Docstring inherited from DatastoreRegistryBridge
240 tables = context.addTableTuple(_makeTableSpecs(datasets))
241 return cls(db=db, tables=cast(_TablesTuple, tables), opaque=opaque, universe=universe,
242 datasetIdColumnType=datasets.getIdColumnType())
244 def refresh(self) -> None:
245 # Docstring inherited from DatastoreRegistryBridge
246 # This implementation has no in-Python state that depends on which
247 # datastores exist, so there's nothing to do.
248 pass
250 def register(self, name: str, *, ephemeral: bool = False) -> DatastoreRegistryBridge:
251 # Docstring inherited from DatastoreRegistryBridge
252 if ephemeral:
253 return self._ephemeral.setdefault(name, EphemeralDatastoreRegistryBridge(name))
254 return MonolithicDatastoreRegistryBridge(name, db=self._db, tables=self._tables)
256 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
257 # Docstring inherited from DatastoreRegistryBridge
258 sql = sqlalchemy.sql.select(
259 [self._tables.dataset_location.columns.datastore_name]
260 ).select_from(
261 self._tables.dataset_location
262 ).where(
263 self._tables.dataset_location.columns.dataset_id == ref.getCheckedId()
264 )
265 for row in self._db.query(sql).fetchall():
266 yield row[self._tables.dataset_location.columns.datastore_name]
267 for name, bridge in self._ephemeral.items():
268 if ref in bridge:
269 yield name
271 @classmethod
272 def currentVersion(cls) -> Optional[VersionTuple]:
273 # Docstring inherited from VersionedExtension.
274 return _VERSION
276 def schemaDigest(self) -> Optional[str]:
277 # Docstring inherited from VersionedExtension.
278 return self._defaultSchemaDigest(self._tables, self._db.dialect)