Coverage for python/lsst/daf/butler/registry/bridge/monolithic.py : 82%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("MonolithicDatastoreRegistryBridgeManager", "MonolithicDatastoreRegistryBridge")
25from collections import namedtuple
26from contextlib import contextmanager
27import copy
28from typing import cast, Dict, Iterable, Iterator, List, Type, TYPE_CHECKING
30import sqlalchemy
32from lsst.daf.butler import DatasetRef, ddl, NamedValueSet
33from lsst.daf.butler.registry.interfaces import (
34 DatasetIdRef,
35 DatastoreRegistryBridge,
36 DatastoreRegistryBridgeManager,
37 FakeDatasetRef,
38)
39from lsst.daf.butler.registry.bridge.ephemeral import EphemeralDatastoreRegistryBridge
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from lsst.daf.butler import DimensionUniverse
43 from lsst.daf.butler.registry.interfaces import (
44 Database,
45 DatasetRecordStorageManager,
46 OpaqueTableStorageManager,
47 StaticTablesContext,
48 )
51_TablesTuple = namedtuple(
52 "_TablesTuple",
53 [
54 "dataset_location",
55 "dataset_location_trash",
56 ]
57)
60def _makeTableSpecs(datasets: Type[DatasetRecordStorageManager]) -> _TablesTuple:
61 """Construct specifications for tables used by the monolithic datastore
62 bridge classes.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 All dimensions known to the `Registry`.
68 datasets : subclass of `DatasetRecordStorageManager`
69 Manager class for datasets; used only to create foreign key fields.
71 Returns
72 -------
73 specs : `_TablesTuple`
74 A named tuple containing `ddl.TableSpec` instances.
75 """
76 # We want the dataset_location and dataset_location_trash tables
77 # to have the same definition, aside from the behavior of their link
78 # to the dataset table: the trash table has no foreign key constraint.
79 dataset_location_spec = ddl.TableSpec(
80 doc=(
81 "A table that provides information on whether a dataset is stored in "
82 "one or more Datastores. The presence or absence of a record in this "
83 "table itself indicates whether the dataset is present in that "
84 "Datastore. "
85 ),
86 fields=NamedValueSet([
87 ddl.FieldSpec(
88 name="datastore_name",
89 dtype=sqlalchemy.String,
90 length=256,
91 primaryKey=True,
92 nullable=False,
93 doc="Name of the Datastore this entry corresponds to.",
94 ),
95 ]),
96 )
97 dataset_location = copy.deepcopy(dataset_location_spec)
98 datasets.addDatasetForeignKey(dataset_location, primaryKey=True)
99 dataset_location_trash = copy.deepcopy(dataset_location_spec)
100 datasets.addDatasetForeignKey(dataset_location_trash, primaryKey=True, constraint=False)
101 return _TablesTuple(
102 dataset_location=dataset_location,
103 dataset_location_trash=dataset_location_trash,
104 )
107class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge):
108 """An implementation of `DatastoreRegistryBridge` that uses the same two
109 tables for all non-ephemeral datastores.
111 Parameters
112 ----------
113 datastoreName : `str`
114 Name of the `Datastore` as it should appear in `Registry` tables
115 referencing it.
116 db : `Database`
117 Object providing a database connection and generic distractions.
118 tables : `_TablesTuple`
119 Named tuple containing `sqlalchemy.schema.Table` instances.
120 """
121 def __init__(self, datastoreName: str, *, db: Database, tables: _TablesTuple):
122 super().__init__(datastoreName)
123 self._db = db
124 self._tables = tables
126 def _refsToRows(self, refs: Iterable[DatasetIdRef]) -> List[dict]:
127 """Transform an iterable of `DatasetRef` or `FakeDatasetRef` objects to
128 a list of dictionaries that match the schema of the tables used by this
129 class.
131 Parameters
132 ----------
133 refs : `Iterable` [ `DatasetRef` or `FakeDatasetRef` ]
134 Datasets to transform.
136 Returns
137 -------
138 rows : `list` [ `dict` ]
139 List of dictionaries, with "datastoreName" and "dataset_id" keys.
140 """
141 return [{"datastore_name": self.datastoreName, "dataset_id": ref.getCheckedId()} for ref in refs]
143 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
144 # Docstring inherited from DatastoreRegistryBridge
145 self._db.insert(self._tables.dataset_location, *self._refsToRows(refs))
147 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
148 # Docstring inherited from DatastoreRegistryBridge
149 # TODO: avoid self.check() call via queries like
150 # INSERT INTO dataset_location_trash
151 # SELECT datastore_name, dataset_id FROM dataset_location
152 # WHERE datastore_name=? AND dataset_id IN (?);
153 # DELETE FROM dataset_location
154 # WHERE datastore_name=? AND dataset_id IN (?);
155 # ...but the Database interface doesn't support those kinds of queries
156 # right now.
157 rows = self._refsToRows(self.check(refs))
158 with self._db.transaction():
159 self._db.delete(self._tables.dataset_location, ["datastore_name", "dataset_id"], *rows)
160 self._db.insert(self._tables.dataset_location_trash, *rows)
162 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
163 # Docstring inherited from DatastoreRegistryBridge
164 byId = {ref.getCheckedId(): ref for ref in refs}
165 sql = sqlalchemy.sql.select(
166 [self._tables.dataset_location.columns.dataset_id]
167 ).select_from(
168 self._tables.dataset_location
169 ).where(
170 sqlalchemy.sql.and_(
171 self._tables.dataset_location.columns.datastore_name == self.datastoreName,
172 self._tables.dataset_location.columns.dataset_id.in_(byId.keys())
173 )
174 )
175 for row in self._db.query(sql).fetchall():
176 yield byId[row["dataset_id"]]
178 @contextmanager
179 def emptyTrash(self) -> Iterator[Iterable[DatasetIdRef]]:
180 # Docstring inherited from DatastoreRegistryBridge
181 sql = sqlalchemy.sql.select(
182 [self._tables.dataset_location_trash.columns.dataset_id]
183 ).select_from(
184 self._tables.dataset_location_trash
185 ).where(
186 self._tables.dataset_location_trash.columns.datastore_name == self.datastoreName
187 )
188 # Run query, transform results into a list of dicts that we can later
189 # use to delete.
190 rows = [{"dataset_id": row["dataset_id"], "datastore_name": self.datastoreName}
191 for row in self._db.query(sql).fetchall()]
192 # Start contextmanager, returning generator expression to iterate over.
193 yield (FakeDatasetRef(row["dataset_id"]) for row in rows)
194 # No exception raised in context manager block. Delete those rows
195 # from the trash table.
196 self._db.delete(self._tables.dataset_location_trash, ["dataset_id", "datastore_name"], *rows)
199class MonolithicDatastoreRegistryBridgeManager(DatastoreRegistryBridgeManager):
200 """An implementation of `DatastoreRegistryBridgeManager` that uses the same
201 two tables for all non-ephemeral datastores.
203 Parameters
204 ----------
205 db : `Database`
206 Object providing a database connection and generic distractions.
207 tables : `_TablesTuple`
208 Named tuple containing `sqlalchemy.schema.Table` instances.
209 opaque : `OpaqueTableStorageManager`
210 Manager object for opaque table storage in the `Registry`.
211 universe : `DimensionUniverse`
212 All dimensions know to the `Registry`.
213 """
214 def __init__(self, *, db: Database, tables: _TablesTuple,
215 opaque: OpaqueTableStorageManager, universe: DimensionUniverse):
216 super().__init__(opaque=opaque, universe=universe)
217 self._db = db
218 self._tables = tables
219 self._ephemeral: Dict[str, EphemeralDatastoreRegistryBridge] = {}
221 @classmethod
222 def initialize(cls, db: Database, context: StaticTablesContext, *,
223 opaque: OpaqueTableStorageManager,
224 datasets: Type[DatasetRecordStorageManager],
225 universe: DimensionUniverse,
226 ) -> DatastoreRegistryBridgeManager:
227 # Docstring inherited from DatastoreRegistryBridge
228 tables = context.addTableTuple(_makeTableSpecs(datasets))
229 return cls(db=db, tables=cast(_TablesTuple, tables), opaque=opaque, universe=universe)
231 def refresh(self) -> None:
232 # Docstring inherited from DatastoreRegistryBridge
233 # This implementation has no in-Python state that depends on which
234 # datastores exist, so there's nothing to do.
235 pass
237 def register(self, name: str, *, ephemeral: bool = False) -> DatastoreRegistryBridge:
238 # Docstring inherited from DatastoreRegistryBridge
239 if ephemeral:
240 return self._ephemeral.setdefault(name, EphemeralDatastoreRegistryBridge(name))
241 return MonolithicDatastoreRegistryBridge(name, db=self._db, tables=self._tables)
243 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
244 # Docstring inherited from DatastoreRegistryBridge
245 sql = sqlalchemy.sql.select(
246 [self._tables.dataset_location.columns.datastore_name]
247 ).select_from(
248 self._tables.dataset_location
249 ).where(
250 self._tables.dataset_location.columns.dataset_id == ref.getCheckedId()
251 )
252 for row in self._db.query(sql).fetchall():
253 yield row[self._tables.dataset_location.columns.datastore_name]
254 for name, bridge in self._ephemeral.items():
255 if ref in bridge:
256 yield name