Coverage for python/lsst/daf/butler/registry/collections/nameKey.py : 73%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["AggressiveNameKeyCollectionManager"]
25from collections import namedtuple
26from datetime import datetime
27from typing import (
28 Any,
29 Iterator,
30 Optional,
31 TYPE_CHECKING,
32)
34import sqlalchemy
36from ...core import ddl
37from ...core.timespan import Timespan, TIMESPAN_FIELD_SPECS
38from .._collectionType import CollectionType
39from ..interfaces import (
40 ChainedCollectionRecord,
41 CollectionManager,
42 CollectionRecord,
43 MissingCollectionError,
44 RunRecord,
45)
46from ..wildcards import CollectionSearch
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .database import Database, StaticTablesContext
52_TablesTuple = namedtuple("CollectionTablesTuple", ["collection", "run", "collection_chain"])
54_TABLES_SPEC = _TablesTuple(
55 collection=ddl.TableSpec(
56 fields=[
57 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, primaryKey=True),
58 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
59 ],
60 ),
61 run=ddl.TableSpec(
62 fields=[
63 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, primaryKey=True),
64 TIMESPAN_FIELD_SPECS.begin,
65 TIMESPAN_FIELD_SPECS.end,
66 ddl.FieldSpec("host", dtype=sqlalchemy.String, length=128),
67 ],
68 foreignKeys=[
69 ddl.ForeignKeySpec("collection", source=("name",), target=("name",), onDelete="CASCADE"),
70 ],
71 ),
72 collection_chain=ddl.TableSpec(
73 fields=[
74 ddl.FieldSpec("parent", dtype=sqlalchemy.String, length=64, primaryKey=True),
75 ddl.FieldSpec("index", dtype=sqlalchemy.SmallInteger, primaryKey=True),
76 ddl.FieldSpec("child", dtype=sqlalchemy.String, length=64, nullable=False),
77 ddl.FieldSpec("dataset_type_name", dtype=sqlalchemy.String, length=128, nullable=True),
78 ],
79 foreignKeys=[
80 ddl.ForeignKeySpec("collection", source=("parent",), target=("name",), onDelete="CASCADE"),
81 ddl.ForeignKeySpec("collection", source=("child",), target=("name",)),
82 ],
83 ),
84)
87class NameKeyCollectionRecord(CollectionRecord):
88 """A `CollectionRecord` implementation that just uses the string name as
89 the primary/foreign key for collections.
90 """
92 @property
93 def key(self) -> str:
94 # Docstring inherited from CollectionRecord.
95 return self.name
98class NameKeyRunRecord(RunRecord):
99 """A `RunRecord` implementation that just uses the string name as the
100 primary/foreign key for collections.
101 """
102 def __init__(self, db: Database, name: str, *, table: sqlalchemy.schema.Table,
103 host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None):
104 super().__init__(name=name, type=CollectionType.RUN)
105 self._db = db
106 self._table = table
107 self._host = host
108 if timespan is None: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true
109 timespan = Timespan(begin=None, end=None)
110 self._timespan = timespan
112 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None):
113 # Docstring inherited from RunRecord.
114 if timespan is None:
115 timespan = Timespan(begin=None, end=None)
116 row = {
117 "name": self.name,
118 TIMESPAN_FIELD_SPECS.begin.name: timespan.begin,
119 TIMESPAN_FIELD_SPECS.end.name: timespan.end,
120 "host": host,
121 }
122 count = self._db.update(self._table, {"name": self.name}, row)
123 if count != 1:
124 raise RuntimeError(f"Run update affected {count} records; expected exactly one.")
125 self._host = host
126 self._timespan = timespan
128 @property
129 def key(self) -> str:
130 # Docstring inherited from CollectionRecord.
131 return self.name
133 @property
134 def host(self) -> Optional[str]:
135 # Docstring inherited from RunRecord.
136 return self._host
138 @property
139 def timespan(self) -> Timespan[Optional[datetime]]:
140 # Docstring inherited from RunRecord.
141 return self._timespan
144class NameKeyChainedCollectionRecord(ChainedCollectionRecord):
145 """A `ChainedCollectionRecord` implementation that just uses the string
146 name as the primary/foreign key for collections.
147 """
148 def __init__(self, db: Database, name: str, *, table: sqlalchemy.schema.Table):
149 super().__init__(name=name)
150 self._db = db
151 self._table = table
153 @property
154 def key(self) -> str:
155 # Docstring inherited from CollectionRecord.
156 return self.name
158 def _update(self, manager: CollectionManager, children: CollectionSearch):
159 # Docstring inherited from ChainedCollectionRecord.
160 rows = []
161 i = 0
162 for child, restriction in children.iter(manager, withRestrictions=True, flattenChains=False):
163 if restriction.names is ...:
164 rows.append({"parent": self.key, "child": child.key, "index": i,
165 "dataset_type_name": ""})
166 i += 1
167 else:
168 for name in restriction.names:
169 rows.append({"parent": self.key, "child": child.key, "index": i,
170 "dataset_type_name": name})
171 i += 1
172 with self._db.transaction():
173 self._db.delete(self._table, ["parent"], {"parent": self.key})
174 self._db.insert(self._table, *rows)
176 def _load(self, manager: CollectionManager) -> CollectionSearch:
177 # Docstring inherited from ChainedCollectionRecord.
178 sql = sqlalchemy.sql.select(
179 [self._table.columns.child, self._table.columns.dataset_type_name]
180 ).select_from(
181 self._table
182 ).where(
183 self._table.columns.parent == self.key
184 ).order_by(
185 self._table.columns.index
186 )
187 # It's fine to have consecutive rows with the same collection name
188 # and different dataset type names - CollectionSearch will group those
189 # up for us.
190 children = []
191 for row in self._db.query(sql):
192 key = row[self._table.columns.child]
193 restriction = row[self._table.columns.dataset_type_name]
194 if not restriction:
195 restriction = ... # we store ... as "" in the database
196 record = manager[key]
197 children.append((record.name, restriction))
198 return CollectionSearch.fromExpression(children)
201class AggressiveNameKeyCollectionManager(CollectionManager):
202 """A `CollectionManager` implementation that uses collection names for
203 primary/foreign keys and aggressively loads all collection/run records in
204 the database into memory.
206 Parameters
207 ----------
208 db : `Database`
209 Interface to the underlying database engine and namespace.
210 tables : `_TablesTuple`
211 Named tuple of SQLAlchemy table objects.
212 """
213 def __init__(self, db: Database, tables: _TablesTuple):
214 self._db = db
215 self._tables = tables
216 self._records = {}
218 @classmethod
219 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
220 # Docstring inherited from CollectionManager.
221 return cls(db, tables=context.addTableTuple(_TABLES_SPEC))
223 @classmethod
224 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
225 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
226 # Docstring inherited from CollectionManager.
227 if prefix is None: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true
228 prefix = "collection"
229 original = _TABLES_SPEC.collection.fields["name"]
230 copy = ddl.FieldSpec(cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, **kwds)
231 tableSpec.fields.add(copy)
232 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("collection", source=(copy.name,),
233 target=(original.name,), onDelete=onDelete))
234 return copy
236 @classmethod
237 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
238 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
239 # Docstring inherited from CollectionManager.
240 if prefix is None: 240 ↛ 241line 240 didn't jump to line 241, because the condition on line 240 was never true
241 prefix = "run"
242 original = _TABLES_SPEC.run.fields["name"]
243 copy = ddl.FieldSpec(cls.getRunForeignKeyName(prefix), dtype=original.dtype, **kwds)
244 tableSpec.fields.add(copy)
245 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("run", source=(copy.name,),
246 target=(original.name,), onDelete=onDelete))
247 return copy
249 @classmethod
250 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
251 return f"{prefix}_name"
253 @classmethod
254 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
255 return f"{prefix}_name"
257 def refresh(self):
258 # Docstring inherited from CollectionManager.
259 sql = sqlalchemy.sql.select(
260 self._tables.collection.columns + self._tables.run.columns
261 ).select_from(
262 self._tables.collection.join(self._tables.run, isouter=True)
263 )
264 # Put found records into a temporary instead of updating self._records
265 # in place, for exception safety.
266 records = {}
267 chains = []
268 for row in self._db.query(sql).fetchall():
269 name = row[self._tables.collection.columns.name]
270 type = CollectionType(row["type"])
271 if type is CollectionType.RUN: 271 ↛ 282line 271 didn't jump to line 282, because the condition on line 271 was never false
272 record = NameKeyRunRecord(
273 name=name,
274 db=self._db,
275 table=self._tables.run,
276 host=row[self._tables.run.columns.host],
277 timespan=Timespan(
278 begin=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.begin.name]],
279 end=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.end.name]],
280 )
281 )
282 elif type is CollectionType.CHAINED:
283 record = NameKeyChainedCollectionRecord(db=self._db, table=self._tables.collection_chain,
284 name=name)
285 chains.append(record)
286 else:
287 record = NameKeyCollectionRecord(type=type, name=name)
288 records[record.name] = record
289 self._records = records
290 for chain in chains: 290 ↛ 291line 290 didn't jump to line 291, because the loop on line 290 never started
291 chain.refresh(self)
293 def register(self, name: str, type: CollectionType) -> CollectionRecord:
294 # Docstring inherited from CollectionManager.
295 record = self._records.get(name)
296 if record is None:
297 kwds = {"name": name}
298 self._db.sync(
299 self._tables.collection,
300 keys=kwds,
301 compared={"type": int(type)},
302 )
303 if type is CollectionType.RUN:
304 row, _ = self._db.sync(
305 self._tables.run,
306 keys=kwds,
307 returning={"host", TIMESPAN_FIELD_SPECS.begin.name, TIMESPAN_FIELD_SPECS.end.name},
308 )
309 record = NameKeyRunRecord(
310 db=self._db,
311 table=self._tables.run,
312 host=row["host"],
313 timespan=Timespan(
314 row[TIMESPAN_FIELD_SPECS.begin.name],
315 row[TIMESPAN_FIELD_SPECS.end.name]
316 ),
317 **kwds
318 )
319 elif type is CollectionType.CHAINED:
320 record = NameKeyChainedCollectionRecord(db=self._db, table=self._tables.collection_chain,
321 **kwds)
322 else:
323 record = NameKeyCollectionRecord(type=type, **kwds)
324 self._records[record.name] = record
325 return record
327 def find(self, name: str) -> CollectionRecord:
328 # Docstring inherited from CollectionManager.
329 result = self._records.get(name)
330 if result is None: 330 ↛ 331line 330 didn't jump to line 331, because the condition on line 330 was never true
331 raise MissingCollectionError(f"No collection with name '{name}' found.")
332 return result
334 def __getitem__(self, key: Any) -> Optional[CollectionRecord]:
335 # Docstring inherited from CollectionManager.
336 try:
337 return self._records[key]
338 except KeyError as err:
339 raise MissingCollectionError(f"Collection with key '{err}' not found.") from err
341 def __iter__(self) -> Iterator[CollectionRecord]:
342 yield from self._records.values()