Coverage for python/lsst/daf/butler/registry/collections/nameKey.py: 99%
110 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:15 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:15 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["NameKeyCollectionManager"]
31import logging
32from collections.abc import Iterable, Mapping
33from typing import TYPE_CHECKING, Any
35import sqlalchemy
37from ... import ddl
38from ...column_spec import COLLECTION_NAME_MAX_LENGTH
39from ...timespan_database_representation import TimespanDatabaseRepresentation
40from .._collection_type import CollectionType
41from ..interfaces import ChainedCollectionRecord, CollectionRecord, RunRecord, VersionTuple
42from ._base import (
43 CollectionTablesTuple,
44 DefaultCollectionManager,
45 makeCollectionChainTableSpec,
46 makeRunTableSpec,
47)
49if TYPE_CHECKING:
50 from .._caching_context import CachingContext
51 from ..interfaces import Database, StaticTablesContext
54_KEY_FIELD_SPEC = ddl.FieldSpec(
55 "name", dtype=sqlalchemy.String, length=COLLECTION_NAME_MAX_LENGTH, primaryKey=True
56)
59# This has to be updated on every schema change
60_VERSION = VersionTuple(2, 0, 0)
63_LOG = logging.getLogger(__name__)
66def _makeTableSpecs(
67 TimespanReprClass: type[TimespanDatabaseRepresentation],
68) -> CollectionTablesTuple[ddl.TableSpec]:
69 return CollectionTablesTuple(
70 collection=ddl.TableSpec(
71 fields=[
72 _KEY_FIELD_SPEC,
73 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
74 ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
75 ],
76 ),
77 run=makeRunTableSpec("name", sqlalchemy.String, TimespanReprClass),
78 collection_chain=makeCollectionChainTableSpec("name", sqlalchemy.String),
79 )
82class NameKeyCollectionManager(DefaultCollectionManager[str]):
83 """A `CollectionManager` implementation that uses collection names for
84 primary/foreign keys and aggressively loads all collection/run records in
85 the database into memory.
87 Most of the logic, including caching policy, is implemented in the base
88 class, this class only adds customizations specific to this particular
89 table schema.
90 """
92 @classmethod
93 def initialize(
94 cls,
95 db: Database,
96 context: StaticTablesContext,
97 *,
98 caching_context: CachingContext,
99 registry_schema_version: VersionTuple | None = None,
100 ) -> NameKeyCollectionManager:
101 # Docstring inherited from CollectionManager.
102 return cls(
103 db,
104 tables=context.addTableTuple(_makeTableSpecs(db.getTimespanRepresentation())), # type: ignore
105 collectionIdName="name",
106 caching_context=caching_context,
107 registry_schema_version=registry_schema_version,
108 )
110 def clone(self, db: Database, caching_context: CachingContext) -> NameKeyCollectionManager:
111 return NameKeyCollectionManager(
112 db,
113 tables=self._tables,
114 collectionIdName=self._collectionIdName,
115 caching_context=caching_context,
116 registry_schema_version=self._registry_schema_version,
117 )
119 @classmethod
120 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
121 # Docstring inherited from CollectionManager.
122 return f"{prefix}_name"
124 @classmethod
125 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
126 # Docstring inherited from CollectionManager.
127 return f"{prefix}_name"
129 @classmethod
130 def addCollectionForeignKey(
131 cls,
132 tableSpec: ddl.TableSpec,
133 *,
134 prefix: str = "collection",
135 onDelete: str | None = None,
136 constraint: bool = True,
137 **kwargs: Any,
138 ) -> ddl.FieldSpec:
139 # Docstring inherited from CollectionManager.
140 original = _KEY_FIELD_SPEC
141 copy = ddl.FieldSpec(
142 cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs
143 )
144 tableSpec.fields.add(copy)
145 if constraint:
146 tableSpec.foreignKeys.append(
147 ddl.ForeignKeySpec(
148 "collection", source=(copy.name,), target=(original.name,), onDelete=onDelete
149 )
150 )
151 return copy
153 @classmethod
154 def addRunForeignKey(
155 cls,
156 tableSpec: ddl.TableSpec,
157 *,
158 prefix: str = "run",
159 onDelete: str | None = None,
160 constraint: bool = True,
161 **kwargs: Any,
162 ) -> ddl.FieldSpec:
163 # Docstring inherited from CollectionManager.
164 original = _KEY_FIELD_SPEC
165 copy = ddl.FieldSpec(
166 cls.getRunForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs
167 )
168 tableSpec.fields.add(copy)
169 if constraint: 169 ↛ 173line 169 didn't jump to line 173, because the condition on line 169 was never false
170 tableSpec.foreignKeys.append(
171 ddl.ForeignKeySpec("run", source=(copy.name,), target=(original.name,), onDelete=onDelete)
172 )
173 return copy
175 def getParentChains(self, key: str) -> set[str]:
176 # Docstring inherited from CollectionManager.
177 table = self._tables.collection_chain
178 sql = (
179 sqlalchemy.sql.select(table.columns["parent"])
180 .select_from(table)
181 .where(table.columns["child"] == key)
182 )
183 with self._db.query(sql) as sql_result:
184 parent_names = set(sql_result.scalars().all())
185 return parent_names
187 def lookup_name_sql(
188 self, sql_key: sqlalchemy.ColumnElement[str], sql_from_clause: sqlalchemy.FromClause
189 ) -> tuple[sqlalchemy.ColumnElement[str], sqlalchemy.FromClause]:
190 # Docstring inherited.
191 return sql_key, sql_from_clause
193 def _fetch_by_name(self, names: Iterable[str]) -> list[CollectionRecord[str]]:
194 # Docstring inherited from base class.
195 return self._fetch_by_key(names)
197 def _fetch_by_key(self, collection_ids: Iterable[str] | None) -> list[CollectionRecord[str]]:
198 # Docstring inherited from base class.
199 _LOG.debug("Fetching collection records using names %s.", collection_ids)
200 sql = sqlalchemy.sql.select(*self._tables.collection.columns, *self._tables.run.columns).select_from(
201 self._tables.collection.join(self._tables.run, isouter=True)
202 )
204 chain_sql = sqlalchemy.sql.select(
205 self._tables.collection_chain.columns["parent"],
206 self._tables.collection_chain.columns["position"],
207 self._tables.collection_chain.columns["child"],
208 )
210 records: list[CollectionRecord[str]] = []
211 # We want to keep transactions as short as possible. When we fetch
212 # everything we want to quickly fetch things into memory and finish
213 # transaction. When we fetch just few records we need to process result
214 # of the first query before we can run the second one.
215 if collection_ids is not None:
216 sql = sql.where(self._tables.collection.columns[self._collectionIdName].in_(collection_ids))
217 with self._db.transaction():
218 with self._db.query(sql) as sql_result:
219 sql_rows = sql_result.mappings().fetchall()
221 records, chained_ids = self._rows_to_records(sql_rows)
223 if chained_ids:
224 # Retrieve chained collection compositions
225 chain_sql = chain_sql.where(
226 self._tables.collection_chain.columns["parent"].in_(chained_ids)
227 )
228 with self._db.query(chain_sql) as sql_result:
229 chain_rows = sql_result.mappings().fetchall()
231 records += self._rows_to_chains(chain_rows, chained_ids)
233 else:
234 with self._db.transaction():
235 with self._db.query(sql) as sql_result:
236 sql_rows = sql_result.mappings().fetchall()
237 with self._db.query(chain_sql) as sql_result:
238 chain_rows = sql_result.mappings().fetchall()
240 records, chained_ids = self._rows_to_records(sql_rows)
241 records += self._rows_to_chains(chain_rows, chained_ids)
243 return records
245 def _rows_to_records(self, rows: Iterable[Mapping]) -> tuple[list[CollectionRecord[str]], list[str]]:
246 """Convert rows returned from collection query to a list of records
247 and a list chained collection names.
248 """
249 records: list[CollectionRecord[str]] = []
250 TimespanReprClass = self._db.getTimespanRepresentation()
251 chained_ids: list[str] = []
252 for row in rows:
253 name = row[self._tables.collection.columns.name]
254 type = CollectionType(row["type"])
255 record: CollectionRecord[str]
256 if type is CollectionType.RUN:
257 record = RunRecord[str](
258 key=name,
259 name=name,
260 host=row[self._tables.run.columns.host],
261 timespan=TimespanReprClass.extract(row),
262 )
263 records.append(record)
264 elif type is CollectionType.CHAINED:
265 # Need to delay chained collection construction until to
266 # fetch their children names.
267 chained_ids.append(name)
268 else:
269 record = CollectionRecord[str](key=name, name=name, type=type)
270 records.append(record)
272 return records, chained_ids
274 def _rows_to_chains(self, rows: Iterable[Mapping], chained_ids: list[str]) -> list[CollectionRecord[str]]:
275 """Convert rows returned from collection chain query to a list of
276 records.
277 """
278 chains_defs: dict[str, list[tuple[int, str]]] = {chain_id: [] for chain_id in chained_ids}
279 for row in rows:
280 chains_defs[row["parent"]].append((row["position"], row["child"]))
282 records: list[CollectionRecord[str]] = []
283 for name, children in chains_defs.items():
284 children_names = [child for _, child in sorted(children)]
285 record = ChainedCollectionRecord[str](
286 key=name,
287 name=name,
288 children=children_names,
289 )
290 records.append(record)
292 return records
294 def _select_pkey_by_name(self, collection_name: str) -> sqlalchemy.Select:
295 table = self._tables.collection
296 return sqlalchemy.select(table.c.name.label("key"), table.c.type).where(
297 table.c.name == collection_name
298 )
300 @classmethod
301 def currentVersions(cls) -> list[VersionTuple]:
302 # Docstring inherited from VersionedExtension.
303 return [_VERSION]