Coverage for python/lsst/daf/butler/registry/collections/nameKey.py: 99%
108 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:52 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:52 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["NameKeyCollectionManager"]
31import logging
32from collections.abc import Iterable, Mapping
33from typing import TYPE_CHECKING, Any
35import sqlalchemy
37from ... import ddl
38from ...column_spec import COLLECTION_NAME_MAX_LENGTH
39from ...timespan_database_representation import TimespanDatabaseRepresentation
40from .._collection_type import CollectionType
41from ..interfaces import ChainedCollectionRecord, CollectionRecord, RunRecord, VersionTuple
42from ._base import (
43 CollectionTablesTuple,
44 DefaultCollectionManager,
45 makeCollectionChainTableSpec,
46 makeRunTableSpec,
47)
49if TYPE_CHECKING:
50 from .._caching_context import CachingContext
51 from ..interfaces import Database, StaticTablesContext
54_KEY_FIELD_SPEC = ddl.FieldSpec(
55 "name", dtype=sqlalchemy.String, length=COLLECTION_NAME_MAX_LENGTH, primaryKey=True
56)
59# This has to be updated on every schema change
60_VERSION = VersionTuple(2, 0, 0)
63_LOG = logging.getLogger(__name__)
66def _makeTableSpecs(
67 TimespanReprClass: type[TimespanDatabaseRepresentation],
68) -> CollectionTablesTuple[ddl.TableSpec]:
69 return CollectionTablesTuple(
70 collection=ddl.TableSpec(
71 fields=[
72 _KEY_FIELD_SPEC,
73 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
74 ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
75 ],
76 ),
77 run=makeRunTableSpec("name", sqlalchemy.String, TimespanReprClass),
78 collection_chain=makeCollectionChainTableSpec("name", sqlalchemy.String),
79 )
82class NameKeyCollectionManager(DefaultCollectionManager[str]):
83 """A `CollectionManager` implementation that uses collection names for
84 primary/foreign keys and aggressively loads all collection/run records in
85 the database into memory.
87 Most of the logic, including caching policy, is implemented in the base
88 class, this class only adds customizations specific to this particular
89 table schema.
90 """
92 @classmethod
93 def initialize(
94 cls,
95 db: Database,
96 context: StaticTablesContext,
97 *,
98 caching_context: CachingContext,
99 registry_schema_version: VersionTuple | None = None,
100 ) -> NameKeyCollectionManager:
101 # Docstring inherited from CollectionManager.
102 return cls(
103 db,
104 tables=context.addTableTuple(_makeTableSpecs(db.getTimespanRepresentation())), # type: ignore
105 collectionIdName="name",
106 caching_context=caching_context,
107 registry_schema_version=registry_schema_version,
108 )
110 def clone(self, db: Database, caching_context: CachingContext) -> NameKeyCollectionManager:
111 return NameKeyCollectionManager(
112 db,
113 tables=self._tables,
114 collectionIdName=self._collectionIdName,
115 caching_context=caching_context,
116 registry_schema_version=self._registry_schema_version,
117 )
119 @classmethod
120 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
121 # Docstring inherited from CollectionManager.
122 return f"{prefix}_name"
124 @classmethod
125 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
126 # Docstring inherited from CollectionManager.
127 return f"{prefix}_name"
129 @classmethod
130 def addCollectionForeignKey(
131 cls,
132 tableSpec: ddl.TableSpec,
133 *,
134 prefix: str = "collection",
135 onDelete: str | None = None,
136 constraint: bool = True,
137 **kwargs: Any,
138 ) -> ddl.FieldSpec:
139 # Docstring inherited from CollectionManager.
140 original = _KEY_FIELD_SPEC
141 copy = ddl.FieldSpec(
142 cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs
143 )
144 tableSpec.fields.add(copy)
145 if constraint:
146 tableSpec.foreignKeys.append(
147 ddl.ForeignKeySpec(
148 "collection", source=(copy.name,), target=(original.name,), onDelete=onDelete
149 )
150 )
151 return copy
153 @classmethod
154 def addRunForeignKey(
155 cls,
156 tableSpec: ddl.TableSpec,
157 *,
158 prefix: str = "run",
159 onDelete: str | None = None,
160 constraint: bool = True,
161 **kwargs: Any,
162 ) -> ddl.FieldSpec:
163 # Docstring inherited from CollectionManager.
164 original = _KEY_FIELD_SPEC
165 copy = ddl.FieldSpec(
166 cls.getRunForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs
167 )
168 tableSpec.fields.add(copy)
169 if constraint: 169 ↛ 173line 169 didn't jump to line 173, because the condition on line 169 was never false
170 tableSpec.foreignKeys.append(
171 ddl.ForeignKeySpec("run", source=(copy.name,), target=(original.name,), onDelete=onDelete)
172 )
173 return copy
175 def getParentChains(self, key: str) -> set[str]:
176 # Docstring inherited from CollectionManager.
177 table = self._tables.collection_chain
178 sql = (
179 sqlalchemy.sql.select(table.columns["parent"])
180 .select_from(table)
181 .where(table.columns["child"] == key)
182 )
183 with self._db.query(sql) as sql_result:
184 parent_names = set(sql_result.scalars().all())
185 return parent_names
187 def _fetch_by_name(self, names: Iterable[str]) -> list[CollectionRecord[str]]:
188 # Docstring inherited from base class.
189 return self._fetch_by_key(names)
191 def _fetch_by_key(self, collection_ids: Iterable[str] | None) -> list[CollectionRecord[str]]:
192 # Docstring inherited from base class.
193 _LOG.debug("Fetching collection records using names %s.", collection_ids)
194 sql = sqlalchemy.sql.select(*self._tables.collection.columns, *self._tables.run.columns).select_from(
195 self._tables.collection.join(self._tables.run, isouter=True)
196 )
198 chain_sql = sqlalchemy.sql.select(
199 self._tables.collection_chain.columns["parent"],
200 self._tables.collection_chain.columns["position"],
201 self._tables.collection_chain.columns["child"],
202 )
204 records: list[CollectionRecord[str]] = []
205 # We want to keep transactions as short as possible. When we fetch
206 # everything we want to quickly fetch things into memory and finish
207 # transaction. When we fetch just few records we need to process result
208 # of the first query before we can run the second one.
209 if collection_ids is not None:
210 sql = sql.where(self._tables.collection.columns[self._collectionIdName].in_(collection_ids))
211 with self._db.transaction():
212 with self._db.query(sql) as sql_result:
213 sql_rows = sql_result.mappings().fetchall()
215 records, chained_ids = self._rows_to_records(sql_rows)
217 if chained_ids:
218 # Retrieve chained collection compositions
219 chain_sql = chain_sql.where(
220 self._tables.collection_chain.columns["parent"].in_(chained_ids)
221 )
222 with self._db.query(chain_sql) as sql_result:
223 chain_rows = sql_result.mappings().fetchall()
225 records += self._rows_to_chains(chain_rows, chained_ids)
227 else:
228 with self._db.transaction():
229 with self._db.query(sql) as sql_result:
230 sql_rows = sql_result.mappings().fetchall()
231 with self._db.query(chain_sql) as sql_result:
232 chain_rows = sql_result.mappings().fetchall()
234 records, chained_ids = self._rows_to_records(sql_rows)
235 records += self._rows_to_chains(chain_rows, chained_ids)
237 return records
239 def _rows_to_records(self, rows: Iterable[Mapping]) -> tuple[list[CollectionRecord[str]], list[str]]:
240 """Convert rows returned from collection query to a list of records
241 and a list chained collection names.
242 """
243 records: list[CollectionRecord[str]] = []
244 TimespanReprClass = self._db.getTimespanRepresentation()
245 chained_ids: list[str] = []
246 for row in rows:
247 name = row[self._tables.collection.columns.name]
248 type = CollectionType(row["type"])
249 record: CollectionRecord[str]
250 if type is CollectionType.RUN:
251 record = RunRecord[str](
252 key=name,
253 name=name,
254 host=row[self._tables.run.columns.host],
255 timespan=TimespanReprClass.extract(row),
256 )
257 records.append(record)
258 elif type is CollectionType.CHAINED:
259 # Need to delay chained collection construction until to
260 # fetch their children names.
261 chained_ids.append(name)
262 else:
263 record = CollectionRecord[str](key=name, name=name, type=type)
264 records.append(record)
266 return records, chained_ids
268 def _rows_to_chains(self, rows: Iterable[Mapping], chained_ids: list[str]) -> list[CollectionRecord[str]]:
269 """Convert rows returned from collection chain query to a list of
270 records.
271 """
272 chains_defs: dict[str, list[tuple[int, str]]] = {chain_id: [] for chain_id in chained_ids}
273 for row in rows:
274 chains_defs[row["parent"]].append((row["position"], row["child"]))
276 records: list[CollectionRecord[str]] = []
277 for name, children in chains_defs.items():
278 children_names = [child for _, child in sorted(children)]
279 record = ChainedCollectionRecord[str](
280 key=name,
281 name=name,
282 children=children_names,
283 )
284 records.append(record)
286 return records
288 def _select_pkey_by_name(self, collection_name: str) -> sqlalchemy.Select:
289 table = self._tables.collection
290 return sqlalchemy.select(table.c.name.label("key"), table.c.type).where(
291 table.c.name == collection_name
292 )
294 @classmethod
295 def currentVersions(cls) -> list[VersionTuple]:
296 # Docstring inherited from VersionedExtension.
297 return [_VERSION]