Coverage for python/lsst/daf/butler/datastore/stored_file_info.py: 42%
98 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo")
32import inspect
33from collections.abc import Iterable, Mapping
34from dataclasses import dataclass
35from typing import TYPE_CHECKING, Any
37from lsst.resources import ResourcePath
38from lsst.utils import doImportType
39from lsst.utils.introspection import get_full_type_name
41from .._formatter import Formatter, FormatterParameter
42from .._location import Location, LocationFactory
43from .._storage_class import StorageClass, StorageClassFactory
45if TYPE_CHECKING:
46 from .._dataset_ref import DatasetRef
48# String to use when a Python None is encountered
49NULLSTR = "__NULL_STRING__"
52class StoredDatastoreItemInfo:
53 """Internal information associated with a stored dataset in a `Datastore`.
55 This is an empty base class. Datastore implementations are expected to
56 write their own subclasses.
57 """
59 __slots__ = ()
61 def file_location(self, factory: LocationFactory) -> Location:
62 """Return the location of artifact.
64 Parameters
65 ----------
66 factory : `LocationFactory`
67 Factory relevant to the datastore represented by this item.
69 Returns
70 -------
71 location : `Location`
72 The location of the item within this datastore.
73 """
74 raise NotImplementedError("The base class does not know how to locate an item in a datastore.")
76 @classmethod
77 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo:
78 """Create instance from database record.
80 Parameters
81 ----------
82 record : `dict`
83 The record associated with this item.
85 Returns
86 -------
87 info : instance of the relevant type.
88 The newly-constructed item corresponding to the record.
89 """
90 raise NotImplementedError()
92 def to_record(self, **kwargs: Any) -> dict[str, Any]:
93 """Convert record contents to a dictionary.
95 Parameters
96 ----------
97 **kwargs
98 Additional items to add to returned record.
99 """
100 raise NotImplementedError()
102 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo:
103 """Create a new class with everything retained apart from the
104 specified values.
105 """
106 raise NotImplementedError()
108 @classmethod
109 def to_records(
110 cls, records: Iterable[StoredDatastoreItemInfo], **kwargs: Any
111 ) -> tuple[str, Iterable[Mapping[str, Any]]]:
112 """Convert a collection of records to dictionaries.
114 Parameters
115 ----------
116 records : `~collections.abc.Iterable` [ `StoredDatastoreItemInfo` ]
117 A collection of records, all records must be of the same type.
118 **kwargs
119 Additional items to add to each returned record.
121 Returns
122 -------
123 class_name : `str`
124 Name of the record class.
125 records : `list` [ `dict` ]
126 Records in their dictionary representation.
127 """
128 if not records:
129 return "", []
130 classes = {record.__class__ for record in records}
131 assert len(classes) == 1, f"Records have to be of the same class: {classes}"
132 return get_full_type_name(classes.pop()), [record.to_record(**kwargs) for record in records]
134 @classmethod
135 def from_records(
136 cls, class_name: str, records: Iterable[Mapping[str, Any]]
137 ) -> list[StoredDatastoreItemInfo]:
138 """Convert collection of dictionaries to records.
140 Parameters
141 ----------
142 class_name : `str`
143 Name of the record class.
144 records : `~collections.abc.Iterable` [ `dict` ]
145 Records in their dictionary representation.
147 Returns
148 -------
149 infos : `list` [`StoredDatastoreItemInfo`]
150 Sequence of records converted to typed representation.
152 Raises
153 ------
154 TypeError
155 Raised if ``class_name`` is not a sub-class of
156 `StoredDatastoreItemInfo`.
157 """
158 try:
159 klass = doImportType(class_name)
160 except ImportError:
161 # Prior to DM-41043 we were embedding a lsst.daf.butler.core
162 # path in the serialized form, which we never wanted; fix this
163 # one case.
164 if class_name == "lsst.daf.butler.core.storedFileInfo.StoredFileInfo":
165 klass = StoredFileInfo
166 else:
167 raise
168 if not issubclass(klass, StoredDatastoreItemInfo):
169 raise TypeError(f"Class {class_name} is not a subclass of StoredDatastoreItemInfo")
170 return [klass.from_record(record) for record in records]
173@dataclass(frozen=True, slots=True)
174class StoredFileInfo(StoredDatastoreItemInfo):
175 """Datastore-private metadata associated with a Datastore file."""
177 storageClassFactory = StorageClassFactory()
179 def __init__(
180 self,
181 formatter: FormatterParameter,
182 path: str,
183 storageClass: StorageClass,
184 component: str | None,
185 checksum: str | None,
186 file_size: int,
187 ):
188 # Use these shenanigans to allow us to use a frozen dataclass
189 object.__setattr__(self, "path", path)
190 object.__setattr__(self, "storageClass", storageClass)
191 object.__setattr__(self, "component", component)
192 object.__setattr__(self, "checksum", checksum)
193 object.__setattr__(self, "file_size", file_size)
195 if isinstance(formatter, str):
196 # We trust that this string refers to a Formatter
197 formatterStr = formatter
198 elif isinstance(formatter, Formatter) or (
199 inspect.isclass(formatter) and issubclass(formatter, Formatter)
200 ):
201 formatterStr = formatter.name()
202 else:
203 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter")
204 object.__setattr__(self, "formatter", formatterStr)
206 formatter: str
207 """Fully-qualified name of Formatter. If a Formatter class or instance
208 is given the name will be extracted."""
210 path: str
211 """Path to dataset within Datastore."""
213 storageClass: StorageClass
214 """StorageClass associated with Dataset."""
216 component: str | None
217 """Component associated with this file. Can be None if the file does
218 not refer to a component of a composite."""
220 checksum: str | None
221 """Checksum of the serialized dataset."""
223 file_size: int
224 """Size of the serialized dataset in bytes."""
226 def rebase(self, ref: DatasetRef) -> StoredFileInfo:
227 """Return a copy of the record suitable for a specified reference.
229 Parameters
230 ----------
231 ref : `DatasetRef`
232 DatasetRef which provides component name and dataset ID for the
233 new returned record.
235 Returns
236 -------
237 record : `StoredFileInfo`
238 New record instance.
239 """
240 # take component from the ref, rest comes from self
241 component = ref.datasetType.component()
242 if component is None:
243 component = self.component
244 return self.update(component=component)
246 def to_record(self, **kwargs: Any) -> dict[str, Any]:
247 """Convert the supplied ref to a database record."""
248 component = self.component
249 if component is None:
250 # Use empty string since we want this to be part of the
251 # primary key.
252 component = NULLSTR
253 return dict(
254 formatter=self.formatter,
255 path=self.path,
256 storage_class=self.storageClass.name,
257 component=component,
258 checksum=self.checksum,
259 file_size=self.file_size,
260 **kwargs,
261 )
263 def file_location(self, factory: LocationFactory) -> Location:
264 """Return the location of artifact.
266 Parameters
267 ----------
268 factory : `LocationFactory`
269 Factory relevant to the datastore represented by this item.
271 Returns
272 -------
273 location : `Location`
274 The location of the item within this datastore.
275 """
276 uriInStore = ResourcePath(self.path, forceAbsolute=False)
277 if uriInStore.isabs():
278 location = Location(None, uriInStore)
279 else:
280 location = factory.fromPath(uriInStore)
281 return location
283 @classmethod
284 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo:
285 """Create instance from database record.
287 Parameters
288 ----------
289 record : `dict`
290 The record associated with this item.
292 Returns
293 -------
294 info : `StoredFileInfo`
295 The newly-constructed item corresponding to the record.
296 """
297 # Convert name of StorageClass to instance
298 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"])
299 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None
300 info = cls(
301 formatter=record["formatter"],
302 path=record["path"],
303 storageClass=storageClass,
304 component=component,
305 checksum=record["checksum"],
306 file_size=record["file_size"],
307 )
308 return info
310 def update(self, **kwargs: Any) -> StoredFileInfo:
311 new_args = {}
312 for k in self.__slots__:
313 if k in kwargs:
314 new_args[k] = kwargs.pop(k)
315 else:
316 new_args[k] = getattr(self, k)
317 if kwargs:
318 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}")
319 return type(self)(**new_args)
321 def __reduce__(self) -> str | tuple[Any, ...]:
322 return (self.from_record, (self.to_record(),))