Coverage for python/lsst/daf/butler/datastore/stored_file_info.py: 48%
119 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo", "SerializedStoredFileInfo")
32import inspect
33from collections.abc import Iterable, Mapping
34from dataclasses import dataclass
35from typing import TYPE_CHECKING, Any
37import pydantic
38from lsst.resources import ResourcePath
39from lsst.utils import doImportType
40from lsst.utils.introspection import get_full_type_name
42from .._formatter import Formatter, FormatterParameter
43from .._location import Location, LocationFactory
44from .._storage_class import StorageClass, StorageClassFactory
46if TYPE_CHECKING:
47 from .._dataset_ref import DatasetRef
49# String to use when a Python None is encountered
50NULLSTR = "__NULL_STRING__"
53class StoredDatastoreItemInfo:
54 """Internal information associated with a stored dataset in a `Datastore`.
56 This is an empty base class. Datastore implementations are expected to
57 write their own subclasses.
58 """
60 __slots__ = ()
62 def file_location(self, factory: LocationFactory) -> Location:
63 """Return the location of artifact.
65 Parameters
66 ----------
67 factory : `LocationFactory`
68 Factory relevant to the datastore represented by this item.
70 Returns
71 -------
72 location : `Location`
73 The location of the item within this datastore.
74 """
75 raise NotImplementedError("The base class does not know how to locate an item in a datastore.")
77 @classmethod
78 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo:
79 """Create instance from database record.
81 Parameters
82 ----------
83 record : `dict`
84 The record associated with this item.
86 Returns
87 -------
88 info : instance of the relevant type.
89 The newly-constructed item corresponding to the record.
90 """
91 raise NotImplementedError()
93 def to_record(self, **kwargs: Any) -> dict[str, Any]:
94 """Convert record contents to a dictionary.
96 Parameters
97 ----------
98 **kwargs
99 Additional items to add to returned record.
100 """
101 raise NotImplementedError()
103 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo:
104 """Create a new class with everything retained apart from the
105 specified values.
107 Parameters
108 ----------
109 **kwargs : `~collections.abc.Mapping`
110 Values to override.
112 Returns
113 -------
114 updated : `StoredDatastoreItemInfo`
115 A new instance of the object with updated values.
116 """
117 raise NotImplementedError()
119 @classmethod
120 def to_records(
121 cls, records: Iterable[StoredDatastoreItemInfo], **kwargs: Any
122 ) -> tuple[str, Iterable[Mapping[str, Any]]]:
123 """Convert a collection of records to dictionaries.
125 Parameters
126 ----------
127 records : `~collections.abc.Iterable` [ `StoredDatastoreItemInfo` ]
128 A collection of records, all records must be of the same type.
129 **kwargs
130 Additional items to add to each returned record.
132 Returns
133 -------
134 class_name : `str`
135 Name of the record class.
136 records : `list` [ `dict` ]
137 Records in their dictionary representation.
138 """
139 if not records:
140 return "", []
141 classes = {record.__class__ for record in records}
142 assert len(classes) == 1, f"Records have to be of the same class: {classes}"
143 return get_full_type_name(classes.pop()), [record.to_record(**kwargs) for record in records]
145 @classmethod
146 def from_records(
147 cls, class_name: str, records: Iterable[Mapping[str, Any]]
148 ) -> list[StoredDatastoreItemInfo]:
149 """Convert collection of dictionaries to records.
151 Parameters
152 ----------
153 class_name : `str`
154 Name of the record class.
155 records : `~collections.abc.Iterable` [ `dict` ]
156 Records in their dictionary representation.
158 Returns
159 -------
160 infos : `list` [`StoredDatastoreItemInfo`]
161 Sequence of records converted to typed representation.
163 Raises
164 ------
165 TypeError
166 Raised if ``class_name`` is not a sub-class of
167 `StoredDatastoreItemInfo`.
168 """
169 try:
170 klass = doImportType(class_name)
171 except ImportError:
172 # Prior to DM-41043 we were embedding a lsst.daf.butler.core
173 # path in the serialized form, which we never wanted; fix this
174 # one case.
175 if class_name == "lsst.daf.butler.core.storedFileInfo.StoredFileInfo":
176 klass = StoredFileInfo
177 else:
178 raise
179 if not issubclass(klass, StoredDatastoreItemInfo):
180 raise TypeError(f"Class {class_name} is not a subclass of StoredDatastoreItemInfo")
181 return [klass.from_record(record) for record in records]
184@dataclass(frozen=True, slots=True)
185class StoredFileInfo(StoredDatastoreItemInfo):
186 """Datastore-private metadata associated with a Datastore file.
188 Parameters
189 ----------
190 formatter : `Formatter` or `str`
191 The formatter to use for this dataset.
192 path : `str`
193 Path to the artifact associated with this dataset.
194 storageClass : `StorageClass`
195 The storage class associated with this dataset.
196 component : `str` or `None`, optional
197 The component if disassembled.
198 checksum : `str` or `None`, optional
199 The checksum of the artifact.
200 file_size : `int`
201 The size of the file in bytes. -1 indicates the size is not known.
202 """
204 storageClassFactory = StorageClassFactory()
206 def __init__(
207 self,
208 formatter: FormatterParameter,
209 path: str,
210 storageClass: StorageClass,
211 component: str | None,
212 checksum: str | None,
213 file_size: int,
214 ):
215 # Use these shenanigans to allow us to use a frozen dataclass
216 object.__setattr__(self, "path", path)
217 object.__setattr__(self, "storageClass", storageClass)
218 object.__setattr__(self, "component", component)
219 object.__setattr__(self, "checksum", checksum)
220 object.__setattr__(self, "file_size", file_size)
222 if isinstance(formatter, str):
223 # We trust that this string refers to a Formatter
224 formatterStr = formatter
225 elif isinstance(formatter, Formatter) or (
226 inspect.isclass(formatter) and issubclass(formatter, Formatter)
227 ):
228 formatterStr = formatter.name()
229 else:
230 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter")
231 object.__setattr__(self, "formatter", formatterStr)
233 formatter: str
234 """Fully-qualified name of Formatter. If a Formatter class or instance
235 is given the name will be extracted."""
237 path: str
238 """Path to dataset within Datastore."""
240 storageClass: StorageClass
241 """StorageClass associated with Dataset."""
243 component: str | None
244 """Component associated with this file. Can be `None` if the file does
245 not refer to a component of a composite."""
247 checksum: str | None
248 """Checksum of the serialized dataset."""
250 file_size: int
251 """Size of the serialized dataset in bytes."""
253 def rebase(self, ref: DatasetRef) -> StoredFileInfo:
254 """Return a copy of the record suitable for a specified reference.
256 Parameters
257 ----------
258 ref : `DatasetRef`
259 DatasetRef which provides component name and dataset ID for the
260 new returned record.
262 Returns
263 -------
264 record : `StoredFileInfo`
265 New record instance.
266 """
267 # take component from the ref, rest comes from self
268 component = ref.datasetType.component()
269 if component is None:
270 component = self.component
271 return self.update(component=component)
273 def to_record(self, **kwargs: Any) -> dict[str, Any]:
274 """Convert the supplied ref to a database record.
276 Parameters
277 ----------
278 **kwargs : `typing.Any`
279 Additional information to be added to the record.
280 """
281 component = self.component
282 if component is None:
283 # Use empty string since we want this to be part of the
284 # primary key.
285 component = NULLSTR
286 return dict(
287 formatter=self.formatter,
288 path=self.path,
289 storage_class=self.storageClass.name,
290 component=component,
291 checksum=self.checksum,
292 file_size=self.file_size,
293 **kwargs,
294 )
296 def to_simple(self) -> SerializedStoredFileInfo:
297 record = self.to_record()
298 # We allow None on the model but the record contains a "null string"
299 # instead
300 record["component"] = self.component
301 return SerializedStoredFileInfo.model_validate(record)
303 def file_location(self, factory: LocationFactory) -> Location:
304 """Return the location of artifact.
306 Parameters
307 ----------
308 factory : `LocationFactory`
309 Factory relevant to the datastore represented by this item.
311 Returns
312 -------
313 location : `Location`
314 The location of the item within this datastore.
315 """
316 uriInStore = ResourcePath(self.path, forceAbsolute=False, forceDirectory=False)
317 if uriInStore.isabs():
318 location = Location(None, uriInStore)
319 else:
320 location = factory.from_uri(uriInStore, trusted_path=True)
321 return location
323 @classmethod
324 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo:
325 """Create instance from database record.
327 Parameters
328 ----------
329 record : `dict`
330 The record associated with this item.
332 Returns
333 -------
334 info : `StoredFileInfo`
335 The newly-constructed item corresponding to the record.
336 """
337 # Convert name of StorageClass to instance
338 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"])
339 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None
340 info = cls(
341 formatter=record["formatter"],
342 path=record["path"],
343 storageClass=storageClass,
344 component=component,
345 checksum=record["checksum"],
346 file_size=record["file_size"],
347 )
348 return info
350 @classmethod
351 def from_simple(cls: type[StoredFileInfo], model: SerializedStoredFileInfo) -> StoredFileInfo:
352 return cls.from_record(dict(model))
354 def update(self, **kwargs: Any) -> StoredFileInfo:
355 new_args = {}
356 for k in self.__slots__:
357 if k in kwargs:
358 new_args[k] = kwargs.pop(k)
359 else:
360 new_args[k] = getattr(self, k)
361 if kwargs:
362 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}")
363 return type(self)(**new_args)
365 def __reduce__(self) -> str | tuple[Any, ...]:
366 return (self.from_record, (self.to_record(),))
369class SerializedStoredFileInfo(pydantic.BaseModel):
370 """Serialized representation of `StoredFileInfo` properties."""
372 formatter: str
373 """Fully-qualified name of Formatter."""
375 path: str
376 """Path to dataset within Datastore."""
378 storage_class: str
379 """Name of the StorageClass associated with Dataset."""
381 component: str | None
382 """Component associated with this file. Can be `None` if the file does
383 not refer to a component of a composite."""
385 checksum: str | None
386 """Checksum of the serialized dataset."""
388 file_size: int
389 """Size of the serialized dataset in bytes."""