Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 50%
85 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo")
32import inspect
33from collections.abc import Mapping
34from dataclasses import dataclass
35from typing import TYPE_CHECKING, Any
37from lsst.resources import ResourcePath
39from .formatter import Formatter, FormatterParameter
40from .location import Location, LocationFactory
41from .storageClass import StorageClass, StorageClassFactory
43if TYPE_CHECKING:
44 from .datasets import DatasetId, DatasetRef
46# String to use when a Python None is encountered
47NULLSTR = "__NULL_STRING__"
50class StoredDatastoreItemInfo:
51 """Internal information associated with a stored dataset in a `Datastore`.
53 This is an empty base class. Datastore implementations are expected to
54 write their own subclasses.
55 """
57 __slots__ = ()
59 def file_location(self, factory: LocationFactory) -> Location:
60 """Return the location of artifact.
62 Parameters
63 ----------
64 factory : `LocationFactory`
65 Factory relevant to the datastore represented by this item.
67 Returns
68 -------
69 location : `Location`
70 The location of the item within this datastore.
71 """
72 raise NotImplementedError("The base class does not know how to locate an item in a datastore.")
74 @classmethod
75 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo:
76 """Create instance from database record.
78 Parameters
79 ----------
80 record : `dict`
81 The record associated with this item.
83 Returns
84 -------
85 info : instance of the relevant type.
86 The newly-constructed item corresponding to the record.
87 """
88 raise NotImplementedError()
90 def to_record(self) -> dict[str, Any]:
91 """Convert record contents to a dictionary."""
92 raise NotImplementedError()
94 @property
95 def dataset_id(self) -> DatasetId:
96 """Dataset ID associated with this record (`DatasetId`)."""
97 raise NotImplementedError()
99 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo:
100 """Create a new class with everything retained apart from the
101 specified values.
102 """
103 raise NotImplementedError()
106@dataclass(frozen=True)
107class StoredFileInfo(StoredDatastoreItemInfo):
108 """Datastore-private metadata associated with a Datastore file."""
110 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"}
112 storageClassFactory = StorageClassFactory()
114 def __init__(
115 self,
116 formatter: FormatterParameter,
117 path: str,
118 storageClass: StorageClass,
119 component: str | None,
120 checksum: str | None,
121 file_size: int,
122 dataset_id: DatasetId,
123 ):
124 # Use these shenanigans to allow us to use a frozen dataclass
125 object.__setattr__(self, "path", path)
126 object.__setattr__(self, "storageClass", storageClass)
127 object.__setattr__(self, "component", component)
128 object.__setattr__(self, "checksum", checksum)
129 object.__setattr__(self, "file_size", file_size)
130 object.__setattr__(self, "dataset_id", dataset_id)
132 if isinstance(formatter, str):
133 # We trust that this string refers to a Formatter
134 formatterStr = formatter
135 elif isinstance(formatter, Formatter) or (
136 inspect.isclass(formatter) and issubclass(formatter, Formatter)
137 ):
138 formatterStr = formatter.name()
139 else:
140 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter")
141 object.__setattr__(self, "formatter", formatterStr)
143 formatter: str
144 """Fully-qualified name of Formatter. If a Formatter class or instance
145 is given the name will be extracted."""
147 path: str
148 """Path to dataset within Datastore."""
150 storageClass: StorageClass
151 """StorageClass associated with Dataset."""
153 component: str | None
154 """Component associated with this file. Can be None if the file does
155 not refer to a component of a composite."""
157 checksum: str | None
158 """Checksum of the serialized dataset."""
160 file_size: int
161 """Size of the serialized dataset in bytes."""
163 dataset_id: DatasetId
164 """DatasetId associated with this record."""
166 def rebase(self, ref: DatasetRef) -> StoredFileInfo:
167 """Return a copy of the record suitable for a specified reference.
169 Parameters
170 ----------
171 ref : `DatasetRef`
172 DatasetRef which provides component name and dataset ID for the
173 new returned record.
175 Returns
176 -------
177 record : `StoredFileInfo`
178 New record instance.
179 """
180 # take component and dataset_id from the ref, rest comes from self
181 component = ref.datasetType.component()
182 if component is None:
183 component = self.component
184 dataset_id = ref.id
185 return self.update(dataset_id=dataset_id, component=component)
187 def to_record(self) -> dict[str, Any]:
188 """Convert the supplied ref to a database record."""
189 component = self.component
190 if component is None:
191 # Use empty string since we want this to be part of the
192 # primary key.
193 component = NULLSTR
194 return dict(
195 dataset_id=self.dataset_id,
196 formatter=self.formatter,
197 path=self.path,
198 storage_class=self.storageClass.name,
199 component=component,
200 checksum=self.checksum,
201 file_size=self.file_size,
202 )
204 def file_location(self, factory: LocationFactory) -> Location:
205 """Return the location of artifact.
207 Parameters
208 ----------
209 factory : `LocationFactory`
210 Factory relevant to the datastore represented by this item.
212 Returns
213 -------
214 location : `Location`
215 The location of the item within this datastore.
216 """
217 uriInStore = ResourcePath(self.path, forceAbsolute=False)
218 if uriInStore.isabs():
219 location = Location(None, uriInStore)
220 else:
221 location = factory.fromPath(uriInStore)
222 return location
224 @classmethod
225 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo:
226 """Create instance from database record.
228 Parameters
229 ----------
230 record : `dict`
231 The record associated with this item.
233 Returns
234 -------
235 info : `StoredFileInfo`
236 The newly-constructed item corresponding to the record.
237 """
238 # Convert name of StorageClass to instance
239 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"])
240 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None
242 info = cls(
243 formatter=record["formatter"],
244 path=record["path"],
245 storageClass=storageClass,
246 component=component,
247 checksum=record["checksum"],
248 file_size=record["file_size"],
249 dataset_id=record["dataset_id"],
250 )
251 return info
253 def update(self, **kwargs: Any) -> StoredFileInfo:
254 new_args = {}
255 for k in self.__slots__:
256 if k in kwargs:
257 new_args[k] = kwargs.pop(k)
258 else:
259 new_args[k] = getattr(self, k)
260 if kwargs:
261 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}")
262 return type(self)(**new_args)
264 def __reduce__(self) -> str | tuple[Any, ...]:
265 return (self.from_record, (self.to_record(),))