Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 50%
85 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo")
26import inspect
27from collections.abc import Mapping
28from dataclasses import dataclass
29from typing import TYPE_CHECKING, Any
31from lsst.resources import ResourcePath
33from .formatter import Formatter, FormatterParameter
34from .location import Location, LocationFactory
35from .storageClass import StorageClass, StorageClassFactory
37if TYPE_CHECKING:
38 from .datasets import DatasetId, DatasetRef
40# String to use when a Python None is encountered
41NULLSTR = "__NULL_STRING__"
44class StoredDatastoreItemInfo:
45 """Internal information associated with a stored dataset in a `Datastore`.
47 This is an empty base class. Datastore implementations are expected to
48 write their own subclasses.
49 """
51 __slots__ = ()
53 def file_location(self, factory: LocationFactory) -> Location:
54 """Return the location of artifact.
56 Parameters
57 ----------
58 factory : `LocationFactory`
59 Factory relevant to the datastore represented by this item.
61 Returns
62 -------
63 location : `Location`
64 The location of the item within this datastore.
65 """
66 raise NotImplementedError("The base class does not know how to locate an item in a datastore.")
68 @classmethod
69 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo:
70 """Create instance from database record.
72 Parameters
73 ----------
74 record : `dict`
75 The record associated with this item.
77 Returns
78 -------
79 info : instance of the relevant type.
80 The newly-constructed item corresponding to the record.
81 """
82 raise NotImplementedError()
84 def to_record(self) -> dict[str, Any]:
85 """Convert record contents to a dictionary."""
86 raise NotImplementedError()
88 @property
89 def dataset_id(self) -> DatasetId:
90 """Dataset ID associated with this record (`DatasetId`)."""
91 raise NotImplementedError()
93 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo:
94 """Create a new class with everything retained apart from the
95 specified values.
96 """
97 raise NotImplementedError()
100@dataclass(frozen=True)
101class StoredFileInfo(StoredDatastoreItemInfo):
102 """Datastore-private metadata associated with a Datastore file."""
104 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"}
106 storageClassFactory = StorageClassFactory()
108 def __init__(
109 self,
110 formatter: FormatterParameter,
111 path: str,
112 storageClass: StorageClass,
113 component: str | None,
114 checksum: str | None,
115 file_size: int,
116 dataset_id: DatasetId,
117 ):
118 # Use these shenanigans to allow us to use a frozen dataclass
119 object.__setattr__(self, "path", path)
120 object.__setattr__(self, "storageClass", storageClass)
121 object.__setattr__(self, "component", component)
122 object.__setattr__(self, "checksum", checksum)
123 object.__setattr__(self, "file_size", file_size)
124 object.__setattr__(self, "dataset_id", dataset_id)
126 if isinstance(formatter, str):
127 # We trust that this string refers to a Formatter
128 formatterStr = formatter
129 elif isinstance(formatter, Formatter) or (
130 inspect.isclass(formatter) and issubclass(formatter, Formatter)
131 ):
132 formatterStr = formatter.name()
133 else:
134 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter")
135 object.__setattr__(self, "formatter", formatterStr)
137 formatter: str
138 """Fully-qualified name of Formatter. If a Formatter class or instance
139 is given the name will be extracted."""
141 path: str
142 """Path to dataset within Datastore."""
144 storageClass: StorageClass
145 """StorageClass associated with Dataset."""
147 component: str | None
148 """Component associated with this file. Can be None if the file does
149 not refer to a component of a composite."""
151 checksum: str | None
152 """Checksum of the serialized dataset."""
154 file_size: int
155 """Size of the serialized dataset in bytes."""
157 dataset_id: DatasetId
158 """DatasetId associated with this record."""
160 def rebase(self, ref: DatasetRef) -> StoredFileInfo:
161 """Return a copy of the record suitable for a specified reference.
163 Parameters
164 ----------
165 ref : `DatasetRef`
166 DatasetRef which provides component name and dataset ID for the
167 new returned record.
169 Returns
170 -------
171 record : `StoredFileInfo`
172 New record instance.
173 """
174 # take component and dataset_id from the ref, rest comes from self
175 component = ref.datasetType.component()
176 if component is None:
177 component = self.component
178 dataset_id = ref.id
179 return self.update(dataset_id=dataset_id, component=component)
181 def to_record(self) -> dict[str, Any]:
182 """Convert the supplied ref to a database record."""
183 component = self.component
184 if component is None:
185 # Use empty string since we want this to be part of the
186 # primary key.
187 component = NULLSTR
188 return dict(
189 dataset_id=self.dataset_id,
190 formatter=self.formatter,
191 path=self.path,
192 storage_class=self.storageClass.name,
193 component=component,
194 checksum=self.checksum,
195 file_size=self.file_size,
196 )
198 def file_location(self, factory: LocationFactory) -> Location:
199 """Return the location of artifact.
201 Parameters
202 ----------
203 factory : `LocationFactory`
204 Factory relevant to the datastore represented by this item.
206 Returns
207 -------
208 location : `Location`
209 The location of the item within this datastore.
210 """
211 uriInStore = ResourcePath(self.path, forceAbsolute=False)
212 if uriInStore.isabs():
213 location = Location(None, uriInStore)
214 else:
215 location = factory.fromPath(uriInStore)
216 return location
218 @classmethod
219 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo:
220 """Create instance from database record.
222 Parameters
223 ----------
224 record : `dict`
225 The record associated with this item.
227 Returns
228 -------
229 info : `StoredFileInfo`
230 The newly-constructed item corresponding to the record.
231 """
232 # Convert name of StorageClass to instance
233 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"])
234 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None
236 info = cls(
237 formatter=record["formatter"],
238 path=record["path"],
239 storageClass=storageClass,
240 component=component,
241 checksum=record["checksum"],
242 file_size=record["file_size"],
243 dataset_id=record["dataset_id"],
244 )
245 return info
247 def update(self, **kwargs: Any) -> StoredFileInfo:
248 new_args = {}
249 for k in self.__slots__:
250 if k in kwargs:
251 new_args[k] = kwargs.pop(k)
252 else:
253 new_args[k] = getattr(self, k)
254 if kwargs:
255 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}")
256 return type(self)(**new_args)
258 def __reduce__(self) -> str | tuple[Any, ...]:
259 return (self.from_record, (self.to_record(),))