Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 46%
82 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 02:31 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 02:31 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo")
26import inspect
27from dataclasses import dataclass
28from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Type
30from lsst.resources import ResourcePath
32from .formatter import Formatter, FormatterParameter
33from .location import Location, LocationFactory
34from .storageClass import StorageClass, StorageClassFactory
36if TYPE_CHECKING:
37 from .datasets import DatasetId, DatasetRef
39# String to use when a Python None is encountered
40NULLSTR = "__NULL_STRING__"
43class StoredDatastoreItemInfo:
44 """Internal information associated with a stored dataset in a `Datastore`.
46 This is an empty base class. Datastore implementations are expected to
47 write their own subclasses.
48 """
50 __slots__ = ()
52 def file_location(self, factory: LocationFactory) -> Location:
53 """Return the location of artifact.
55 Parameters
56 ----------
57 factory : `LocationFactory`
58 Factory relevant to the datastore represented by this item.
60 Returns
61 -------
62 location : `Location`
63 The location of the item within this datastore.
64 """
65 raise NotImplementedError("The base class does not know how to locate an item in a datastore.")
67 @classmethod
68 def from_record(cls: Type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo:
69 """Create instance from database record.
71 Parameters
72 ----------
73 record : `dict`
74 The record associated with this item.
76 Returns
77 -------
78 info : instance of the relevant type.
79 The newly-constructed item corresponding to the record.
80 """
81 raise NotImplementedError()
83 def to_record(self) -> Dict[str, Any]:
84 """Convert record contents to a dictionary."""
85 raise NotImplementedError()
87 @property
88 def dataset_id(self) -> DatasetId:
89 """Dataset ID associated with this record (`DatasetId`)"""
90 raise NotImplementedError()
92 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo:
93 """Create a new class with everything retained apart from the
94 specified values."""
95 raise NotImplementedError()
98@dataclass(frozen=True)
99class StoredFileInfo(StoredDatastoreItemInfo):
100 """Datastore-private metadata associated with a Datastore file."""
102 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"}
104 storageClassFactory = StorageClassFactory()
106 def __init__(
107 self,
108 formatter: FormatterParameter,
109 path: str,
110 storageClass: StorageClass,
111 component: Optional[str],
112 checksum: Optional[str],
113 file_size: int,
114 dataset_id: DatasetId,
115 ):
116 # Use these shenanigans to allow us to use a frozen dataclass
117 object.__setattr__(self, "path", path)
118 object.__setattr__(self, "storageClass", storageClass)
119 object.__setattr__(self, "component", component)
120 object.__setattr__(self, "checksum", checksum)
121 object.__setattr__(self, "file_size", file_size)
122 object.__setattr__(self, "dataset_id", dataset_id)
124 if isinstance(formatter, str):
125 # We trust that this string refers to a Formatter
126 formatterStr = formatter
127 elif isinstance(formatter, Formatter) or (
128 inspect.isclass(formatter) and issubclass(formatter, Formatter)
129 ):
130 formatterStr = formatter.name()
131 else:
132 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter")
133 object.__setattr__(self, "formatter", formatterStr)
135 formatter: str
136 """Fully-qualified name of Formatter. If a Formatter class or instance
137 is given the name will be extracted."""
139 path: str
140 """Path to dataset within Datastore."""
142 storageClass: StorageClass
143 """StorageClass associated with Dataset."""
145 component: Optional[str]
146 """Component associated with this file. Can be None if the file does
147 not refer to a component of a composite."""
149 checksum: Optional[str]
150 """Checksum of the serialized dataset."""
152 file_size: int
153 """Size of the serialized dataset in bytes."""
155 dataset_id: DatasetId
156 """DatasetId associated with this record."""
158 def rebase(self, ref: DatasetRef) -> StoredFileInfo:
159 """Return a copy of the record suitable for a specified reference.
161 Parameters
162 ----------
163 ref : `DatasetRef`
164 DatasetRef which provides component name and dataset ID for the
165 new returned record.
167 Returns
168 -------
169 record : `StoredFileInfo`
170 New record instance.
171 """
172 # take component and dataset_id from the ref, rest comes from self
173 component = ref.datasetType.component()
174 if component is None:
175 component = self.component
176 dataset_id = ref.getCheckedId()
177 return self.update(dataset_id=dataset_id, component=component)
179 def to_record(self) -> Dict[str, Any]:
180 """Convert the supplied ref to a database record."""
181 component = self.component
182 if component is None:
183 # Use empty string since we want this to be part of the
184 # primary key.
185 component = NULLSTR
186 return dict(
187 dataset_id=self.dataset_id,
188 formatter=self.formatter,
189 path=self.path,
190 storage_class=self.storageClass.name,
191 component=component,
192 checksum=self.checksum,
193 file_size=self.file_size,
194 )
196 def file_location(self, factory: LocationFactory) -> Location:
197 """Return the location of artifact.
199 Parameters
200 ----------
201 factory : `LocationFactory`
202 Factory relevant to the datastore represented by this item.
204 Returns
205 -------
206 location : `Location`
207 The location of the item within this datastore.
208 """
209 uriInStore = ResourcePath(self.path, forceAbsolute=False)
210 if uriInStore.isabs():
211 location = Location(None, uriInStore)
212 else:
213 location = factory.fromPath(uriInStore)
214 return location
216 @classmethod
217 def from_record(cls: Type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo:
218 """Create instance from database record.
220 Parameters
221 ----------
222 record : `dict`
223 The record associated with this item.
225 Returns
226 -------
227 info : `StoredFileInfo`
228 The newly-constructed item corresponding to the record.
229 """
230 # Convert name of StorageClass to instance
231 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"])
232 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None
234 info = cls(
235 formatter=record["formatter"],
236 path=record["path"],
237 storageClass=storageClass,
238 component=component,
239 checksum=record["checksum"],
240 file_size=record["file_size"],
241 dataset_id=record["dataset_id"],
242 )
243 return info
245 def update(self, **kwargs: Any) -> StoredFileInfo:
246 new_args = {}
247 for k in self.__slots__:
248 if k in kwargs:
249 new_args[k] = kwargs.pop(k)
250 else:
251 new_args[k] = getattr(self, k)
252 if kwargs:
253 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}")
254 return type(self)(**new_args)