Coverage for python/lsst/daf/butler/datastore/stored_file_info.py: 48%

119 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo", "SerializedStoredFileInfo") 

31 

32import inspect 

33from collections.abc import Iterable, Mapping 

34from dataclasses import dataclass 

35from typing import TYPE_CHECKING, Any 

36 

37import pydantic 

38from lsst.resources import ResourcePath 

39from lsst.utils import doImportType 

40from lsst.utils.introspection import get_full_type_name 

41 

42from .._formatter import Formatter, FormatterParameter 

43from .._location import Location, LocationFactory 

44from .._storage_class import StorageClass, StorageClassFactory 

45 

46if TYPE_CHECKING: 

47 from .._dataset_ref import DatasetRef 

48 

49# String to use when a Python None is encountered 

50NULLSTR = "__NULL_STRING__" 

51 

52 

53class StoredDatastoreItemInfo: 

54 """Internal information associated with a stored dataset in a `Datastore`. 

55 

56 This is an empty base class. Datastore implementations are expected to 

57 write their own subclasses. 

58 """ 

59 

60 __slots__ = () 

61 

62 def file_location(self, factory: LocationFactory) -> Location: 

63 """Return the location of artifact. 

64 

65 Parameters 

66 ---------- 

67 factory : `LocationFactory` 

68 Factory relevant to the datastore represented by this item. 

69 

70 Returns 

71 ------- 

72 location : `Location` 

73 The location of the item within this datastore. 

74 """ 

75 raise NotImplementedError("The base class does not know how to locate an item in a datastore.") 

76 

77 @classmethod 

78 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo: 

79 """Create instance from database record. 

80 

81 Parameters 

82 ---------- 

83 record : `dict` 

84 The record associated with this item. 

85 

86 Returns 

87 ------- 

88 info : instance of the relevant type. 

89 The newly-constructed item corresponding to the record. 

90 """ 

91 raise NotImplementedError() 

92 

93 def to_record(self, **kwargs: Any) -> dict[str, Any]: 

94 """Convert record contents to a dictionary. 

95 

96 Parameters 

97 ---------- 

98 **kwargs 

99 Additional items to add to returned record. 

100 """ 

101 raise NotImplementedError() 

102 

103 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo: 

104 """Create a new class with everything retained apart from the 

105 specified values. 

106 

107 Parameters 

108 ---------- 

109 **kwargs : `~collections.abc.Mapping` 

110 Values to override. 

111 

112 Returns 

113 ------- 

114 updated : `StoredDatastoreItemInfo` 

115 A new instance of the object with updated values. 

116 """ 

117 raise NotImplementedError() 

118 

119 @classmethod 

120 def to_records( 

121 cls, records: Iterable[StoredDatastoreItemInfo], **kwargs: Any 

122 ) -> tuple[str, Iterable[Mapping[str, Any]]]: 

123 """Convert a collection of records to dictionaries. 

124 

125 Parameters 

126 ---------- 

127 records : `~collections.abc.Iterable` [ `StoredDatastoreItemInfo` ] 

128 A collection of records, all records must be of the same type. 

129 **kwargs 

130 Additional items to add to each returned record. 

131 

132 Returns 

133 ------- 

134 class_name : `str` 

135 Name of the record class. 

136 records : `list` [ `dict` ] 

137 Records in their dictionary representation. 

138 """ 

139 if not records: 

140 return "", [] 

141 classes = {record.__class__ for record in records} 

142 assert len(classes) == 1, f"Records have to be of the same class: {classes}" 

143 return get_full_type_name(classes.pop()), [record.to_record(**kwargs) for record in records] 

144 

145 @classmethod 

146 def from_records( 

147 cls, class_name: str, records: Iterable[Mapping[str, Any]] 

148 ) -> list[StoredDatastoreItemInfo]: 

149 """Convert collection of dictionaries to records. 

150 

151 Parameters 

152 ---------- 

153 class_name : `str` 

154 Name of the record class. 

155 records : `~collections.abc.Iterable` [ `dict` ] 

156 Records in their dictionary representation. 

157 

158 Returns 

159 ------- 

160 infos : `list` [`StoredDatastoreItemInfo`] 

161 Sequence of records converted to typed representation. 

162 

163 Raises 

164 ------ 

165 TypeError 

166 Raised if ``class_name`` is not a sub-class of 

167 `StoredDatastoreItemInfo`. 

168 """ 

169 try: 

170 klass = doImportType(class_name) 

171 except ImportError: 

172 # Prior to DM-41043 we were embedding a lsst.daf.butler.core 

173 # path in the serialized form, which we never wanted; fix this 

174 # one case. 

175 if class_name == "lsst.daf.butler.core.storedFileInfo.StoredFileInfo": 

176 klass = StoredFileInfo 

177 else: 

178 raise 

179 if not issubclass(klass, StoredDatastoreItemInfo): 

180 raise TypeError(f"Class {class_name} is not a subclass of StoredDatastoreItemInfo") 

181 return [klass.from_record(record) for record in records] 

182 

183 

184@dataclass(frozen=True, slots=True) 

185class StoredFileInfo(StoredDatastoreItemInfo): 

186 """Datastore-private metadata associated with a Datastore file. 

187 

188 Parameters 

189 ---------- 

190 formatter : `Formatter` or `str` 

191 The formatter to use for this dataset. 

192 path : `str` 

193 Path to the artifact associated with this dataset. 

194 storageClass : `StorageClass` 

195 The storage class associated with this dataset. 

196 component : `str` or `None`, optional 

197 The component if disassembled. 

198 checksum : `str` or `None`, optional 

199 The checksum of the artifact. 

200 file_size : `int` 

201 The size of the file in bytes. -1 indicates the size is not known. 

202 """ 

203 

204 storageClassFactory = StorageClassFactory() 

205 

206 def __init__( 

207 self, 

208 formatter: FormatterParameter, 

209 path: str, 

210 storageClass: StorageClass, 

211 component: str | None, 

212 checksum: str | None, 

213 file_size: int, 

214 ): 

215 # Use these shenanigans to allow us to use a frozen dataclass 

216 object.__setattr__(self, "path", path) 

217 object.__setattr__(self, "storageClass", storageClass) 

218 object.__setattr__(self, "component", component) 

219 object.__setattr__(self, "checksum", checksum) 

220 object.__setattr__(self, "file_size", file_size) 

221 

222 if isinstance(formatter, str): 

223 # We trust that this string refers to a Formatter 

224 formatterStr = formatter 

225 elif isinstance(formatter, Formatter) or ( 

226 inspect.isclass(formatter) and issubclass(formatter, Formatter) 

227 ): 

228 formatterStr = formatter.name() 

229 else: 

230 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter") 

231 object.__setattr__(self, "formatter", formatterStr) 

232 

233 formatter: str 

234 """Fully-qualified name of Formatter. If a Formatter class or instance 

235 is given the name will be extracted.""" 

236 

237 path: str 

238 """Path to dataset within Datastore.""" 

239 

240 storageClass: StorageClass 

241 """StorageClass associated with Dataset.""" 

242 

243 component: str | None 

244 """Component associated with this file. Can be `None` if the file does 

245 not refer to a component of a composite.""" 

246 

247 checksum: str | None 

248 """Checksum of the serialized dataset.""" 

249 

250 file_size: int 

251 """Size of the serialized dataset in bytes.""" 

252 

253 def rebase(self, ref: DatasetRef) -> StoredFileInfo: 

254 """Return a copy of the record suitable for a specified reference. 

255 

256 Parameters 

257 ---------- 

258 ref : `DatasetRef` 

259 DatasetRef which provides component name and dataset ID for the 

260 new returned record. 

261 

262 Returns 

263 ------- 

264 record : `StoredFileInfo` 

265 New record instance. 

266 """ 

267 # take component from the ref, rest comes from self 

268 component = ref.datasetType.component() 

269 if component is None: 

270 component = self.component 

271 return self.update(component=component) 

272 

273 def to_record(self, **kwargs: Any) -> dict[str, Any]: 

274 """Convert the supplied ref to a database record. 

275 

276 Parameters 

277 ---------- 

278 **kwargs : `typing.Any` 

279 Additional information to be added to the record. 

280 """ 

281 component = self.component 

282 if component is None: 

283 # Use empty string since we want this to be part of the 

284 # primary key. 

285 component = NULLSTR 

286 return dict( 

287 formatter=self.formatter, 

288 path=self.path, 

289 storage_class=self.storageClass.name, 

290 component=component, 

291 checksum=self.checksum, 

292 file_size=self.file_size, 

293 **kwargs, 

294 ) 

295 

296 def to_simple(self) -> SerializedStoredFileInfo: 

297 record = self.to_record() 

298 # We allow None on the model but the record contains a "null string" 

299 # instead 

300 record["component"] = self.component 

301 return SerializedStoredFileInfo.model_validate(record) 

302 

303 def file_location(self, factory: LocationFactory) -> Location: 

304 """Return the location of artifact. 

305 

306 Parameters 

307 ---------- 

308 factory : `LocationFactory` 

309 Factory relevant to the datastore represented by this item. 

310 

311 Returns 

312 ------- 

313 location : `Location` 

314 The location of the item within this datastore. 

315 """ 

316 uriInStore = ResourcePath(self.path, forceAbsolute=False, forceDirectory=False) 

317 if uriInStore.isabs(): 

318 location = Location(None, uriInStore) 

319 else: 

320 location = factory.from_uri(uriInStore, trusted_path=True) 

321 return location 

322 

323 @classmethod 

324 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo: 

325 """Create instance from database record. 

326 

327 Parameters 

328 ---------- 

329 record : `dict` 

330 The record associated with this item. 

331 

332 Returns 

333 ------- 

334 info : `StoredFileInfo` 

335 The newly-constructed item corresponding to the record. 

336 """ 

337 # Convert name of StorageClass to instance 

338 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"]) 

339 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None 

340 info = cls( 

341 formatter=record["formatter"], 

342 path=record["path"], 

343 storageClass=storageClass, 

344 component=component, 

345 checksum=record["checksum"], 

346 file_size=record["file_size"], 

347 ) 

348 return info 

349 

350 @classmethod 

351 def from_simple(cls: type[StoredFileInfo], model: SerializedStoredFileInfo) -> StoredFileInfo: 

352 return cls.from_record(dict(model)) 

353 

354 def update(self, **kwargs: Any) -> StoredFileInfo: 

355 new_args = {} 

356 for k in self.__slots__: 

357 if k in kwargs: 

358 new_args[k] = kwargs.pop(k) 

359 else: 

360 new_args[k] = getattr(self, k) 

361 if kwargs: 

362 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}") 

363 return type(self)(**new_args) 

364 

365 def __reduce__(self) -> str | tuple[Any, ...]: 

366 return (self.from_record, (self.to_record(),)) 

367 

368 

369class SerializedStoredFileInfo(pydantic.BaseModel): 

370 """Serialized representation of `StoredFileInfo` properties.""" 

371 

372 formatter: str 

373 """Fully-qualified name of Formatter.""" 

374 

375 path: str 

376 """Path to dataset within Datastore.""" 

377 

378 storage_class: str 

379 """Name of the StorageClass associated with Dataset.""" 

380 

381 component: str | None 

382 """Component associated with this file. Can be `None` if the file does 

383 not refer to a component of a composite.""" 

384 

385 checksum: str | None 

386 """Checksum of the serialized dataset.""" 

387 

388 file_size: int 

389 """Size of the serialized dataset in bytes."""