Coverage for python/lsst/daf/butler/datastore/stored_file_info.py: 42%

98 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo") 

31 

32import inspect 

33from collections.abc import Iterable, Mapping 

34from dataclasses import dataclass 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.resources import ResourcePath 

38from lsst.utils import doImportType 

39from lsst.utils.introspection import get_full_type_name 

40 

41from .._formatter import Formatter, FormatterParameter 

42from .._location import Location, LocationFactory 

43from .._storage_class import StorageClass, StorageClassFactory 

44 

45if TYPE_CHECKING: 

46 from .._dataset_ref import DatasetRef 

47 

48# String to use when a Python None is encountered 

49NULLSTR = "__NULL_STRING__" 

50 

51 

52class StoredDatastoreItemInfo: 

53 """Internal information associated with a stored dataset in a `Datastore`. 

54 

55 This is an empty base class. Datastore implementations are expected to 

56 write their own subclasses. 

57 """ 

58 

59 __slots__ = () 

60 

61 def file_location(self, factory: LocationFactory) -> Location: 

62 """Return the location of artifact. 

63 

64 Parameters 

65 ---------- 

66 factory : `LocationFactory` 

67 Factory relevant to the datastore represented by this item. 

68 

69 Returns 

70 ------- 

71 location : `Location` 

72 The location of the item within this datastore. 

73 """ 

74 raise NotImplementedError("The base class does not know how to locate an item in a datastore.") 

75 

76 @classmethod 

77 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo: 

78 """Create instance from database record. 

79 

80 Parameters 

81 ---------- 

82 record : `dict` 

83 The record associated with this item. 

84 

85 Returns 

86 ------- 

87 info : instance of the relevant type. 

88 The newly-constructed item corresponding to the record. 

89 """ 

90 raise NotImplementedError() 

91 

92 def to_record(self, **kwargs: Any) -> dict[str, Any]: 

93 """Convert record contents to a dictionary. 

94 

95 Parameters 

96 ---------- 

97 **kwargs 

98 Additional items to add to returned record. 

99 """ 

100 raise NotImplementedError() 

101 

102 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo: 

103 """Create a new class with everything retained apart from the 

104 specified values. 

105 """ 

106 raise NotImplementedError() 

107 

108 @classmethod 

109 def to_records( 

110 cls, records: Iterable[StoredDatastoreItemInfo], **kwargs: Any 

111 ) -> tuple[str, Iterable[Mapping[str, Any]]]: 

112 """Convert a collection of records to dictionaries. 

113 

114 Parameters 

115 ---------- 

116 records : `~collections.abc.Iterable` [ `StoredDatastoreItemInfo` ] 

117 A collection of records, all records must be of the same type. 

118 **kwargs 

119 Additional items to add to each returned record. 

120 

121 Returns 

122 ------- 

123 class_name : `str` 

124 Name of the record class. 

125 records : `list` [ `dict` ] 

126 Records in their dictionary representation. 

127 """ 

128 if not records: 

129 return "", [] 

130 classes = {record.__class__ for record in records} 

131 assert len(classes) == 1, f"Records have to be of the same class: {classes}" 

132 return get_full_type_name(classes.pop()), [record.to_record(**kwargs) for record in records] 

133 

134 @classmethod 

135 def from_records( 

136 cls, class_name: str, records: Iterable[Mapping[str, Any]] 

137 ) -> list[StoredDatastoreItemInfo]: 

138 """Convert collection of dictionaries to records. 

139 

140 Parameters 

141 ---------- 

142 class_name : `str` 

143 Name of the record class. 

144 records : `~collections.abc.Iterable` [ `dict` ] 

145 Records in their dictionary representation. 

146 

147 Returns 

148 ------- 

149 infos : `list` [`StoredDatastoreItemInfo`] 

150 Sequence of records converted to typed representation. 

151 

152 Raises 

153 ------ 

154 TypeError 

155 Raised if ``class_name`` is not a sub-class of 

156 `StoredDatastoreItemInfo`. 

157 """ 

158 try: 

159 klass = doImportType(class_name) 

160 except ImportError: 

161 # Prior to DM-41043 we were embedding a lsst.daf.butler.core 

162 # path in the serialized form, which we never wanted; fix this 

163 # one case. 

164 if class_name == "lsst.daf.butler.core.storedFileInfo.StoredFileInfo": 

165 klass = StoredFileInfo 

166 else: 

167 raise 

168 if not issubclass(klass, StoredDatastoreItemInfo): 

169 raise TypeError(f"Class {class_name} is not a subclass of StoredDatastoreItemInfo") 

170 return [klass.from_record(record) for record in records] 

171 

172 

173@dataclass(frozen=True, slots=True) 

174class StoredFileInfo(StoredDatastoreItemInfo): 

175 """Datastore-private metadata associated with a Datastore file.""" 

176 

177 storageClassFactory = StorageClassFactory() 

178 

179 def __init__( 

180 self, 

181 formatter: FormatterParameter, 

182 path: str, 

183 storageClass: StorageClass, 

184 component: str | None, 

185 checksum: str | None, 

186 file_size: int, 

187 ): 

188 # Use these shenanigans to allow us to use a frozen dataclass 

189 object.__setattr__(self, "path", path) 

190 object.__setattr__(self, "storageClass", storageClass) 

191 object.__setattr__(self, "component", component) 

192 object.__setattr__(self, "checksum", checksum) 

193 object.__setattr__(self, "file_size", file_size) 

194 

195 if isinstance(formatter, str): 

196 # We trust that this string refers to a Formatter 

197 formatterStr = formatter 

198 elif isinstance(formatter, Formatter) or ( 

199 inspect.isclass(formatter) and issubclass(formatter, Formatter) 

200 ): 

201 formatterStr = formatter.name() 

202 else: 

203 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter") 

204 object.__setattr__(self, "formatter", formatterStr) 

205 

206 formatter: str 

207 """Fully-qualified name of Formatter. If a Formatter class or instance 

208 is given the name will be extracted.""" 

209 

210 path: str 

211 """Path to dataset within Datastore.""" 

212 

213 storageClass: StorageClass 

214 """StorageClass associated with Dataset.""" 

215 

216 component: str | None 

217 """Component associated with this file. Can be None if the file does 

218 not refer to a component of a composite.""" 

219 

220 checksum: str | None 

221 """Checksum of the serialized dataset.""" 

222 

223 file_size: int 

224 """Size of the serialized dataset in bytes.""" 

225 

226 def rebase(self, ref: DatasetRef) -> StoredFileInfo: 

227 """Return a copy of the record suitable for a specified reference. 

228 

229 Parameters 

230 ---------- 

231 ref : `DatasetRef` 

232 DatasetRef which provides component name and dataset ID for the 

233 new returned record. 

234 

235 Returns 

236 ------- 

237 record : `StoredFileInfo` 

238 New record instance. 

239 """ 

240 # take component from the ref, rest comes from self 

241 component = ref.datasetType.component() 

242 if component is None: 

243 component = self.component 

244 return self.update(component=component) 

245 

246 def to_record(self, **kwargs: Any) -> dict[str, Any]: 

247 """Convert the supplied ref to a database record.""" 

248 component = self.component 

249 if component is None: 

250 # Use empty string since we want this to be part of the 

251 # primary key. 

252 component = NULLSTR 

253 return dict( 

254 formatter=self.formatter, 

255 path=self.path, 

256 storage_class=self.storageClass.name, 

257 component=component, 

258 checksum=self.checksum, 

259 file_size=self.file_size, 

260 **kwargs, 

261 ) 

262 

263 def file_location(self, factory: LocationFactory) -> Location: 

264 """Return the location of artifact. 

265 

266 Parameters 

267 ---------- 

268 factory : `LocationFactory` 

269 Factory relevant to the datastore represented by this item. 

270 

271 Returns 

272 ------- 

273 location : `Location` 

274 The location of the item within this datastore. 

275 """ 

276 uriInStore = ResourcePath(self.path, forceAbsolute=False) 

277 if uriInStore.isabs(): 

278 location = Location(None, uriInStore) 

279 else: 

280 location = factory.fromPath(uriInStore) 

281 return location 

282 

283 @classmethod 

284 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo: 

285 """Create instance from database record. 

286 

287 Parameters 

288 ---------- 

289 record : `dict` 

290 The record associated with this item. 

291 

292 Returns 

293 ------- 

294 info : `StoredFileInfo` 

295 The newly-constructed item corresponding to the record. 

296 """ 

297 # Convert name of StorageClass to instance 

298 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"]) 

299 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None 

300 info = cls( 

301 formatter=record["formatter"], 

302 path=record["path"], 

303 storageClass=storageClass, 

304 component=component, 

305 checksum=record["checksum"], 

306 file_size=record["file_size"], 

307 ) 

308 return info 

309 

310 def update(self, **kwargs: Any) -> StoredFileInfo: 

311 new_args = {} 

312 for k in self.__slots__: 

313 if k in kwargs: 

314 new_args[k] = kwargs.pop(k) 

315 else: 

316 new_args[k] = getattr(self, k) 

317 if kwargs: 

318 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}") 

319 return type(self)(**new_args) 

320 

321 def __reduce__(self) -> str | tuple[Any, ...]: 

322 return (self.from_record, (self.to_record(),))