Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 50%

85 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo") 

31 

32import inspect 

33from collections.abc import Mapping 

34from dataclasses import dataclass 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.resources import ResourcePath 

38 

39from .formatter import Formatter, FormatterParameter 

40from .location import Location, LocationFactory 

41from .storageClass import StorageClass, StorageClassFactory 

42 

43if TYPE_CHECKING: 

44 from .datasets import DatasetId, DatasetRef 

45 

46# String to use when a Python None is encountered 

47NULLSTR = "__NULL_STRING__" 

48 

49 

50class StoredDatastoreItemInfo: 

51 """Internal information associated with a stored dataset in a `Datastore`. 

52 

53 This is an empty base class. Datastore implementations are expected to 

54 write their own subclasses. 

55 """ 

56 

57 __slots__ = () 

58 

59 def file_location(self, factory: LocationFactory) -> Location: 

60 """Return the location of artifact. 

61 

62 Parameters 

63 ---------- 

64 factory : `LocationFactory` 

65 Factory relevant to the datastore represented by this item. 

66 

67 Returns 

68 ------- 

69 location : `Location` 

70 The location of the item within this datastore. 

71 """ 

72 raise NotImplementedError("The base class does not know how to locate an item in a datastore.") 

73 

74 @classmethod 

75 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo: 

76 """Create instance from database record. 

77 

78 Parameters 

79 ---------- 

80 record : `dict` 

81 The record associated with this item. 

82 

83 Returns 

84 ------- 

85 info : instance of the relevant type. 

86 The newly-constructed item corresponding to the record. 

87 """ 

88 raise NotImplementedError() 

89 

90 def to_record(self) -> dict[str, Any]: 

91 """Convert record contents to a dictionary.""" 

92 raise NotImplementedError() 

93 

94 @property 

95 def dataset_id(self) -> DatasetId: 

96 """Dataset ID associated with this record (`DatasetId`).""" 

97 raise NotImplementedError() 

98 

99 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo: 

100 """Create a new class with everything retained apart from the 

101 specified values. 

102 """ 

103 raise NotImplementedError() 

104 

105 

106@dataclass(frozen=True) 

107class StoredFileInfo(StoredDatastoreItemInfo): 

108 """Datastore-private metadata associated with a Datastore file.""" 

109 

110 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"} 

111 

112 storageClassFactory = StorageClassFactory() 

113 

114 def __init__( 

115 self, 

116 formatter: FormatterParameter, 

117 path: str, 

118 storageClass: StorageClass, 

119 component: str | None, 

120 checksum: str | None, 

121 file_size: int, 

122 dataset_id: DatasetId, 

123 ): 

124 # Use these shenanigans to allow us to use a frozen dataclass 

125 object.__setattr__(self, "path", path) 

126 object.__setattr__(self, "storageClass", storageClass) 

127 object.__setattr__(self, "component", component) 

128 object.__setattr__(self, "checksum", checksum) 

129 object.__setattr__(self, "file_size", file_size) 

130 object.__setattr__(self, "dataset_id", dataset_id) 

131 

132 if isinstance(formatter, str): 

133 # We trust that this string refers to a Formatter 

134 formatterStr = formatter 

135 elif isinstance(formatter, Formatter) or ( 

136 inspect.isclass(formatter) and issubclass(formatter, Formatter) 

137 ): 

138 formatterStr = formatter.name() 

139 else: 

140 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter") 

141 object.__setattr__(self, "formatter", formatterStr) 

142 

143 formatter: str 

144 """Fully-qualified name of Formatter. If a Formatter class or instance 

145 is given the name will be extracted.""" 

146 

147 path: str 

148 """Path to dataset within Datastore.""" 

149 

150 storageClass: StorageClass 

151 """StorageClass associated with Dataset.""" 

152 

153 component: str | None 

154 """Component associated with this file. Can be None if the file does 

155 not refer to a component of a composite.""" 

156 

157 checksum: str | None 

158 """Checksum of the serialized dataset.""" 

159 

160 file_size: int 

161 """Size of the serialized dataset in bytes.""" 

162 

163 dataset_id: DatasetId 

164 """DatasetId associated with this record.""" 

165 

166 def rebase(self, ref: DatasetRef) -> StoredFileInfo: 

167 """Return a copy of the record suitable for a specified reference. 

168 

169 Parameters 

170 ---------- 

171 ref : `DatasetRef` 

172 DatasetRef which provides component name and dataset ID for the 

173 new returned record. 

174 

175 Returns 

176 ------- 

177 record : `StoredFileInfo` 

178 New record instance. 

179 """ 

180 # take component and dataset_id from the ref, rest comes from self 

181 component = ref.datasetType.component() 

182 if component is None: 

183 component = self.component 

184 dataset_id = ref.id 

185 return self.update(dataset_id=dataset_id, component=component) 

186 

187 def to_record(self) -> dict[str, Any]: 

188 """Convert the supplied ref to a database record.""" 

189 component = self.component 

190 if component is None: 

191 # Use empty string since we want this to be part of the 

192 # primary key. 

193 component = NULLSTR 

194 return dict( 

195 dataset_id=self.dataset_id, 

196 formatter=self.formatter, 

197 path=self.path, 

198 storage_class=self.storageClass.name, 

199 component=component, 

200 checksum=self.checksum, 

201 file_size=self.file_size, 

202 ) 

203 

204 def file_location(self, factory: LocationFactory) -> Location: 

205 """Return the location of artifact. 

206 

207 Parameters 

208 ---------- 

209 factory : `LocationFactory` 

210 Factory relevant to the datastore represented by this item. 

211 

212 Returns 

213 ------- 

214 location : `Location` 

215 The location of the item within this datastore. 

216 """ 

217 uriInStore = ResourcePath(self.path, forceAbsolute=False) 

218 if uriInStore.isabs(): 

219 location = Location(None, uriInStore) 

220 else: 

221 location = factory.fromPath(uriInStore) 

222 return location 

223 

224 @classmethod 

225 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo: 

226 """Create instance from database record. 

227 

228 Parameters 

229 ---------- 

230 record : `dict` 

231 The record associated with this item. 

232 

233 Returns 

234 ------- 

235 info : `StoredFileInfo` 

236 The newly-constructed item corresponding to the record. 

237 """ 

238 # Convert name of StorageClass to instance 

239 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"]) 

240 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None 

241 

242 info = cls( 

243 formatter=record["formatter"], 

244 path=record["path"], 

245 storageClass=storageClass, 

246 component=component, 

247 checksum=record["checksum"], 

248 file_size=record["file_size"], 

249 dataset_id=record["dataset_id"], 

250 ) 

251 return info 

252 

253 def update(self, **kwargs: Any) -> StoredFileInfo: 

254 new_args = {} 

255 for k in self.__slots__: 

256 if k in kwargs: 

257 new_args[k] = kwargs.pop(k) 

258 else: 

259 new_args[k] = getattr(self, k) 

260 if kwargs: 

261 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}") 

262 return type(self)(**new_args) 

263 

264 def __reduce__(self) -> str | tuple[Any, ...]: 

265 return (self.from_record, (self.to_record(),))