Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 47%

85 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo") 

25 

26import inspect 

27from collections.abc import Mapping 

28from dataclasses import dataclass 

29from typing import TYPE_CHECKING, Any 

30 

31from lsst.resources import ResourcePath 

32 

33from .formatter import Formatter, FormatterParameter 

34from .location import Location, LocationFactory 

35from .storageClass import StorageClass, StorageClassFactory 

36 

37if TYPE_CHECKING: 

38 from .datasets import DatasetId, DatasetRef 

39 

40# String to use when a Python None is encountered 

41NULLSTR = "__NULL_STRING__" 

42 

43 

44class StoredDatastoreItemInfo: 

45 """Internal information associated with a stored dataset in a `Datastore`. 

46 

47 This is an empty base class. Datastore implementations are expected to 

48 write their own subclasses. 

49 """ 

50 

51 __slots__ = () 

52 

53 def file_location(self, factory: LocationFactory) -> Location: 

54 """Return the location of artifact. 

55 

56 Parameters 

57 ---------- 

58 factory : `LocationFactory` 

59 Factory relevant to the datastore represented by this item. 

60 

61 Returns 

62 ------- 

63 location : `Location` 

64 The location of the item within this datastore. 

65 """ 

66 raise NotImplementedError("The base class does not know how to locate an item in a datastore.") 

67 

68 @classmethod 

69 def from_record(cls: type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo: 

70 """Create instance from database record. 

71 

72 Parameters 

73 ---------- 

74 record : `dict` 

75 The record associated with this item. 

76 

77 Returns 

78 ------- 

79 info : instance of the relevant type. 

80 The newly-constructed item corresponding to the record. 

81 """ 

82 raise NotImplementedError() 

83 

84 def to_record(self) -> dict[str, Any]: 

85 """Convert record contents to a dictionary.""" 

86 raise NotImplementedError() 

87 

88 @property 

89 def dataset_id(self) -> DatasetId: 

90 """Dataset ID associated with this record (`DatasetId`)""" 

91 raise NotImplementedError() 

92 

93 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo: 

94 """Create a new class with everything retained apart from the 

95 specified values. 

96 """ 

97 raise NotImplementedError() 

98 

99 

100@dataclass(frozen=True) 

101class StoredFileInfo(StoredDatastoreItemInfo): 

102 """Datastore-private metadata associated with a Datastore file.""" 

103 

104 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"} 

105 

106 storageClassFactory = StorageClassFactory() 

107 

108 def __init__( 

109 self, 

110 formatter: FormatterParameter, 

111 path: str, 

112 storageClass: StorageClass, 

113 component: str | None, 

114 checksum: str | None, 

115 file_size: int, 

116 dataset_id: DatasetId, 

117 ): 

118 # Use these shenanigans to allow us to use a frozen dataclass 

119 object.__setattr__(self, "path", path) 

120 object.__setattr__(self, "storageClass", storageClass) 

121 object.__setattr__(self, "component", component) 

122 object.__setattr__(self, "checksum", checksum) 

123 object.__setattr__(self, "file_size", file_size) 

124 object.__setattr__(self, "dataset_id", dataset_id) 

125 

126 if isinstance(formatter, str): 

127 # We trust that this string refers to a Formatter 

128 formatterStr = formatter 

129 elif isinstance(formatter, Formatter) or ( 

130 inspect.isclass(formatter) and issubclass(formatter, Formatter) 

131 ): 

132 formatterStr = formatter.name() 

133 else: 

134 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter") 

135 object.__setattr__(self, "formatter", formatterStr) 

136 

137 formatter: str 

138 """Fully-qualified name of Formatter. If a Formatter class or instance 

139 is given the name will be extracted.""" 

140 

141 path: str 

142 """Path to dataset within Datastore.""" 

143 

144 storageClass: StorageClass 

145 """StorageClass associated with Dataset.""" 

146 

147 component: str | None 

148 """Component associated with this file. Can be None if the file does 

149 not refer to a component of a composite.""" 

150 

151 checksum: str | None 

152 """Checksum of the serialized dataset.""" 

153 

154 file_size: int 

155 """Size of the serialized dataset in bytes.""" 

156 

157 dataset_id: DatasetId 

158 """DatasetId associated with this record.""" 

159 

160 def rebase(self, ref: DatasetRef) -> StoredFileInfo: 

161 """Return a copy of the record suitable for a specified reference. 

162 

163 Parameters 

164 ---------- 

165 ref : `DatasetRef` 

166 DatasetRef which provides component name and dataset ID for the 

167 new returned record. 

168 

169 Returns 

170 ------- 

171 record : `StoredFileInfo` 

172 New record instance. 

173 """ 

174 # take component and dataset_id from the ref, rest comes from self 

175 component = ref.datasetType.component() 

176 if component is None: 

177 component = self.component 

178 dataset_id = ref.id 

179 return self.update(dataset_id=dataset_id, component=component) 

180 

181 def to_record(self) -> dict[str, Any]: 

182 """Convert the supplied ref to a database record.""" 

183 component = self.component 

184 if component is None: 

185 # Use empty string since we want this to be part of the 

186 # primary key. 

187 component = NULLSTR 

188 return dict( 

189 dataset_id=self.dataset_id, 

190 formatter=self.formatter, 

191 path=self.path, 

192 storage_class=self.storageClass.name, 

193 component=component, 

194 checksum=self.checksum, 

195 file_size=self.file_size, 

196 ) 

197 

198 def file_location(self, factory: LocationFactory) -> Location: 

199 """Return the location of artifact. 

200 

201 Parameters 

202 ---------- 

203 factory : `LocationFactory` 

204 Factory relevant to the datastore represented by this item. 

205 

206 Returns 

207 ------- 

208 location : `Location` 

209 The location of the item within this datastore. 

210 """ 

211 uriInStore = ResourcePath(self.path, forceAbsolute=False) 

212 if uriInStore.isabs(): 

213 location = Location(None, uriInStore) 

214 else: 

215 location = factory.fromPath(uriInStore) 

216 return location 

217 

218 @classmethod 

219 def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo: 

220 """Create instance from database record. 

221 

222 Parameters 

223 ---------- 

224 record : `dict` 

225 The record associated with this item. 

226 

227 Returns 

228 ------- 

229 info : `StoredFileInfo` 

230 The newly-constructed item corresponding to the record. 

231 """ 

232 # Convert name of StorageClass to instance 

233 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"]) 

234 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None 

235 

236 info = cls( 

237 formatter=record["formatter"], 

238 path=record["path"], 

239 storageClass=storageClass, 

240 component=component, 

241 checksum=record["checksum"], 

242 file_size=record["file_size"], 

243 dataset_id=record["dataset_id"], 

244 ) 

245 return info 

246 

247 def update(self, **kwargs: Any) -> StoredFileInfo: 

248 new_args = {} 

249 for k in self.__slots__: 

250 if k in kwargs: 

251 new_args[k] = kwargs.pop(k) 

252 else: 

253 new_args[k] = getattr(self, k) 

254 if kwargs: 

255 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}") 

256 return type(self)(**new_args) 

257 

258 def __reduce__(self) -> str | tuple[Any, ...]: 

259 return (self.from_record, (self.to_record(),))