Coverage for python/lsst/daf/butler/core/storedFileInfo.py: 46%

82 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-05 03:17 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo") 

25 

26import inspect 

27from dataclasses import dataclass 

28from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Type 

29 

30from lsst.resources import ResourcePath 

31 

32from .formatter import Formatter, FormatterParameter 

33from .location import Location, LocationFactory 

34from .storageClass import StorageClass, StorageClassFactory 

35 

36if TYPE_CHECKING: 

37 from .datasets import DatasetId, DatasetRef 

38 

39# String to use when a Python None is encountered 

40NULLSTR = "__NULL_STRING__" 

41 

42 

43class StoredDatastoreItemInfo: 

44 """Internal information associated with a stored dataset in a `Datastore`. 

45 

46 This is an empty base class. Datastore implementations are expected to 

47 write their own subclasses. 

48 """ 

49 

50 __slots__ = () 

51 

52 def file_location(self, factory: LocationFactory) -> Location: 

53 """Return the location of artifact. 

54 

55 Parameters 

56 ---------- 

57 factory : `LocationFactory` 

58 Factory relevant to the datastore represented by this item. 

59 

60 Returns 

61 ------- 

62 location : `Location` 

63 The location of the item within this datastore. 

64 """ 

65 raise NotImplementedError("The base class does not know how to locate an item in a datastore.") 

66 

67 @classmethod 

68 def from_record(cls: Type[StoredDatastoreItemInfo], record: Mapping[str, Any]) -> StoredDatastoreItemInfo: 

69 """Create instance from database record. 

70 

71 Parameters 

72 ---------- 

73 record : `dict` 

74 The record associated with this item. 

75 

76 Returns 

77 ------- 

78 info : instance of the relevant type. 

79 The newly-constructed item corresponding to the record. 

80 """ 

81 raise NotImplementedError() 

82 

83 def to_record(self) -> Dict[str, Any]: 

84 """Convert record contents to a dictionary.""" 

85 raise NotImplementedError() 

86 

87 @property 

88 def dataset_id(self) -> DatasetId: 

89 """Dataset ID associated with this record (`DatasetId`)""" 

90 raise NotImplementedError() 

91 

92 def update(self, **kwargs: Any) -> StoredDatastoreItemInfo: 

93 """Create a new class with everything retained apart from the 

94 specified values.""" 

95 raise NotImplementedError() 

96 

97 

98@dataclass(frozen=True) 

99class StoredFileInfo(StoredDatastoreItemInfo): 

100 """Datastore-private metadata associated with a Datastore file.""" 

101 

102 __slots__ = {"formatter", "path", "storageClass", "component", "checksum", "file_size", "dataset_id"} 

103 

104 storageClassFactory = StorageClassFactory() 

105 

106 def __init__( 

107 self, 

108 formatter: FormatterParameter, 

109 path: str, 

110 storageClass: StorageClass, 

111 component: Optional[str], 

112 checksum: Optional[str], 

113 file_size: int, 

114 dataset_id: DatasetId, 

115 ): 

116 # Use these shenanigans to allow us to use a frozen dataclass 

117 object.__setattr__(self, "path", path) 

118 object.__setattr__(self, "storageClass", storageClass) 

119 object.__setattr__(self, "component", component) 

120 object.__setattr__(self, "checksum", checksum) 

121 object.__setattr__(self, "file_size", file_size) 

122 object.__setattr__(self, "dataset_id", dataset_id) 

123 

124 if isinstance(formatter, str): 

125 # We trust that this string refers to a Formatter 

126 formatterStr = formatter 

127 elif isinstance(formatter, Formatter) or ( 

128 inspect.isclass(formatter) and issubclass(formatter, Formatter) 

129 ): 

130 formatterStr = formatter.name() 

131 else: 

132 raise TypeError(f"Supplied formatter '{formatter}' is not a Formatter") 

133 object.__setattr__(self, "formatter", formatterStr) 

134 

135 formatter: str 

136 """Fully-qualified name of Formatter. If a Formatter class or instance 

137 is given the name will be extracted.""" 

138 

139 path: str 

140 """Path to dataset within Datastore.""" 

141 

142 storageClass: StorageClass 

143 """StorageClass associated with Dataset.""" 

144 

145 component: Optional[str] 

146 """Component associated with this file. Can be None if the file does 

147 not refer to a component of a composite.""" 

148 

149 checksum: Optional[str] 

150 """Checksum of the serialized dataset.""" 

151 

152 file_size: int 

153 """Size of the serialized dataset in bytes.""" 

154 

155 dataset_id: DatasetId 

156 """DatasetId associated with this record.""" 

157 

158 def rebase(self, ref: DatasetRef) -> StoredFileInfo: 

159 """Return a copy of the record suitable for a specified reference. 

160 

161 Parameters 

162 ---------- 

163 ref : `DatasetRef` 

164 DatasetRef which provides component name and dataset ID for the 

165 new returned record. 

166 

167 Returns 

168 ------- 

169 record : `StoredFileInfo` 

170 New record instance. 

171 """ 

172 # take component and dataset_id from the ref, rest comes from self 

173 component = ref.datasetType.component() 

174 if component is None: 

175 component = self.component 

176 dataset_id = ref.getCheckedId() 

177 return self.update(dataset_id=dataset_id, component=component) 

178 

179 def to_record(self) -> Dict[str, Any]: 

180 """Convert the supplied ref to a database record.""" 

181 component = self.component 

182 if component is None: 

183 # Use empty string since we want this to be part of the 

184 # primary key. 

185 component = NULLSTR 

186 return dict( 

187 dataset_id=self.dataset_id, 

188 formatter=self.formatter, 

189 path=self.path, 

190 storage_class=self.storageClass.name, 

191 component=component, 

192 checksum=self.checksum, 

193 file_size=self.file_size, 

194 ) 

195 

196 def file_location(self, factory: LocationFactory) -> Location: 

197 """Return the location of artifact. 

198 

199 Parameters 

200 ---------- 

201 factory : `LocationFactory` 

202 Factory relevant to the datastore represented by this item. 

203 

204 Returns 

205 ------- 

206 location : `Location` 

207 The location of the item within this datastore. 

208 """ 

209 uriInStore = ResourcePath(self.path, forceAbsolute=False) 

210 if uriInStore.isabs(): 

211 location = Location(None, uriInStore) 

212 else: 

213 location = factory.fromPath(uriInStore) 

214 return location 

215 

216 @classmethod 

217 def from_record(cls: Type[StoredFileInfo], record: Mapping[str, Any]) -> StoredFileInfo: 

218 """Create instance from database record. 

219 

220 Parameters 

221 ---------- 

222 record : `dict` 

223 The record associated with this item. 

224 

225 Returns 

226 ------- 

227 info : `StoredFileInfo` 

228 The newly-constructed item corresponding to the record. 

229 """ 

230 # Convert name of StorageClass to instance 

231 storageClass = cls.storageClassFactory.getStorageClass(record["storage_class"]) 

232 component = record["component"] if (record["component"] and record["component"] != NULLSTR) else None 

233 

234 info = cls( 

235 formatter=record["formatter"], 

236 path=record["path"], 

237 storageClass=storageClass, 

238 component=component, 

239 checksum=record["checksum"], 

240 file_size=record["file_size"], 

241 dataset_id=record["dataset_id"], 

242 ) 

243 return info 

244 

245 def update(self, **kwargs: Any) -> StoredFileInfo: 

246 new_args = {} 

247 for k in self.__slots__: 

248 if k in kwargs: 

249 new_args[k] = kwargs.pop(k) 

250 else: 

251 new_args[k] = getattr(self, k) 

252 if kwargs: 

253 raise ValueError(f"Unexpected keyword arguments for update: {', '.join(kwargs)}") 

254 return type(self)(**new_args)