Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Generic datastore code useful for most datastores.""" 

25 

26__all__ = ("GenericBaseDatastore", ) 

27 

28import logging 

29from abc import abstractmethod 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Iterable, 

34 List, 

35 Mapping, 

36 Optional, 

37 Sequence, 

38 Tuple, 

39) 

40 

41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError 

42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class GenericBaseDatastore(Datastore): 

51 """Methods useful for most implementations of a `Datastore`. 

52 

53 Should always be sub-classed since key abstract methods are missing. 

54 """ 

55 

56 @property 

57 @abstractmethod 

58 def bridge(self) -> DatastoreRegistryBridge: 

59 """Object that manages the interface between this `Datastore` and the 

60 `Registry` (`DatastoreRegistryBridge`). 

61 """ 

62 raise NotImplementedError() 

63 

64 @abstractmethod 

65 def addStoredItemInfo(self, refs: Iterable[DatasetRef], 

66 infos: Iterable[Any]) -> None: 

67 """Record internal storage information associated with one or more 

68 datasets. 

69 

70 Parameters 

71 ---------- 

72 refs : sequence of `DatasetRef` 

73 The datasets that have been stored. 

74 infos : sequence of `StoredDatastoreItemInfo` 

75 Metadata associated with the stored datasets. 

76 """ 

77 raise NotImplementedError() 

78 

79 @abstractmethod 

80 def getStoredItemInfo(self, ref: DatasetRef) -> Any: 

81 """Retrieve information associated with file stored in this 

82 `Datastore`. 

83 

84 Parameters 

85 ---------- 

86 ref : `DatasetRef` 

87 The dataset that is to be queried. 

88 

89 Returns 

90 ------- 

91 info : `StoredDatastoreItemInfo` 

92 Stored information about this file and its formatter. 

93 

94 Raises 

95 ------ 

96 KeyError 

97 Dataset with that id can not be found. 

98 """ 

99 raise NotImplementedError() 

100 

101 @abstractmethod 

102 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

103 """Retrieve information associated with files stored in this 

104 `Datastore` associated with this dataset ref. 

105 

106 Parameters 

107 ---------- 

108 ref : `DatasetRef` 

109 The dataset that is to be queried. 

110 

111 Returns 

112 ------- 

113 items : `list` [`StoredDatastoreItemInfo`] 

114 Stored information about the files and associated formatters 

115 associated with this dataset. Only one file will be returned 

116 if the dataset has not been disassembled. Can return an empty 

117 list if no matching datasets can be found. 

118 """ 

119 raise NotImplementedError() 

120 

121 @abstractmethod 

122 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

123 """Remove information about the file associated with this dataset. 

124 

125 Parameters 

126 ---------- 

127 ref : `DatasetRef` 

128 The dataset that has been removed. 

129 """ 

130 raise NotImplementedError() 

131 

132 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None: 

133 """Update registry to indicate that one or more datasets have been 

134 stored. 

135 

136 Parameters 

137 ---------- 

138 refsAndInfos : sequence `tuple` [`DatasetRef`, 

139 `StoredDatastoreItemInfo`] 

140 Datasets to register and the internal datastore metadata associated 

141 with them. 

142 """ 

143 expandedRefs: List[DatasetRef] = [] 

144 expandedItemInfos = [] 

145 

146 for ref, itemInfo in refsAndInfos: 

147 expandedRefs.append(ref) 

148 expandedItemInfos.append(itemInfo) 

149 

150 # Dataset location only cares about registry ID so if we have 

151 # disassembled in datastore we have to deduplicate. Since they 

152 # will have different datasetTypes we can't use a set 

153 registryRefs = {r.id: r for r in expandedRefs} 

154 self.bridge.insert(registryRefs.values()) 

155 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

156 

157 def _move_to_trash_in_registry(self, ref: DatasetRef) -> None: 

158 """Tell registry that this dataset and associated components 

159 are to be trashed. 

160 

161 Parameters 

162 ---------- 

163 ref : `DatasetRef` 

164 Dataset to mark for removal from registry. 

165 

166 Notes 

167 ----- 

168 Dataset is not removed from internal stored item info table. 

169 """ 

170 # Note that a ref can point to component dataset refs that 

171 # have been deleted already from registry but are still in 

172 # the python object. moveToTrash will deal with that. 

173 self.bridge.moveToTrash([ref]) 

174 

175 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass, 

176 assemblerParams: Optional[Mapping[str, Any]] = None, 

177 isComponent: bool = False) -> Any: 

178 """Given the Python object read from the datastore, manipulate 

179 it based on the supplied parameters and ensure the Python 

180 type is correct. 

181 

182 Parameters 

183 ---------- 

184 inMemoryDataset : `object` 

185 Dataset to check. 

186 readStorageClass: `StorageClass` 

187 The `StorageClass` used to obtain the assembler and to 

188 check the python type. 

189 assemblerParams : `dict`, optional 

190 Parameters to pass to the assembler. Can be `None`. 

191 isComponent : `bool`, optional 

192 If this is a component, allow the inMemoryDataset to be `None`. 

193 """ 

194 # Process any left over parameters 

195 if assemblerParams: 

196 inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams) 

197 

198 # Validate the returned data type matches the expected data type 

199 pytype = readStorageClass.pytype 

200 

201 allowedTypes = [] 

202 if pytype: 202 ↛ 206line 202 didn't jump to line 206, because the condition on line 202 was never false

203 allowedTypes.append(pytype) 

204 

205 # Special case components to allow them to be None 

206 if isComponent: 

207 allowedTypes.append(type(None)) 

208 

209 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 209 ↛ 210line 209 didn't jump to line 210, because the condition on line 209 was never true

210 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset), 

211 pytype)) 

212 

213 return inMemoryDataset 

214 

215 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

216 """Validate the supplied arguments for put. 

217 

218 Parameters 

219 ---------- 

220 inMemoryDataset : `object` 

221 The dataset to store. 

222 ref : `DatasetRef` 

223 Reference to the associated Dataset. 

224 """ 

225 storageClass = ref.datasetType.storageClass 

226 

227 # Sanity check 

228 if not isinstance(inMemoryDataset, storageClass.pytype): 228 ↛ 229line 228 didn't jump to line 229, because the condition on line 228 was never true

229 raise TypeError("Inconsistency between supplied object ({}) " 

230 "and storage class type ({})".format(type(inMemoryDataset), 

231 storageClass.pytype)) 

232 

233 # Confirm that we can accept this dataset 

234 if not self.constraints.isAcceptable(ref): 

235 # Raise rather than use boolean return value. 

236 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

237 " configuration.") 

238 

239 return 

240 

241 def remove(self, ref: DatasetRef) -> None: 

242 """Indicate to the Datastore that a dataset can be removed. 

243 

244 .. warning:: 

245 

246 This method deletes the artifact associated with this 

247 dataset and can not be reversed. 

248 

249 Parameters 

250 ---------- 

251 ref : `DatasetRef` 

252 Reference to the required Dataset. 

253 

254 Raises 

255 ------ 

256 FileNotFoundError 

257 Attempt to remove a dataset that does not exist. 

258 

259 Notes 

260 ----- 

261 This method is used for immediate removal of a dataset and is 

262 generally reserved for internal testing of datastore APIs. 

263 It is implemented by calling `trash()` and then immediately calling 

264 `emptyTrash()`. This call is meant to be immediate so errors 

265 encountered during removal are not ignored. 

266 """ 

267 self.trash(ref, ignore_errors=False) 

268 self.emptyTrash(ignore_errors=False) 

269 

270 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

271 """Retrieve a dataset from an input `Datastore`, 

272 and store the result in this `Datastore`. 

273 

274 Parameters 

275 ---------- 

276 inputDatastore : `Datastore` 

277 The external `Datastore` from which to retreive the Dataset. 

278 ref : `DatasetRef` 

279 Reference to the required dataset in the input data store. 

280 

281 """ 

282 assert inputDatastore is not self # unless we want it for renames? 

283 inMemoryDataset = inputDatastore.get(ref) 

284 return self.put(inMemoryDataset, ref)