Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Generic datastore code useful for most datastores.""" 

23 

24__all__ = ("GenericBaseDatastore", ) 

25 

26import logging 

27from abc import abstractmethod 

28 

29from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError 

30from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

31 

32log = logging.getLogger(__name__) 

33 

34 

35class GenericBaseDatastore(Datastore): 

36 """Methods useful for most implementations of a `Datastore`. 

37 

38 Should always be sub-classed since key abstract methods are missing. 

39 """ 

40 

41 @property 

42 @abstractmethod 

43 def bridge(self) -> DatastoreRegistryBridge: 

44 """Object that manages the interface between this `Datastore` and the 

45 `Registry` (`DatastoreRegistryBridge`). 

46 """ 

47 raise NotImplementedError() 

48 

49 @abstractmethod 

50 def addStoredItemInfo(self, refs, infos): 

51 """Record internal storage information associated with one or more 

52 datasets. 

53 

54 Parameters 

55 ---------- 

56 refs : sequence of `DatasetRef` 

57 The datasets that have been stored. 

58 infos : sequence of `StoredDatastoreItemInfo` 

59 Metadata associated with the stored datasets. 

60 """ 

61 raise NotImplementedError() 

62 

63 @abstractmethod 

64 def getStoredItemInfo(self, ref): 

65 """Retrieve information associated with file stored in this 

66 `Datastore`. 

67 

68 Parameters 

69 ---------- 

70 ref : `DatasetRef` 

71 The dataset that is to be queried. 

72 

73 Returns 

74 ------- 

75 info : `StoredDatastoreItemInfo` 

76 Stored information about this file and its formatter. 

77 

78 Raises 

79 ------ 

80 KeyError 

81 Dataset with that id can not be found. 

82 """ 

83 raise NotImplementedError() 

84 

85 @abstractmethod 

86 def getStoredItemsInfo(self, ref): 

87 """Retrieve information associated with files stored in this 

88 `Datastore` associated with this dataset ref. 

89 

90 Parameters 

91 ---------- 

92 ref : `DatasetRef` 

93 The dataset that is to be queried. 

94 

95 Returns 

96 ------- 

97 items : `list` [`StoredDatastoreItemInfo`] 

98 Stored information about the files and associated formatters 

99 associated with this dataset. Only one file will be returned 

100 if the dataset has not been disassembled. Can return an empty 

101 list if no matching datasets can be found. 

102 """ 

103 raise NotImplementedError() 

104 

105 @abstractmethod 

106 def removeStoredItemInfo(self, ref): 

107 """Remove information about the file associated with this dataset. 

108 

109 Parameters 

110 ---------- 

111 ref : `DatasetRef` 

112 The dataset that has been removed. 

113 """ 

114 raise NotImplementedError() 

115 

116 def _register_datasets(self, refsAndInfos): 

117 """Update registry to indicate that one or more datasets have been 

118 stored. 

119 

120 Parameters 

121 ---------- 

122 refsAndInfos : sequence `tuple` [`DatasetRef`, 

123 `StoredDatastoreItemInfo`] 

124 Datasets to register and the internal datastore metadata associated 

125 with them. 

126 """ 

127 expandedRefs = [] 

128 expandedItemInfos = [] 

129 

130 for ref, itemInfo in refsAndInfos: 

131 # Need the main dataset and the components 

132 expandedRefs.extend(ref.flatten([ref])) 

133 

134 # Need one for the main ref and then one for each registered 

135 # component 

136 expandedItemInfos.extend([itemInfo] * (len(ref.components) + 1)) 

137 

138 # Dataset location only cares about registry ID so if we have 

139 # disassembled in datastore we have to deduplicate. Since they 

140 # will have different datasetTypes we can't use a set 

141 registryRefs = {r.id: r for r in expandedRefs} 

142 self.bridge.insert(registryRefs.values()) 

143 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

144 

145 def _move_to_trash_in_registry(self, ref): 

146 """Tell registry that this dataset and associated components 

147 are to be trashed. 

148 

149 Parameters 

150 ---------- 

151 ref : `DatasetRef` 

152 Dataset to mark for removal from registry. 

153 

154 Notes 

155 ----- 

156 Dataset is not removed from internal stored item info table. 

157 """ 

158 # Note that a ref can point to component dataset refs that 

159 # have been deleted already from registry but are still in 

160 # the python object. moveToTrash will deal with that. 

161 self.bridge.moveToTrash(ref.flatten([ref])) 

162 

163 def _post_process_get(self, inMemoryDataset, readStorageClass, assemblerParams=None, 

164 isComponent=False): 

165 """Given the Python object read from the datastore, manipulate 

166 it based on the supplied parameters and ensure the Python 

167 type is correct. 

168 

169 Parameters 

170 ---------- 

171 inMemoryDataset : `object` 

172 Dataset to check. 

173 readStorageClass: `StorageClass` 

174 The `StorageClass` used to obtain the assembler and to 

175 check the python type. 

176 assemblerParams : `dict`, optional 

177 Parameters to pass to the assembler. Can be `None`. 

178 isComponent : `bool`, optional 

179 If this is a component, allow the inMemoryDataset to be `None`. 

180 """ 

181 # Process any left over parameters 

182 if assemblerParams: 

183 inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams) 

184 

185 # Validate the returned data type matches the expected data type 

186 pytype = readStorageClass.pytype 

187 

188 allowedTypes = [] 

189 if pytype: 189 ↛ 193line 189 didn't jump to line 193, because the condition on line 189 was never false

190 allowedTypes.append(pytype) 

191 

192 # Special case components to allow them to be None 

193 if isComponent: 

194 allowedTypes.append(type(None)) 

195 

196 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset), 

198 pytype)) 

199 

200 return inMemoryDataset 

201 

202 def _validate_put_parameters(self, inMemoryDataset, ref): 

203 """Validate the supplied arguments for put. 

204 

205 Parameters 

206 ---------- 

207 inMemoryDataset : `object` 

208 The dataset to store. 

209 ref : `DatasetRef` 

210 Reference to the associated Dataset. 

211 """ 

212 storageClass = ref.datasetType.storageClass 

213 

214 # Sanity check 

215 if not isinstance(inMemoryDataset, storageClass.pytype): 215 ↛ 216line 215 didn't jump to line 216, because the condition on line 215 was never true

216 raise TypeError("Inconsistency between supplied object ({}) " 

217 "and storage class type ({})".format(type(inMemoryDataset), 

218 storageClass.pytype)) 

219 

220 # Confirm that we can accept this dataset 

221 if not self.constraints.isAcceptable(ref): 

222 # Raise rather than use boolean return value. 

223 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

224 " configuration.") 

225 

226 return 

227 

228 def remove(self, ref): 

229 """Indicate to the Datastore that a dataset can be removed. 

230 

231 .. warning:: 

232 

233 This method deletes the artifact associated with this 

234 dataset and can not be reversed. 

235 

236 Parameters 

237 ---------- 

238 ref : `DatasetRef` 

239 Reference to the required Dataset. 

240 

241 Raises 

242 ------ 

243 FileNotFoundError 

244 Attempt to remove a dataset that does not exist. 

245 

246 Notes 

247 ----- 

248 This method is used for immediate removal of a dataset and is 

249 generally reserved for internal testing of datastore APIs. 

250 It is implemented by calling `trash()` and then immediately calling 

251 `emptyTrash()`. This call is meant to be immediate so errors 

252 encountered during removal are not ignored. 

253 """ 

254 self.trash(ref, ignore_errors=False) 

255 self.emptyTrash(ignore_errors=False) 

256 

257 def transfer(self, inputDatastore, ref): 

258 """Retrieve a dataset from an input `Datastore`, 

259 and store the result in this `Datastore`. 

260 

261 Parameters 

262 ---------- 

263 inputDatastore : `Datastore` 

264 The external `Datastore` from which to retreive the Dataset. 

265 ref : `DatasetRef` 

266 Reference to the required dataset in the input data store. 

267 

268 """ 

269 assert inputDatastore is not self # unless we want it for renames? 

270 inMemoryDataset = inputDatastore.get(ref) 

271 return self.put(inMemoryDataset, ref)