Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Generic datastore code useful for most datastores.""" 

23 

24__all__ = ("GenericBaseDatastore", ) 

25 

26import logging 

27from abc import abstractmethod 

28 

29from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError 

30 

31log = logging.getLogger(__name__) 

32 

33 

34class GenericBaseDatastore(Datastore): 

35 """Methods useful for most implementations of a `Datastore`. 

36 

37 Should always be sub-classed since key abstract methods are missing. 

38 """ 

39 

40 @abstractmethod 

41 def addStoredItemInfo(self, refs, infos): 

42 """Record internal storage information associated with one or more 

43 datasets. 

44 

45 Parameters 

46 ---------- 

47 refs : sequence of `DatasetRef` 

48 The datasets that have been stored. 

49 infos : sequence of `StoredDatastoreItemInfo` 

50 Metadata associated with the stored datasets. 

51 """ 

52 raise NotImplementedError() 

53 

54 @abstractmethod 

55 def getStoredItemInfo(self, ref): 

56 """Retrieve information associated with file stored in this 

57 `Datastore`. 

58 

59 Parameters 

60 ---------- 

61 ref : `DatasetRef` 

62 The dataset that is to be queried. 

63 

64 Returns 

65 ------- 

66 info : `StoredDatastoreItemInfo` 

67 Stored information about this file and its formatter. 

68 

69 Raises 

70 ------ 

71 KeyError 

72 Dataset with that id can not be found. 

73 """ 

74 raise NotImplementedError() 

75 

76 @abstractmethod 

77 def getStoredItemsInfo(self, ref): 

78 """Retrieve information associated with files stored in this 

79 `Datastore` associated with this dataset ref. 

80 

81 Parameters 

82 ---------- 

83 ref : `DatasetRef` 

84 The dataset that is to be queried. 

85 

86 Returns 

87 ------- 

88 items : `list` [`StoredDatastoreItemInfo`] 

89 Stored information about the files and associated formatters 

90 associated with this dataset. Only one file will be returned 

91 if the dataset has not been disassembled. Can return an empty 

92 list if no matching datasets can be found. 

93 """ 

94 raise NotImplementedError() 

95 

96 @abstractmethod 

97 def removeStoredItemInfo(self, ref): 

98 """Remove information about the file associated with this dataset. 

99 

100 Parameters 

101 ---------- 

102 ref : `DatasetRef` 

103 The dataset that has been removed. 

104 """ 

105 raise NotImplementedError() 

106 

107 def _register_datasets(self, refsAndInfos): 

108 """Update registry to indicate that one or more datasets have been 

109 stored. 

110 

111 Parameters 

112 ---------- 

113 refsAndInfos : sequence `tuple` [`DatasetRef`, 

114 `StoredDatastoreItemInfo`] 

115 Datasets to register and the internal datastore metadata associated 

116 with them. 

117 """ 

118 expandedRefs = [] 

119 expandedItemInfos = [] 

120 

121 for ref, itemInfo in refsAndInfos: 

122 # Need the main dataset and the components 

123 expandedRefs.extend(ref.flatten([ref])) 

124 

125 # Need one for the main ref and then one for each registered 

126 # component 

127 expandedItemInfos.extend([itemInfo] * (len(ref.components) + 1)) 

128 

129 # Dataset location only cares about registry ID so if we have 

130 # disassembled in datastore we have to deduplicate. Since they 

131 # will have different datasetTypes we can't use a set 

132 registryRefs = {r.id: r for r in expandedRefs} 

133 self.registry.insertDatasetLocations(self.name, registryRefs.values()) 

134 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

135 

136 def _move_to_trash_in_registry(self, ref): 

137 """Tell registry that this dataset and associated components 

138 are to be trashed. 

139 

140 Parameters 

141 ---------- 

142 ref : `DatasetRef` 

143 Dataset to mark for removal from registry. 

144 

145 Notes 

146 ----- 

147 Dataset is not removed from internal stored item info table. 

148 """ 

149 

150 # Note that a ref can point to component dataset refs that 

151 # have been deleted already from registry but are still in 

152 # the python object. moveDatasetLocationToTrash will deal with that. 

153 self.registry.moveDatasetLocationToTrash(self.name, list(ref.flatten([ref]))) 

154 

155 def _post_process_get(self, inMemoryDataset, readStorageClass, assemblerParams=None, 

156 isComponent=False): 

157 """Given the Python object read from the datastore, manipulate 

158 it based on the supplied parameters and ensure the Python 

159 type is correct. 

160 

161 Parameters 

162 ---------- 

163 inMemoryDataset : `object` 

164 Dataset to check. 

165 readStorageClass: `StorageClass` 

166 The `StorageClass` used to obtain the assembler and to 

167 check the python type. 

168 assemblerParams : `dict`, optional 

169 Parameters to pass to the assembler. Can be `None`. 

170 isComponent : `bool`, optional 

171 If this is a component, allow the inMemoryDataset to be `None`. 

172 """ 

173 # Process any left over parameters 

174 if assemblerParams: 

175 inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams) 

176 

177 # Validate the returned data type matches the expected data type 

178 pytype = readStorageClass.pytype 

179 

180 allowedTypes = [] 

181 if pytype: 181 ↛ 185line 181 didn't jump to line 185, because the condition on line 181 was never false

182 allowedTypes.append(pytype) 

183 

184 # Special case components to allow them to be None 

185 if isComponent: 

186 allowedTypes.append(type(None)) 

187 

188 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true

189 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset), 

190 pytype)) 

191 

192 return inMemoryDataset 

193 

194 def _validate_put_parameters(self, inMemoryDataset, ref): 

195 """Validate the supplied arguments for put. 

196 

197 Parameters 

198 ---------- 

199 inMemoryDataset : `object` 

200 The dataset to store. 

201 ref : `DatasetRef` 

202 Reference to the associated Dataset. 

203 """ 

204 storageClass = ref.datasetType.storageClass 

205 

206 # Sanity check 

207 if not isinstance(inMemoryDataset, storageClass.pytype): 207 ↛ 208line 207 didn't jump to line 208, because the condition on line 207 was never true

208 raise TypeError("Inconsistency between supplied object ({}) " 

209 "and storage class type ({})".format(type(inMemoryDataset), 

210 storageClass.pytype)) 

211 

212 # Confirm that we can accept this dataset 

213 if not self.constraints.isAcceptable(ref): 

214 # Raise rather than use boolean return value. 

215 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

216 " configuration.") 

217 

218 return 

219 

220 def remove(self, ref): 

221 """Indicate to the Datastore that a dataset can be removed. 

222 

223 .. warning:: 

224 

225 This method deletes the artifact associated with this 

226 dataset and can not be reversed. 

227 

228 Parameters 

229 ---------- 

230 ref : `DatasetRef` 

231 Reference to the required Dataset. 

232 

233 Raises 

234 ------ 

235 FileNotFoundError 

236 Attempt to remove a dataset that does not exist. 

237 

238 Notes 

239 ----- 

240 This method is used for immediate removal of a dataset and is 

241 generally reserved for internal testing of datastore APIs. 

242 It is implemented by calling `trash()` and then immediately calling 

243 `emptyTrash()`. This call is meant to be immediate so errors 

244 encountered during removal are not ignored. 

245 """ 

246 self.trash(ref, ignore_errors=False) 

247 self.emptyTrash(ignore_errors=False) 

248 

249 def transfer(self, inputDatastore, ref): 

250 """Retrieve a dataset from an input `Datastore`, 

251 and store the result in this `Datastore`. 

252 

253 Parameters 

254 ---------- 

255 inputDatastore : `Datastore` 

256 The external `Datastore` from which to retreive the Dataset. 

257 ref : `DatasetRef` 

258 Reference to the required dataset in the input data store. 

259 

260 """ 

261 assert inputDatastore is not self # unless we want it for renames? 

262 inMemoryDataset = inputDatastore.get(ref) 

263 return self.put(inMemoryDataset, ref)