Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 86%

59 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Generic datastore code useful for most datastores.""" 

25 

26__all__ = ("GenericBaseDatastore", ) 

27 

28import logging 

29from abc import abstractmethod 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Iterable, 

34 List, 

35 Mapping, 

36 Optional, 

37 Sequence, 

38 Tuple, 

39) 

40 

41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError 

42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class GenericBaseDatastore(Datastore): 

51 """Methods useful for most implementations of a `Datastore`. 

52 

53 Should always be sub-classed since key abstract methods are missing. 

54 """ 

55 

56 @property 

57 @abstractmethod 

58 def bridge(self) -> DatastoreRegistryBridge: 

59 """Object that manages the interface between this `Datastore` and the 

60 `Registry` (`DatastoreRegistryBridge`). 

61 """ 

62 raise NotImplementedError() 

63 

64 @abstractmethod 

65 def addStoredItemInfo(self, refs: Iterable[DatasetRef], 

66 infos: Iterable[Any]) -> None: 

67 """Record internal storage information associated with one or more 

68 datasets. 

69 

70 Parameters 

71 ---------- 

72 refs : sequence of `DatasetRef` 

73 The datasets that have been stored. 

74 infos : sequence of `StoredDatastoreItemInfo` 

75 Metadata associated with the stored datasets. 

76 """ 

77 raise NotImplementedError() 

78 

79 @abstractmethod 

80 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

81 """Retrieve information associated with files stored in this 

82 `Datastore` associated with this dataset ref. 

83 

84 Parameters 

85 ---------- 

86 ref : `DatasetRef` 

87 The dataset that is to be queried. 

88 

89 Returns 

90 ------- 

91 items : `list` [`StoredDatastoreItemInfo`] 

92 Stored information about the files and associated formatters 

93 associated with this dataset. Only one file will be returned 

94 if the dataset has not been disassembled. Can return an empty 

95 list if no matching datasets can be found. 

96 """ 

97 raise NotImplementedError() 

98 

99 @abstractmethod 

100 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

101 """Remove information about the file associated with this dataset. 

102 

103 Parameters 

104 ---------- 

105 ref : `DatasetRef` 

106 The dataset that has been removed. 

107 """ 

108 raise NotImplementedError() 

109 

110 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None: 

111 """Update registry to indicate that one or more datasets have been 

112 stored. 

113 

114 Parameters 

115 ---------- 

116 refsAndInfos : sequence `tuple` [`DatasetRef`, 

117 `StoredDatastoreItemInfo`] 

118 Datasets to register and the internal datastore metadata associated 

119 with them. 

120 """ 

121 expandedRefs: List[DatasetRef] = [] 

122 expandedItemInfos = [] 

123 

124 for ref, itemInfo in refsAndInfos: 

125 expandedRefs.append(ref) 

126 expandedItemInfos.append(itemInfo) 

127 

128 # Dataset location only cares about registry ID so if we have 

129 # disassembled in datastore we have to deduplicate. Since they 

130 # will have different datasetTypes we can't use a set 

131 registryRefs = {r.id: r for r in expandedRefs} 

132 self.bridge.insert(registryRefs.values()) 

133 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

134 

135 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass, 

136 assemblerParams: Optional[Mapping[str, Any]] = None, 

137 isComponent: bool = False) -> Any: 

138 """Given the Python object read from the datastore, manipulate 

139 it based on the supplied parameters and ensure the Python 

140 type is correct. 

141 

142 Parameters 

143 ---------- 

144 inMemoryDataset : `object` 

145 Dataset to check. 

146 readStorageClass: `StorageClass` 

147 The `StorageClass` used to obtain the assembler and to 

148 check the python type. 

149 assemblerParams : `dict`, optional 

150 Parameters to pass to the assembler. Can be `None`. 

151 isComponent : `bool`, optional 

152 If this is a component, allow the inMemoryDataset to be `None`. 

153 """ 

154 # Process any left over parameters 

155 if assemblerParams: 

156 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

157 

158 # Validate the returned data type matches the expected data type 

159 pytype = readStorageClass.pytype 

160 

161 allowedTypes = [] 

162 if pytype: 162 ↛ 166line 162 didn't jump to line 166, because the condition on line 162 was never false

163 allowedTypes.append(pytype) 

164 

165 # Special case components to allow them to be None 

166 if isComponent: 

167 allowedTypes.append(type(None)) 

168 

169 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 169 ↛ 170line 169 didn't jump to line 170, because the condition on line 169 was never true

170 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset), 

171 pytype)) 

172 

173 return inMemoryDataset 

174 

175 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

176 """Validate the supplied arguments for put. 

177 

178 Parameters 

179 ---------- 

180 inMemoryDataset : `object` 

181 The dataset to store. 

182 ref : `DatasetRef` 

183 Reference to the associated Dataset. 

184 """ 

185 storageClass = ref.datasetType.storageClass 

186 

187 # Sanity check 

188 if not isinstance(inMemoryDataset, storageClass.pytype): 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true

189 raise TypeError("Inconsistency between supplied object ({}) " 

190 "and storage class type ({})".format(type(inMemoryDataset), 

191 storageClass.pytype)) 

192 

193 # Confirm that we can accept this dataset 

194 if not self.constraints.isAcceptable(ref): 

195 # Raise rather than use boolean return value. 

196 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

197 " configuration.") 

198 

199 return 

200 

201 def remove(self, ref: DatasetRef) -> None: 

202 """Indicate to the Datastore that a dataset can be removed. 

203 

204 .. warning:: 

205 

206 This method deletes the artifact associated with this 

207 dataset and can not be reversed. 

208 

209 Parameters 

210 ---------- 

211 ref : `DatasetRef` 

212 Reference to the required Dataset. 

213 

214 Raises 

215 ------ 

216 FileNotFoundError 

217 Attempt to remove a dataset that does not exist. 

218 

219 Notes 

220 ----- 

221 This method is used for immediate removal of a dataset and is 

222 generally reserved for internal testing of datastore APIs. 

223 It is implemented by calling `trash()` and then immediately calling 

224 `emptyTrash()`. This call is meant to be immediate so errors 

225 encountered during removal are not ignored. 

226 """ 

227 self.trash(ref, ignore_errors=False) 

228 self.emptyTrash(ignore_errors=False) 

229 

230 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

231 """Retrieve a dataset from an input `Datastore`, 

232 and store the result in this `Datastore`. 

233 

234 Parameters 

235 ---------- 

236 inputDatastore : `Datastore` 

237 The external `Datastore` from which to retreive the Dataset. 

238 ref : `DatasetRef` 

239 Reference to the required dataset in the input data store. 

240 

241 """ 

242 assert inputDatastore is not self # unless we want it for renames? 

243 inMemoryDataset = inputDatastore.get(ref) 

244 return self.put(inMemoryDataset, ref)