Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 86%

60 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-26 02:22 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Generic datastore code useful for most datastores.""" 

25 

26__all__ = ("GenericBaseDatastore",) 

27 

28import logging 

29from abc import abstractmethod 

30from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Sequence, Tuple 

31 

32from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore 

33from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

37 

38log = logging.getLogger(__name__) 

39 

40 

41class GenericBaseDatastore(Datastore): 

42 """Methods useful for most implementations of a `Datastore`. 

43 

44 Should always be sub-classed since key abstract methods are missing. 

45 """ 

46 

47 @property 

48 @abstractmethod 

49 def bridge(self) -> DatastoreRegistryBridge: 

50 """Object that manages the interface between this `Datastore` and the 

51 `Registry` (`DatastoreRegistryBridge`). 

52 """ 

53 raise NotImplementedError() 

54 

55 @abstractmethod 

56 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[Any]) -> None: 

57 """Record internal storage information associated with one or more 

58 datasets. 

59 

60 Parameters 

61 ---------- 

62 refs : sequence of `DatasetRef` 

63 The datasets that have been stored. 

64 infos : sequence of `StoredDatastoreItemInfo` 

65 Metadata associated with the stored datasets. 

66 """ 

67 raise NotImplementedError() 

68 

69 @abstractmethod 

70 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

71 """Retrieve information associated with files stored in this 

72 `Datastore` associated with this dataset ref. 

73 

74 Parameters 

75 ---------- 

76 ref : `DatasetRef` 

77 The dataset that is to be queried. 

78 

79 Returns 

80 ------- 

81 items : `list` [`StoredDatastoreItemInfo`] 

82 Stored information about the files and associated formatters 

83 associated with this dataset. Only one file will be returned 

84 if the dataset has not been disassembled. Can return an empty 

85 list if no matching datasets can be found. 

86 """ 

87 raise NotImplementedError() 

88 

89 @abstractmethod 

90 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

91 """Remove information about the file associated with this dataset. 

92 

93 Parameters 

94 ---------- 

95 ref : `DatasetRef` 

96 The dataset that has been removed. 

97 """ 

98 raise NotImplementedError() 

99 

100 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None: 

101 """Update registry to indicate that one or more datasets have been 

102 stored. 

103 

104 Parameters 

105 ---------- 

106 refsAndInfos : sequence `tuple` [`DatasetRef`, 

107 `StoredDatastoreItemInfo`] 

108 Datasets to register and the internal datastore metadata associated 

109 with them. 

110 """ 

111 expandedRefs: List[DatasetRef] = [] 

112 expandedItemInfos = [] 

113 

114 for ref, itemInfo in refsAndInfos: 

115 expandedRefs.append(ref) 

116 expandedItemInfos.append(itemInfo) 

117 

118 # Dataset location only cares about registry ID so if we have 

119 # disassembled in datastore we have to deduplicate. Since they 

120 # will have different datasetTypes we can't use a set 

121 registryRefs = {r.id: r for r in expandedRefs} 

122 self.bridge.insert(registryRefs.values()) 

123 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

124 

125 def _post_process_get( 

126 self, 

127 inMemoryDataset: Any, 

128 readStorageClass: StorageClass, 

129 assemblerParams: Optional[Mapping[str, Any]] = None, 

130 isComponent: bool = False, 

131 ) -> Any: 

132 """Given the Python object read from the datastore, manipulate 

133 it based on the supplied parameters and ensure the Python 

134 type is correct. 

135 

136 Parameters 

137 ---------- 

138 inMemoryDataset : `object` 

139 Dataset to check. 

140 readStorageClass: `StorageClass` 

141 The `StorageClass` used to obtain the assembler and to 

142 check the python type. 

143 assemblerParams : `dict`, optional 

144 Parameters to pass to the assembler. Can be `None`. 

145 isComponent : `bool`, optional 

146 If this is a component, allow the inMemoryDataset to be `None`. 

147 """ 

148 # Process any left over parameters 

149 if assemblerParams: 

150 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

151 

152 # Validate the returned data type matches the expected data type 

153 pytype = readStorageClass.pytype 

154 

155 allowedTypes = [] 

156 if pytype: 156 ↛ 160line 156 didn't jump to line 160, because the condition on line 156 was never false

157 allowedTypes.append(pytype) 

158 

159 # Special case components to allow them to be None 

160 if isComponent: 

161 allowedTypes.append(type(None)) 

162 

163 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true

164 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset) 

165 

166 return inMemoryDataset 

167 

168 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

169 """Validate the supplied arguments for put. 

170 

171 Parameters 

172 ---------- 

173 inMemoryDataset : `object` 

174 The dataset to store. 

175 ref : `DatasetRef` 

176 Reference to the associated Dataset. 

177 """ 

178 storageClass = ref.datasetType.storageClass 

179 

180 # Sanity check 

181 if not isinstance(inMemoryDataset, storageClass.pytype): 181 ↛ 182line 181 didn't jump to line 182, because the condition on line 181 was never true

182 raise TypeError( 

183 "Inconsistency between supplied object ({}) " 

184 "and storage class type ({})".format(type(inMemoryDataset), storageClass.pytype) 

185 ) 

186 

187 # Confirm that we can accept this dataset 

188 if not self.constraints.isAcceptable(ref): 

189 # Raise rather than use boolean return value. 

190 raise DatasetTypeNotSupportedError( 

191 f"Dataset {ref} has been rejected by this datastore via configuration." 

192 ) 

193 

194 return 

195 

196 def remove(self, ref: DatasetRef) -> None: 

197 """Indicate to the Datastore that a dataset can be removed. 

198 

199 .. warning:: 

200 

201 This method deletes the artifact associated with this 

202 dataset and can not be reversed. 

203 

204 Parameters 

205 ---------- 

206 ref : `DatasetRef` 

207 Reference to the required Dataset. 

208 

209 Raises 

210 ------ 

211 FileNotFoundError 

212 Attempt to remove a dataset that does not exist. 

213 

214 Notes 

215 ----- 

216 This method is used for immediate removal of a dataset and is 

217 generally reserved for internal testing of datastore APIs. 

218 It is implemented by calling `trash()` and then immediately calling 

219 `emptyTrash()`. This call is meant to be immediate so errors 

220 encountered during removal are not ignored. 

221 """ 

222 self.trash(ref, ignore_errors=False) 

223 self.emptyTrash(ignore_errors=False) 

224 

225 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

226 """Retrieve a dataset from an input `Datastore`, 

227 and store the result in this `Datastore`. 

228 

229 Parameters 

230 ---------- 

231 inputDatastore : `Datastore` 

232 The external `Datastore` from which to retreive the Dataset. 

233 ref : `DatasetRef` 

234 Reference to the required dataset in the input data store. 

235 

236 """ 

237 assert inputDatastore is not self # unless we want it for renames? 

238 inMemoryDataset = inputDatastore.get(ref) 

239 return self.put(inMemoryDataset, ref)