Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 45%

55 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:21 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Generic datastore code useful for most datastores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("GenericBaseDatastore",) 

27 

28import logging 

29from abc import abstractmethod 

30from collections.abc import Iterable, Mapping, Sequence 

31from typing import TYPE_CHECKING, Any 

32 

33from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore 

34from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

35 

36if TYPE_CHECKING: 

37 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

38 

39log = logging.getLogger(__name__) 

40 

41 

42class GenericBaseDatastore(Datastore): 

43 """Methods useful for most implementations of a `Datastore`. 

44 

45 Should always be sub-classed since key abstract methods are missing. 

46 """ 

47 

48 @property 

49 @abstractmethod 

50 def bridge(self) -> DatastoreRegistryBridge: 

51 """Object that manages the interface between this `Datastore` and the 

52 `Registry` (`DatastoreRegistryBridge`). 

53 """ 

54 raise NotImplementedError() 

55 

56 @abstractmethod 

57 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[Any]) -> None: 

58 """Record internal storage information associated with one or more 

59 datasets. 

60 

61 Parameters 

62 ---------- 

63 refs : sequence of `DatasetRef` 

64 The datasets that have been stored. 

65 infos : sequence of `StoredDatastoreItemInfo` 

66 Metadata associated with the stored datasets. 

67 """ 

68 raise NotImplementedError() 

69 

70 @abstractmethod 

71 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

72 """Retrieve information associated with files stored in this 

73 `Datastore` associated with this dataset ref. 

74 

75 Parameters 

76 ---------- 

77 ref : `DatasetRef` 

78 The dataset that is to be queried. 

79 

80 Returns 

81 ------- 

82 items : `list` [`StoredDatastoreItemInfo`] 

83 Stored information about the files and associated formatters 

84 associated with this dataset. Only one file will be returned 

85 if the dataset has not been disassembled. Can return an empty 

86 list if no matching datasets can be found. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

92 """Remove information about the file associated with this dataset. 

93 

94 Parameters 

95 ---------- 

96 ref : `DatasetRef` 

97 The dataset that has been removed. 

98 """ 

99 raise NotImplementedError() 

100 

101 def _register_datasets(self, refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None: 

102 """Update registry to indicate that one or more datasets have been 

103 stored. 

104 

105 Parameters 

106 ---------- 

107 refsAndInfos : sequence `tuple` [`DatasetRef`, 

108 `StoredDatastoreItemInfo`] 

109 Datasets to register and the internal datastore metadata associated 

110 with them. 

111 """ 

112 expandedRefs: list[DatasetRef] = [] 

113 expandedItemInfos = [] 

114 

115 for ref, itemInfo in refsAndInfos: 

116 expandedRefs.append(ref) 

117 expandedItemInfos.append(itemInfo) 

118 

119 # Dataset location only cares about registry ID so if we have 

120 # disassembled in datastore we have to deduplicate. Since they 

121 # will have different datasetTypes we can't use a set 

122 registryRefs = {r.id: r for r in expandedRefs} 

123 self.bridge.insert(registryRefs.values()) 

124 self.addStoredItemInfo(expandedRefs, expandedItemInfos) 

125 

126 def _post_process_get( 

127 self, 

128 inMemoryDataset: Any, 

129 readStorageClass: StorageClass, 

130 assemblerParams: Mapping[str, Any] | None = None, 

131 isComponent: bool = False, 

132 ) -> Any: 

133 """Given the Python object read from the datastore, manipulate 

134 it based on the supplied parameters and ensure the Python 

135 type is correct. 

136 

137 Parameters 

138 ---------- 

139 inMemoryDataset : `object` 

140 Dataset to check. 

141 readStorageClass: `StorageClass` 

142 The `StorageClass` used to obtain the assembler and to 

143 check the python type. 

144 assemblerParams : `dict`, optional 

145 Parameters to pass to the assembler. Can be `None`. 

146 isComponent : `bool`, optional 

147 If this is a component, allow the inMemoryDataset to be `None`. 

148 """ 

149 # Process any left over parameters 

150 if assemblerParams: 

151 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

152 

153 # Validate the returned data type matches the expected data type 

154 pytype = readStorageClass.pytype 

155 

156 allowedTypes = [] 

157 if pytype: 

158 allowedTypes.append(pytype) 

159 

160 # Special case components to allow them to be None 

161 if isComponent: 

162 allowedTypes.append(type(None)) 

163 

164 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 

165 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset) 

166 

167 return inMemoryDataset 

168 

169 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

170 """Validate the supplied arguments for put. 

171 

172 Parameters 

173 ---------- 

174 inMemoryDataset : `object` 

175 The dataset to store. 

176 ref : `DatasetRef` 

177 Reference to the associated Dataset. 

178 """ 

179 storageClass = ref.datasetType.storageClass 

180 

181 # Sanity check 

182 if not isinstance(inMemoryDataset, storageClass.pytype): 

183 raise TypeError( 

184 f"Inconsistency between supplied object ({type(inMemoryDataset)}) " 

185 f"and storage class type ({storageClass.pytype})" 

186 ) 

187 

188 # Confirm that we can accept this dataset 

189 if not self.constraints.isAcceptable(ref): 

190 # Raise rather than use boolean return value. 

191 raise DatasetTypeNotSupportedError( 

192 f"Dataset {ref} has been rejected by this datastore via configuration." 

193 ) 

194 

195 return 

196 

197 def remove(self, ref: DatasetRef) -> None: 

198 """Indicate to the Datastore that a dataset can be removed. 

199 

200 .. warning:: 

201 

202 This method deletes the artifact associated with this 

203 dataset and can not be reversed. 

204 

205 Parameters 

206 ---------- 

207 ref : `DatasetRef` 

208 Reference to the required Dataset. 

209 

210 Raises 

211 ------ 

212 FileNotFoundError 

213 Attempt to remove a dataset that does not exist. 

214 

215 Notes 

216 ----- 

217 This method is used for immediate removal of a dataset and is 

218 generally reserved for internal testing of datastore APIs. 

219 It is implemented by calling `trash()` and then immediately calling 

220 `emptyTrash()`. This call is meant to be immediate so errors 

221 encountered during removal are not ignored. 

222 """ 

223 self.trash(ref, ignore_errors=False) 

224 self.emptyTrash(ignore_errors=False) 

225 

226 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

227 """Retrieve a dataset from an input `Datastore`, 

228 and store the result in this `Datastore`. 

229 

230 Parameters 

231 ---------- 

232 inputDatastore : `Datastore` 

233 The external `Datastore` from which to retreive the Dataset. 

234 ref : `DatasetRef` 

235 Reference to the required dataset in the input data store. 

236 

237 """ 

238 assert inputDatastore is not self # unless we want it for renames? 

239 inMemoryDataset = inputDatastore.get(ref) 

240 return self.put(inMemoryDataset, ref)