Coverage for python/lsst/daf/butler/datastore/generic_base.py: 43%

58 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Generic datastore code useful for most datastores.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("GenericBaseDatastore",) 

33 

34import logging 

35from abc import abstractmethod 

36from collections.abc import Iterable, Mapping, Sequence 

37from typing import TYPE_CHECKING, Any 

38 

39from .._exceptions import DatasetTypeNotSupportedError 

40from ..registry.interfaces import DatabaseInsertMode, DatastoreRegistryBridge 

41from ._datastore import Datastore 

42 

43if TYPE_CHECKING: 

44 from .._dataset_ref import DatasetRef 

45 from .._storage_class import StorageClass 

46 from .stored_file_info import StoredDatastoreItemInfo 

47 

48log = logging.getLogger(__name__) 

49 

50 

51class GenericBaseDatastore(Datastore): 

52 """Methods useful for most implementations of a `Datastore`. 

53 

54 Should always be sub-classed since key abstract methods are missing. 

55 """ 

56 

57 @property 

58 @abstractmethod 

59 def bridge(self) -> DatastoreRegistryBridge: 

60 """Object that manages the interface between this `Datastore` and the 

61 `Registry` (`DatastoreRegistryBridge`). 

62 """ 

63 raise NotImplementedError() 

64 

65 @abstractmethod 

66 def addStoredItemInfo( 

67 self, 

68 refs: Iterable[DatasetRef], 

69 infos: Iterable[Any], 

70 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

71 ) -> None: 

72 """Record internal storage information associated with one or more 

73 datasets. 

74 

75 Parameters 

76 ---------- 

77 refs : sequence of `DatasetRef` 

78 The datasets that have been stored. 

79 infos : sequence of `StoredDatastoreItemInfo` 

80 Metadata associated with the stored datasets. 

81 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode` 

82 Mode to use to insert the new records into the table. The 

83 options are ``INSERT`` (error if pre-existing), ``REPLACE`` 

84 (replace content with new values), and ``ENSURE`` (skip if the row 

85 already exists). 

86 """ 

87 raise NotImplementedError() 

88 

89 @abstractmethod 

90 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

91 """Retrieve information associated with files stored in this 

92 `Datastore` associated with this dataset ref. 

93 

94 Parameters 

95 ---------- 

96 ref : `DatasetRef` 

97 The dataset that is to be queried. 

98 

99 Returns 

100 ------- 

101 items : `list` [`StoredDatastoreItemInfo`] 

102 Stored information about the files and associated formatters 

103 associated with this dataset. Only one file will be returned 

104 if the dataset has not been disassembled. Can return an empty 

105 list if no matching datasets can be found. 

106 """ 

107 raise NotImplementedError() 

108 

109 @abstractmethod 

110 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

111 """Remove information about the file associated with this dataset. 

112 

113 Parameters 

114 ---------- 

115 ref : `DatasetRef` 

116 The dataset that has been removed. 

117 """ 

118 raise NotImplementedError() 

119 

120 def _register_datasets( 

121 self, 

122 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]], 

123 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

124 ) -> None: 

125 """Update registry to indicate that one or more datasets have been 

126 stored. 

127 

128 Parameters 

129 ---------- 

130 refsAndInfos : sequence `tuple` [`DatasetRef`, 

131 `StoredDatastoreItemInfo`] 

132 Datasets to register and the internal datastore metadata associated 

133 with them. 

134 insert_mode : `str`, optional 

135 Indicate whether the new records should be new ("insert", default), 

136 or allowed to exists ("ensure") or be replaced if already present 

137 ("replace"). 

138 """ 

139 expandedRefs: list[DatasetRef] = [] 

140 expandedItemInfos = [] 

141 

142 for ref, itemInfo in refsAndInfos: 

143 expandedRefs.append(ref) 

144 expandedItemInfos.append(itemInfo) 

145 

146 # Dataset location only cares about registry ID so if we have 

147 # disassembled in datastore we have to deduplicate. Since they 

148 # will have different datasetTypes we can't use a set 

149 registryRefs = {r.id: r for r in expandedRefs} 

150 if insert_mode == DatabaseInsertMode.INSERT: 

151 self.bridge.insert(registryRefs.values()) 

152 else: 

153 # There are only two columns and all that matters is the 

154 # dataset ID. 

155 self.bridge.ensure(registryRefs.values()) 

156 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode) 

157 

158 def _post_process_get( 

159 self, 

160 inMemoryDataset: Any, 

161 readStorageClass: StorageClass, 

162 assemblerParams: Mapping[str, Any] | None = None, 

163 isComponent: bool = False, 

164 ) -> Any: 

165 """Given the Python object read from the datastore, manipulate 

166 it based on the supplied parameters and ensure the Python 

167 type is correct. 

168 

169 Parameters 

170 ---------- 

171 inMemoryDataset : `object` 

172 Dataset to check. 

173 readStorageClass: `StorageClass` 

174 The `StorageClass` used to obtain the assembler and to 

175 check the python type. 

176 assemblerParams : `dict`, optional 

177 Parameters to pass to the assembler. Can be `None`. 

178 isComponent : `bool`, optional 

179 If this is a component, allow the inMemoryDataset to be `None`. 

180 """ 

181 # Process any left over parameters 

182 if assemblerParams: 

183 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

184 

185 # Validate the returned data type matches the expected data type 

186 pytype = readStorageClass.pytype 

187 

188 allowedTypes = [] 

189 if pytype: 

190 allowedTypes.append(pytype) 

191 

192 # Special case components to allow them to be None 

193 if isComponent: 

194 allowedTypes.append(type(None)) 

195 

196 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 

197 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset) 

198 

199 return inMemoryDataset 

200 

201 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

202 """Validate the supplied arguments for put. 

203 

204 Parameters 

205 ---------- 

206 inMemoryDataset : `object` 

207 The dataset to store. 

208 ref : `DatasetRef` 

209 Reference to the associated Dataset. 

210 """ 

211 storageClass = ref.datasetType.storageClass 

212 

213 # Sanity check 

214 if not isinstance(inMemoryDataset, storageClass.pytype): 

215 raise TypeError( 

216 f"Inconsistency between supplied object ({type(inMemoryDataset)}) " 

217 f"and storage class type ({storageClass.pytype})" 

218 ) 

219 

220 # Confirm that we can accept this dataset 

221 if not self.constraints.isAcceptable(ref): 

222 # Raise rather than use boolean return value. 

223 raise DatasetTypeNotSupportedError( 

224 f"Dataset {ref} has been rejected by this datastore via configuration." 

225 ) 

226 

227 return 

228 

229 def remove(self, ref: DatasetRef) -> None: 

230 """Indicate to the Datastore that a dataset can be removed. 

231 

232 .. warning:: 

233 

234 This method deletes the artifact associated with this 

235 dataset and can not be reversed. 

236 

237 Parameters 

238 ---------- 

239 ref : `DatasetRef` 

240 Reference to the required Dataset. 

241 

242 Raises 

243 ------ 

244 FileNotFoundError 

245 Attempt to remove a dataset that does not exist. 

246 

247 Notes 

248 ----- 

249 This method is used for immediate removal of a dataset and is 

250 generally reserved for internal testing of datastore APIs. 

251 It is implemented by calling `trash()` and then immediately calling 

252 `emptyTrash()`. This call is meant to be immediate so errors 

253 encountered during removal are not ignored. 

254 """ 

255 self.trash(ref, ignore_errors=False) 

256 self.emptyTrash(ignore_errors=False) 

257 

258 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

259 """Retrieve a dataset from an input `Datastore`, 

260 and store the result in this `Datastore`. 

261 

262 Parameters 

263 ---------- 

264 inputDatastore : `Datastore` 

265 The external `Datastore` from which to retreive the Dataset. 

266 ref : `DatasetRef` 

267 Reference to the required dataset in the input data store. 

268 

269 """ 

270 assert inputDatastore is not self # unless we want it for renames? 

271 inMemoryDataset = inputDatastore.get(ref) 

272 return self.put(inMemoryDataset, ref)