Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 43%

58 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Generic datastore code useful for most datastores.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("GenericBaseDatastore",) 

33 

34import logging 

35from abc import abstractmethod 

36from collections.abc import Iterable, Mapping, Sequence 

37from typing import TYPE_CHECKING, Any 

38 

39from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore 

40from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

41 

42from ..registry.interfaces import DatabaseInsertMode 

43 

44if TYPE_CHECKING: 

45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class GenericBaseDatastore(Datastore): 

51 """Methods useful for most implementations of a `Datastore`. 

52 

53 Should always be sub-classed since key abstract methods are missing. 

54 """ 

55 

56 @property 

57 @abstractmethod 

58 def bridge(self) -> DatastoreRegistryBridge: 

59 """Object that manages the interface between this `Datastore` and the 

60 `Registry` (`DatastoreRegistryBridge`). 

61 """ 

62 raise NotImplementedError() 

63 

64 @abstractmethod 

65 def addStoredItemInfo( 

66 self, 

67 refs: Iterable[DatasetRef], 

68 infos: Iterable[Any], 

69 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

70 ) -> None: 

71 """Record internal storage information associated with one or more 

72 datasets. 

73 

74 Parameters 

75 ---------- 

76 refs : sequence of `DatasetRef` 

77 The datasets that have been stored. 

78 infos : sequence of `StoredDatastoreItemInfo` 

79 Metadata associated with the stored datasets. 

80 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode` 

81 Mode to use to insert the new records into the table. The 

82 options are ``INSERT`` (error if pre-existing), ``REPLACE`` 

83 (replace content with new values), and ``ENSURE`` (skip if the row 

84 already exists). 

85 """ 

86 raise NotImplementedError() 

87 

88 @abstractmethod 

89 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

90 """Retrieve information associated with files stored in this 

91 `Datastore` associated with this dataset ref. 

92 

93 Parameters 

94 ---------- 

95 ref : `DatasetRef` 

96 The dataset that is to be queried. 

97 

98 Returns 

99 ------- 

100 items : `list` [`StoredDatastoreItemInfo`] 

101 Stored information about the files and associated formatters 

102 associated with this dataset. Only one file will be returned 

103 if the dataset has not been disassembled. Can return an empty 

104 list if no matching datasets can be found. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

110 """Remove information about the file associated with this dataset. 

111 

112 Parameters 

113 ---------- 

114 ref : `DatasetRef` 

115 The dataset that has been removed. 

116 """ 

117 raise NotImplementedError() 

118 

119 def _register_datasets( 

120 self, 

121 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]], 

122 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

123 ) -> None: 

124 """Update registry to indicate that one or more datasets have been 

125 stored. 

126 

127 Parameters 

128 ---------- 

129 refsAndInfos : sequence `tuple` [`DatasetRef`, 

130 `StoredDatastoreItemInfo`] 

131 Datasets to register and the internal datastore metadata associated 

132 with them. 

133 insert_mode : `str`, optional 

134 Indicate whether the new records should be new ("insert", default), 

135 or allowed to exists ("ensure") or be replaced if already present 

136 ("replace"). 

137 """ 

138 expandedRefs: list[DatasetRef] = [] 

139 expandedItemInfos = [] 

140 

141 for ref, itemInfo in refsAndInfos: 

142 expandedRefs.append(ref) 

143 expandedItemInfos.append(itemInfo) 

144 

145 # Dataset location only cares about registry ID so if we have 

146 # disassembled in datastore we have to deduplicate. Since they 

147 # will have different datasetTypes we can't use a set 

148 registryRefs = {r.id: r for r in expandedRefs} 

149 if insert_mode == DatabaseInsertMode.INSERT: 

150 self.bridge.insert(registryRefs.values()) 

151 else: 

152 # There are only two columns and all that matters is the 

153 # dataset ID. 

154 self.bridge.ensure(registryRefs.values()) 

155 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode) 

156 

157 def _post_process_get( 

158 self, 

159 inMemoryDataset: Any, 

160 readStorageClass: StorageClass, 

161 assemblerParams: Mapping[str, Any] | None = None, 

162 isComponent: bool = False, 

163 ) -> Any: 

164 """Given the Python object read from the datastore, manipulate 

165 it based on the supplied parameters and ensure the Python 

166 type is correct. 

167 

168 Parameters 

169 ---------- 

170 inMemoryDataset : `object` 

171 Dataset to check. 

172 readStorageClass: `StorageClass` 

173 The `StorageClass` used to obtain the assembler and to 

174 check the python type. 

175 assemblerParams : `dict`, optional 

176 Parameters to pass to the assembler. Can be `None`. 

177 isComponent : `bool`, optional 

178 If this is a component, allow the inMemoryDataset to be `None`. 

179 """ 

180 # Process any left over parameters 

181 if assemblerParams: 

182 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

183 

184 # Validate the returned data type matches the expected data type 

185 pytype = readStorageClass.pytype 

186 

187 allowedTypes = [] 

188 if pytype: 

189 allowedTypes.append(pytype) 

190 

191 # Special case components to allow them to be None 

192 if isComponent: 

193 allowedTypes.append(type(None)) 

194 

195 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 

196 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset) 

197 

198 return inMemoryDataset 

199 

200 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

201 """Validate the supplied arguments for put. 

202 

203 Parameters 

204 ---------- 

205 inMemoryDataset : `object` 

206 The dataset to store. 

207 ref : `DatasetRef` 

208 Reference to the associated Dataset. 

209 """ 

210 storageClass = ref.datasetType.storageClass 

211 

212 # Sanity check 

213 if not isinstance(inMemoryDataset, storageClass.pytype): 

214 raise TypeError( 

215 f"Inconsistency between supplied object ({type(inMemoryDataset)}) " 

216 f"and storage class type ({storageClass.pytype})" 

217 ) 

218 

219 # Confirm that we can accept this dataset 

220 if not self.constraints.isAcceptable(ref): 

221 # Raise rather than use boolean return value. 

222 raise DatasetTypeNotSupportedError( 

223 f"Dataset {ref} has been rejected by this datastore via configuration." 

224 ) 

225 

226 return 

227 

228 def remove(self, ref: DatasetRef) -> None: 

229 """Indicate to the Datastore that a dataset can be removed. 

230 

231 .. warning:: 

232 

233 This method deletes the artifact associated with this 

234 dataset and can not be reversed. 

235 

236 Parameters 

237 ---------- 

238 ref : `DatasetRef` 

239 Reference to the required Dataset. 

240 

241 Raises 

242 ------ 

243 FileNotFoundError 

244 Attempt to remove a dataset that does not exist. 

245 

246 Notes 

247 ----- 

248 This method is used for immediate removal of a dataset and is 

249 generally reserved for internal testing of datastore APIs. 

250 It is implemented by calling `trash()` and then immediately calling 

251 `emptyTrash()`. This call is meant to be immediate so errors 

252 encountered during removal are not ignored. 

253 """ 

254 self.trash(ref, ignore_errors=False) 

255 self.emptyTrash(ignore_errors=False) 

256 

257 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

258 """Retrieve a dataset from an input `Datastore`, 

259 and store the result in this `Datastore`. 

260 

261 Parameters 

262 ---------- 

263 inputDatastore : `Datastore` 

264 The external `Datastore` from which to retreive the Dataset. 

265 ref : `DatasetRef` 

266 Reference to the required dataset in the input data store. 

267 

268 """ 

269 assert inputDatastore is not self # unless we want it for renames? 

270 inMemoryDataset = inputDatastore.get(ref) 

271 return self.put(inMemoryDataset, ref)