Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 43%

58 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Generic datastore code useful for most datastores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("GenericBaseDatastore",) 

27 

28import logging 

29from abc import abstractmethod 

30from collections.abc import Iterable, Mapping, Sequence 

31from typing import TYPE_CHECKING, Any 

32 

33from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore 

34from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge 

35 

36from ..registry.interfaces import DatabaseInsertMode 

37 

38if TYPE_CHECKING: 

39 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo 

40 

41log = logging.getLogger(__name__) 

42 

43 

44class GenericBaseDatastore(Datastore): 

45 """Methods useful for most implementations of a `Datastore`. 

46 

47 Should always be sub-classed since key abstract methods are missing. 

48 """ 

49 

50 @property 

51 @abstractmethod 

52 def bridge(self) -> DatastoreRegistryBridge: 

53 """Object that manages the interface between this `Datastore` and the 

54 `Registry` (`DatastoreRegistryBridge`). 

55 """ 

56 raise NotImplementedError() 

57 

58 @abstractmethod 

59 def addStoredItemInfo( 

60 self, 

61 refs: Iterable[DatasetRef], 

62 infos: Iterable[Any], 

63 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

64 ) -> None: 

65 """Record internal storage information associated with one or more 

66 datasets. 

67 

68 Parameters 

69 ---------- 

70 refs : sequence of `DatasetRef` 

71 The datasets that have been stored. 

72 infos : sequence of `StoredDatastoreItemInfo` 

73 Metadata associated with the stored datasets. 

74 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode` 

75 Mode to use to insert the new records into the table. The 

76 options are ``INSERT`` (error if pre-existing), ``REPLACE`` 

77 (replace content with new values), and ``ENSURE`` (skip if the row 

78 already exists). 

79 """ 

80 raise NotImplementedError() 

81 

82 @abstractmethod 

83 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]: 

84 """Retrieve information associated with files stored in this 

85 `Datastore` associated with this dataset ref. 

86 

87 Parameters 

88 ---------- 

89 ref : `DatasetRef` 

90 The dataset that is to be queried. 

91 

92 Returns 

93 ------- 

94 items : `list` [`StoredDatastoreItemInfo`] 

95 Stored information about the files and associated formatters 

96 associated with this dataset. Only one file will be returned 

97 if the dataset has not been disassembled. Can return an empty 

98 list if no matching datasets can be found. 

99 """ 

100 raise NotImplementedError() 

101 

102 @abstractmethod 

103 def removeStoredItemInfo(self, ref: DatasetRef) -> None: 

104 """Remove information about the file associated with this dataset. 

105 

106 Parameters 

107 ---------- 

108 ref : `DatasetRef` 

109 The dataset that has been removed. 

110 """ 

111 raise NotImplementedError() 

112 

113 def _register_datasets( 

114 self, 

115 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]], 

116 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT, 

117 ) -> None: 

118 """Update registry to indicate that one or more datasets have been 

119 stored. 

120 

121 Parameters 

122 ---------- 

123 refsAndInfos : sequence `tuple` [`DatasetRef`, 

124 `StoredDatastoreItemInfo`] 

125 Datasets to register and the internal datastore metadata associated 

126 with them. 

127 insert_mode : `str`, optional 

128 Indicate whether the new records should be new ("insert", default), 

129 or allowed to exists ("ensure") or be replaced if already present 

130 ("replace"). 

131 """ 

132 expandedRefs: list[DatasetRef] = [] 

133 expandedItemInfos = [] 

134 

135 for ref, itemInfo in refsAndInfos: 

136 expandedRefs.append(ref) 

137 expandedItemInfos.append(itemInfo) 

138 

139 # Dataset location only cares about registry ID so if we have 

140 # disassembled in datastore we have to deduplicate. Since they 

141 # will have different datasetTypes we can't use a set 

142 registryRefs = {r.id: r for r in expandedRefs} 

143 if insert_mode == DatabaseInsertMode.INSERT: 

144 self.bridge.insert(registryRefs.values()) 

145 else: 

146 # There are only two columns and all that matters is the 

147 # dataset ID. 

148 self.bridge.ensure(registryRefs.values()) 

149 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode) 

150 

151 def _post_process_get( 

152 self, 

153 inMemoryDataset: Any, 

154 readStorageClass: StorageClass, 

155 assemblerParams: Mapping[str, Any] | None = None, 

156 isComponent: bool = False, 

157 ) -> Any: 

158 """Given the Python object read from the datastore, manipulate 

159 it based on the supplied parameters and ensure the Python 

160 type is correct. 

161 

162 Parameters 

163 ---------- 

164 inMemoryDataset : `object` 

165 Dataset to check. 

166 readStorageClass: `StorageClass` 

167 The `StorageClass` used to obtain the assembler and to 

168 check the python type. 

169 assemblerParams : `dict`, optional 

170 Parameters to pass to the assembler. Can be `None`. 

171 isComponent : `bool`, optional 

172 If this is a component, allow the inMemoryDataset to be `None`. 

173 """ 

174 # Process any left over parameters 

175 if assemblerParams: 

176 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams) 

177 

178 # Validate the returned data type matches the expected data type 

179 pytype = readStorageClass.pytype 

180 

181 allowedTypes = [] 

182 if pytype: 

183 allowedTypes.append(pytype) 

184 

185 # Special case components to allow them to be None 

186 if isComponent: 

187 allowedTypes.append(type(None)) 

188 

189 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 

190 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset) 

191 

192 return inMemoryDataset 

193 

194 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

195 """Validate the supplied arguments for put. 

196 

197 Parameters 

198 ---------- 

199 inMemoryDataset : `object` 

200 The dataset to store. 

201 ref : `DatasetRef` 

202 Reference to the associated Dataset. 

203 """ 

204 storageClass = ref.datasetType.storageClass 

205 

206 # Sanity check 

207 if not isinstance(inMemoryDataset, storageClass.pytype): 

208 raise TypeError( 

209 f"Inconsistency between supplied object ({type(inMemoryDataset)}) " 

210 f"and storage class type ({storageClass.pytype})" 

211 ) 

212 

213 # Confirm that we can accept this dataset 

214 if not self.constraints.isAcceptable(ref): 

215 # Raise rather than use boolean return value. 

216 raise DatasetTypeNotSupportedError( 

217 f"Dataset {ref} has been rejected by this datastore via configuration." 

218 ) 

219 

220 return 

221 

222 def remove(self, ref: DatasetRef) -> None: 

223 """Indicate to the Datastore that a dataset can be removed. 

224 

225 .. warning:: 

226 

227 This method deletes the artifact associated with this 

228 dataset and can not be reversed. 

229 

230 Parameters 

231 ---------- 

232 ref : `DatasetRef` 

233 Reference to the required Dataset. 

234 

235 Raises 

236 ------ 

237 FileNotFoundError 

238 Attempt to remove a dataset that does not exist. 

239 

240 Notes 

241 ----- 

242 This method is used for immediate removal of a dataset and is 

243 generally reserved for internal testing of datastore APIs. 

244 It is implemented by calling `trash()` and then immediately calling 

245 `emptyTrash()`. This call is meant to be immediate so errors 

246 encountered during removal are not ignored. 

247 """ 

248 self.trash(ref, ignore_errors=False) 

249 self.emptyTrash(ignore_errors=False) 

250 

251 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

252 """Retrieve a dataset from an input `Datastore`, 

253 and store the result in this `Datastore`. 

254 

255 Parameters 

256 ---------- 

257 inputDatastore : `Datastore` 

258 The external `Datastore` from which to retreive the Dataset. 

259 ref : `DatasetRef` 

260 Reference to the required dataset in the input data store. 

261 

262 """ 

263 assert inputDatastore is not self # unless we want it for renames? 

264 inMemoryDataset = inputDatastore.get(ref) 

265 return self.put(inMemoryDataset, ref)