Coverage for python/lsst/daf/butler/_limited_butler.py: 70%

38 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-12 02:19 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Iterable, Optional, Union 

29 

30from ._deferredDatasetHandle import DeferredDatasetHandle 

31from .core import ( 

32 AmbiguousDatasetError, 

33 DatasetRef, 

34 Datastore, 

35 DimensionUniverse, 

36 StorageClass, 

37 StorageClassFactory, 

38) 

39 

40log = logging.getLogger(__name__) 

41 

42 

43class LimitedButler(ABC): 

44 """A minimal butler interface that is sufficient to back 

45 `~lsst.pipe.base.PipelineTask` execution. 

46 """ 

47 

48 GENERATION: ClassVar[int] = 3 

49 """This is a Generation 3 Butler. 

50 

51 This attribute may be removed in the future, once the Generation 2 Butler 

52 interface has been fully retired; it should only be used in transitional 

53 code. 

54 """ 

55 

56 @abstractmethod 

57 def isWriteable(self) -> bool: 

58 """Return `True` if this `Butler` supports write operations.""" 

59 raise NotImplementedError() 

60 

61 @abstractmethod 

62 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

63 """Store a dataset that already has a UUID and ``RUN`` collection. 

64 

65 Parameters 

66 ---------- 

67 obj : `object` 

68 The dataset. 

69 ref : `DatasetRef` 

70 Resolved reference for a not-yet-stored dataset. 

71 

72 Returns 

73 ------- 

74 ref : `DatasetRef` 

75 The same as the given, for convenience and symmetry with 

76 `Butler.put`. 

77 

78 Raises 

79 ------ 

80 TypeError 

81 Raised if the butler is read-only. 

82 AmbiguousDatasetError 

83 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

84 

85 Notes 

86 ----- 

87 Whether this method inserts the given dataset into a ``Registry`` is 

88 implementation defined (some `LimitedButler` subclasses do not have a 

89 `Registry`), but it always adds the dataset to a `Datastore`, and the 

90 given ``ref.id`` and ``ref.run`` are always preserved. 

91 """ 

92 raise NotImplementedError() 

93 

94 def getDirect( 

95 self, 

96 ref: DatasetRef, 

97 *, 

98 parameters: Optional[Dict[str, Any]] = None, 

99 storageClass: str | StorageClass | None = None, 

100 ) -> Any: 

101 """Retrieve a stored dataset. 

102 

103 Unlike `Butler.get`, this method allows datasets outside the Butler's 

104 collection to be read as long as the `DatasetRef` that identifies them 

105 can be obtained separately. 

106 

107 Parameters 

108 ---------- 

109 ref : `DatasetRef` 

110 Resolved reference to an already stored dataset. 

111 parameters : `dict` 

112 Additional StorageClass-defined options to control reading, 

113 typically used to efficiently read only a subset of the dataset. 

114 storageClass : `StorageClass` or `str`, optional 

115 The storage class to be used to override the Python type 

116 returned by this method. By default the returned type matches 

117 the dataset type definition for this dataset. Specifying a 

118 read `StorageClass` can force a different type to be returned. 

119 This type must be compatible with the original type. 

120 

121 Returns 

122 ------- 

123 obj : `object` 

124 The dataset. 

125 

126 Raises 

127 ------ 

128 AmbiguousDatasetError 

129 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

130 """ 

131 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

132 

133 def getDirectDeferred( 

134 self, 

135 ref: DatasetRef, 

136 *, 

137 parameters: Union[dict, None] = None, 

138 storageClass: str | StorageClass | None = None, 

139 ) -> DeferredDatasetHandle: 

140 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

141 from a resolved `DatasetRef`. 

142 

143 Parameters 

144 ---------- 

145 ref : `DatasetRef` 

146 Resolved reference to an already stored dataset. 

147 parameters : `dict` 

148 Additional StorageClass-defined options to control reading, 

149 typically used to efficiently read only a subset of the dataset. 

150 storageClass : `StorageClass` or `str`, optional 

151 The storage class to be used to override the Python type 

152 returned by this method. By default the returned type matches 

153 the dataset type definition for this dataset. Specifying a 

154 read `StorageClass` can force a different type to be returned. 

155 This type must be compatible with the original type. 

156 

157 Returns 

158 ------- 

159 obj : `DeferredDatasetHandle` 

160 A handle which can be used to retrieve a dataset at a later time. 

161 

162 Raises 

163 ------ 

164 AmbiguousDatasetError 

165 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

166 """ 

167 if ref.id is None: 

168 raise AmbiguousDatasetError( 

169 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

170 ) 

171 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

172 

173 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

174 """Return `True` if a dataset is actually present in the Datastore. 

175 

176 Parameters 

177 ---------- 

178 ref : `DatasetRef` 

179 Resolved reference to a dataset. 

180 

181 Returns 

182 ------- 

183 exists : `bool` 

184 Whether the dataset exists in the Datastore. 

185 """ 

186 return self.datastore.exists(ref) 

187 

188 def markInputUnused(self, ref: DatasetRef) -> None: 

189 """Indicate that a predicted input was not actually used when 

190 processing a `Quantum`. 

191 

192 Parameters 

193 ---------- 

194 ref : `DatasetRef` 

195 Reference to the unused dataset. 

196 

197 Notes 

198 ----- 

199 By default, a dataset is considered "actually used" if it is accessed 

200 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

201 (even if the handle is not used). This method must be called after one 

202 of those in order to remove the dataset from the actual input list. 

203 

204 This method does nothing for butlers that do not store provenance 

205 information (which is the default implementation provided by the base 

206 class). 

207 """ 

208 pass 

209 

210 @abstractmethod 

211 def pruneDatasets( 

212 self, 

213 refs: Iterable[DatasetRef], 

214 *, 

215 disassociate: bool = True, 

216 unstore: bool = False, 

217 tags: Iterable[str] = (), 

218 purge: bool = False, 

219 ) -> None: 

220 """Remove one or more datasets from a collection and/or storage. 

221 

222 Parameters 

223 ---------- 

224 refs : `~collections.abc.Iterable` of `DatasetRef` 

225 Datasets to prune. These must be "resolved" references (not just 

226 a `DatasetType` and data ID). 

227 disassociate : `bool`, optional 

228 Disassociate pruned datasets from ``tags``, or from all collections 

229 if ``purge=True``. 

230 unstore : `bool`, optional 

231 If `True` (`False` is default) remove these datasets from all 

232 datastores known to this butler. Note that this will make it 

233 impossible to retrieve these datasets even via other collections. 

234 Datasets that are already not stored are ignored by this option. 

235 tags : `Iterable` [ `str` ], optional 

236 `~CollectionType.TAGGED` collections to disassociate the datasets 

237 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

238 `True`. 

239 purge : `bool`, optional 

240 If `True` (`False` is default), completely remove the dataset from 

241 the `Registry`. To prevent accidental deletions, ``purge`` may 

242 only be `True` if all of the following conditions are met: 

243 

244 - ``disassociate`` is `True`; 

245 - ``unstore`` is `True`. 

246 

247 This mode may remove provenance information from datasets other 

248 than those provided, and should be used with extreme care. 

249 

250 Raises 

251 ------ 

252 TypeError 

253 Raised if the butler is read-only, if no collection was provided, 

254 or the conditions for ``purge=True`` were not met. 

255 """ 

256 raise NotImplementedError() 

257 

258 @property 

259 @abstractmethod 

260 def dimensions(self) -> DimensionUniverse: 

261 """Structure managing all dimensions recognized by this data 

262 repository (`DimensionUniverse`). 

263 """ 

264 raise NotImplementedError() 

265 

266 datastore: Datastore 

267 """The object that manages actual dataset storage (`Datastore`). 

268 

269 Direct user access to the datastore should rarely be necessary; the primary 

270 exception is the case where a `Datastore` implementation provides extra 

271 functionality beyond what the base class defines. 

272 """ 

273 

274 storageClasses: StorageClassFactory 

275 """An object that maps known storage class names to objects that fully 

276 describe them (`StorageClassFactory`). 

277 """