Coverage for python/lsst/daf/butler/_limited_butler.py: 71%

38 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-30 02:26 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Iterable, Optional, Union 

29 

30from ._deferredDatasetHandle import DeferredDatasetHandle 

31from .core import AmbiguousDatasetError, DatasetRef, Datastore, DimensionUniverse, StorageClassFactory 

32 

33log = logging.getLogger(__name__) 

34 

35 

36class LimitedButler(ABC): 

37 """A minimal butler interface that is sufficient to back 

38 `~lsst.pipe.base.PipelineTask` execution. 

39 """ 

40 

41 GENERATION: ClassVar[int] = 3 

42 """This is a Generation 3 Butler. 

43 

44 This attribute may be removed in the future, once the Generation 2 Butler 

45 interface has been fully retired; it should only be used in transitional 

46 code. 

47 """ 

48 

49 @abstractmethod 

50 def isWriteable(self) -> bool: 

51 """Return `True` if this `Butler` supports write operations.""" 

52 raise NotImplementedError() 

53 

54 @abstractmethod 

55 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

56 """Store a dataset that already has a UUID and ``RUN`` collection. 

57 

58 Parameters 

59 ---------- 

60 obj : `object` 

61 The dataset. 

62 ref : `DatasetRef` 

63 Resolved reference for a not-yet-stored dataset. 

64 

65 Returns 

66 ------- 

67 ref : `DatasetRef` 

68 The same as the given, for convenience and symmetry with 

69 `Butler.put`. 

70 

71 Raises 

72 ------ 

73 TypeError 

74 Raised if the butler is read-only. 

75 AmbiguousDatasetError 

76 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

77 

78 Notes 

79 ----- 

80 Whether this method inserts the given dataset into a ``Registry`` is 

81 implementation defined (some `LimitedButler` subclasses do not have a 

82 `Registry`), but it always adds the dataset to a `Datastore`, and the 

83 given ``ref.id`` and ``ref.run`` are always preserved. 

84 """ 

85 raise NotImplementedError() 

86 

87 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

88 """Retrieve a stored dataset. 

89 

90 Unlike `Butler.get`, this method allows datasets outside the Butler's 

91 collection to be read as long as the `DatasetRef` that identifies them 

92 can be obtained separately. 

93 

94 Parameters 

95 ---------- 

96 ref : `DatasetRef` 

97 Resolved reference to an already stored dataset. 

98 parameters : `dict` 

99 Additional StorageClass-defined options to control reading, 

100 typically used to efficiently read only a subset of the dataset. 

101 

102 Returns 

103 ------- 

104 obj : `object` 

105 The dataset. 

106 

107 Raises 

108 ------ 

109 AmbiguousDatasetError 

110 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

111 """ 

112 return self.datastore.get(ref, parameters=parameters) 

113 

114 def getDirectDeferred( 

115 self, ref: DatasetRef, *, parameters: Union[dict, None] = None 

116 ) -> DeferredDatasetHandle: 

117 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

118 from a resolved `DatasetRef`. 

119 

120 Parameters 

121 ---------- 

122 ref : `DatasetRef` 

123 Resolved reference to an already stored dataset. 

124 parameters : `dict` 

125 Additional StorageClass-defined options to control reading, 

126 typically used to efficiently read only a subset of the dataset. 

127 

128 Returns 

129 ------- 

130 obj : `DeferredDatasetHandle` 

131 A handle which can be used to retrieve a dataset at a later time. 

132 

133 Raises 

134 ------ 

135 AmbiguousDatasetError 

136 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

137 """ 

138 if ref.id is None: 

139 raise AmbiguousDatasetError( 

140 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

141 ) 

142 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

143 

144 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

145 """Return `True` if a dataset is actually present in the Datastore. 

146 

147 Parameters 

148 ---------- 

149 ref : `DatasetRef` 

150 Resolved reference to a dataset. 

151 

152 Returns 

153 ------- 

154 exists : `bool` 

155 Whether the dataset exists in the Datastore. 

156 """ 

157 return self.datastore.exists(ref) 

158 

159 def markInputUnused(self, ref: DatasetRef) -> None: 

160 """Indicate that a predicted input was not actually used when 

161 processing a `Quantum`. 

162 

163 Parameters 

164 ---------- 

165 ref : `DatasetRef` 

166 Reference to the unused dataset. 

167 

168 Notes 

169 ----- 

170 By default, a dataset is considered "actually used" if it is accessed 

171 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

172 (even if the handle is not used). This method must be called after one 

173 of those in order to remove the dataset from the actual input list. 

174 

175 This method does nothing for butlers that do not store provenance 

176 information (which is the default implementation provided by the base 

177 class). 

178 """ 

179 pass 

180 

181 @abstractmethod 

182 def pruneDatasets( 

183 self, 

184 refs: Iterable[DatasetRef], 

185 *, 

186 disassociate: bool = True, 

187 unstore: bool = False, 

188 tags: Iterable[str] = (), 

189 purge: bool = False, 

190 ) -> None: 

191 """Remove one or more datasets from a collection and/or storage. 

192 

193 Parameters 

194 ---------- 

195 refs : `~collections.abc.Iterable` of `DatasetRef` 

196 Datasets to prune. These must be "resolved" references (not just 

197 a `DatasetType` and data ID). 

198 disassociate : `bool`, optional 

199 Disassociate pruned datasets from ``tags``, or from all collections 

200 if ``purge=True``. 

201 unstore : `bool`, optional 

202 If `True` (`False` is default) remove these datasets from all 

203 datastores known to this butler. Note that this will make it 

204 impossible to retrieve these datasets even via other collections. 

205 Datasets that are already not stored are ignored by this option. 

206 tags : `Iterable` [ `str` ], optional 

207 `~CollectionType.TAGGED` collections to disassociate the datasets 

208 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

209 `True`. 

210 purge : `bool`, optional 

211 If `True` (`False` is default), completely remove the dataset from 

212 the `Registry`. To prevent accidental deletions, ``purge`` may 

213 only be `True` if all of the following conditions are met: 

214 

215 - ``disassociate`` is `True`; 

216 - ``unstore`` is `True`. 

217 

218 This mode may remove provenance information from datasets other 

219 than those provided, and should be used with extreme care. 

220 

221 Raises 

222 ------ 

223 TypeError 

224 Raised if the butler is read-only, if no collection was provided, 

225 or the conditions for ``purge=True`` were not met. 

226 """ 

227 raise NotImplementedError() 

228 

229 @property 

230 @abstractmethod 

231 def dimensions(self) -> DimensionUniverse: 

232 """Structure managing all dimensions recognized by this data 

233 repository (`DimensionUniverse`). 

234 """ 

235 raise NotImplementedError() 

236 

237 datastore: Datastore 

238 """The object that manages actual dataset storage (`Datastore`). 

239 

240 Direct user access to the datastore should rarely be necessary; the primary 

241 exception is the case where a `Datastore` implementation provides extra 

242 functionality beyond what the base class defines. 

243 """ 

244 

245 storageClasses: StorageClassFactory 

246 """An object that maps known storage class names to objects that fully 

247 describe them (`StorageClassFactory`). 

248 """