Coverage for python/lsst/daf/butler/_limited_butler.py: 70%

33 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 02:27 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Optional, Union 

29 

30from ._deferredDatasetHandle import DeferredDatasetHandle 

31from .core import AmbiguousDatasetError, DatasetRef, Datastore, DimensionUniverse, StorageClassFactory 

32 

33log = logging.getLogger(__name__) 

34 

35 

36class LimitedButler(ABC): 

37 """A miminal butler interface that is sufficient to back 

38 `~lsst.pipe.base.PipelineTask` execution. 

39 

40 Notes 

41 ----- 

42 This ABC currently does not include any support for dataset deletion; it 

43 probably needs some to support clobbering and automatic retries, which are 

44 problems I have not yet considered when prototyping (in the hopes that 

45 addressing them later won't change the big pictures). 

46 """ 

47 

48 GENERATION: ClassVar[int] = 3 

49 """This is a Generation 3 Butler. 

50 

51 This attribute may be removed in the future, once the Generation 2 Butler 

52 interface has been fully retired; it should only be used in transitional 

53 code. 

54 """ 

55 

56 @abstractmethod 

57 def isWriteable(self) -> bool: 

58 """Return `True` if this `Butler` supports write operations.""" 

59 raise NotImplementedError() 

60 

61 @abstractmethod 

62 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

63 """Store a dataset that already has a UUID and ``RUN`` collection. 

64 

65 Parameters 

66 ---------- 

67 obj : `object` 

68 The dataset. 

69 ref : `DatasetRef` 

70 Resolved reference for a not-yet-stored dataset. 

71 

72 Returns 

73 ------- 

74 ref : `DatasetRef` 

75 The same as the given, for convenience and symmetry with 

76 `Butler.put`. 

77 

78 Raises 

79 ------ 

80 TypeError 

81 Raised if the butler is read-only. 

82 AmbiguousDatasetError 

83 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

84 

85 Notes 

86 ----- 

87 Whether this method inserts the given dataset into a ``Registry`` is 

88 implementation defined (some `LimitedButler` subclasses do not have a 

89 `Registry`), but it always adds the dataset to a `Datastore`, and the 

90 given ``ref.id`` and ``ref.run`` are always preserved. 

91 """ 

92 raise NotImplementedError() 

93 

94 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

95 """Retrieve a stored dataset. 

96 

97 Unlike `Butler.get`, this method allows datasets outside the Butler's 

98 collection to be read as long as the `DatasetRef` that identifies them 

99 can be obtained separately. 

100 

101 Parameters 

102 ---------- 

103 ref : `DatasetRef` 

104 Resolved reference to an already stored dataset. 

105 parameters : `dict` 

106 Additional StorageClass-defined options to control reading, 

107 typically used to efficiently read only a subset of the dataset. 

108 

109 Returns 

110 ------- 

111 obj : `object` 

112 The dataset. 

113 

114 Raises 

115 ------ 

116 AmbiguousDatasetError 

117 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

118 """ 

119 return self.datastore.get(ref, parameters=parameters) 

120 

121 def getDirectDeferred( 

122 self, ref: DatasetRef, *, parameters: Union[dict, None] = None 

123 ) -> DeferredDatasetHandle: 

124 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

125 from a resolved `DatasetRef`. 

126 

127 Parameters 

128 ---------- 

129 ref : `DatasetRef` 

130 Resolved reference to an already stored dataset. 

131 parameters : `dict` 

132 Additional StorageClass-defined options to control reading, 

133 typically used to efficiently read only a subset of the dataset. 

134 

135 Returns 

136 ------- 

137 obj : `DeferredDatasetHandle` 

138 A handle which can be used to retrieve a dataset at a later time. 

139 

140 Raises 

141 ------ 

142 AmbiguousDatasetError 

143 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

144 """ 

145 if ref.id is None: 

146 raise AmbiguousDatasetError( 

147 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

148 ) 

149 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

150 

151 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

152 """Return `True` if a dataset is actually present in the Datastore. 

153 

154 Parameters 

155 ---------- 

156 ref : `DatasetRef` 

157 Resolved reference to a dataset. 

158 

159 Returns 

160 ------- 

161 exists : `bool` 

162 Whether the dataset exists in the Datastore. 

163 """ 

164 return self.datastore.exists(ref) 

165 

166 def markInputUnused(self, ref: DatasetRef) -> None: 

167 """Indicate that a predicted input was not actually used when 

168 processing a `Quantum`. 

169 

170 Parameters 

171 ---------- 

172 ref : `DatasetRef` 

173 Reference to the unused dataset. 

174 

175 Notes 

176 ----- 

177 By default, a dataset is considered "actually used" if it is accessed 

178 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

179 (even if the handle is not used). This method must be called after one 

180 of those in order to remove the dataset from the actual input list. 

181 

182 This method does nothing for butlers that do not store provenance 

183 information (which is the default implementation provided by the base 

184 class). 

185 """ 

186 pass 

187 

188 @property 

189 @abstractmethod 

190 def dimensions(self) -> DimensionUniverse: 

191 """Structure managing all dimensions recognized by this data 

192 repository (`DimensionUniverse`). 

193 """ 

194 raise NotImplementedError() 

195 

196 datastore: Datastore 

197 """The object that manages actual dataset storage (`Datastore`). 

198 

199 Direct user access to the datastore should rarely be necessary; the primary 

200 exception is the case where a `Datastore` implementation provides extra 

201 functionality beyond what the base class defines. 

202 """ 

203 

204 storageClasses: StorageClassFactory 

205 """An object that maps known storage class names to objects that fully 

206 describe them (`StorageClassFactory`). 

207 """