Coverage for python/lsst/ap/verify/workspace.py: 47%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

93 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Workspace", "WorkspaceGen3"] 

25 

26import abc 

27import os 

28import pathlib 

29import re 

30import stat 

31 

32import lsst.skymap 

33import lsst.daf.butler as dafButler 

34import lsst.obs.base as obsBase 

35 

36 

37class Workspace(metaclass=abc.ABCMeta): 

38 """A directory used by ``ap_verify`` to handle data and outputs. 

39 

40 Any object of this class represents a working directory containing 

41 (possibly empty) subdirectories for various purposes. Subclasses are 

42 typically specialized for particular workflows. Keeping such details in 

43 separate classes makes it easier to provide guarantees without forcing 

44 awkward directory structures on users. 

45 

46 All Workspace classes must guarantee the existence of any subdirectories 

47 inside the workspace. Directories corresponding to repositories do not need 

48 to be initialized, since creating a valid repository usually requires 

49 external information. 

50 

51 Parameters 

52 ---------- 

53 location : `str` 

54 The location on disk where the workspace will be set up. Will be 

55 created if it does not already exist. 

56 

57 Raises 

58 ------ 

59 EnvironmentError 

60 Raised if ``location`` is not readable or not writeable 

61 """ 

62 def __init__(self, location): 

63 # Properties must be `str` for backwards compatibility 

64 self._location = str(pathlib.Path(location).resolve()) 

65 

66 self.mkdir(self._location) 

67 self.mkdir(self.configDir) 

68 

69 @staticmethod 

70 def mkdir(directory): 

71 """Create a directory for the workspace. 

72 

73 This method is intended to be called only by subclasses, and should 

74 not be used by external code. 

75 

76 Parameters 

77 ---------- 

78 directory : `str` 

79 The directory to create. 

80 """ 

81 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

82 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

83 

84 def __eq__(self, other): 

85 """Test whether two workspaces are of the same type and have the 

86 same location. 

87 """ 

88 return type(self) == type(other) and self.workDir == other.workDir 

89 

90 def __repr__(self): 

91 """A string representation that can be used to reconstruct the Workspace. 

92 """ 

93 return f"{type(self).__name__}({self.workDir!r})" 

94 

95 @property 

96 def workDir(self): 

97 """The absolute location of the workspace as a whole 

98 (`str`, read-only). 

99 """ 

100 return self._location 

101 

102 @property 

103 def configDir(self): 

104 """The absolute location of a directory containing custom Task config 

105 files for use with the data (`str`, read-only). 

106 """ 

107 return os.path.join(self._location, 'config') 

108 

109 @property 

110 @abc.abstractmethod 

111 def dbLocation(self): 

112 """The default absolute location of the source association database to 

113 be created or updated by the pipeline (`str`, read-only). 

114 

115 Shall be a pathname to a database suitable for the backend of `Apdb`. 

116 """ 

117 

118 @property 

119 @abc.abstractmethod 

120 def alertLocation(self): 

121 """The absolute location of an output directory for persisted 

122 alert packets (`str`, read-only). 

123 """ 

124 

125 @property 

126 @abc.abstractmethod 

127 def workButler(self): 

128 """A Butler that can produce pipeline inputs and outputs (read-only). 

129 The type is class-dependent. 

130 """ 

131 

132 @property 

133 @abc.abstractmethod 

134 def analysisButler(self): 

135 """A Butler that can read pipeline outputs (read-only). 

136 The type is class-dependent. 

137 

138 The Butler should be read-only, if its type supports the restriction. 

139 """ 

140 

141 

142class WorkspaceGen3(Workspace): 

143 """A directory used by ``ap_verify`` to handle data. 

144 

145 Any object of this class represents a working directory containing 

146 subdirectories for a repository and for non-repository files. Constructing 

147 a WorkspaceGen3 does not *initialize* its repository, as this requires 

148 external information. 

149 

150 Parameters 

151 ---------- 

152 location : `str` 

153 The location on disk where the workspace will be set up. Will be 

154 created if it does not already exist. 

155 

156 Raises 

157 ------ 

158 EnvironmentError 

159 Raised if ``location`` is not readable or not writeable 

160 """ 

161 

162 def __init__(self, location): 

163 super().__init__(location) 

164 

165 self.mkdir(self.repo) 

166 self.mkdir(self.pipelineDir) 

167 

168 # Gen 3 name of the output 

169 self.outputName = "ap_verify-output" 

170 

171 # Lazy evaluation to optimize butlers 

172 self._workButler = None 

173 self._analysisButler = None 

174 

175 @property 

176 def repo(self): 

177 """The absolute path/URI to a Butler repo for AP pipeline processing 

178 (`str`, read-only). 

179 """ 

180 return os.path.join(self._location, 'repo') 

181 

182 @property 

183 def pipelineDir(self): 

184 """The absolute location of a directory containing custom pipeline 

185 files for use with the data (`str`, read-only). 

186 """ 

187 return os.path.join(self._location, 'pipelines') 

188 

189 @property 

190 def dbLocation(self): 

191 return os.path.join(self._location, 'association.db') 

192 

193 @property 

194 def alertLocation(self): 

195 return os.path.join(self._location, 'alerts') 

196 

197 def _ensureCollection(self, registry, name, collectionType): 

198 """Add a collection to a repository if it does not already exist. 

199 

200 Parameters 

201 ---------- 

202 registry : `lsst.daf.butler.Registry` 

203 The repository to which to add the collection. 

204 name : `str` 

205 The name of the collection to test for and add. 

206 collectionType : `lsst.daf.butler.CollectionType` 

207 The type of collection to add. This field is ignored when 

208 testing if a collection exists. 

209 """ 

210 matchingCollections = list(registry.queryCollections(re.compile(name))) 

211 if not matchingCollections: 

212 registry.registerCollection(name, type=collectionType) 

213 

214 @property 

215 def workButler(self): 

216 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

217 

218 Notes 

219 ----- 

220 Assumes `repo` has been initialized. 

221 """ 

222 if self._workButler is None: 

223 try: 

224 # Hard-code the collection names because it's hard to infer the inputs from the Butler 

225 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

226 inputs = { 

227 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME, 

228 } 

229 for dimension in queryButler.registry.queryDataIds('instrument'): 

230 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

231 rawName = instrument.makeDefaultRawIngestRunName() 

232 inputs.add(rawName) 

233 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN) 

234 inputs.add(instrument.makeCalibrationCollectionName()) 

235 inputs.add(instrument.makeRefCatCollectionName()) 

236 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+"))) 

237 

238 # Create an output chain here, so that workButler can see it. 

239 # Definition does not conflict with what pipetask --output uses. 

240 # Regex is workaround for DM-25945. 

241 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))): 

242 queryButler.registry.registerCollection(self.outputName, 

243 dafButler.CollectionType.CHAINED) 

244 queryButler.registry.setCollectionChain(self.outputName, inputs) 

245 

246 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName) 

247 except OSError as e: 

248 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

249 return self._workButler 

250 

251 @property 

252 def analysisButler(self): 

253 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

254 

255 Notes 

256 ----- 

257 Assumes `repo` has been initialized. 

258 """ 

259 if self._analysisButler is None: 

260 try: 

261 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName, 

262 writeable=False) 

263 except OSError as e: 

264 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

265 return self._analysisButler