Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import stat 

28 

29import lsst.daf.persistence as dafPersist 

30import lsst.daf.butler as dafButler 

31import lsst.obs.base as obsBase 

32 

33 

34class Workspace(metaclass=abc.ABCMeta): 

35 """A directory used by ``ap_verify`` to handle data and outputs. 

36 

37 Any object of this class represents a working directory containing 

38 (possibly empty) subdirectories for various purposes. Subclasses are 

39 typically specialized for particular workflows. Keeping such details in 

40 separate classes makes it easier to provide guarantees without forcing 

41 awkward directory structures on users. 

42 

43 All Workspace classes must guarantee the existence of any subdirectories 

44 inside the workspace. Directories corresponding to repositories do not need 

45 to be initialized, since creating a valid repository usually requires 

46 external information. 

47 

48 Parameters 

49 ---------- 

50 location : `str` 

51 The location on disk where the workspace will be set up. Will be 

52 created if it does not already exist. 

53 

54 Raises 

55 ------ 

56 EnvironmentError 

57 Raised if ``location`` is not readable or not writeable 

58 """ 

59 def __init__(self, location): 

60 # Properties must be `str` for backwards compatibility 

61 self._location = str(pathlib.Path(location).resolve()) 

62 

63 self.mkdir(self._location) 

64 self.mkdir(self.configDir) 

65 

66 @staticmethod 

67 def mkdir(directory): 

68 """Create a directory for the workspace. 

69 

70 This method is intended to be called only by subclasses, and should 

71 not be used by external code. 

72 

73 Parameters 

74 ---------- 

75 directory : `str` 

76 The directory to create. 

77 """ 

78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

80 

81 @property 

82 def workDir(self): 

83 """The absolute location of the workspace as a whole 

84 (`str`, read-only). 

85 """ 

86 return self._location 

87 

88 @property 

89 def configDir(self): 

90 """The absolute location of a directory containing custom Task config 

91 files for use with the data (`str`, read-only). 

92 """ 

93 return os.path.join(self._location, 'config') 

94 

95 @property 

96 @abc.abstractmethod 

97 def dbLocation(self): 

98 """The default absolute location of the source association database to 

99 be created or updated by the pipeline (`str`, read-only). 

100 

101 Shall be a pathname to a database suitable for the backend of `Apdb`. 

102 """ 

103 

104 @property 

105 @abc.abstractmethod 

106 def alertLocation(self): 

107 """The absolute location of an output directory for persisted 

108 alert packets (`str`, read-only). 

109 """ 

110 

111 @property 

112 @abc.abstractmethod 

113 def workButler(self): 

114 """A Butler that can produce pipeline inputs and outputs (read-only). 

115 The type is class-dependent. 

116 """ 

117 

118 @property 

119 @abc.abstractmethod 

120 def analysisButler(self): 

121 """A Butler that can read pipeline outputs (read-only). 

122 The type is class-dependent. 

123 

124 The Butler should be read-only, if its type supports the restriction. 

125 """ 

126 

127 

128class WorkspaceGen2(Workspace): 

129 """A directory used by ``ap_verify`` to handle data. 

130 

131 Any object of this class represents a working directory containing 

132 (possibly empty) subdirectories for repositories. Constructing a 

133 WorkspaceGen2 does not *initialize* its repositories, as this requires 

134 external information. 

135 

136 Parameters 

137 ---------- 

138 location : `str` 

139 The location on disk where the workspace will be set up. Will be 

140 created if it does not already exist. 

141 

142 Raises 

143 ------ 

144 EnvironmentError 

145 Raised if ``location`` is not readable or not writeable 

146 """ 

147 

148 def __init__(self, location): 

149 super().__init__(location) 

150 

151 self.mkdir(self.dataRepo) 

152 self.mkdir(self.calibRepo) 

153 self.mkdir(self.templateRepo) 

154 self.mkdir(self.outputRepo) 

155 

156 # Lazy evaluation to optimize butlers 

157 self._workButler = None 

158 self._analysisButler = None 

159 

160 @property 

161 def dataRepo(self): 

162 """The absolute path/URI to a Butler repo for science data 

163 (`str`, read-only). 

164 """ 

165 return os.path.join(self._location, 'ingested') 

166 

167 @property 

168 def calibRepo(self): 

169 """The absolute path/URI to a Butler repo for calibration data 

170 (`str`, read-only). 

171 """ 

172 return os.path.join(self._location, 'calibingested') 

173 

174 @property 

175 def templateRepo(self): 

176 """The absolute path/URI to a Butler repo for precomputed templates 

177 (`str`, read-only). 

178 """ 

179 return self.dataRepo 

180 

181 @property 

182 def outputRepo(self): 

183 """The absolute path/URI to a Butler repo for AP pipeline products 

184 (`str`, read-only). 

185 """ 

186 return os.path.join(self._location, 'output') 

187 

188 @property 

189 def dbLocation(self): 

190 return os.path.join(self._location, 'association.db') 

191 

192 @property 

193 def alertLocation(self): 

194 return os.path.join(self._location, 'alerts') 

195 

196 @property 

197 def workButler(self): 

198 """A Butler that can produce pipeline inputs and outputs 

199 (`lsst.daf.persistence.Butler`, read-only). 

200 """ 

201 if self._workButler is None: 

202 self._workButler = self._makeButler() 

203 return self._workButler 

204 

205 def _makeButler(self): 

206 """Create a butler for accessing the entire workspace. 

207 

208 Returns 

209 ------- 

210 butler : `lsst.daf.persistence.Butler` 

211 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

212 inputs, and `outputRepo` as an output. 

213 

214 Notes 

215 ----- 

216 Assumes all `*Repo` properties have been initialized. 

217 """ 

218 # common arguments for butler elements 

219 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

220 

221 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

222 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

223 

224 if not os.path.samefile(self.dataRepo, self.templateRepo): 

225 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

226 

227 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

228 

229 @property 

230 def analysisButler(self): 

231 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

232 """ 

233 if self._analysisButler is None: 

234 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

235 return self._analysisButler 

236 

237 

238class WorkspaceGen3(Workspace): 

239 """A directory used by ``ap_verify`` to handle data. 

240 

241 Any object of this class represents a working directory containing 

242 subdirectories for a repository and for non-repository files. Constructing 

243 a WorkspaceGen3 does not *initialize* its repository, as this requires 

244 external information. 

245 

246 Parameters 

247 ---------- 

248 location : `str` 

249 The location on disk where the workspace will be set up. Will be 

250 created if it does not already exist. 

251 

252 Raises 

253 ------ 

254 EnvironmentError 

255 Raised if ``location`` is not readable or not writeable 

256 """ 

257 

258 def __init__(self, location): 

259 super().__init__(location) 

260 

261 self.mkdir(self.repo) 

262 

263 # Gen 3 name of the output run 

264 self.runName = "ap_verify-output" 

265 

266 # Lazy evaluation to optimize butlers 

267 self._workButler = None 

268 self._analysisButler = None 

269 

270 @property 

271 def repo(self): 

272 """The absolute path/URI to a Butler repo for AP pipeline processing 

273 (`str`, read-only). 

274 """ 

275 return os.path.join(self._location, 'repo') 

276 

277 @property 

278 def dbLocation(self): 

279 return os.path.join(self._location, 'association.db') 

280 

281 @property 

282 def alertLocation(self): 

283 return os.path.join(self._location, 'alerts') 

284 

285 @property 

286 def workButler(self): 

287 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

288 

289 Notes 

290 ----- 

291 Assumes `repo` has been initialized. 

292 """ 

293 if self._workButler is None: 

294 try: 

295 # All Gen 3 collection names subject to change; don't hardcode them 

296 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

297 inputs = set(queryButler.registry.queryCollections( 

298 collectionType=dafButler.CollectionType.RUN)) 

299 for dimension in queryButler.registry.queryDataIds('instrument'): 

300 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

301 inputs.add(instrument.makeDefaultRawIngestRunName()) 

302 

303 # should set run=self.runName, but this breaks quantum graph generation (DM-26246) 

304 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs) 

305 except OSError as e: 

306 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

307 return self._workButler 

308 

309 @property 

310 def analysisButler(self): 

311 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

312 

313 Notes 

314 ----- 

315 Assumes `repo` has been initialized. 

316 """ 

317 if self._analysisButler is None: 

318 try: 

319 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName, 

320 writeable=False) 

321 except OSError as e: 

322 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

323 return self._analysisButler