Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import re 

28import stat 

29 

30import lsst.daf.persistence as dafPersist 

31import lsst.daf.butler as dafButler 

32import lsst.obs.base as obsBase 

33 

34 

35class Workspace(metaclass=abc.ABCMeta): 

36 """A directory used by ``ap_verify`` to handle data and outputs. 

37 

38 Any object of this class represents a working directory containing 

39 (possibly empty) subdirectories for various purposes. Subclasses are 

40 typically specialized for particular workflows. Keeping such details in 

41 separate classes makes it easier to provide guarantees without forcing 

42 awkward directory structures on users. 

43 

44 All Workspace classes must guarantee the existence of any subdirectories 

45 inside the workspace. Directories corresponding to repositories do not need 

46 to be initialized, since creating a valid repository usually requires 

47 external information. 

48 

49 Parameters 

50 ---------- 

51 location : `str` 

52 The location on disk where the workspace will be set up. Will be 

53 created if it does not already exist. 

54 

55 Raises 

56 ------ 

57 EnvironmentError 

58 Raised if ``location`` is not readable or not writeable 

59 """ 

60 def __init__(self, location): 

61 # Properties must be `str` for backwards compatibility 

62 self._location = str(pathlib.Path(location).resolve()) 

63 

64 self.mkdir(self._location) 

65 self.mkdir(self.configDir) 

66 

67 @staticmethod 

68 def mkdir(directory): 

69 """Create a directory for the workspace. 

70 

71 This method is intended to be called only by subclasses, and should 

72 not be used by external code. 

73 

74 Parameters 

75 ---------- 

76 directory : `str` 

77 The directory to create. 

78 """ 

79 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

80 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

81 

82 def __eq__(self, other): 

83 """Test whether two workspaces are of the same type and have the 

84 same location. 

85 """ 

86 return type(self) == type(other) and self.workDir == other.workDir 

87 

88 def __repr__(self): 

89 """A string representation that can be used to reconstruct the Workspace. 

90 """ 

91 return f"{type(self).__name__}({self.workDir!r})" 

92 

93 @property 

94 def workDir(self): 

95 """The absolute location of the workspace as a whole 

96 (`str`, read-only). 

97 """ 

98 return self._location 

99 

100 @property 

101 def configDir(self): 

102 """The absolute location of a directory containing custom Task config 

103 files for use with the data (`str`, read-only). 

104 """ 

105 return os.path.join(self._location, 'config') 

106 

107 @property 

108 @abc.abstractmethod 

109 def dbLocation(self): 

110 """The default absolute location of the source association database to 

111 be created or updated by the pipeline (`str`, read-only). 

112 

113 Shall be a pathname to a database suitable for the backend of `Apdb`. 

114 """ 

115 

116 @property 

117 @abc.abstractmethod 

118 def alertLocation(self): 

119 """The absolute location of an output directory for persisted 

120 alert packets (`str`, read-only). 

121 """ 

122 

123 @property 

124 @abc.abstractmethod 

125 def workButler(self): 

126 """A Butler that can produce pipeline inputs and outputs (read-only). 

127 The type is class-dependent. 

128 """ 

129 

130 @property 

131 @abc.abstractmethod 

132 def analysisButler(self): 

133 """A Butler that can read pipeline outputs (read-only). 

134 The type is class-dependent. 

135 

136 The Butler should be read-only, if its type supports the restriction. 

137 """ 

138 

139 

140class WorkspaceGen2(Workspace): 

141 """A directory used by ``ap_verify`` to handle data. 

142 

143 Any object of this class represents a working directory containing 

144 (possibly empty) subdirectories for repositories. Constructing a 

145 WorkspaceGen2 does not *initialize* its repositories, as this requires 

146 external information. 

147 

148 Parameters 

149 ---------- 

150 location : `str` 

151 The location on disk where the workspace will be set up. Will be 

152 created if it does not already exist. 

153 

154 Raises 

155 ------ 

156 EnvironmentError 

157 Raised if ``location`` is not readable or not writeable 

158 """ 

159 

160 def __init__(self, location): 

161 super().__init__(location) 

162 

163 self.mkdir(self.dataRepo) 

164 self.mkdir(self.calibRepo) 

165 self.mkdir(self.templateRepo) 

166 self.mkdir(self.outputRepo) 

167 

168 # Lazy evaluation to optimize butlers 

169 self._workButler = None 

170 self._analysisButler = None 

171 

172 @property 

173 def dataRepo(self): 

174 """The absolute path/URI to a Butler repo for science data 

175 (`str`, read-only). 

176 """ 

177 return os.path.join(self._location, 'ingested') 

178 

179 @property 

180 def calibRepo(self): 

181 """The absolute path/URI to a Butler repo for calibration data 

182 (`str`, read-only). 

183 """ 

184 return os.path.join(self._location, 'calibingested') 

185 

186 @property 

187 def templateRepo(self): 

188 """The absolute path/URI to a Butler repo for precomputed templates 

189 (`str`, read-only). 

190 """ 

191 return self.dataRepo 

192 

193 @property 

194 def outputRepo(self): 

195 """The absolute path/URI to a Butler repo for AP pipeline products 

196 (`str`, read-only). 

197 """ 

198 return os.path.join(self._location, 'output') 

199 

200 @property 

201 def dbLocation(self): 

202 return os.path.join(self._location, 'association.db') 

203 

204 @property 

205 def alertLocation(self): 

206 return os.path.join(self._location, 'alerts') 

207 

208 @property 

209 def workButler(self): 

210 """A Butler that can produce pipeline inputs and outputs 

211 (`lsst.daf.persistence.Butler`, read-only). 

212 """ 

213 if self._workButler is None: 

214 self._workButler = self._makeButler() 

215 return self._workButler 

216 

217 def _makeButler(self): 

218 """Create a butler for accessing the entire workspace. 

219 

220 Returns 

221 ------- 

222 butler : `lsst.daf.persistence.Butler` 

223 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

224 inputs, and `outputRepo` as an output. 

225 

226 Notes 

227 ----- 

228 Assumes all `*Repo` properties have been initialized. 

229 """ 

230 # common arguments for butler elements 

231 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

232 

233 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

234 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

235 

236 if not os.path.samefile(self.dataRepo, self.templateRepo): 

237 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

238 

239 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

240 

241 @property 

242 def analysisButler(self): 

243 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

244 """ 

245 if self._analysisButler is None: 

246 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

247 return self._analysisButler 

248 

249 

250class WorkspaceGen3(Workspace): 

251 """A directory used by ``ap_verify`` to handle data. 

252 

253 Any object of this class represents a working directory containing 

254 subdirectories for a repository and for non-repository files. Constructing 

255 a WorkspaceGen3 does not *initialize* its repository, as this requires 

256 external information. 

257 

258 Parameters 

259 ---------- 

260 location : `str` 

261 The location on disk where the workspace will be set up. Will be 

262 created if it does not already exist. 

263 

264 Raises 

265 ------ 

266 EnvironmentError 

267 Raised if ``location`` is not readable or not writeable 

268 """ 

269 

270 def __init__(self, location): 

271 super().__init__(location) 

272 

273 self.mkdir(self.repo) 

274 

275 # Gen 3 name of the output 

276 self.outputName = "ap_verify-output" 

277 

278 # Lazy evaluation to optimize butlers 

279 self._workButler = None 

280 self._analysisButler = None 

281 

282 @property 

283 def repo(self): 

284 """The absolute path/URI to a Butler repo for AP pipeline processing 

285 (`str`, read-only). 

286 """ 

287 return os.path.join(self._location, 'repo') 

288 

289 @property 

290 def dbLocation(self): 

291 return os.path.join(self._location, 'association.db') 

292 

293 @property 

294 def alertLocation(self): 

295 return os.path.join(self._location, 'alerts') 

296 

297 def _ensureCollection(self, registry, name, collectionType): 

298 """Add a collection to a repository if it does not already exist. 

299 

300 Parameters 

301 ---------- 

302 registry : `lsst.daf.butler.Registry` 

303 The repository to which to add the collection. 

304 name : `str` 

305 The name of the collection to test for and add. 

306 collectionType : `lsst.daf.butler.CollectionType` 

307 The type of collection to add. This field is ignored when 

308 testing if a collection exists. 

309 """ 

310 matchingCollections = list(registry.queryCollections(re.compile(name))) 

311 if not matchingCollections: 

312 registry.registerCollection(name, type=collectionType) 

313 

314 @property 

315 def workButler(self): 

316 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

317 

318 Notes 

319 ----- 

320 Assumes `repo` has been initialized. 

321 """ 

322 if self._workButler is None: 

323 try: 

324 # Hard-code the collection names because it's hard to infer the inputs from the Butler 

325 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

326 inputs = {"skymaps", "refcats"} 

327 for dimension in queryButler.registry.queryDataIds('instrument'): 

328 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

329 rawName = instrument.makeDefaultRawIngestRunName() 

330 inputs.add(rawName) 

331 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN) 

332 inputs.add(instrument.makeCalibrationCollectionName()) 

333 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+"))) 

334 

335 # Create an output chain here, so that workButler can see it. 

336 # Definition does not conflict with what pipetask --output uses. 

337 # Regex is workaround for DM-25945. 

338 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))): 

339 queryButler.registry.registerCollection(self.outputName, 

340 dafButler.CollectionType.CHAINED) 

341 queryButler.registry.setCollectionChain(self.outputName, inputs) 

342 

343 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName) 

344 except OSError as e: 

345 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

346 return self._workButler 

347 

348 @property 

349 def analysisButler(self): 

350 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

351 

352 Notes 

353 ----- 

354 Assumes `repo` has been initialized. 

355 """ 

356 if self._analysisButler is None: 

357 try: 

358 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName, 

359 writeable=False) 

360 except OSError as e: 

361 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

362 return self._analysisButler