Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import re 

28import stat 

29 

30import lsst.skymap 

31import lsst.daf.persistence as dafPersist 

32import lsst.daf.butler as dafButler 

33import lsst.obs.base as obsBase 

34 

35 

36class Workspace(metaclass=abc.ABCMeta): 

37 """A directory used by ``ap_verify`` to handle data and outputs. 

38 

39 Any object of this class represents a working directory containing 

40 (possibly empty) subdirectories for various purposes. Subclasses are 

41 typically specialized for particular workflows. Keeping such details in 

42 separate classes makes it easier to provide guarantees without forcing 

43 awkward directory structures on users. 

44 

45 All Workspace classes must guarantee the existence of any subdirectories 

46 inside the workspace. Directories corresponding to repositories do not need 

47 to be initialized, since creating a valid repository usually requires 

48 external information. 

49 

50 Parameters 

51 ---------- 

52 location : `str` 

53 The location on disk where the workspace will be set up. Will be 

54 created if it does not already exist. 

55 

56 Raises 

57 ------ 

58 EnvironmentError 

59 Raised if ``location`` is not readable or not writeable 

60 """ 

61 def __init__(self, location): 

62 # Properties must be `str` for backwards compatibility 

63 self._location = str(pathlib.Path(location).resolve()) 

64 

65 self.mkdir(self._location) 

66 self.mkdir(self.configDir) 

67 

68 @staticmethod 

69 def mkdir(directory): 

70 """Create a directory for the workspace. 

71 

72 This method is intended to be called only by subclasses, and should 

73 not be used by external code. 

74 

75 Parameters 

76 ---------- 

77 directory : `str` 

78 The directory to create. 

79 """ 

80 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

81 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

82 

83 def __eq__(self, other): 

84 """Test whether two workspaces are of the same type and have the 

85 same location. 

86 """ 

87 return type(self) == type(other) and self.workDir == other.workDir 

88 

89 def __repr__(self): 

90 """A string representation that can be used to reconstruct the Workspace. 

91 """ 

92 return f"{type(self).__name__}({self.workDir!r})" 

93 

94 @property 

95 def workDir(self): 

96 """The absolute location of the workspace as a whole 

97 (`str`, read-only). 

98 """ 

99 return self._location 

100 

101 @property 

102 def configDir(self): 

103 """The absolute location of a directory containing custom Task config 

104 files for use with the data (`str`, read-only). 

105 """ 

106 return os.path.join(self._location, 'config') 

107 

108 @property 

109 @abc.abstractmethod 

110 def dbLocation(self): 

111 """The default absolute location of the source association database to 

112 be created or updated by the pipeline (`str`, read-only). 

113 

114 Shall be a pathname to a database suitable for the backend of `Apdb`. 

115 """ 

116 

117 @property 

118 @abc.abstractmethod 

119 def alertLocation(self): 

120 """The absolute location of an output directory for persisted 

121 alert packets (`str`, read-only). 

122 """ 

123 

124 @property 

125 @abc.abstractmethod 

126 def workButler(self): 

127 """A Butler that can produce pipeline inputs and outputs (read-only). 

128 The type is class-dependent. 

129 """ 

130 

131 @property 

132 @abc.abstractmethod 

133 def analysisButler(self): 

134 """A Butler that can read pipeline outputs (read-only). 

135 The type is class-dependent. 

136 

137 The Butler should be read-only, if its type supports the restriction. 

138 """ 

139 

140 

141class WorkspaceGen2(Workspace): 

142 """A directory used by ``ap_verify`` to handle data. 

143 

144 Any object of this class represents a working directory containing 

145 (possibly empty) subdirectories for repositories. Constructing a 

146 WorkspaceGen2 does not *initialize* its repositories, as this requires 

147 external information. 

148 

149 Parameters 

150 ---------- 

151 location : `str` 

152 The location on disk where the workspace will be set up. Will be 

153 created if it does not already exist. 

154 

155 Raises 

156 ------ 

157 EnvironmentError 

158 Raised if ``location`` is not readable or not writeable 

159 """ 

160 

161 def __init__(self, location): 

162 super().__init__(location) 

163 

164 self.mkdir(self.dataRepo) 

165 self.mkdir(self.calibRepo) 

166 self.mkdir(self.templateRepo) 

167 self.mkdir(self.outputRepo) 

168 

169 # Lazy evaluation to optimize butlers 

170 self._workButler = None 

171 self._analysisButler = None 

172 

173 @property 

174 def dataRepo(self): 

175 """The absolute path/URI to a Butler repo for science data 

176 (`str`, read-only). 

177 """ 

178 return os.path.join(self._location, 'ingested') 

179 

180 @property 

181 def calibRepo(self): 

182 """The absolute path/URI to a Butler repo for calibration data 

183 (`str`, read-only). 

184 """ 

185 return os.path.join(self._location, 'calibingested') 

186 

187 @property 

188 def templateRepo(self): 

189 """The absolute path/URI to a Butler repo for precomputed templates 

190 (`str`, read-only). 

191 """ 

192 return self.dataRepo 

193 

194 @property 

195 def outputRepo(self): 

196 """The absolute path/URI to a Butler repo for AP pipeline products 

197 (`str`, read-only). 

198 """ 

199 return os.path.join(self._location, 'output') 

200 

201 @property 

202 def dbLocation(self): 

203 return os.path.join(self._location, 'association.db') 

204 

205 @property 

206 def alertLocation(self): 

207 return os.path.join(self._location, 'alerts') 

208 

209 @property 

210 def workButler(self): 

211 """A Butler that can produce pipeline inputs and outputs 

212 (`lsst.daf.persistence.Butler`, read-only). 

213 """ 

214 if self._workButler is None: 

215 self._workButler = self._makeButler() 

216 return self._workButler 

217 

218 def _makeButler(self): 

219 """Create a butler for accessing the entire workspace. 

220 

221 Returns 

222 ------- 

223 butler : `lsst.daf.persistence.Butler` 

224 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

225 inputs, and `outputRepo` as an output. 

226 

227 Notes 

228 ----- 

229 Assumes all `*Repo` properties have been initialized. 

230 """ 

231 # common arguments for butler elements 

232 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

233 

234 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

235 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

236 

237 if not os.path.samefile(self.dataRepo, self.templateRepo): 

238 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

239 

240 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

241 

242 @property 

243 def analysisButler(self): 

244 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

245 """ 

246 if self._analysisButler is None: 

247 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

248 return self._analysisButler 

249 

250 

251class WorkspaceGen3(Workspace): 

252 """A directory used by ``ap_verify`` to handle data. 

253 

254 Any object of this class represents a working directory containing 

255 subdirectories for a repository and for non-repository files. Constructing 

256 a WorkspaceGen3 does not *initialize* its repository, as this requires 

257 external information. 

258 

259 Parameters 

260 ---------- 

261 location : `str` 

262 The location on disk where the workspace will be set up. Will be 

263 created if it does not already exist. 

264 

265 Raises 

266 ------ 

267 EnvironmentError 

268 Raised if ``location`` is not readable or not writeable 

269 """ 

270 

271 def __init__(self, location): 

272 super().__init__(location) 

273 

274 self.mkdir(self.repo) 

275 

276 # Gen 3 name of the output 

277 self.outputName = "ap_verify-output" 

278 

279 # Lazy evaluation to optimize butlers 

280 self._workButler = None 

281 self._analysisButler = None 

282 

283 @property 

284 def repo(self): 

285 """The absolute path/URI to a Butler repo for AP pipeline processing 

286 (`str`, read-only). 

287 """ 

288 return os.path.join(self._location, 'repo') 

289 

290 @property 

291 def dbLocation(self): 

292 return os.path.join(self._location, 'association.db') 

293 

294 @property 

295 def alertLocation(self): 

296 return os.path.join(self._location, 'alerts') 

297 

298 def _ensureCollection(self, registry, name, collectionType): 

299 """Add a collection to a repository if it does not already exist. 

300 

301 Parameters 

302 ---------- 

303 registry : `lsst.daf.butler.Registry` 

304 The repository to which to add the collection. 

305 name : `str` 

306 The name of the collection to test for and add. 

307 collectionType : `lsst.daf.butler.CollectionType` 

308 The type of collection to add. This field is ignored when 

309 testing if a collection exists. 

310 """ 

311 matchingCollections = list(registry.queryCollections(re.compile(name))) 

312 if not matchingCollections: 

313 registry.registerCollection(name, type=collectionType) 

314 

315 @property 

316 def workButler(self): 

317 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

318 

319 Notes 

320 ----- 

321 Assumes `repo` has been initialized. 

322 """ 

323 if self._workButler is None: 

324 try: 

325 # Hard-code the collection names because it's hard to infer the inputs from the Butler 

326 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

327 inputs = { 

328 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME, 

329 } 

330 for dimension in queryButler.registry.queryDataIds('instrument'): 

331 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

332 rawName = instrument.makeDefaultRawIngestRunName() 

333 inputs.add(rawName) 

334 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN) 

335 inputs.add(instrument.makeCalibrationCollectionName()) 

336 inputs.add(instrument.makeRefCatCollectionName()) 

337 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+"))) 

338 

339 # Create an output chain here, so that workButler can see it. 

340 # Definition does not conflict with what pipetask --output uses. 

341 # Regex is workaround for DM-25945. 

342 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))): 

343 queryButler.registry.registerCollection(self.outputName, 

344 dafButler.CollectionType.CHAINED) 

345 queryButler.registry.setCollectionChain(self.outputName, inputs) 

346 

347 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName) 

348 except OSError as e: 

349 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

350 return self._workButler 

351 

352 @property 

353 def analysisButler(self): 

354 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

355 

356 Notes 

357 ----- 

358 Assumes `repo` has been initialized. 

359 """ 

360 if self._analysisButler is None: 

361 try: 

362 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName, 

363 writeable=False) 

364 except OSError as e: 

365 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

366 return self._analysisButler