Coverage for python/lsst/ap/verify/workspace.py: 43%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

134 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Workspace", "WorkspaceGen2", "WorkspaceGen3"] 

25 

26import abc 

27import os 

28import pathlib 

29import re 

30import stat 

31 

32import lsst.skymap 

33import lsst.daf.persistence as dafPersist 

34import lsst.daf.butler as dafButler 

35import lsst.obs.base as obsBase 

36 

37 

38class Workspace(metaclass=abc.ABCMeta): 

39 """A directory used by ``ap_verify`` to handle data and outputs. 

40 

41 Any object of this class represents a working directory containing 

42 (possibly empty) subdirectories for various purposes. Subclasses are 

43 typically specialized for particular workflows. Keeping such details in 

44 separate classes makes it easier to provide guarantees without forcing 

45 awkward directory structures on users. 

46 

47 All Workspace classes must guarantee the existence of any subdirectories 

48 inside the workspace. Directories corresponding to repositories do not need 

49 to be initialized, since creating a valid repository usually requires 

50 external information. 

51 

52 Parameters 

53 ---------- 

54 location : `str` 

55 The location on disk where the workspace will be set up. Will be 

56 created if it does not already exist. 

57 

58 Raises 

59 ------ 

60 EnvironmentError 

61 Raised if ``location`` is not readable or not writeable 

62 """ 

63 def __init__(self, location): 

64 # Properties must be `str` for backwards compatibility 

65 self._location = str(pathlib.Path(location).resolve()) 

66 

67 self.mkdir(self._location) 

68 self.mkdir(self.configDir) 

69 

70 @staticmethod 

71 def mkdir(directory): 

72 """Create a directory for the workspace. 

73 

74 This method is intended to be called only by subclasses, and should 

75 not be used by external code. 

76 

77 Parameters 

78 ---------- 

79 directory : `str` 

80 The directory to create. 

81 """ 

82 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

83 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

84 

85 def __eq__(self, other): 

86 """Test whether two workspaces are of the same type and have the 

87 same location. 

88 """ 

89 return type(self) == type(other) and self.workDir == other.workDir 

90 

91 def __repr__(self): 

92 """A string representation that can be used to reconstruct the Workspace. 

93 """ 

94 return f"{type(self).__name__}({self.workDir!r})" 

95 

96 @property 

97 def workDir(self): 

98 """The absolute location of the workspace as a whole 

99 (`str`, read-only). 

100 """ 

101 return self._location 

102 

103 @property 

104 def configDir(self): 

105 """The absolute location of a directory containing custom Task config 

106 files for use with the data (`str`, read-only). 

107 """ 

108 return os.path.join(self._location, 'config') 

109 

110 @property 

111 @abc.abstractmethod 

112 def dbLocation(self): 

113 """The default absolute location of the source association database to 

114 be created or updated by the pipeline (`str`, read-only). 

115 

116 Shall be a pathname to a database suitable for the backend of `Apdb`. 

117 """ 

118 

119 @property 

120 @abc.abstractmethod 

121 def alertLocation(self): 

122 """The absolute location of an output directory for persisted 

123 alert packets (`str`, read-only). 

124 """ 

125 

126 @property 

127 @abc.abstractmethod 

128 def workButler(self): 

129 """A Butler that can produce pipeline inputs and outputs (read-only). 

130 The type is class-dependent. 

131 """ 

132 

133 @property 

134 @abc.abstractmethod 

135 def analysisButler(self): 

136 """A Butler that can read pipeline outputs (read-only). 

137 The type is class-dependent. 

138 

139 The Butler should be read-only, if its type supports the restriction. 

140 """ 

141 

142 

143class WorkspaceGen2(Workspace): 

144 """A directory used by ``ap_verify`` to handle data. 

145 

146 Any object of this class represents a working directory containing 

147 (possibly empty) subdirectories for repositories. Constructing a 

148 WorkspaceGen2 does not *initialize* its repositories, as this requires 

149 external information. 

150 

151 Parameters 

152 ---------- 

153 location : `str` 

154 The location on disk where the workspace will be set up. Will be 

155 created if it does not already exist. 

156 

157 Raises 

158 ------ 

159 EnvironmentError 

160 Raised if ``location`` is not readable or not writeable 

161 """ 

162 

163 def __init__(self, location): 

164 super().__init__(location) 

165 

166 self.mkdir(self.dataRepo) 

167 self.mkdir(self.calibRepo) 

168 self.mkdir(self.templateRepo) 

169 self.mkdir(self.outputRepo) 

170 

171 # Lazy evaluation to optimize butlers 

172 self._workButler = None 

173 self._analysisButler = None 

174 

175 @property 

176 def dataRepo(self): 

177 """The absolute path/URI to a Butler repo for science data 

178 (`str`, read-only). 

179 """ 

180 return os.path.join(self._location, 'ingested') 

181 

182 @property 

183 def calibRepo(self): 

184 """The absolute path/URI to a Butler repo for calibration data 

185 (`str`, read-only). 

186 """ 

187 return os.path.join(self._location, 'calibingested') 

188 

189 @property 

190 def templateRepo(self): 

191 """The absolute path/URI to a Butler repo for precomputed templates 

192 (`str`, read-only). 

193 """ 

194 return self.dataRepo 

195 

196 @property 

197 def outputRepo(self): 

198 """The absolute path/URI to a Butler repo for AP pipeline products 

199 (`str`, read-only). 

200 """ 

201 return os.path.join(self._location, 'output') 

202 

203 @property 

204 def dbLocation(self): 

205 return os.path.join(self._location, 'association.db') 

206 

207 @property 

208 def alertLocation(self): 

209 return os.path.join(self._location, 'alerts') 

210 

211 @property 

212 def workButler(self): 

213 """A Butler that can produce pipeline inputs and outputs 

214 (`lsst.daf.persistence.Butler`, read-only). 

215 """ 

216 if self._workButler is None: 

217 self._workButler = self._makeButler() 

218 return self._workButler 

219 

220 def _makeButler(self): 

221 """Create a butler for accessing the entire workspace. 

222 

223 Returns 

224 ------- 

225 butler : `lsst.daf.persistence.Butler` 

226 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

227 inputs, and `outputRepo` as an output. 

228 

229 Notes 

230 ----- 

231 Assumes all `*Repo` properties have been initialized. 

232 """ 

233 # common arguments for butler elements 

234 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

235 

236 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

237 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

238 

239 if not os.path.samefile(self.dataRepo, self.templateRepo): 

240 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

241 

242 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

243 

244 @property 

245 def analysisButler(self): 

246 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

247 """ 

248 if self._analysisButler is None: 

249 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

250 return self._analysisButler 

251 

252 

253class WorkspaceGen3(Workspace): 

254 """A directory used by ``ap_verify`` to handle data. 

255 

256 Any object of this class represents a working directory containing 

257 subdirectories for a repository and for non-repository files. Constructing 

258 a WorkspaceGen3 does not *initialize* its repository, as this requires 

259 external information. 

260 

261 Parameters 

262 ---------- 

263 location : `str` 

264 The location on disk where the workspace will be set up. Will be 

265 created if it does not already exist. 

266 

267 Raises 

268 ------ 

269 EnvironmentError 

270 Raised if ``location`` is not readable or not writeable 

271 """ 

272 

273 def __init__(self, location): 

274 super().__init__(location) 

275 

276 self.mkdir(self.repo) 

277 

278 # Gen 3 name of the output 

279 self.outputName = "ap_verify-output" 

280 

281 # Lazy evaluation to optimize butlers 

282 self._workButler = None 

283 self._analysisButler = None 

284 

285 @property 

286 def repo(self): 

287 """The absolute path/URI to a Butler repo for AP pipeline processing 

288 (`str`, read-only). 

289 """ 

290 return os.path.join(self._location, 'repo') 

291 

292 @property 

293 def dbLocation(self): 

294 return os.path.join(self._location, 'association.db') 

295 

296 @property 

297 def alertLocation(self): 

298 return os.path.join(self._location, 'alerts') 

299 

300 def _ensureCollection(self, registry, name, collectionType): 

301 """Add a collection to a repository if it does not already exist. 

302 

303 Parameters 

304 ---------- 

305 registry : `lsst.daf.butler.Registry` 

306 The repository to which to add the collection. 

307 name : `str` 

308 The name of the collection to test for and add. 

309 collectionType : `lsst.daf.butler.CollectionType` 

310 The type of collection to add. This field is ignored when 

311 testing if a collection exists. 

312 """ 

313 matchingCollections = list(registry.queryCollections(re.compile(name))) 

314 if not matchingCollections: 

315 registry.registerCollection(name, type=collectionType) 

316 

317 @property 

318 def workButler(self): 

319 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

320 

321 Notes 

322 ----- 

323 Assumes `repo` has been initialized. 

324 """ 

325 if self._workButler is None: 

326 try: 

327 # Hard-code the collection names because it's hard to infer the inputs from the Butler 

328 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

329 inputs = { 

330 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME, 

331 } 

332 for dimension in queryButler.registry.queryDataIds('instrument'): 

333 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

334 rawName = instrument.makeDefaultRawIngestRunName() 

335 inputs.add(rawName) 

336 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN) 

337 inputs.add(instrument.makeCalibrationCollectionName()) 

338 inputs.add(instrument.makeRefCatCollectionName()) 

339 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+"))) 

340 

341 # Create an output chain here, so that workButler can see it. 

342 # Definition does not conflict with what pipetask --output uses. 

343 # Regex is workaround for DM-25945. 

344 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))): 

345 queryButler.registry.registerCollection(self.outputName, 

346 dafButler.CollectionType.CHAINED) 

347 queryButler.registry.setCollectionChain(self.outputName, inputs) 

348 

349 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName) 

350 except OSError as e: 

351 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

352 return self._workButler 

353 

354 @property 

355 def analysisButler(self): 

356 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

357 

358 Notes 

359 ----- 

360 Assumes `repo` has been initialized. 

361 """ 

362 if self._analysisButler is None: 

363 try: 

364 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName, 

365 writeable=False) 

366 except OSError as e: 

367 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

368 return self._analysisButler