Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import stat 

28 

29import lsst.daf.persistence as dafPersist 

30import lsst.daf.butler as dafButler 

31import lsst.obs.base as obsBase 

32 

33 

34class Workspace(metaclass=abc.ABCMeta): 

35 """A directory used by ``ap_verify`` to handle data and outputs. 

36 

37 Any object of this class represents a working directory containing 

38 (possibly empty) subdirectories for various purposes. Subclasses are 

39 typically specialized for particular workflows. Keeping such details in 

40 separate classes makes it easier to provide guarantees without forcing 

41 awkward directory structures on users. 

42 

43 All Workspace classes must guarantee the existence of any subdirectories 

44 inside the workspace. Directories corresponding to repositories do not need 

45 to be initialized, since creating a valid repository usually requires 

46 external information. 

47 

48 Parameters 

49 ---------- 

50 location : `str` 

51 The location on disk where the workspace will be set up. Will be 

52 created if it does not already exist. 

53 

54 Raises 

55 ------ 

56 EnvironmentError 

57 Raised if ``location`` is not readable or not writeable 

58 """ 

59 def __init__(self, location): 

60 # Properties must be `str` for backwards compatibility 

61 self._location = str(pathlib.Path(location).resolve()) 

62 

63 self.mkdir(self._location) 

64 self.mkdir(self.configDir) 

65 

66 @staticmethod 

67 def mkdir(directory): 

68 """Create a directory for the workspace. 

69 

70 This method is intended to be called only by subclasses, and should 

71 not be used by external code. 

72 

73 Parameters 

74 ---------- 

75 directory : `str` 

76 The directory to create. 

77 """ 

78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

80 

81 def __eq__(self, other): 

82 """Test whether two workspaces are of the same type and have the 

83 same location. 

84 """ 

85 return type(self) == type(other) and self.workDir == other.workDir 

86 

87 def __repr__(self): 

88 """A string representation that can be used to reconstruct the Workspace. 

89 """ 

90 return f"{type(self).__name__}({self.workDir!r})" 

91 

92 @property 

93 def workDir(self): 

94 """The absolute location of the workspace as a whole 

95 (`str`, read-only). 

96 """ 

97 return self._location 

98 

99 @property 

100 def configDir(self): 

101 """The absolute location of a directory containing custom Task config 

102 files for use with the data (`str`, read-only). 

103 """ 

104 return os.path.join(self._location, 'config') 

105 

106 @property 

107 @abc.abstractmethod 

108 def dbLocation(self): 

109 """The default absolute location of the source association database to 

110 be created or updated by the pipeline (`str`, read-only). 

111 

112 Shall be a pathname to a database suitable for the backend of `Apdb`. 

113 """ 

114 

115 @property 

116 @abc.abstractmethod 

117 def alertLocation(self): 

118 """The absolute location of an output directory for persisted 

119 alert packets (`str`, read-only). 

120 """ 

121 

122 @property 

123 @abc.abstractmethod 

124 def workButler(self): 

125 """A Butler that can produce pipeline inputs and outputs (read-only). 

126 The type is class-dependent. 

127 """ 

128 

129 @property 

130 @abc.abstractmethod 

131 def analysisButler(self): 

132 """A Butler that can read pipeline outputs (read-only). 

133 The type is class-dependent. 

134 

135 The Butler should be read-only, if its type supports the restriction. 

136 """ 

137 

138 

139class WorkspaceGen2(Workspace): 

140 """A directory used by ``ap_verify`` to handle data. 

141 

142 Any object of this class represents a working directory containing 

143 (possibly empty) subdirectories for repositories. Constructing a 

144 WorkspaceGen2 does not *initialize* its repositories, as this requires 

145 external information. 

146 

147 Parameters 

148 ---------- 

149 location : `str` 

150 The location on disk where the workspace will be set up. Will be 

151 created if it does not already exist. 

152 

153 Raises 

154 ------ 

155 EnvironmentError 

156 Raised if ``location`` is not readable or not writeable 

157 """ 

158 

159 def __init__(self, location): 

160 super().__init__(location) 

161 

162 self.mkdir(self.dataRepo) 

163 self.mkdir(self.calibRepo) 

164 self.mkdir(self.templateRepo) 

165 self.mkdir(self.outputRepo) 

166 

167 # Lazy evaluation to optimize butlers 

168 self._workButler = None 

169 self._analysisButler = None 

170 

171 @property 

172 def dataRepo(self): 

173 """The absolute path/URI to a Butler repo for science data 

174 (`str`, read-only). 

175 """ 

176 return os.path.join(self._location, 'ingested') 

177 

178 @property 

179 def calibRepo(self): 

180 """The absolute path/URI to a Butler repo for calibration data 

181 (`str`, read-only). 

182 """ 

183 return os.path.join(self._location, 'calibingested') 

184 

185 @property 

186 def templateRepo(self): 

187 """The absolute path/URI to a Butler repo for precomputed templates 

188 (`str`, read-only). 

189 """ 

190 return self.dataRepo 

191 

192 @property 

193 def outputRepo(self): 

194 """The absolute path/URI to a Butler repo for AP pipeline products 

195 (`str`, read-only). 

196 """ 

197 return os.path.join(self._location, 'output') 

198 

199 @property 

200 def dbLocation(self): 

201 return os.path.join(self._location, 'association.db') 

202 

203 @property 

204 def alertLocation(self): 

205 return os.path.join(self._location, 'alerts') 

206 

207 @property 

208 def workButler(self): 

209 """A Butler that can produce pipeline inputs and outputs 

210 (`lsst.daf.persistence.Butler`, read-only). 

211 """ 

212 if self._workButler is None: 

213 self._workButler = self._makeButler() 

214 return self._workButler 

215 

216 def _makeButler(self): 

217 """Create a butler for accessing the entire workspace. 

218 

219 Returns 

220 ------- 

221 butler : `lsst.daf.persistence.Butler` 

222 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

223 inputs, and `outputRepo` as an output. 

224 

225 Notes 

226 ----- 

227 Assumes all `*Repo` properties have been initialized. 

228 """ 

229 # common arguments for butler elements 

230 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

231 

232 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

233 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

234 

235 if not os.path.samefile(self.dataRepo, self.templateRepo): 

236 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

237 

238 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

239 

240 @property 

241 def analysisButler(self): 

242 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

243 """ 

244 if self._analysisButler is None: 

245 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

246 return self._analysisButler 

247 

248 

249class WorkspaceGen3(Workspace): 

250 """A directory used by ``ap_verify`` to handle data. 

251 

252 Any object of this class represents a working directory containing 

253 subdirectories for a repository and for non-repository files. Constructing 

254 a WorkspaceGen3 does not *initialize* its repository, as this requires 

255 external information. 

256 

257 Parameters 

258 ---------- 

259 location : `str` 

260 The location on disk where the workspace will be set up. Will be 

261 created if it does not already exist. 

262 

263 Raises 

264 ------ 

265 EnvironmentError 

266 Raised if ``location`` is not readable or not writeable 

267 """ 

268 

269 def __init__(self, location): 

270 super().__init__(location) 

271 

272 self.mkdir(self.repo) 

273 

274 # Gen 3 name of the output run 

275 self.runName = "ap_verify-output" 

276 

277 # Lazy evaluation to optimize butlers 

278 self._workButler = None 

279 self._analysisButler = None 

280 

281 @property 

282 def repo(self): 

283 """The absolute path/URI to a Butler repo for AP pipeline processing 

284 (`str`, read-only). 

285 """ 

286 return os.path.join(self._location, 'repo') 

287 

288 @property 

289 def dbLocation(self): 

290 return os.path.join(self._location, 'association.db') 

291 

292 @property 

293 def alertLocation(self): 

294 return os.path.join(self._location, 'alerts') 

295 

296 @property 

297 def workButler(self): 

298 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

299 

300 Notes 

301 ----- 

302 Assumes `repo` has been initialized. 

303 """ 

304 if self._workButler is None: 

305 try: 

306 # All Gen 3 collection names subject to change; don't hardcode them 

307 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

308 inputs = set(queryButler.registry.queryCollections( 

309 collectionType=dafButler.CollectionType.RUN)) 

310 for dimension in queryButler.registry.queryDataIds('instrument'): 

311 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

312 inputs.add(instrument.makeDefaultRawIngestRunName()) 

313 

314 # should set run=self.runName, but this breaks quantum graph generation (DM-26246) 

315 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs) 

316 except OSError as e: 

317 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

318 return self._workButler 

319 

320 @property 

321 def analysisButler(self): 

322 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

323 

324 Notes 

325 ----- 

326 Assumes `repo` has been initialized. 

327 """ 

328 if self._analysisButler is None: 

329 try: 

330 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName, 

331 writeable=False) 

332 except OSError as e: 

333 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

334 return self._analysisButler