Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import stat 

28 

29import lsst.daf.persistence as dafPersist 

30import lsst.daf.butler as dafButler 

31 

32 

33class Workspace(metaclass=abc.ABCMeta): 

34 """A directory used by ``ap_verify`` to handle data and outputs. 

35 

36 Any object of this class represents a working directory containing 

37 (possibly empty) subdirectories for various purposes. Subclasses are 

38 typically specialized for particular workflows. Keeping such details in 

39 separate classes makes it easier to provide guarantees without forcing 

40 awkward directory structures on users. 

41 

42 All Workspace classes must guarantee the existence of any subdirectories 

43 inside the workspace. Directories corresponding to repositories do not need 

44 to be initialized, since creating a valid repository usually requires 

45 external information. 

46 

47 Parameters 

48 ---------- 

49 location : `str` 

50 The location on disk where the workspace will be set up. Will be 

51 created if it does not already exist. 

52 

53 Raises 

54 ------ 

55 EnvironmentError 

56 Raised if ``location`` is not readable or not writeable 

57 """ 

58 def __init__(self, location): 

59 # Properties must be `str` for backwards compatibility 

60 self._location = str(pathlib.Path(location).resolve()) 

61 

62 self.mkdir(self._location) 

63 self.mkdir(self.configDir) 

64 

65 @staticmethod 

66 def mkdir(directory): 

67 """Create a directory for the workspace. 

68 

69 This method is intended to be called only by subclasses, and should 

70 not be used by external code. 

71 

72 Parameters 

73 ---------- 

74 directory : `str` 

75 The directory to create. 

76 """ 

77 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

78 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

79 

80 @property 

81 def workDir(self): 

82 """The absolute location of the workspace as a whole 

83 (`str`, read-only). 

84 """ 

85 return self._location 

86 

87 @property 

88 def configDir(self): 

89 """The absolute location of a directory containing custom Task config 

90 files for use with the data (`str`, read-only). 

91 """ 

92 return os.path.join(self._location, 'config') 

93 

94 @property 

95 @abc.abstractmethod 

96 def dbLocation(self): 

97 """The default absolute location of the source association database to 

98 be created or updated by the pipeline (`str`, read-only). 

99 

100 Shall be a pathname to a database suitable for the backend of `Apdb`. 

101 """ 

102 

103 @property 

104 @abc.abstractmethod 

105 def workButler(self): 

106 """A Butler that can produce pipeline inputs and outputs (read-only). 

107 The type is class-dependent. 

108 """ 

109 

110 @property 

111 @abc.abstractmethod 

112 def analysisButler(self): 

113 """A Butler that can read pipeline outputs (read-only). 

114 The type is class-dependent. 

115 

116 The Butler should be read-only, if its type supports the restriction. 

117 """ 

118 

119 

120class WorkspaceGen2(Workspace): 

121 """A directory used by ``ap_verify`` to handle data. 

122 

123 Any object of this class represents a working directory containing 

124 (possibly empty) subdirectories for repositories. Constructing a 

125 WorkspaceGen2 does not *initialize* its repositories, as this requires 

126 external information. 

127 

128 Parameters 

129 ---------- 

130 location : `str` 

131 The location on disk where the workspace will be set up. Will be 

132 created if it does not already exist. 

133 

134 Raises 

135 ------ 

136 EnvironmentError 

137 Raised if ``location`` is not readable or not writeable 

138 """ 

139 

140 def __init__(self, location): 

141 super().__init__(location) 

142 

143 self.mkdir(self.dataRepo) 

144 self.mkdir(self.calibRepo) 

145 self.mkdir(self.templateRepo) 

146 self.mkdir(self.outputRepo) 

147 

148 # Lazy evaluation to optimize butlers 

149 self._workButler = None 

150 self._analysisButler = None 

151 

152 @property 

153 def dataRepo(self): 

154 """The absolute path/URI to a Butler repo for science data 

155 (`str`, read-only). 

156 """ 

157 return os.path.join(self._location, 'ingested') 

158 

159 @property 

160 def calibRepo(self): 

161 """The absolute path/URI to a Butler repo for calibration data 

162 (`str`, read-only). 

163 """ 

164 return os.path.join(self._location, 'calibingested') 

165 

166 @property 

167 def templateRepo(self): 

168 """The absolute path/URI to a Butler repo for precomputed templates 

169 (`str`, read-only). 

170 """ 

171 return self.dataRepo 

172 

173 @property 

174 def outputRepo(self): 

175 """The absolute path/URI to a Butler repo for AP pipeline products 

176 (`str`, read-only). 

177 """ 

178 return os.path.join(self._location, 'output') 

179 

180 @property 

181 def dbLocation(self): 

182 return os.path.join(self._location, 'association.db') 

183 

184 @property 

185 def workButler(self): 

186 """A Butler that can produce pipeline inputs and outputs 

187 (`lsst.daf.persistence.Butler`, read-only). 

188 """ 

189 if self._workButler is None: 

190 self._workButler = self._makeButler() 

191 return self._workButler 

192 

193 def _makeButler(self): 

194 """Create a butler for accessing the entire workspace. 

195 

196 Returns 

197 ------- 

198 butler : `lsst.daf.persistence.Butler` 

199 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

200 inputs, and `outputRepo` as an output. 

201 

202 Notes 

203 ----- 

204 Assumes all `*Repo` properties have been initialized. 

205 """ 

206 # common arguments for butler elements 

207 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

208 

209 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

210 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

211 

212 if not os.path.samefile(self.dataRepo, self.templateRepo): 

213 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

214 

215 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

216 

217 @property 

218 def analysisButler(self): 

219 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

220 """ 

221 if self._analysisButler is None: 

222 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

223 return self._analysisButler 

224 

225 

226class WorkspaceGen3(Workspace): 

227 """A directory used by ``ap_verify`` to handle data. 

228 

229 Any object of this class represents a working directory containing 

230 subdirectories for a repository and for non-repository files. Constructing 

231 a WorkspaceGen3 does not *initialize* its repository, as this requires 

232 external information. 

233 

234 Parameters 

235 ---------- 

236 location : `str` 

237 The location on disk where the workspace will be set up. Will be 

238 created if it does not already exist. 

239 

240 Raises 

241 ------ 

242 EnvironmentError 

243 Raised if ``location`` is not readable or not writeable 

244 """ 

245 

246 def __init__(self, location): 

247 super().__init__(location) 

248 

249 self.mkdir(self.repo) 

250 

251 # Gen 3 name of the output run 

252 self.runName = "ap_verify-output" 

253 

254 # Lazy evaluation to optimize butlers 

255 self._workButler = None 

256 self._analysisButler = None 

257 

258 @property 

259 def repo(self): 

260 """The absolute path/URI to a Butler repo for AP pipeline processing 

261 (`str`, read-only). 

262 """ 

263 return os.path.join(self._location, 'repo') 

264 

265 @property 

266 def dbLocation(self): 

267 return os.path.join(self._location, 'association.db') 

268 

269 @property 

270 def workButler(self): 

271 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

272 

273 Notes 

274 ----- 

275 Assumes `repo` has been initialized. 

276 """ 

277 if self._workButler is None: 

278 try: 

279 self._workButler = dafButler.Butler(self.repo, run=self.runName) 

280 except OSError as e: 

281 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

282 return self._workButler 

283 

284 @property 

285 def analysisButler(self): 

286 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

287 

288 Notes 

289 ----- 

290 Assumes `repo` has been initialized. 

291 """ 

292 if self._analysisButler is None: 

293 try: 

294 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName, 

295 writeable=False) 

296 except OSError as e: 

297 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

298 return self._analysisButler