Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import abc 

25import os 

26import pathlib 

27import stat 

28 

29import lsst.daf.persistence as dafPersist 

30import lsst.daf.butler as dafButler 

31import lsst.obs.base as obsBase 

32 

33 

34class Workspace(metaclass=abc.ABCMeta): 

35 """A directory used by ``ap_verify`` to handle data and outputs. 

36 

37 Any object of this class represents a working directory containing 

38 (possibly empty) subdirectories for various purposes. Subclasses are 

39 typically specialized for particular workflows. Keeping such details in 

40 separate classes makes it easier to provide guarantees without forcing 

41 awkward directory structures on users. 

42 

43 All Workspace classes must guarantee the existence of any subdirectories 

44 inside the workspace. Directories corresponding to repositories do not need 

45 to be initialized, since creating a valid repository usually requires 

46 external information. 

47 

48 Parameters 

49 ---------- 

50 location : `str` 

51 The location on disk where the workspace will be set up. Will be 

52 created if it does not already exist. 

53 

54 Raises 

55 ------ 

56 EnvironmentError 

57 Raised if ``location`` is not readable or not writeable 

58 """ 

59 def __init__(self, location): 

60 # Properties must be `str` for backwards compatibility 

61 self._location = str(pathlib.Path(location).resolve()) 

62 

63 self.mkdir(self._location) 

64 self.mkdir(self.configDir) 

65 

66 @staticmethod 

67 def mkdir(directory): 

68 """Create a directory for the workspace. 

69 

70 This method is intended to be called only by subclasses, and should 

71 not be used by external code. 

72 

73 Parameters 

74 ---------- 

75 directory : `str` 

76 The directory to create. 

77 """ 

78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

80 

81 @property 

82 def workDir(self): 

83 """The absolute location of the workspace as a whole 

84 (`str`, read-only). 

85 """ 

86 return self._location 

87 

88 @property 

89 def configDir(self): 

90 """The absolute location of a directory containing custom Task config 

91 files for use with the data (`str`, read-only). 

92 """ 

93 return os.path.join(self._location, 'config') 

94 

95 @property 

96 @abc.abstractmethod 

97 def dbLocation(self): 

98 """The default absolute location of the source association database to 

99 be created or updated by the pipeline (`str`, read-only). 

100 

101 Shall be a pathname to a database suitable for the backend of `Apdb`. 

102 """ 

103 

104 @property 

105 @abc.abstractmethod 

106 def workButler(self): 

107 """A Butler that can produce pipeline inputs and outputs (read-only). 

108 The type is class-dependent. 

109 """ 

110 

111 @property 

112 @abc.abstractmethod 

113 def analysisButler(self): 

114 """A Butler that can read pipeline outputs (read-only). 

115 The type is class-dependent. 

116 

117 The Butler should be read-only, if its type supports the restriction. 

118 """ 

119 

120 

121class WorkspaceGen2(Workspace): 

122 """A directory used by ``ap_verify`` to handle data. 

123 

124 Any object of this class represents a working directory containing 

125 (possibly empty) subdirectories for repositories. Constructing a 

126 WorkspaceGen2 does not *initialize* its repositories, as this requires 

127 external information. 

128 

129 Parameters 

130 ---------- 

131 location : `str` 

132 The location on disk where the workspace will be set up. Will be 

133 created if it does not already exist. 

134 

135 Raises 

136 ------ 

137 EnvironmentError 

138 Raised if ``location`` is not readable or not writeable 

139 """ 

140 

141 def __init__(self, location): 

142 super().__init__(location) 

143 

144 self.mkdir(self.dataRepo) 

145 self.mkdir(self.calibRepo) 

146 self.mkdir(self.templateRepo) 

147 self.mkdir(self.outputRepo) 

148 

149 # Lazy evaluation to optimize butlers 

150 self._workButler = None 

151 self._analysisButler = None 

152 

153 @property 

154 def dataRepo(self): 

155 """The absolute path/URI to a Butler repo for science data 

156 (`str`, read-only). 

157 """ 

158 return os.path.join(self._location, 'ingested') 

159 

160 @property 

161 def calibRepo(self): 

162 """The absolute path/URI to a Butler repo for calibration data 

163 (`str`, read-only). 

164 """ 

165 return os.path.join(self._location, 'calibingested') 

166 

167 @property 

168 def templateRepo(self): 

169 """The absolute path/URI to a Butler repo for precomputed templates 

170 (`str`, read-only). 

171 """ 

172 return self.dataRepo 

173 

174 @property 

175 def outputRepo(self): 

176 """The absolute path/URI to a Butler repo for AP pipeline products 

177 (`str`, read-only). 

178 """ 

179 return os.path.join(self._location, 'output') 

180 

181 @property 

182 def dbLocation(self): 

183 return os.path.join(self._location, 'association.db') 

184 

185 @property 

186 def workButler(self): 

187 """A Butler that can produce pipeline inputs and outputs 

188 (`lsst.daf.persistence.Butler`, read-only). 

189 """ 

190 if self._workButler is None: 

191 self._workButler = self._makeButler() 

192 return self._workButler 

193 

194 def _makeButler(self): 

195 """Create a butler for accessing the entire workspace. 

196 

197 Returns 

198 ------- 

199 butler : `lsst.daf.persistence.Butler` 

200 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as 

201 inputs, and `outputRepo` as an output. 

202 

203 Notes 

204 ----- 

205 Assumes all `*Repo` properties have been initialized. 

206 """ 

207 # common arguments for butler elements 

208 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)} 

209 

210 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}] 

211 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}] 

212 

213 if not os.path.samefile(self.dataRepo, self.templateRepo): 

214 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs}) 

215 

216 return dafPersist.Butler(inputs=inputs, outputs=outputs) 

217 

218 @property 

219 def analysisButler(self): 

220 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only). 

221 """ 

222 if self._analysisButler is None: 

223 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"}) 

224 return self._analysisButler 

225 

226 

227class WorkspaceGen3(Workspace): 

228 """A directory used by ``ap_verify`` to handle data. 

229 

230 Any object of this class represents a working directory containing 

231 subdirectories for a repository and for non-repository files. Constructing 

232 a WorkspaceGen3 does not *initialize* its repository, as this requires 

233 external information. 

234 

235 Parameters 

236 ---------- 

237 location : `str` 

238 The location on disk where the workspace will be set up. Will be 

239 created if it does not already exist. 

240 

241 Raises 

242 ------ 

243 EnvironmentError 

244 Raised if ``location`` is not readable or not writeable 

245 """ 

246 

247 def __init__(self, location): 

248 super().__init__(location) 

249 

250 self.mkdir(self.repo) 

251 

252 # Gen 3 name of the output run 

253 self.runName = "ap_verify-output" 

254 

255 # Lazy evaluation to optimize butlers 

256 self._workButler = None 

257 self._analysisButler = None 

258 

259 @property 

260 def repo(self): 

261 """The absolute path/URI to a Butler repo for AP pipeline processing 

262 (`str`, read-only). 

263 """ 

264 return os.path.join(self._location, 'repo') 

265 

266 @property 

267 def dbLocation(self): 

268 return os.path.join(self._location, 'association.db') 

269 

270 @property 

271 def workButler(self): 

272 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

273 

274 Notes 

275 ----- 

276 Assumes `repo` has been initialized. 

277 """ 

278 if self._workButler is None: 

279 try: 

280 # All Gen 3 collection names subject to change; don't hardcode them 

281 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

282 inputs = set(queryButler.registry.queryCollections( 

283 collectionType=dafButler.CollectionType.RUN)) 

284 for dimension in queryButler.registry.queryDimensions('instrument'): 

285 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

286 inputs.add(instrument.makeDefaultRawIngestRunName()) 

287 

288 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs, run=self.runName) 

289 except OSError as e: 

290 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

291 return self._workButler 

292 

293 @property 

294 def analysisButler(self): 

295 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

296 

297 Notes 

298 ----- 

299 Assumes `repo` has been initialized. 

300 """ 

301 if self._analysisButler is None: 

302 try: 

303 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName, 

304 writeable=False) 

305 except OSError as e: 

306 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

307 return self._analysisButler