Coverage for python/lsst/ap/verify/workspace.py: 64%

97 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 04:28 -0700

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Workspace", "WorkspaceGen3"] 

25 

26import abc 

27import os 

28import pathlib 

29import re 

30import stat 

31 

32import lsst.daf.butler as dafButler 

33import lsst.obs.base as obsBase 

34 

35 

36class Workspace(metaclass=abc.ABCMeta): 

37 """A directory used by ``ap_verify`` to handle data and outputs. 

38 

39 Any object of this class represents a working directory containing 

40 (possibly empty) subdirectories for various purposes. Subclasses are 

41 typically specialized for particular workflows. Keeping such details in 

42 separate classes makes it easier to provide guarantees without forcing 

43 awkward directory structures on users. 

44 

45 All Workspace classes must guarantee the existence of any subdirectories 

46 inside the workspace. Directories corresponding to repositories do not need 

47 to be initialized, since creating a valid repository usually requires 

48 external information. 

49 

50 Parameters 

51 ---------- 

52 location : `str` 

53 The location on disk where the workspace will be set up. Will be 

54 created if it does not already exist. 

55 

56 Raises 

57 ------ 

58 EnvironmentError 

59 Raised if ``location`` is not readable or not writeable 

60 """ 

61 def __init__(self, location): 

62 # Properties must be `str` for backwards compatibility 

63 self._location = str(pathlib.Path(location).resolve()) 

64 

65 self.mkdir(self._location) 

66 self.mkdir(self.configDir) 

67 

68 @staticmethod 

69 def mkdir(directory): 

70 """Create a directory for the workspace. 

71 

72 This method is intended to be called only by subclasses, and should 

73 not be used by external code. 

74 

75 Parameters 

76 ---------- 

77 directory : `str` 

78 The directory to create. 

79 """ 

80 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx 

81 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode) 

82 

83 def __eq__(self, other): 

84 """Test whether two workspaces are of the same type and have the 

85 same location. 

86 """ 

87 return type(self) == type(other) and self.workDir == other.workDir 

88 

89 def __repr__(self): 

90 """A string representation that can be used to reconstruct the Workspace. 

91 """ 

92 return f"{type(self).__name__}({self.workDir!r})" 

93 

94 @property 

95 def workDir(self): 

96 """The absolute location of the workspace as a whole 

97 (`str`, read-only). 

98 """ 

99 return self._location 

100 

101 @property 

102 def configDir(self): 

103 """The absolute location of a directory containing custom Task config 

104 files for use with the data (`str`, read-only). 

105 """ 

106 return os.path.join(self._location, 'config') 

107 

108 @property 

109 @abc.abstractmethod 

110 def dbLocation(self): 

111 """The default absolute location of the source association database to 

112 be created or updated by the pipeline (`str`, read-only). 

113 

114 Shall be a pathname to a database suitable for the backend of `Apdb`. 

115 """ 

116 

117 @property 

118 @abc.abstractmethod 

119 def dbConfigLocation(self): 

120 """The absolute location of the config file for the source association 

121 database to be created or updated by the pipeline (`str`, read-only). 

122 

123 The location is assumed to be a Python (`lsst.pex.config.Config`) file. 

124 """ 

125 

126 @property 

127 @abc.abstractmethod 

128 def alertLocation(self): 

129 """The absolute location of an output directory for persisted 

130 alert packets (`str`, read-only). 

131 """ 

132 

133 @property 

134 @abc.abstractmethod 

135 def workButler(self): 

136 """A Butler that can produce pipeline inputs and outputs (read-only). 

137 The type is class-dependent. 

138 """ 

139 

140 @property 

141 @abc.abstractmethod 

142 def analysisButler(self): 

143 """A Butler that can read pipeline outputs (read-only). 

144 The type is class-dependent. 

145 

146 The Butler should be read-only, if its type supports the restriction. 

147 """ 

148 

149 

150class WorkspaceGen3(Workspace): 

151 """A directory used by ``ap_verify`` to handle data. 

152 

153 Any object of this class represents a working directory containing 

154 subdirectories for a repository and for non-repository files. Constructing 

155 a WorkspaceGen3 does not *initialize* its repository, as this requires 

156 external information. 

157 

158 Parameters 

159 ---------- 

160 location : `str` 

161 The location on disk where the workspace will be set up. Will be 

162 created if it does not already exist. 

163 

164 Raises 

165 ------ 

166 EnvironmentError 

167 Raised if ``location`` is not readable or not writeable 

168 """ 

169 

170 def __init__(self, location): 

171 super().__init__(location) 

172 

173 self.mkdir(self.repo) 

174 self.mkdir(self.pipelineDir) 

175 

176 # Gen 3 name of the output 

177 self.outputName = "ap_verify-output" 

178 

179 # Lazy evaluation to optimize butlers 

180 self._workButler = None 

181 self._analysisButler = None 

182 

183 @property 

184 def repo(self): 

185 """The absolute path/URI to a Butler repo for AP pipeline processing 

186 (`str`, read-only). 

187 """ 

188 return os.path.join(self._location, 'repo') 

189 

190 @property 

191 def pipelineDir(self): 

192 """The absolute location of a directory containing custom pipeline 

193 files for use with the data (`str`, read-only). 

194 """ 

195 return os.path.join(self._location, 'pipelines') 

196 

197 @property 

198 def dbLocation(self): 

199 return os.path.join(self._location, 'association.db') 

200 

201 @property 

202 def dbConfigLocation(self): 

203 return os.path.join(self._location, 'apdb.py') 

204 

205 @property 

206 def alertLocation(self): 

207 return os.path.join(self._location, 'alerts') 

208 

209 def _ensureCollection(self, registry, name, collectionType): 

210 """Add a collection to a repository if it does not already exist. 

211 

212 Parameters 

213 ---------- 

214 registry : `lsst.daf.butler.Registry` 

215 The repository to which to add the collection. 

216 name : `str` 

217 The name of the collection to test for and add. 

218 collectionType : `lsst.daf.butler.CollectionType` 

219 The type of collection to add. This field is ignored when 

220 testing if a collection exists. 

221 """ 

222 matchingCollections = list(registry.queryCollections(re.compile(name))) 

223 if not matchingCollections: 

224 registry.registerCollection(name, type=collectionType) 

225 

226 @property 

227 def workButler(self): 

228 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only). 

229 

230 Notes 

231 ----- 

232 Assumes `repo` has been initialized. 

233 """ 

234 if self._workButler is None: 

235 try: 

236 # Dataset generation puts all preloaded datasets in <instrument>/defaults. 

237 # However, this definition excludes raws, which are not preloaded. 

238 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler 

239 inputs = [] 

240 for dimension in queryButler.registry.queryDataIds('instrument'): 

241 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry) 

242 defaultName = instrument.makeCollectionName("defaults") 

243 inputs.append(defaultName) 

244 rawName = instrument.makeDefaultRawIngestRunName() 

245 inputs.append(rawName) 

246 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN) 

247 

248 # Create an output chain here, so that workButler can see it. 

249 # Definition does not conflict with what pipetask --output uses. 

250 # Regex is workaround for DM-25945. 

251 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))): 

252 queryButler.registry.registerCollection(self.outputName, 

253 dafButler.CollectionType.CHAINED) 

254 queryButler.registry.setCollectionChain(self.outputName, inputs) 

255 

256 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName) 

257 except OSError as e: 

258 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

259 return self._workButler 

260 

261 @property 

262 def analysisButler(self): 

263 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only). 

264 

265 Notes 

266 ----- 

267 Assumes `repo` has been initialized. 

268 """ 

269 if self._analysisButler is None: 

270 try: 

271 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName, 

272 writeable=False) 

273 except OSError as e: 

274 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e 

275 return self._analysisButler