Coverage for python / lsst / daf / butler / script / queryCollections.py: 7%

123 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from collections.abc import Iterable 

31from fnmatch import fnmatch 

32from typing import Literal 

33 

34from astropy.table import Column, Table, hstack, vstack 

35 

36from .._butler import Butler 

37from .._butler_collections import CollectionInfo 

38from .._collection_type import CollectionType 

39 

40 

41def _parseDatasetTypes(dataset_types: frozenset[str] | list[str] | None) -> list[str]: 

42 """Parse dataset types from a collection info object or a list of strings. 

43 

44 Parameters 

45 ---------- 

46 dataset_types : `frozenset` [`str`] | `list` [`str`] | `None` 

47 The dataset types to parse. If `None`, an empty list is returned. 

48 If a `frozenset` or `list` is provided, it is returned as a list. 

49 

50 Returns 

51 ------- 

52 dataset_types : `list` [`str`] 

53 The parsed dataset types. 

54 """ 

55 return [""] if not dataset_types else list(dataset_types) 

56 

57 

58def _getTable( 

59 repo: str, 

60 glob: Iterable[str], 

61 collection_type: Iterable[CollectionType], 

62 inverse: bool, 

63 show_dataset_types: bool = False, 

64 exclude_dataset_types: Iterable[str] | None = None, 

65) -> Table: 

66 """Run queryCollections and return the results in Table form. 

67 

68 Only lists the first child (or parent if `inverse` is `True`) in the 

69 description column. 

70 

71 Parameters 

72 ---------- 

73 repo : `str` 

74 The Butler repository location. 

75 glob : `collections.abc.Iterable` of `str` 

76 Wildcard to pass to ``queryCollections``. 

77 collection_type 

78 Same as `queryCollections` 

79 inverse : `bool` 

80 True if parent CHAINED datasets of each dataset should be listed in the 

81 description column, False if children of CHAINED datasets should be 

82 listed. 

83 show_dataset_types : `bool`, optional 

84 If `True`, also show the dataset types present within each collection. 

85 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional 

86 A glob-style iterable of dataset types to exclude. 

87 Only has an effect if `show_dataset_types` is True. 

88 

89 Returns 

90 ------- 

91 collections : `astropy.table.Table` 

92 Same as `queryCollections` 

93 """ 

94 typeCol = "Type" 

95 descriptionCol = "Parents" if inverse else "Children" 

96 table = Table( 

97 names=("Name", typeCol, descriptionCol), 

98 dtype=(str, str, str), 

99 ) 

100 if show_dataset_types: 

101 table.add_column(Column(name="Dataset Types", dtype=str)) 

102 

103 with Butler.from_config(repo) as butler: 

104 

105 def addDatasetTypes(collection_table: Table, collection: str, dataset_types: list[str]) -> Table: 

106 if dataset_types[0] == "": 

107 cinfo = butler.collections.get_info(collection, include_summary=True) 

108 dataset_types = _parseDatasetTypes(cinfo.dataset_types) 

109 if exclude_dataset_types: 

110 dataset_types = [ 

111 dt 

112 for dt in dataset_types 

113 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types) 

114 ] 

115 dataset_types = _parseDatasetTypes(dataset_types) 

116 types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,)) 

117 collection_table = hstack([collection_table, types_table]).filled("") 

118 return collection_table 

119 

120 def addCollection(info: CollectionInfo, relation: str) -> None: 

121 try: 

122 info_relatives = getattr(info, relation) 

123 except AttributeError: 

124 info_relatives = [] 

125 # Parent results can be returned in a non-deterministic order, so 

126 # sort them to make the output deterministic. 

127 if relation == "parents": 

128 info_relatives = sorted(info_relatives) 

129 if info_relatives: 

130 collection_table = Table([[info.name], [info.type.name]], names=("Name", typeCol)) 

131 description_table = Table(names=(descriptionCol,), dtype=(str,)) 

132 for info_relative in info_relatives: 

133 relative_table = Table([[info_relative]], names=(descriptionCol,)) 

134 if show_dataset_types: 

135 relative_table = addDatasetTypes(relative_table, info_relative, [""]) 

136 description_table = vstack([description_table, relative_table]) 

137 collection_table = hstack([collection_table, description_table]).filled("") 

138 for row in collection_table: 

139 table.add_row(row) 

140 else: 

141 collection_table = Table( 

142 [[info.name], [info.type.name], [""]], names=("Name", typeCol, descriptionCol) 

143 ) 

144 if show_dataset_types: 

145 collection_table = addDatasetTypes(collection_table, info.name, [""]) 

146 for row in collection_table: 

147 table.add_row(row) 

148 

149 collections = sorted( 

150 butler.collections.query_info( 

151 glob or "*", 

152 collection_types=frozenset(collection_type), 

153 include_parents=inverse, 

154 include_summary=show_dataset_types, 

155 ) 

156 ) 

157 if inverse: 

158 for info in collections: 

159 addCollection(info, "parents") 

160 # If none of the datasets has a parent dataset then remove the 

161 # description column. 

162 if not any(c for c in table[descriptionCol]): 

163 del table[descriptionCol] 

164 else: 

165 for info in collections: 

166 if info.type == CollectionType.CHAINED: 

167 addCollection(info, "children") 

168 else: 

169 addCollection(info, "self") 

170 # If there aren't any CHAINED datasets in the results then remove 

171 # the description column. 

172 if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]): 

173 del table[descriptionCol] 

174 

175 return table 

176 

177 

178def _getTree( 

179 repo: str, 

180 glob: Iterable[str], 

181 collection_type: Iterable[CollectionType], 

182 inverse: bool, 

183 show_dataset_types: bool = False, 

184 exclude_dataset_types: Iterable[str] | None = None, 

185) -> Table: 

186 """Run queryCollections and return the results in a table representing tree 

187 form. 

188 

189 Recursively lists children (or parents if `inverse` is `True`) 

190 

191 Parameters 

192 ---------- 

193 repo : `str` 

194 Butler repository location. 

195 glob : `collections.abc.Iterable` of `str` 

196 Wildcards to pass to ``queryCollections``. 

197 collection_type 

198 Same as `queryCollections` 

199 inverse : `bool` 

200 True if parent CHAINED datasets of each dataset should be listed in the 

201 description column, False if children of CHAINED datasets should be 

202 listed. 

203 show_dataset_types : `bool`, optional 

204 If `True`, also show the dataset types present within each collection. 

205 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional 

206 A glob-style iterable of dataset types to exclude. 

207 Only has an effect if `show_dataset_types` is True. 

208 

209 Returns 

210 ------- 

211 collections : `astropy.table.Table` 

212 Same as `queryCollections` 

213 """ 

214 table = Table( 

215 names=("Name", "Type"), 

216 dtype=(str, str), 

217 ) 

218 if show_dataset_types: 

219 table.add_column(Column(name="Dataset Types", dtype=str)) 

220 

221 with Butler.from_config(repo, without_datastore=True) as butler: 

222 

223 def addCollection(info: CollectionInfo, level: int = 0) -> None: 

224 collection_table = Table([[" " * level + info.name], [info.type.name]], names=["Name", "Type"]) 

225 if show_dataset_types: 

226 if info.type == CollectionType.CHAINED: 

227 collection_table = hstack( 

228 [collection_table, Table([[""] * len(collection_table)], names=["Dataset Types"])] 

229 ) 

230 else: 

231 dataset_types = _parseDatasetTypes(info.dataset_types) 

232 if exclude_dataset_types: 

233 dataset_types = [ 

234 dt 

235 for dt in dataset_types 

236 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types) 

237 ] 

238 dataset_types = _parseDatasetTypes(dataset_types) 

239 dataset_types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,)) 

240 collection_table = hstack([collection_table, dataset_types_table]).filled("") 

241 for row in collection_table: 

242 table.add_row(row) 

243 

244 if inverse: 

245 assert info.parents is not None # For mypy. 

246 for pname in sorted(info.parents): 

247 pinfo = butler.collections.get_info( 

248 pname, include_parents=inverse, include_summary=show_dataset_types 

249 ) 

250 addCollection(pinfo, level + 1) 

251 else: 

252 if info.type == CollectionType.CHAINED: 

253 for name in info.children: 

254 cinfo = butler.collections.get_info(name, include_summary=show_dataset_types) 

255 addCollection(cinfo, level + 1) 

256 

257 collections = butler.collections.query_info( 

258 glob or "*", 

259 collection_types=frozenset(collection_type), 

260 include_parents=inverse, 

261 include_summary=show_dataset_types, 

262 ) 

263 for collection in sorted(collections): 

264 addCollection(collection) 

265 return table 

266 

267 

268def _getList( 

269 repo: str, 

270 glob: Iterable[str], 

271 collection_type: Iterable[CollectionType], 

272 flatten_chains: bool, 

273 show_dataset_types: bool = False, 

274 exclude_dataset_types: Iterable[str] | None = None, 

275) -> Table: 

276 """Return collection results as a table representing a flat list of 

277 collections. 

278 

279 Parameters 

280 ---------- 

281 repo : `str` 

282 Butler repository location. 

283 glob : `collections.abc.Iterable` of `str` 

284 Wildcards to pass to ``queryCollections``. 

285 collection_type 

286 Same as `queryCollections` 

287 flatten_chains : `bool` 

288 If `True`, flatten the tree of CHAINED datasets. 

289 show_dataset_types : `bool`, optional 

290 If `True`, also show the dataset types present within each collection. 

291 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional 

292 A glob-style iterable of dataset types to exclude. 

293 Only has an effect if `show_dataset_types` is True. 

294 

295 Returns 

296 ------- 

297 collections : `astropy.table.Table` 

298 Same as `queryCollections` 

299 """ 

300 table = Table( 

301 names=("Name", "Type"), 

302 dtype=(str, str), 

303 ) 

304 if show_dataset_types: 

305 table.add_column(Column(name="Dataset Types", dtype=str)) 

306 

307 with Butler.from_config(repo) as butler: 

308 

309 def addCollection(info: CollectionInfo) -> None: 

310 collection_table = Table([[info.name], [info.type.name]], names=["Name", "Type"]) 

311 if show_dataset_types: 

312 dataset_types = _parseDatasetTypes(info.dataset_types) 

313 if exclude_dataset_types: 

314 dataset_types = [ 

315 dt 

316 for dt in dataset_types 

317 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types) 

318 ] 

319 dataset_types = _parseDatasetTypes(dataset_types) 

320 dataset_types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,)) 

321 collection_table = hstack([collection_table, dataset_types_table]).filled("") 

322 for row in collection_table: 

323 table.add_row(row) 

324 

325 collections = list( 

326 butler.collections.query_info( 

327 glob or "*", 

328 collection_types=frozenset(collection_type), 

329 flatten_chains=flatten_chains, 

330 include_summary=show_dataset_types, 

331 ) 

332 ) 

333 for collection in collections: 

334 addCollection(collection) 

335 

336 return table 

337 

338 

339def queryCollections( 

340 repo: str, 

341 glob: Iterable[str], 

342 collection_type: Iterable[CollectionType], 

343 chains: Literal["INVERSE-TABLE", "TABLE", "TREE", "INVERSE-TREE", "FLATTEN", "NO-CHILDREN"], 

344 show_dataset_types: bool = False, 

345 exclude_dataset_types: Iterable[str] | None = None, 

346) -> Table: 

347 """Get the collections whose names match an expression. 

348 

349 Parameters 

350 ---------- 

351 repo : `str` 

352 URI to the location of the repo or URI to a config file describing the 

353 repo and its location. 

354 glob : `~collections.abc.Iterable` [`str`] 

355 A list of glob-style search string that fully or partially identify 

356 the dataset type names to search for. 

357 collection_type : `~collections.abc.Iterable` [ `CollectionType` ], \ 

358 optional 

359 If provided, only return collections of these types. 

360 chains : `str` 

361 Affects contents and formatting of results, see 

362 ``cli.commands.query_collections``. 

363 show_dataset_types : `bool`, optional 

364 If `True`, include the dataset types present within each collection. 

365 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional 

366 A glob-style iterable of dataset types to exclude. 

367 Only has an effect if `show_dataset_types` is True. 

368 

369 Returns 

370 ------- 

371 collections : `astropy.table.Table` 

372 A table containing information about collections. 

373 """ 

374 if (inverse := chains == "INVERSE-TABLE") or chains == "TABLE": 

375 return _getTable(repo, glob, collection_type, inverse, show_dataset_types, exclude_dataset_types) 

376 elif (inverse := chains == "INVERSE-TREE") or chains == "TREE": 

377 return _getTree(repo, glob, collection_type, inverse, show_dataset_types, exclude_dataset_types) 

378 elif chains == "FLATTEN" or chains == "NO-CHILDREN": 

379 flatten = chains == "FLATTEN" 

380 return _getList(repo, glob, collection_type, flatten, show_dataset_types, exclude_dataset_types) 

381 raise RuntimeError(f"Value for --chains not recognized: {chains}")