Coverage for python/lsst/daf/butler/script/queryCollections.py: 8%

68 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-08 10:28 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24from collections.abc import Iterable 

25 

26from astropy.table import Table 

27 

28from .._butler import Butler 

29from ..registry import CollectionType 

30 

31 

32def _getTable( 

33 repo: str, 

34 glob: Iterable[str], 

35 collection_type: Iterable[CollectionType], 

36 inverse: bool, 

37) -> Table: 

38 """Run queryCollections and return the results in Table form. 

39 

40 Only lists the first child (or parent if `inverse` is `True`) in the 

41 description column. 

42 

43 Parameters 

44 ---------- 

45 repo 

46 glob 

47 collection_type 

48 Same as `queryCollections` 

49 inverse : `bool` 

50 True if parent CHAINED datasets of each dataset should be listed in the 

51 description column, False if children of CHAINED datasets should be 

52 listed. 

53 

54 Returns 

55 ------- 

56 collections : `astropy.table.Table` 

57 Same as `queryCollections` 

58 """ 

59 typeCol = "Type" 

60 descriptionCol = "Parents" if inverse else "Children" 

61 table = Table( 

62 names=("Name", typeCol, descriptionCol), 

63 dtype=(str, str, str), 

64 ) 

65 butler = Butler(repo) 

66 names = sorted( 

67 butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...) 

68 ) 

69 if inverse: 

70 for name in names: 

71 type = butler.registry.getCollectionType(name) 

72 parentNames = butler.registry.getCollectionParentChains(name) 

73 if parentNames: 

74 first = True 

75 for parentName in sorted(parentNames): 

76 table.add_row((name if first else "", type.name if first else "", parentName)) 

77 first = False 

78 else: 

79 table.add_row((name, type.name, "")) 

80 # If none of the datasets has a parent dataset then remove the 

81 # description column. 

82 if not any(c for c in table[descriptionCol]): 

83 del table[descriptionCol] 

84 else: 

85 for name in names: 

86 type = butler.registry.getCollectionType(name) 

87 if type == CollectionType.CHAINED: 

88 children = butler.registry.getCollectionChain(name) 

89 if children: 

90 first = True 

91 for child in sorted(children): 

92 table.add_row((name if first else "", type.name if first else "", child)) 

93 first = False 

94 else: 

95 table.add_row((name, type.name, "")) 

96 else: 

97 table.add_row((name, type.name, "")) 

98 # If there aren't any CHAINED datasets in the results then remove the 

99 # description column. 

100 if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]): 

101 del table[descriptionCol] 

102 

103 return table 

104 

105 

106def _getTree( 

107 repo: str, 

108 glob: Iterable[str], 

109 collection_type: Iterable[CollectionType], 

110 inverse: bool, 

111) -> Table: 

112 """Run queryCollections and return the results in a table representing tree 

113 form. 

114 

115 Recursively lists children (or parents if `inverse` is `True`) 

116 

117 Parameters 

118 ---------- 

119 repo 

120 glob 

121 collection_type 

122 Same as `queryCollections` 

123 inverse : `bool` 

124 True if parent CHAINED datasets of each dataset should be listed in the 

125 description column, False if children of CHAINED datasets should be 

126 listed. 

127 

128 Returns 

129 ------- 

130 collections : `astropy.table.Table` 

131 Same as `queryCollections` 

132 """ 

133 table = Table( 

134 names=("Name", "Type"), 

135 dtype=(str, str), 

136 ) 

137 butler = Butler(repo) 

138 

139 def addCollection(name: str, level: int = 0) -> None: 

140 collectionType = butler.registry.getCollectionType(name) 

141 table.add_row((" " * level + name, collectionType.name)) 

142 if inverse: 

143 parentNames = butler.registry.getCollectionParentChains(name) 

144 for pname in sorted(parentNames): 

145 addCollection(pname, level + 1) 

146 else: 

147 if collectionType == CollectionType.CHAINED: 

148 childNames = butler.registry.getCollectionChain(name) 

149 for name in childNames: 

150 addCollection(name, level + 1) 

151 

152 collections = butler.registry.queryCollections( 

153 collectionTypes=frozenset(collection_type), expression=glob or ... 

154 ) 

155 for collection in sorted(collections): 

156 addCollection(collection) 

157 return table 

158 

159 

160def _getFlatten( 

161 repo: str, 

162 glob: Iterable[str], 

163 collection_type: Iterable[CollectionType], 

164) -> Table: 

165 butler = Butler(repo) 

166 collectionNames = list( 

167 butler.registry.queryCollections( 

168 collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ... 

169 ) 

170 ) 

171 

172 collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames] 

173 return Table((collectionNames, collectionTypes), names=("Name", "Type")) 

174 

175 

176def queryCollections( 

177 repo: str, 

178 glob: Iterable[str], 

179 collection_type: Iterable[CollectionType], 

180 chains: str, 

181) -> Table: 

182 """Get the collections whose names match an expression. 

183 

184 Parameters 

185 ---------- 

186 repo : `str` 

187 URI to the location of the repo or URI to a config file describing the 

188 repo and its location. 

189 glob : `Iterable` [`str`] 

190 A list of glob-style search string that fully or partially identify 

191 the dataset type names to search for. 

192 collection_type : `Iterable` [ `CollectionType` ], optional 

193 If provided, only return collections of these types. 

194 chains : `str` 

195 Must be one of "FLATTEN", "TABLE", or "TREE" (case sensitive). 

196 Affects contents and formatting of results, see 

197 ``cli.commands.query_collections``. 

198 inverse : `bool` 

199 If true, show what CHAINED collections the dataset is a member of 

200 (instead of what datasets any CHAINED collection contains) 

201 

202 Returns 

203 ------- 

204 collections : `astropy.table.Table` 

205 A table containing information about collections. 

206 """ 

207 if (inverse := chains == "INVERSE-TABLE") or chains == "TABLE": 

208 return _getTable(repo, glob, collection_type, inverse) 

209 elif (inverse := chains == "INVERSE-TREE") or chains == "TREE": 

210 return _getTree(repo, glob, collection_type, inverse) 

211 elif chains == "FLATTEN": 

212 return _getFlatten(repo, glob, collection_type) 

213 raise RuntimeError(f"Value for --chains not recognized: {chains}")