Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from astropy.table import Table as AstropyTable 

23from collections import defaultdict, namedtuple 

24from numpy import array 

25 

26from .. import Butler 

27from ..core.utils import globToRegex 

28 

29_RefInfo = namedtuple("RefInfo", "datasetRef uri") 

30 

31 

32class _Table: 

33 """Aggregates rows for a single dataset type, and creates an astropy table 

34 with the aggregated data. Eliminates duplicate rows. 

35 

36 Parameters 

37 ---------- 

38 columnNames : `list` [`str`] 

39 The names of columns. 

40 """ 

41 

42 def __init__(self): 

43 self.datasetRefs = set() 

44 

45 def add(self, datasetRef, uri=None): 

46 """Add a row of information to the table. 

47 

48 ``uri`` is optional but must be the consistent; provided or not, for 

49 every call to a ``_Table`` instance. 

50 

51 Parameters 

52 ---------- 

53 datasetRef : ``DatasetRef`` 

54 A dataset ref that will be added as a row in the table. 

55 uri : ``ButlerURI``, optional 

56 The URI to show as a file location in the table, by default None 

57 """ 

58 if uri: 

59 uri = str(uri) 

60 self.datasetRefs.add(_RefInfo(datasetRef, uri)) 

61 

62 def getAstropyTable(self, datasetTypeName): 

63 """Get the table as an astropy table. 

64 

65 Parameters 

66 ---------- 

67 datasetTypeName : `str` 

68 The dataset type name to show in the ``type`` column of the table. 

69 

70 Returns 

71 ------- 

72 table : `astropy.table._Table` 

73 The table with the provided column names and rows. 

74 """ 

75 # Should never happen; adding a dataset should be the action that 

76 # causes a _Table to be created. 

77 if not self.datasetRefs: 

78 raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}") 

79 

80 refInfo = next(iter(self.datasetRefs)) 

81 columnNames = ["type", "run", "id", 

82 *[str(item) for item in refInfo.datasetRef.dataId.keys()]] 

83 if refInfo.uri: 

84 columnNames.append("URI") 

85 

86 rows = [] 

87 for refInfo in sorted(self.datasetRefs): 

88 row = [datasetTypeName, 

89 refInfo.datasetRef.run, 

90 refInfo.datasetRef.id, 

91 *[str(value) for value in refInfo.datasetRef.dataId.values()]] 

92 if refInfo.uri: 

93 row.append(refInfo.uri) 

94 rows.append(row) 

95 

96 return AstropyTable(array(rows), names=columnNames) 

97 

98 

99def queryDatasets(repo, glob, collections, where, find_first, show_uri): 

100 """Get dataset refs from a repository. 

101 

102 Parameters 

103 ---------- 

104 repo : `str` 

105 URI to the location of the repo or URI to a config file describing the 

106 repo and its location. 

107 glob : iterable [`str`] 

108 A list of glob-style search string that fully or partially identify 

109 the dataset type names to search for. 

110 collections : iterable [`str`] 

111 A list of glob-style search string that fully or partially identify 

112 the collections to search for. 

113 where : `str` 

114 A string expression similar to a SQL WHERE clause. May involve any 

115 column of a dimension table or (as a shortcut for the primary key 

116 column of a dimension table) dimension name. 

117 find_first : `bool` 

118 For each result data ID, only yield one DatasetRef of each DatasetType, 

119 from the first collection in which a dataset of that dataset type 

120 appears (according to the order of `collections` passed in). If used, 

121 `collections` must specify at least one expression and must not contain 

122 wildcards. 

123 show_uri : `bool` 

124 If True, include the dataset URI in the output. 

125 Returns 

126 ------- 

127 datasetTables : `list` [``astropy.table._Table``] 

128 A list of astropy tables, one for each dataset type. 

129 """ 

130 butler = Butler(repo) 

131 

132 dataset = globToRegex(glob) 

133 if not dataset: 

134 dataset = ... 

135 

136 if collections and not find_first: 

137 collections = globToRegex(collections) 

138 elif not collections: 

139 collections = ... 

140 

141 datasets = butler.registry.queryDatasets(datasetType=dataset, 

142 collections=collections, 

143 where=where, 

144 findFirst=find_first) 

145 

146 tables = defaultdict(_Table) 

147 

148 for datasetRef in datasets: 

149 if not show_uri: 

150 tables[datasetRef.datasetType.name].add(datasetRef) 

151 else: 

152 primaryURI, componentURIs = butler.getURIs(datasetRef, collections=datasetRef.run) 

153 if primaryURI: 

154 tables[datasetRef.datasetType.name].add(datasetRef, primaryURI) 

155 for name, uri in componentURIs.items(): 

156 tables[datasetRef.datasetType.componentTypeName(name)].add(datasetRef, uri) 

157 

158 return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]