Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from astropy.table import Table as AstropyTable 

23from collections import defaultdict, namedtuple 

24import numpy as np 

25 

26from .. import Butler 

27from ..core.utils import globToRegex 

28from ..cli.utils import sortAstropyTable 

29 

30 

31_RefInfo = namedtuple("RefInfo", "datasetRef uri") 

32 

33 

34class _Table: 

35 """Aggregates rows for a single dataset type, and creates an astropy table 

36 with the aggregated data. Eliminates duplicate rows. 

37 """ 

38 

39 def __init__(self): 

40 self.datasetRefs = set() 

41 

42 def add(self, datasetRef, uri=None): 

43 """Add a row of information to the table. 

44 

45 ``uri`` is optional but must be the consistent; provided or not, for 

46 every call to a ``_Table`` instance. 

47 

48 Parameters 

49 ---------- 

50 datasetRef : ``DatasetRef`` 

51 A dataset ref that will be added as a row in the table. 

52 uri : ``ButlerURI``, optional 

53 The URI to show as a file location in the table, by default None 

54 """ 

55 if uri: 

56 uri = str(uri) 

57 self.datasetRefs.add(_RefInfo(datasetRef, uri)) 

58 

59 def getAstropyTable(self, datasetTypeName): 

60 """Get the table as an astropy table. 

61 

62 Parameters 

63 ---------- 

64 datasetTypeName : `str` 

65 The dataset type name to show in the ``type`` column of the table. 

66 

67 Returns 

68 ------- 

69 table : `astropy.table._Table` 

70 The table with the provided column names and rows. 

71 """ 

72 # Should never happen; adding a dataset should be the action that 

73 # causes a _Table to be created. 

74 if not self.datasetRefs: 

75 raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}") 

76 

77 refInfo = next(iter(self.datasetRefs)) 

78 dimensions = list(refInfo.datasetRef.dataId.full.keys()) 

79 columnNames = ["type", "run", "id", 

80 *[str(item) for item in dimensions]] 

81 

82 # Need to hint the column types for numbers since the per-row 

83 # constructor of Table does not work this out on its own and sorting 

84 # will not work properly without. 

85 typeMap = {float: np.float, int: np.int64} 

86 columnTypes = [None, None, np.int64, 

87 *[typeMap.get(type(value)) for value in refInfo.datasetRef.dataId.full.values()]] 

88 if refInfo.uri: 

89 columnNames.append("URI") 

90 columnTypes.append(None) 

91 

92 rows = [] 

93 for refInfo in self.datasetRefs: 

94 row = [datasetTypeName, 

95 refInfo.datasetRef.run, 

96 refInfo.datasetRef.id, 

97 *[value for value in refInfo.datasetRef.dataId.full.values()]] 

98 if refInfo.uri: 

99 row.append(refInfo.uri) 

100 rows.append(row) 

101 

102 dataset_table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

103 return sortAstropyTable(dataset_table, dimensions, ["type", "run"]) 

104 

105 

106def queryDatasets(repo, glob, collections, where, find_first, show_uri): 

107 """Get dataset refs from a repository. 

108 

109 Parameters 

110 ---------- 

111 repo : `str` 

112 URI to the location of the repo or URI to a config file describing the 

113 repo and its location. 

114 glob : iterable [`str`] 

115 A list of glob-style search string that fully or partially identify 

116 the dataset type names to search for. 

117 collections : iterable [`str`] 

118 A list of glob-style search string that fully or partially identify 

119 the collections to search for. 

120 where : `str` 

121 A string expression similar to a SQL WHERE clause. May involve any 

122 column of a dimension table or (as a shortcut for the primary key 

123 column of a dimension table) dimension name. 

124 find_first : `bool` 

125 For each result data ID, only yield one DatasetRef of each DatasetType, 

126 from the first collection in which a dataset of that dataset type 

127 appears (according to the order of `collections` passed in). If used, 

128 `collections` must specify at least one expression and must not contain 

129 wildcards. 

130 show_uri : `bool` 

131 If True, include the dataset URI in the output. 

132 Returns 

133 ------- 

134 datasetTables : `list` [``astropy.table._Table``] 

135 A list of astropy tables, one for each dataset type. 

136 """ 

137 butler = Butler(repo) 

138 

139 dataset = globToRegex(glob) 

140 if not dataset: 

141 dataset = ... 

142 

143 if collections and not find_first: 

144 collections = globToRegex(collections) 

145 elif not collections: 

146 collections = ... 

147 

148 datasets = butler.registry.queryDatasets(datasetType=dataset, 

149 collections=collections, 

150 where=where, 

151 findFirst=find_first) 

152 

153 tables = defaultdict(_Table) 

154 

155 for datasetRef in datasets: 

156 if not show_uri: 

157 tables[datasetRef.datasetType.name].add(datasetRef) 

158 else: 

159 primaryURI, componentURIs = butler.getURIs(datasetRef, collections=datasetRef.run) 

160 if primaryURI: 

161 tables[datasetRef.datasetType.name].add(datasetRef, primaryURI) 

162 for name, uri in componentURIs.items(): 

163 tables[datasetRef.datasetType.componentTypeName(name)].add(datasetRef, uri) 

164 

165 return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]