Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

__all__ = ("DataIdQueryBuilder",) 

 

import logging 

 

from sqlalchemy.sql import and_, select 

 

from .queryBuilder import QueryBuilder 

 

_LOG = logging.getLogger(__name__) 

 

 

class DataIdQueryBuilder(QueryBuilder): 

r"""Specialization of `QueryBuilder` that yields data IDs consistent 

with dimension relationships. 

 

Parameters 

---------- 

registry : `SqlRegistry` 

Registry instance the query is being run against. 

fromClause : `sqlalchemy.sql.expression.FromClause`, optional 

Initial FROM clause for the query. 

whereClause : SQLAlchemy boolean expression, optional 

Expression to use as the initial WHERE clause. 

""" 

 

def __init__(self, registry, *, fromClause=None, whereClause=None): 

super().__init__(registry, fromClause=fromClause, whereClause=whereClause) 

 

def addDimension(self, dimension, addResultColumns=True): 

"""Add a `Dimension` table to the query. 

 

This is a thin wrapper around `QueryBuilder.joinDimensionElement` 

that updates the SELECT clause and hence requires that the dimension to 

be added has a table. 

 

Parameters 

---------- 

dimensions : `Dimension` 

Dimension to add. 

addResultColumns : `bool` 

If `True`, add the dimension's links to the SELECT clause so 

they are included in the query results. Any links already selected 

from other dimensions will be ignored. 

 

Raises 

------ 

ValueError 

Raised if the dimension has no table or view. 

""" 

table = self.joinDimensionElement(dimension) 

if table is None: 

raise ValueError(f"Dimension '{dimension}' has no table.") 

if addResultColumns: 

for link in dimension.links(): 

self.selectDimensionLink(link) 

 

def requireDataset(self, datasetType, collections): 

"""Constrain the query to return only data IDs for which at least one 

instance of the given dataset exists in one of the given collections. 

 

The dimensions joined into the query and the dimensions used to 

identify the `DatasetType` need not be identical, but they should 

overlap. 

 

To ensure any dimensions that might relate to a `DatasetType` are 

present, `requireDataset` should generally only be called after 

all calls to `addDimension` have been made. 

 

Parameters 

---------- 

datasetType : `DatasetType` 

`DatasetType` for which instances must exist in order for the 

query to yield related data IDs. 

collections : `~collections.abc.Iterable` of `str` 

The names of collections to search for the dataset, in any order. 

""" 

datasetTable = self.registry._schema.tables["dataset"] 

datasetCollectionTable = self.registry._schema.tables["dataset_collection"] 

links = [link for link in datasetType.dimensions.links() 

if self.findSelectableForLink(link) is not None] 

subquery = select( 

[datasetTable.columns[link] for link in links] 

).select_from( 

datasetTable.join( 

datasetCollectionTable, 

datasetTable.columns.dataset_id == datasetCollectionTable.columns.dataset_id 

) 

).where( 

and_(datasetTable.columns.dataset_type_name == datasetType.name, 

datasetCollectionTable.columns.collection.in_(list(collections))) 

) 

self.join(subquery.alias(datasetType.name), links) 

 

def convertResultRow(self, managed): 

"""Convert a result row for this query to a `DataId`. 

 

Parameters 

---------- 

managed : `ResultsColumnsManager.ManagedRow` 

Intermediate result row object to convert. 

 

Returns 

------- 

dataId : `DataId` 

Dictionary-like object with a set of related dimension values. 

""" 

return managed.makeDataId()