Coverage for python/lsst/daf/butler/core/_column_type_info.py: 35%

50 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-28 04:40 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ColumnTypeInfo", "LogicalColumn") 

25 

26import dataclasses 

27from collections.abc import Iterable 

28from typing import Union, cast 

29 

30import sqlalchemy 

31from lsst.daf.relation import ColumnTag, sql 

32 

33from . import ddl 

34from ._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag 

35from .dimensions import Dimension, DimensionUniverse 

36from .timespan import TimespanDatabaseRepresentation 

37 

38LogicalColumn = Union[sqlalchemy.sql.ColumnElement, TimespanDatabaseRepresentation] 

39"""A type alias for the types used to represent columns in SQL relations. 

40 

41This is the butler specialization of the `lsst.daf.relation.sql.LogicalColumn` 

42concept. 

43""" 

44 

45 

46@dataclasses.dataclass(frozen=True, eq=False) 

47class ColumnTypeInfo: 

48 """A struct that aggregates information about column types that can differ 

49 across data repositories due to `Registry` and dimension configuration. 

50 """ 

51 

52 timespan_cls: type[TimespanDatabaseRepresentation] 

53 """An abstraction around the column type or types used for timespans by 

54 this database engine. 

55 """ 

56 

57 universe: DimensionUniverse 

58 """Object that manages the definitions of all dimension and dimension 

59 elements. 

60 """ 

61 

62 dataset_id_spec: ddl.FieldSpec 

63 """Field specification for the dataset primary key column. 

64 """ 

65 

66 run_key_spec: ddl.FieldSpec 

67 """Field specification for the `~CollectionType.RUN` primary key column. 

68 """ 

69 

70 def make_relation_table_spec( 

71 self, 

72 columns: Iterable[ColumnTag], 

73 unique_keys: Iterable[Iterable[ColumnTag]] = (), 

74 ) -> ddl.TableSpec: 

75 """Create a specification for a table with the given relation columns. 

76 

77 This is used primarily to create temporary tables for query results. 

78 

79 Parameters 

80 ---------- 

81 columns : `Iterable` [ `ColumnTag` ] 

82 Iterable of column identifiers. 

83 unique_keys : `Iterable` [ `Iterable` [ `ColumnTag` ] ] 

84 Unique constraints to add the table, as a nested iterable of 

85 (first) constraint and (second) the columns within that constraint. 

86 

87 Returns 

88 ------- 

89 spec : `ddl.TableSpec` 

90 Specification for a table. 

91 """ 

92 result = ddl.TableSpec(fields=()) 

93 columns = list(columns) 

94 if not columns: 

95 result.fields.add( 

96 ddl.FieldSpec( 

97 sql.Engine.EMPTY_COLUMNS_NAME, 

98 dtype=sql.Engine.EMPTY_COLUMNS_TYPE, 

99 nullable=True, 

100 default=True, 

101 ) 

102 ) 

103 for tag in columns: 

104 match tag: 

105 case DimensionKeyColumnTag(dimension=dimension_name): 

106 result.fields.add( 

107 dataclasses.replace( 

108 cast(Dimension, self.universe[dimension_name]).primaryKey, 

109 name=tag.qualified_name, 

110 primaryKey=False, 

111 nullable=False, 

112 ) 

113 ) 

114 case DimensionRecordColumnTag(column="region"): 

115 result.fields.add(ddl.FieldSpec.for_region(tag.qualified_name)) 

116 case DimensionRecordColumnTag(column="timespan") | DatasetColumnTag(column="timespan"): 

117 result.fields.update( 

118 self.timespan_cls.makeFieldSpecs(nullable=True, name=tag.qualified_name) 

119 ) 

120 case DimensionRecordColumnTag(element=element_name, column=column): 

121 element = self.universe[element_name] 

122 result.fields.add( 

123 dataclasses.replace( 

124 element.RecordClass.fields.facts[column], 

125 name=tag.qualified_name, 

126 nullable=True, 

127 primaryKey=False, 

128 ) 

129 ) 

130 case DatasetColumnTag(column="dataset_id"): 

131 result.fields.add( 

132 dataclasses.replace( 

133 self.dataset_id_spec, name=tag.qualified_name, primaryKey=False, nullable=False 

134 ) 

135 ) 

136 case DatasetColumnTag(column="run"): 

137 result.fields.add( 

138 dataclasses.replace( 

139 self.run_key_spec, name=tag.qualified_name, primaryKey=False, nullable=False 

140 ) 

141 ) 

142 case DatasetColumnTag(column="ingest_date"): 

143 result.fields.add( 

144 ddl.FieldSpec(tag.qualified_name, dtype=sqlalchemy.TIMESTAMP, nullable=False) 

145 ) 

146 case _: 

147 raise TypeError(f"Unexpected column tag {tag}.") 

148 for unique_key in unique_keys: 

149 result.unique.add(tuple(tag.qualified_name for tag in unique_key)) 

150 return result