Coverage for python/lsst/daf/butler/_column_type_info.py: 37%

61 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 09:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ColumnTypeInfo", "LogicalColumn") 

31 

32import dataclasses 

33import datetime 

34from collections.abc import Iterable 

35from typing import cast 

36 

37import astropy.time 

38import sqlalchemy 

39from lsst.daf.relation import ColumnTag, sql 

40 

41from . import ddl 

42from ._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag 

43from .dimensions import Dimension, DimensionUniverse 

44from .timespan_database_representation import TimespanDatabaseRepresentation 

45 

46LogicalColumn = sqlalchemy.sql.ColumnElement | TimespanDatabaseRepresentation 

47"""A type alias for the types used to represent columns in SQL relations. 

48 

49This is the butler specialization of the `lsst.daf.relation.sql.LogicalColumn` 

50concept. 

51""" 

52 

53 

54@dataclasses.dataclass(frozen=True, eq=False) 

55class ColumnTypeInfo: 

56 """A struct that aggregates information about column types that can differ 

57 across data repositories due to `Registry` and dimension configuration. 

58 """ 

59 

60 timespan_cls: type[TimespanDatabaseRepresentation] 

61 """An abstraction around the column type or types used for timespans by 

62 this database engine. 

63 """ 

64 

65 universe: DimensionUniverse 

66 """Object that manages the definitions of all dimension and dimension 

67 elements. 

68 """ 

69 

70 dataset_id_spec: ddl.FieldSpec 

71 """Field specification for the dataset primary key column. 

72 """ 

73 

74 run_key_spec: ddl.FieldSpec 

75 """Field specification for the `~CollectionType.RUN` primary key column. 

76 """ 

77 

78 ingest_date_dtype: type[ddl.AstropyTimeNsecTai] | type[sqlalchemy.TIMESTAMP] 

79 """Type of the ``ingest_date`` column, can be either 

80 `~lsst.daf.butler.ddl.AstropyTimeNsecTai` or `sqlalchemy.TIMESTAMP`. 

81 """ 

82 

83 @property 

84 def ingest_date_pytype(self) -> type: 

85 """Python type corresponding to ``ingest_date`` column type. 

86 

87 Returns 

88 ------- 

89 `type` 

90 The Python type. 

91 """ 

92 if self.ingest_date_dtype is ddl.AstropyTimeNsecTai: 

93 return astropy.time.Time 

94 elif self.ingest_date_dtype is sqlalchemy.TIMESTAMP: 

95 return datetime.datetime 

96 else: 

97 raise TypeError(f"Unexpected type of ingest_date_dtype: {self.ingest_date_dtype}") 

98 

99 def make_relation_table_spec( 

100 self, 

101 columns: Iterable[ColumnTag], 

102 unique_keys: Iterable[Iterable[ColumnTag]] = (), 

103 ) -> ddl.TableSpec: 

104 """Create a specification for a table with the given relation columns. 

105 

106 This is used primarily to create temporary tables for query results. 

107 

108 Parameters 

109 ---------- 

110 columns : `~collections.abc.Iterable` [ `ColumnTag` ] 

111 Iterable of column identifiers. 

112 unique_keys : `~collections.abc.Iterable` \ 

113 [ `~collections.abc.Iterable` [ `ColumnTag` ] ] 

114 Unique constraints to add the table, as a nested iterable of 

115 (first) constraint and (second) the columns within that constraint. 

116 

117 Returns 

118 ------- 

119 spec : `ddl.TableSpec` 

120 Specification for a table. 

121 """ 

122 result = ddl.TableSpec(fields=()) 

123 columns = list(columns) 

124 if not columns: 

125 result.fields.add( 

126 ddl.FieldSpec( 

127 sql.Engine.EMPTY_COLUMNS_NAME, 

128 dtype=sql.Engine.EMPTY_COLUMNS_TYPE, 

129 nullable=True, 

130 ) 

131 ) 

132 for tag in columns: 

133 match tag: 

134 case DimensionKeyColumnTag(dimension=dimension_name): 

135 result.fields.add( 

136 dataclasses.replace( 

137 cast(Dimension, self.universe[dimension_name]).primaryKey, 

138 name=tag.qualified_name, 

139 primaryKey=False, 

140 nullable=False, 

141 ) 

142 ) 

143 case DimensionRecordColumnTag(column="region"): 

144 result.fields.add(ddl.FieldSpec.for_region(tag.qualified_name)) 

145 case DimensionRecordColumnTag(column="timespan") | DatasetColumnTag(column="timespan"): 

146 result.fields.update( 

147 self.timespan_cls.makeFieldSpecs(nullable=True, name=tag.qualified_name) 

148 ) 

149 case DimensionRecordColumnTag(element=element_name, column=column): 

150 element = self.universe[element_name] 

151 result.fields.add( 

152 dataclasses.replace( 

153 element.RecordClass.fields.facts[column], 

154 name=tag.qualified_name, 

155 nullable=True, 

156 primaryKey=False, 

157 ) 

158 ) 

159 case DatasetColumnTag(column="dataset_id"): 

160 result.fields.add( 

161 dataclasses.replace( 

162 self.dataset_id_spec, name=tag.qualified_name, primaryKey=False, nullable=False 

163 ) 

164 ) 

165 case DatasetColumnTag(column="run"): 

166 result.fields.add( 

167 dataclasses.replace( 

168 self.run_key_spec, name=tag.qualified_name, primaryKey=False, nullable=False 

169 ) 

170 ) 

171 case DatasetColumnTag(column="ingest_date"): 

172 result.fields.add( 

173 ddl.FieldSpec(tag.qualified_name, dtype=self.ingest_date_dtype, nullable=False) 

174 ) 

175 case _: 

176 raise TypeError(f"Unexpected column tag {tag}.") 

177 for unique_key in unique_keys: 

178 result.unique.add(tuple(tag.qualified_name for tag in unique_key)) 

179 return result