Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeStaticTableSpecs", 

27 "makeDynamicTableName", 

28 "makeDynamicTableSpec", 

29 "StaticDatasetTablesTuple", 

30) 

31 

32from typing import ( 

33 Optional, 

34 Type, 

35) 

36 

37from collections import namedtuple 

38 

39import sqlalchemy 

40 

41from lsst.daf.butler import ( 

42 DatasetType, 

43 ddl, 

44 DimensionUniverse, 

45) 

46from lsst.daf.butler import addDimensionForeignKey 

47from lsst.daf.butler.registry.interfaces import CollectionManager 

48 

49 

50DATASET_TYPE_NAME_LENGTH = 128 

51 

52 

53StaticDatasetTablesTuple = namedtuple( 

54 "StaticDatasetTablesTuple", 

55 [ 

56 "dataset_type", 

57 "dataset", 

58 ] 

59) 

60 

61 

62def addDatasetForeignKey(tableSpec: ddl.TableSpec, *, 

63 name: str = "dataset", 

64 onDelete: Optional[str] = None, 

65 constraint: bool = True, 

66 **kwargs) -> ddl.FieldSpec: 

67 """Add a foreign key column for datasets and (optionally) a constraint to 

68 a table. 

69 

70 This is an internal interface for the ``byDimensions`` package; external 

71 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

72 

73 Parameters 

74 ---------- 

75 tableSpec : `ddl.TableSpec` 

76 Specification for the table that should reference the dataset 

77 table. Will be modified in place. 

78 name: `str`, optional 

79 A name to use for the prefix of the new field; the full name is 

80 ``{name}_id``. 

81 onDelete: `str`, optional 

82 One of "CASCADE" or "SET NULL", indicating what should happen to 

83 the referencing row if the collection row is deleted. `None` 

84 indicates that this should be an integrity error. 

85 constraint: `bool`, optional 

86 If `False` (`True` is default), add a field that can be joined to 

87 the dataset primary key, but do not add a foreign key constraint. 

88 **kwargs 

89 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

90 constructor (only the ``name`` and ``dtype`` arguments are 

91 otherwise provided). 

92 

93 Returns 

94 ------- 

95 idSpec : `ddl.FieldSpec` 

96 Specification for the ID field. 

97 """ 

98 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs) 

99 tableSpec.fields.add(idFieldSpec) 

100 if constraint: 

101 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

102 target=("id",), onDelete=onDelete)) 

103 return idFieldSpec 

104 

105 

106def makeStaticTableSpecs(collections: Type[CollectionManager], 

107 universe: DimensionUniverse, 

108 ) -> StaticDatasetTablesTuple: 

109 """Construct all static tables used by the classes in this package. 

110 

111 Static tables are those that are present in all Registries and do not 

112 depend on what DatasetTypes have been registered. 

113 

114 Parameters 

115 ---------- 

116 collections: `CollectionManager` 

117 Manager object for the collections in this `Registry`. 

118 universe : `DimensionUniverse` 

119 Universe graph containing all dimensions known to this `Registry`. 

120 

121 Returns 

122 ------- 

123 specs : `StaticDatasetTablesTuple` 

124 A named tuple containing `ddl.TableSpec` instances. 

125 """ 

126 specs = StaticDatasetTablesTuple( 

127 dataset_type=ddl.TableSpec( 

128 fields=[ 

129 ddl.FieldSpec( 

130 name="id", 

131 dtype=sqlalchemy.BigInteger, 

132 autoincrement=True, 

133 primaryKey=True, 

134 doc=( 

135 "Autoincrement ID that uniquely identifies a dataset " 

136 "type in other tables. Python code outside the " 

137 "`Registry` class should never interact with this; " 

138 "its existence is considered an implementation detail." 

139 ), 

140 ), 

141 ddl.FieldSpec( 

142 name="name", 

143 dtype=sqlalchemy.String, 

144 length=DATASET_TYPE_NAME_LENGTH, 

145 nullable=False, 

146 doc="String name that uniquely identifies a dataset type.", 

147 ), 

148 ddl.FieldSpec( 

149 name="storage_class", 

150 dtype=sqlalchemy.String, 

151 length=64, 

152 nullable=False, 

153 doc=( 

154 "Name of the storage class associated with all " 

155 "datasets of this type. Storage classes are " 

156 "generally associated with a Python class, and are " 

157 "enumerated in butler configuration." 

158 ) 

159 ), 

160 ddl.FieldSpec( 

161 name="dimensions_encoded", 

162 dtype=ddl.Base64Bytes, 

163 nbytes=universe.getEncodeLength(), 

164 nullable=False, 

165 doc=( 

166 "An opaque (but reversible) encoding of the set of " 

167 "dimensions used to identify dataset of this type." 

168 ), 

169 ), 

170 ], 

171 unique=[("name",)], 

172 ), 

173 dataset=ddl.TableSpec( 

174 fields=[ 

175 ddl.FieldSpec( 

176 name="id", 

177 dtype=sqlalchemy.BigInteger, 

178 autoincrement=True, 

179 primaryKey=True, 

180 doc="A unique autoincrement field used as the primary key for dataset.", 

181 ), 

182 ddl.FieldSpec( 

183 name="dataset_type_id", 

184 dtype=sqlalchemy.BigInteger, 

185 nullable=False, 

186 doc=( 

187 "Reference to the associated entry in the dataset_type " 

188 "table." 

189 ), 

190 ), 

191 ddl.FieldSpec( 

192 name="quantum_id", 

193 dtype=sqlalchemy.BigInteger, 

194 doc=( 

195 "The id of the quantum that produced this dataset, providing access " 

196 "to fine-grained provenance information. May be NULL for datasets " 

197 "not produced by running a PipelineTask." 

198 ), 

199 ), 

200 # Foreign key field/constraint to run added below. 

201 # Eventually quantum field/constraint will move there, too. 

202 ], 

203 foreignKeys=[ 

204 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

205 ddl.ForeignKeySpec("quantum", source=("quantum_id",), target=("id",), onDelete="SET NULL"), 

206 ] 

207 ), 

208 ) 

209 # Add foreign key fields programmatically. 

210 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

211 return specs 

212 

213 

214def makeDynamicTableName(datasetType: DatasetType) -> str: 

215 """Construct the name for a dynamic (DatasetType-dependent) table used by 

216 the classes in this package. 

217 

218 Parameters 

219 ---------- 

220 datasetType : `DatasetType` 

221 Dataset type to construct a name for. Multiple dataset types may 

222 share the same table. 

223 

224 Returns 

225 ------- 

226 name : `str` 

227 Name for the table. 

228 """ 

229 return f"dataset_collection_{datasetType.dimensions.encode().hex()}" 

230 

231 

232def makeDynamicTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec: 

233 """Construct the specification for a dynamic (DatasetType-dependent) table 

234 used by the classes in this package. 

235 

236 Parameters 

237 ---------- 

238 datasetType : `DatasetType` 

239 Dataset type to construct a spec for. Multiple dataset types may 

240 share the same table. 

241 

242 Returns 

243 ------- 

244 spec : `ddl.TableSpec` 

245 Specification for the table. 

246 """ 

247 tableSpec = ddl.TableSpec( 

248 fields=[ 

249 # Foreign key fields to dataset, collection, and usually dimension 

250 # tables added below. 

251 # The dataset_type_id field here would be redundant with the one 

252 # in the main monolithic dataset table, but we need it here for an 

253 # important unique constraint. 

254 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

255 ], 

256 foreignKeys=[ 

257 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

258 ] 

259 ) 

260 # We'll also have a unique constraint on dataset type, collection, and data 

261 # ID. We only include the required part of the data ID, as that's 

262 # sufficient and saves us from worrying about nulls in the constraint. 

263 constraint = ["dataset_type_id"] 

264 # Add foreign key fields to dataset table (part of the primary key) 

265 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

266 # Add foreign key fields to collection table (part of the primary key and 

267 # the data ID unique constraint). 

268 fieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

269 constraint.append(fieldSpec.name) 

270 for dimension in datasetType.dimensions.required: 

271 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

272 constraint.append(fieldSpec.name) 

273 # Actually add the unique constraint. 

274 tableSpec.unique.add(tuple(constraint)) 

275 return tableSpec