Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeStaticTableSpecs", 

27 "makeDynamicTableName", 

28 "makeDynamicTableSpec", 

29 "StaticDatasetTablesTuple", 

30) 

31 

32from typing import ( 

33 Optional, 

34 Type, 

35) 

36 

37from collections import namedtuple 

38 

39import sqlalchemy 

40 

41from lsst.daf.butler import ( 

42 DatasetType, 

43 ddl, 

44 DimensionUniverse, 

45) 

46from lsst.daf.butler import addDimensionForeignKey 

47from lsst.daf.butler.registry.interfaces import CollectionManager 

48 

49 

50DATASET_TYPE_NAME_LENGTH = 128 

51 

52 

53StaticDatasetTablesTuple = namedtuple( 

54 "StaticDatasetTablesTuple", 

55 [ 

56 "dataset_type", 

57 "dataset", 

58 "dataset_composition", 

59 ] 

60) 

61 

62 

63def addDatasetForeignKey(tableSpec: ddl.TableSpec, *, 

64 name: str = "dataset", 

65 onDelete: Optional[str] = None, 

66 constraint: bool = True, 

67 **kwargs) -> ddl.FieldSpec: 

68 """Add a foreign key column for datasets and (optionally) a constraint to 

69 a table. 

70 

71 This is an internal interface for the ``byDimensions`` package; external 

72 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

73 

74 Parameters 

75 ---------- 

76 tableSpec : `ddl.TableSpec` 

77 Specification for the table that should reference the dataset 

78 table. Will be modified in place. 

79 name: `str`, optional 

80 A name to use for the prefix of the new field; the full name is 

81 ``{name}_id``. 

82 onDelete: `str`, optional 

83 One of "CASCADE" or "SET NULL", indicating what should happen to 

84 the referencing row if the collection row is deleted. `None` 

85 indicates that this should be an integrity error. 

86 constraint: `bool`, optional 

87 If `False` (`True` is default), add a field that can be joined to 

88 the dataset primary key, but do not add a foreign key constraint. 

89 **kwargs 

90 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

91 constructor (only the ``name`` and ``dtype`` arguments are 

92 otherwise provided). 

93 

94 Returns 

95 ------- 

96 idSpec : `ddl.FieldSpec` 

97 Specification for the ID field. 

98 """ 

99 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs) 

100 tableSpec.fields.add(idFieldSpec) 

101 if constraint: 

102 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

103 target=("id",), onDelete=onDelete)) 

104 return idFieldSpec 

105 

106 

107def makeStaticTableSpecs(collections: Type[CollectionManager], 

108 universe: DimensionUniverse, 

109 ) -> StaticDatasetTablesTuple: 

110 """Construct all static tables used by the classes in this package. 

111 

112 Static tables are those that are present in all Registries and do not 

113 depend on what DatasetTypes have been registered. 

114 

115 Parameters 

116 ---------- 

117 collections: `CollectionManager` 

118 Manager object for the collections in this `Registry`. 

119 universe : `DimensionUniverse` 

120 Universe graph containing all dimensions known to this `Registry`. 

121 

122 Returns 

123 ------- 

124 specs : `StaticDatasetTablesTuple` 

125 A named tuple containing `ddl.TableSpec` instances. 

126 """ 

127 specs = StaticDatasetTablesTuple( 

128 dataset_type=ddl.TableSpec( 

129 fields=[ 

130 ddl.FieldSpec( 

131 name="id", 

132 dtype=sqlalchemy.BigInteger, 

133 autoincrement=True, 

134 primaryKey=True, 

135 doc=( 

136 "Autoincrement ID that uniquely identifies a dataset " 

137 "type in other tables. Python code outside the " 

138 "`Registry` class should never interact with this; " 

139 "its existence is considered an implementation detail." 

140 ), 

141 ), 

142 ddl.FieldSpec( 

143 name="name", 

144 dtype=sqlalchemy.String, 

145 length=DATASET_TYPE_NAME_LENGTH, 

146 nullable=False, 

147 doc="String name that uniquely identifies a dataset type.", 

148 ), 

149 ddl.FieldSpec( 

150 name="storage_class", 

151 dtype=sqlalchemy.String, 

152 length=64, 

153 nullable=False, 

154 doc=( 

155 "Name of the storage class associated with all " 

156 "datasets of this type. Storage classes are " 

157 "generally associated with a Python class, and are " 

158 "enumerated in butler configuration." 

159 ) 

160 ), 

161 ddl.FieldSpec( 

162 name="dimensions_encoded", 

163 dtype=ddl.Base64Bytes, 

164 nbytes=universe.getEncodeLength(), 

165 nullable=False, 

166 doc=( 

167 "An opaque (but reversible) encoding of the set of " 

168 "dimensions used to identify dataset of this type." 

169 ), 

170 ), 

171 ], 

172 unique=[("name",)], 

173 ), 

174 dataset=ddl.TableSpec( 

175 fields=[ 

176 ddl.FieldSpec( 

177 name="id", 

178 dtype=sqlalchemy.BigInteger, 

179 autoincrement=True, 

180 primaryKey=True, 

181 doc="A unique autoincrement field used as the primary key for dataset.", 

182 ), 

183 ddl.FieldSpec( 

184 name="dataset_type_id", 

185 dtype=sqlalchemy.BigInteger, 

186 nullable=False, 

187 doc=( 

188 "Reference to the associated entry in the dataset_type " 

189 "table." 

190 ), 

191 ), 

192 ddl.FieldSpec( 

193 name="quantum_id", 

194 dtype=sqlalchemy.BigInteger, 

195 doc=( 

196 "The id of the quantum that produced this dataset, providing access " 

197 "to fine-grained provenance information. May be NULL for datasets " 

198 "not produced by running a PipelineTask." 

199 ), 

200 ), 

201 # Foreign key field/constraint to run added below. 

202 # Eventually quantum field/constraint will move there, too. 

203 ], 

204 foreignKeys=[ 

205 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

206 ddl.ForeignKeySpec("quantum", source=("quantum_id",), target=("id",), onDelete="SET NULL"), 

207 ] 

208 ), 

209 dataset_composition=ddl.TableSpec( 

210 fields=[ 

211 # Foreign keys to dataset added below (one of which is a 

212 # primary key). 

213 ddl.FieldSpec("component_name", dtype=sqlalchemy.String, length=32, primaryKey=True), 

214 ddl.FieldSpec("simple", dtype=sqlalchemy.Boolean, 

215 doc=("True if this component is in the same RUN-type collection and has the " 

216 "same data ID as its parent dataset, False otherwise.")) 

217 ], 

218 ), 

219 ) 

220 # Add foreign key fields programmatically. 

221 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

222 addDatasetForeignKey(specs.dataset_composition, name="parent_dataset", onDelete="CASCADE", 

223 primaryKey=True) 

224 addDatasetForeignKey(specs.dataset_composition, name="component_dataset", onDelete="CASCADE") 

225 return specs 

226 

227 

228def makeDynamicTableName(datasetType: DatasetType) -> str: 

229 """Construct the name for a dynamic (DatasetType-dependent) table used by 

230 the classes in this package. 

231 

232 Parameters 

233 ---------- 

234 datasetType : `DatasetType` 

235 Dataset type to construct a name for. Multiple dataset types may 

236 share the same table. 

237 

238 Returns 

239 ------- 

240 name : `str` 

241 Name for the table. 

242 """ 

243 return f"dataset_collection_{datasetType.dimensions.encode().hex()}" 

244 

245 

246def makeDynamicTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec: 

247 """Construct the specification for a dynamic (DatasetType-dependent) table 

248 used by the classes in this package. 

249 

250 Parameters 

251 ---------- 

252 datasetType : `DatasetType` 

253 Dataset type to construct a spec for. Multiple dataset types may 

254 share the same table. 

255 

256 Returns 

257 ------- 

258 spec : `ddl.TableSpec` 

259 Specification for the table. 

260 """ 

261 tableSpec = ddl.TableSpec( 

262 fields=[ 

263 # Foreign key fields to dataset, collection, and usually dimension 

264 # tables added below. 

265 # The dataset_type_id field here would be redundant with the one 

266 # in the main monolithic dataset table, but we need it here for an 

267 # important unique constraint. 

268 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

269 ], 

270 foreignKeys=[ 

271 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

272 ] 

273 ) 

274 # We'll also have a unique constraint on dataset type, collection, and data 

275 # ID. We only include the required part of the data ID, as that's 

276 # sufficient and saves us from worrying about nulls in the constraint. 

277 constraint = ["dataset_type_id"] 

278 # Add foreign key fields to dataset table (part of the primary key) 

279 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

280 # Add foreign key fields to collection table (part of the primary key and 

281 # the data ID unique constraint). 

282 fieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

283 constraint.append(fieldSpec.name) 

284 for dimension in datasetType.dimensions.required: 

285 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

286 constraint.append(fieldSpec.name) 

287 # Actually add the unique constraint. 

288 tableSpec.unique.add(tuple(constraint)) 

289 return tableSpec