Coverage for python / lsst / daf / butler / direct_query_driver / _sql_builders.py: 24%

184 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("SqlColumns", "SqlJoinsBuilder", "SqlSelectBuilder", "make_table_spec") 

31 

32import dataclasses 

33import itertools 

34from collections.abc import Iterable, Sequence 

35from typing import TYPE_CHECKING, Any, ClassVar, Self 

36 

37import sqlalchemy 

38 

39from .. import ddl 

40from ..dimensions import DimensionGroup 

41from ..dimensions._group import SortedSequenceSet 

42from ..nonempty_mapping import NonemptyMapping 

43from ..queries import tree as qt 

44from ._postprocessing import Postprocessing 

45 

46if TYPE_CHECKING: 

47 from ..registry.interfaces import Database 

48 from ..timespan_database_representation import TimespanDatabaseRepresentation 

49 

50 

51@dataclasses.dataclass 

52class SqlSelectBuilder: 

53 """A struct used to represent an under-construction SQL SELECT query. 

54 

55 This object's methods frequently "consume" ``self``, by either returning 

56 it after modification or returning related copy that may share state with 

57 the original. Users should be careful never to use consumed instances, and 

58 are recommended to reuse the same variable name to make that hard to do 

59 accidentally. 

60 """ 

61 

62 joins: SqlJoinsBuilder 

63 """Struct representing the SQL FROM and WHERE clauses, as well as the 

64 columns *available* to the query (but not necessarily in the SELECT 

65 clause). 

66 """ 

67 

68 columns: qt.ColumnSet 

69 """Columns to include the SELECT clause. 

70 

71 This does not include columns required only by `Postprocessing` and columns 

72 in `SqlJoinsBuilder.special`, which are also always included in the SELECT 

73 clause. 

74 """ 

75 

76 distinct: bool | tuple[sqlalchemy.ColumnElement[Any], ...] = () 

77 """A representation of a DISTINCT or DISTINCT ON clause. 

78 

79 If `True`, this represents a SELECT DISTINCT. If a non-empty sequence, 

80 this represents a SELECT DISTINCT ON. If `False` or an empty sequence, 

81 there is no DISTINCT clause. 

82 """ 

83 

84 group_by: tuple[sqlalchemy.ColumnElement[Any], ...] = () 

85 """A representation of a GROUP BY clause. 

86 

87 If not-empty, a GROUP BY clause with these columns is added. This 

88 generally requires that every `sqlalchemy.ColumnElement` held in the nested 

89 `joins` builder that is part of `columns` must either be part of `group_by` 

90 or hold an aggregate function. 

91 """ 

92 

93 EMPTY_COLUMNS_NAME: ClassVar[str] = "IGNORED" 

94 """Name of the column added to a SQL SELECT clause in order to construct 

95 queries that have no real columns. 

96 """ 

97 

98 EMPTY_COLUMNS_TYPE: ClassVar[type] = sqlalchemy.Boolean 

99 """Type of the column added to a SQL SELECT clause in order to construct 

100 queries that have no real columns. 

101 """ 

102 

103 def copy(self) -> SqlSelectBuilder: 

104 """Return a copy that can be safely mutated without affecting the 

105 original. 

106 """ 

107 return dataclasses.replace(self, joins=self.joins.copy(), columns=self.columns.copy()) 

108 

109 @classmethod 

110 def handle_empty_columns( 

111 cls, columns: list[sqlalchemy.sql.ColumnElement] 

112 ) -> list[sqlalchemy.ColumnElement]: 

113 """Handle the edge case where a SELECT statement has no columns, by 

114 adding a literal column that should be ignored. 

115 

116 Parameters 

117 ---------- 

118 columns : `list` [ `sqlalchemy.ColumnElement` ] 

119 List of SQLAlchemy column objects. This may have no elements when 

120 this method is called, and will always have at least one element 

121 when it returns. 

122 

123 Returns 

124 ------- 

125 columns : `list` [ `sqlalchemy.ColumnElement` ] 

126 The same list that was passed in, after any modification. 

127 """ 

128 if not columns: 

129 columns.append(sqlalchemy.sql.literal(True).label(cls.EMPTY_COLUMNS_NAME)) 

130 return columns 

131 

132 def select(self, postprocessing: Postprocessing | None) -> sqlalchemy.Select: 

133 """Transform this builder into a SQLAlchemy representation of a SELECT 

134 query. 

135 

136 Parameters 

137 ---------- 

138 postprocessing : `Postprocessing` 

139 Struct representing post-query processing in Python, which may 

140 require additional columns in the query results. 

141 

142 Returns 

143 ------- 

144 select : `sqlalchemy.Select` 

145 SQLAlchemy SELECT statement. 

146 """ 

147 assert not (self.distinct and self.group_by), "At most one of distinct and group_by can be set." 

148 sql_columns: list[sqlalchemy.ColumnElement[Any]] = [] 

149 for logical_table, field in self.columns: 

150 name = self.columns.get_qualified_name(logical_table, field) 

151 if field is None: 

152 assert logical_table is not qt.ANY_DATASET 

153 sql_columns.append(self.joins.dimension_keys[logical_table][0].label(name)) 

154 else: 

155 name = self.joins.db.name_shrinker.shrink(name) 

156 if self.columns.is_timespan(logical_table, field): 

157 sql_columns.extend(self.joins.timespans[logical_table].flatten(name)) 

158 else: 

159 sql_columns.append(self.joins.fields[logical_table][field].label(name)) 

160 if postprocessing is not None: 

161 for element in postprocessing.iter_missing(self.columns): 

162 sql_columns.append( 

163 self.joins.fields[element.name]["region"].label( 

164 self.joins.db.name_shrinker.shrink( 

165 self.columns.get_qualified_name(element.name, "region") 

166 ) 

167 ) 

168 ) 

169 for label, sql_column in self.joins.special.items(): 

170 sql_columns.append(sql_column.label(label)) 

171 self.handle_empty_columns(sql_columns) 

172 result = sqlalchemy.select(*sql_columns) 

173 if self.joins.from_clause is not None: 

174 result = result.select_from(self.joins.from_clause) 

175 if self.distinct is True: 

176 result = result.distinct() 

177 elif self.distinct: 

178 result = result.distinct(*self.distinct) 

179 if self.group_by: 

180 result = result.group_by(*self.group_by) 

181 if self.joins.where_terms: 

182 result = result.where(*self.joins.where_terms) 

183 return result 

184 

185 def join(self, other: SqlJoinsBuilder) -> SqlSelectBuilder: 

186 """Join tables, subqueries, and WHERE clauses from another query into 

187 this one, in place. 

188 

189 Parameters 

190 ---------- 

191 other : `SqlJoinsBuilder` 

192 Object holding the FROM and WHERE clauses to add to this one. 

193 JOIN ON clauses are generated via the dimension keys in common. 

194 

195 Returns 

196 ------- 

197 self : `SqlSelectBuilder` 

198 This `SqlSelectBuilder` instance (never a copy); returned to enable 

199 method-chaining. 

200 """ 

201 self.joins.join(other) 

202 return self 

203 

204 def into_joins_builder( 

205 self, cte: bool = False, force: bool = False, *, postprocessing: Postprocessing | None 

206 ) -> SqlJoinsBuilder: 

207 """Convert this builder into a `SqlJoinsBuilder`, nesting it in a 

208 subquery or common table expression only if needed to apply DISTINCT or 

209 GROUP BY clauses. 

210 

211 This method consumes ``self``. 

212 

213 Parameters 

214 ---------- 

215 cte : `bool`, optional 

216 If `True`, nest via a common table expression instead of a 

217 subquery. 

218 force : `bool`, optional 

219 If `True`, nest via a subquery or common table expression even if 

220 there is no DISTINCT or GROUP BY. 

221 postprocessing : `Postprocessing` 

222 Struct representing post-query processing in Python, which may 

223 require additional columns in the query results. 

224 

225 Returns 

226 ------- 

227 joins_builder : `SqlJoinsBuilder` 

228 SqlJoinsBuilder` with at least all columns in `columns` available. 

229 This may or may not be the `joins` attribute of this object. 

230 """ 

231 if force or self.distinct or self.group_by: 

232 sql_from_clause = ( 

233 self.select(postprocessing).cte() if cte else self.select(postprocessing).subquery() 

234 ) 

235 return SqlJoinsBuilder(db=self.joins.db, from_clause=sql_from_clause).extract_columns( 

236 self.columns, postprocessing, special=self.joins.special.keys() 

237 ) 

238 return self.joins 

239 

240 def nested( 

241 self, cte: bool = False, force: bool = False, *, postprocessing: Postprocessing | None 

242 ) -> SqlSelectBuilder: 

243 """Convert this builder into a `SqlSelectBuilder` that is guaranteed to 

244 have no DISTINCT or GROUP BY, nesting it in a subquery or common table 

245 expression only if needed to apply any current DISTINCT or GROUP BY 

246 clauses. 

247 

248 This method consumes ``self``. 

249 

250 Parameters 

251 ---------- 

252 cte : `bool`, optional 

253 If `True`, nest via a common table expression instead of a 

254 subquery. 

255 force : `bool`, optional 

256 If `True`, nest via a subquery or common table expression even if 

257 there is no DISTINCT or GROUP BY. 

258 postprocessing : `Postprocessing` 

259 Struct representing post-query processing in Python, which may 

260 require additional columns in the query results. 

261 

262 Returns 

263 ------- 

264 builder : `SqlSelectBuilder` 

265 `SqlSelectBuilder` with at least all columns in `columns` 

266 available. This may or may not be the ``builder`` attribute of 

267 this object. 

268 """ 

269 return SqlSelectBuilder( 

270 self.into_joins_builder(cte=cte, force=force, postprocessing=postprocessing), columns=self.columns 

271 ) 

272 

273 def union_subquery( 

274 self, others: Iterable[SqlSelectBuilder], postprocessing: Postprocessing | None = None 

275 ) -> SqlJoinsBuilder: 

276 """Combine this builder with others to make a SELECT UNION subquery. 

277 

278 Parameters 

279 ---------- 

280 others : `~collections.abc.Iterable` [ `SqlSelectBuilder` ] 

281 Other query builders to union with. Their `columns` attributes 

282 must be the same as those of ``self``. 

283 postprocessing : `Postprocessing` 

284 Struct representing post-query processing in Python, which may 

285 require additional columns in the query results. 

286 

287 Returns 

288 ------- 

289 joins_builder : `SqlJoinsBuilder` 

290 `SqlJoinsBuilder` with at least all columns in `columns` available. 

291 This may or may not be the `joins` attribute of this object. 

292 """ 

293 select0 = self.select(postprocessing) 

294 other_selects = [other.select(postprocessing) for other in others] 

295 return SqlJoinsBuilder( 

296 db=self.joins.db, 

297 from_clause=select0.union(*other_selects).subquery(), 

298 ).extract_columns(self.columns, postprocessing) 

299 

300 

301@dataclasses.dataclass(kw_only=True) 

302class SqlColumns: 

303 """A struct that holds SQLAlchemy columns objects for a query, categorized 

304 by type. 

305 

306 This class mostly serves as a base class for `SqlJoinsBuilder`, but unlike 

307 `SqlJoinsBuilder` it is capable of representing columns in a compound 

308 SELECT (i.e. UNION or UNION ALL) clause, not just a FROM clause. 

309 """ 

310 

311 db: Database 

312 """Object that abstracts over the database engine.""" 

313 

314 dimension_keys: NonemptyMapping[str, list[sqlalchemy.ColumnElement]] = dataclasses.field( 

315 default_factory=lambda: NonemptyMapping(list) 

316 ) 

317 """Mapping of dimension keys included in the FROM clause. 

318 

319 Nested lists correspond to different tables that have the same dimension 

320 key (which should all have equal values for all result rows). 

321 """ 

322 

323 fields: NonemptyMapping[str | qt.AnyDatasetType, dict[str, sqlalchemy.ColumnElement[Any]]] = ( 

324 dataclasses.field(default_factory=lambda: NonemptyMapping(dict)) 

325 ) 

326 """Mapping of columns that are neither dimension keys nor timespans. 

327 

328 Inner and outer keys correspond to the "logical table" and "field" pairs 

329 that result from iterating over `~.queries.tree.ColumnSet`, with the former 

330 either a dimension element name or dataset type name. 

331 """ 

332 

333 timespans: dict[str | qt.AnyDatasetType, TimespanDatabaseRepresentation] = dataclasses.field( 

334 default_factory=dict 

335 ) 

336 """Mapping of timespan columns. 

337 

338 Keys are "logical tables" - dimension element names or dataset type names. 

339 """ 

340 

341 special: dict[str, sqlalchemy.ColumnElement[Any]] = dataclasses.field(default_factory=dict) 

342 """Special columns that are available from the FROM clause and 

343 automatically included in the SELECT clause when this join builder is 

344 nested within a `SqlSelectBuilder`. 

345 

346 These columns are not part of the dimension universe and are not associated 

347 with a dataset. They are never returned to users, even if they may be 

348 included in raw SQL results. 

349 """ 

350 

351 def extract_dimensions( 

352 self, dimensions: Iterable[str], *, column_collection: sqlalchemy.ColumnCollection, **kwargs: str 

353 ) -> Self: 

354 """Add dimension key columns from `from_clause` into `dimension_keys`. 

355 

356 Parameters 

357 ---------- 

358 dimensions : `~collections.abc.Iterable` [ `str` ] 

359 Names of dimensions to include, assuming that their names in 

360 ``sql_columns`` are just the dimension names. 

361 column_collection : `sqlalchemy.ColumnCollection` 

362 SQLAlchemy column collection to extract from. 

363 

364 **kwargs : `str` 

365 Additional dimensions to include, with the names in ``sql_columns`` 

366 as keys and the actual dimension names as values. 

367 

368 Returns 

369 ------- 

370 self : `QueryColumns` 

371 This `QueryColumns` instance (never a copy). Provided to enable 

372 method chaining. 

373 """ 

374 for dimension_name in dimensions: 

375 self.dimension_keys[dimension_name].append(column_collection[dimension_name]) 

376 for k, v in kwargs.items(): 

377 self.dimension_keys[v].append(column_collection[k]) 

378 return self 

379 

380 def extract_columns( 

381 self, 

382 columns: qt.ColumnSet, 

383 postprocessing: Postprocessing | None = None, 

384 special: Iterable[str] = (), 

385 *, 

386 column_collection: sqlalchemy.ColumnCollection, 

387 ) -> Self: 

388 """Add columns from `from_clause` into `dimension_keys`. 

389 

390 Parameters 

391 ---------- 

392 columns : `.queries.tree.ColumnSet` 

393 Columns to include, assuming that 

394 `.queries.tree.ColumnSet.get_qualified_name` corresponds to the 

395 name used in ``sql_columns`` (after name shrinking). 

396 postprocessing : `Postprocessing`, optional 

397 Postprocessing object whose needed columns should also be included. 

398 special : `~collections.abc.Iterable` [ `str` ], optional 

399 Additional special columns to extract. 

400 column_collection : `sqlalchemy.ColumnCollection` 

401 SQLAlchemy column collection to extract from. 

402 

403 Returns 

404 ------- 

405 self : `QueryColumns` 

406 This `QueryColumns` instance (never a copy). Provided to enable 

407 method chaining. 

408 """ 

409 for logical_table, field in columns: 

410 name = columns.get_qualified_name(logical_table, field) 

411 if field is None: 

412 assert logical_table is not qt.ANY_DATASET 

413 self.dimension_keys[logical_table].append(column_collection[name]) 

414 else: 

415 name = self.db.name_shrinker.shrink(name) 

416 if columns.is_timespan(logical_table, field): 

417 self.timespans[logical_table] = self.db.getTimespanRepresentation().from_columns( 

418 column_collection, name 

419 ) 

420 else: 

421 self.fields[logical_table][field] = column_collection[name] 

422 if postprocessing is not None: 

423 for element in postprocessing.iter_missing(columns): 

424 self.fields[element.name]["region"] = column_collection[ 

425 self.db.name_shrinker.shrink(columns.get_qualified_name(element.name, "region")) 

426 ] 

427 for name in postprocessing.spatial_expression_filtering: 

428 self.special[name] = column_collection[name] 

429 if postprocessing.check_validity_match_count: 

430 self.special[postprocessing.VALIDITY_MATCH_COUNT] = column_collection[ 

431 postprocessing.VALIDITY_MATCH_COUNT 

432 ] 

433 for name in special: 

434 self.special[name] = column_collection[name] 

435 return self 

436 

437 

438@dataclasses.dataclass(kw_only=True) 

439class SqlJoinsBuilder(SqlColumns): 

440 """A struct used to represent the FROM and WHERE clauses of an 

441 under-construction SQL SELECT query. 

442 

443 This object's methods frequently "consume" ``self``, by either returning 

444 it after modification or returning related copy that may share state with 

445 the original. Users should be careful never to use consumed instances, and 

446 are recommended to reuse the same variable name to make that hard to do 

447 accidentally. 

448 """ 

449 

450 from_clause: sqlalchemy.FromClause | None = None 

451 """SQLAlchemy representation of the FROM clause. 

452 

453 This is initialized to `None` but in almost all cases is immediately 

454 replaced. 

455 """ 

456 

457 where_terms: list[sqlalchemy.ColumnElement[bool]] = dataclasses.field(default_factory=list) 

458 """Sequence of WHERE clause terms to be combined with AND.""" 

459 

460 def copy(self) -> SqlJoinsBuilder: 

461 """Return a copy that can be safely mutated without affecting the 

462 original. 

463 """ 

464 return dataclasses.replace( 

465 self, 

466 where_terms=self.where_terms.copy(), 

467 dimension_keys=self.dimension_keys.copy(), 

468 fields=self.fields.copy(), 

469 timespans=self.timespans.copy(), 

470 special=self.special.copy(), 

471 ) 

472 

473 def extract_dimensions( 

474 self, 

475 dimensions: Iterable[str], 

476 *, 

477 column_collection: sqlalchemy.ColumnCollection | None = None, 

478 **kwargs: str, 

479 ) -> Self: 

480 """Add dimension key columns from `from_clause` into `dimension_keys`. 

481 

482 Parameters 

483 ---------- 

484 dimensions : `~collections.abc.Iterable` [ `str` ] 

485 Names of dimensions to include, assuming that their names in 

486 ``sql_columns`` are just the dimension names. 

487 column_collection : `sqlalchemy.ColumnCollection`, optional 

488 SQLAlchemy column collection to extract from. Defaults to 

489 ``self.from_clause.columns``. 

490 **kwargs : `str` 

491 Additional dimensions to include, with the names in ``sql_columns`` 

492 as keys and the actual dimension names as values. 

493 

494 Returns 

495 ------- 

496 self : `SqlJoinsBuilder` 

497 This `SqlJoinsBuilder` instance (never a copy). Provided to enable 

498 method chaining. 

499 """ 

500 if column_collection is None: 

501 assert self.from_clause is not None, "Cannot extract columns with no FROM clause." 

502 column_collection = self.from_clause.columns 

503 return super().extract_dimensions(dimensions, column_collection=column_collection, **kwargs) 

504 

505 def extract_columns( 

506 self, 

507 columns: qt.ColumnSet, 

508 postprocessing: Postprocessing | None = None, 

509 special: Iterable[str] = (), 

510 *, 

511 column_collection: sqlalchemy.ColumnCollection | None = None, 

512 ) -> Self: 

513 """Add columns from `from_clause` into `dimension_keys`. 

514 

515 Parameters 

516 ---------- 

517 columns : `.queries.tree.ColumnSet` 

518 Columns to include, assuming that 

519 `.queries.tree.ColumnSet.get_qualified_name` corresponds to the 

520 name used in ``sql_columns`` (after name shrinking). 

521 postprocessing : `Postprocessing`, optional 

522 Postprocessing object whose needed columns should also be included. 

523 special : `~collections.abc.Iterable` [ `str` ], optional 

524 Additional special columns to extract. 

525 column_collection : `sqlalchemy.ColumnCollection`, optional 

526 SQLAlchemy column collection to extract from. Defaults to 

527 ``self.from_clause.columns``. 

528 

529 Returns 

530 ------- 

531 self : `SqlJoinsBuilder` 

532 This `SqlJoinsBuilder` instance (never a copy). Provided to enable 

533 method chaining. 

534 """ 

535 if column_collection is None: 

536 assert self.from_clause is not None, "Cannot extract columns with no FROM clause." 

537 column_collection = self.from_clause.columns 

538 return super().extract_columns(columns, postprocessing, special, column_collection=column_collection) 

539 

540 def join(self, other: SqlJoinsBuilder) -> SqlJoinsBuilder: 

541 """Combine this `SqlJoinsBuilder` with another via an INNER JOIN on 

542 dimension keys. 

543 

544 This method consumes ``self``. 

545 

546 Parameters 

547 ---------- 

548 other : `SqlJoinsBuilder` 

549 Other join builder to combine with this one. 

550 

551 Returns 

552 ------- 

553 joined : `SqlJoinsBuilder` 

554 A `SqlJoinsBuilder` with all columns present in either operand, 

555 with its `from_clause` representing a SQL INNER JOIN where the 

556 dimension key columns common to both operands are constrained to be 

557 equal. If either operand does not have `from_clause`, the other's 

558 is used. The `where_terms` of the two operands are concatenated, 

559 representing a logical AND (with no attempt at deduplication). 

560 """ 

561 join_on: list[sqlalchemy.ColumnElement] = [] 

562 for dimension_name in other.dimension_keys.keys(): 

563 if dimension_name in self.dimension_keys: 

564 for column1, column2 in itertools.product( 

565 self.dimension_keys[dimension_name], other.dimension_keys[dimension_name] 

566 ): 

567 join_on.append(column1 == column2) 

568 self.dimension_keys[dimension_name].extend(other.dimension_keys[dimension_name]) 

569 if self.from_clause is None: 

570 self.from_clause = other.from_clause 

571 elif other.from_clause is not None: 

572 join_on_sql: sqlalchemy.ColumnElement[bool] 

573 match len(join_on): 

574 case 0: 

575 join_on_sql = sqlalchemy.true() 

576 case 1: 

577 (join_on_sql,) = join_on 

578 case _: 

579 join_on_sql = sqlalchemy.and_(*join_on) 

580 self.from_clause = self.from_clause.join(other.from_clause, onclause=join_on_sql) 

581 for logical_table, fields in other.fields.items(): 

582 self.fields[logical_table].update(fields) 

583 self.timespans.update(other.timespans) 

584 self.special.update(other.special) 

585 self.where_terms += other.where_terms 

586 return self 

587 

588 def where(self, *args: sqlalchemy.ColumnElement[bool]) -> SqlJoinsBuilder: 

589 """Add a WHERE clause term. 

590 

591 Parameters 

592 ---------- 

593 *args : `sqlalchemy.ColumnElement` 

594 SQL boolean column expressions to be combined with AND. 

595 

596 Returns 

597 ------- 

598 self : `SqlJoinsBuilder` 

599 This `SqlJoinsBuilder` instance (never a copy). Provided to enable 

600 method chaining. 

601 """ 

602 self.where_terms.extend(args) 

603 return self 

604 

605 def to_select_builder( 

606 self, 

607 columns: qt.ColumnSet, 

608 distinct: bool | Sequence[sqlalchemy.ColumnElement[Any]] = (), 

609 group_by: Sequence[sqlalchemy.ColumnElement[Any]] = (), 

610 ) -> SqlSelectBuilder: 

611 """Convert this join builder into a `SqlSelectBuilder` by providing 

612 SELECT clause columns and optional DISTINCT or GROUP BY clauses. 

613 

614 This method consumes ``self``. 

615 

616 Parameters 

617 ---------- 

618 columns : `~.queries.tree.ColumnSet` 

619 Regular columns to include in the SELECT clause. 

620 distinct : `bool` or `~collections.abc.Sequence` [ \ 

621 `sqlalchemy.ColumnElement` ], optional 

622 Specification of the DISTINCT clause (see 

623 `SqlSelectBuilder.distinct`). 

624 group_by : `~collections.abc.Sequence` [ \ 

625 `sqlalchemy.ColumnElement` ], optional 

626 Specification of the GROUP BY clause (see 

627 `SqlSelectBuilder.group_by`). 

628 

629 Returns 

630 ------- 

631 builder : `SqlSelectBuilder` 

632 New query builder. 

633 """ 

634 return SqlSelectBuilder( 

635 self, 

636 columns, 

637 distinct=distinct if type(distinct) is bool else tuple(distinct), 

638 group_by=tuple(group_by), 

639 ) 

640 

641 

642def make_table_spec( 

643 columns: qt.ColumnSet, db: Database, postprocessing: Postprocessing | None, *, make_indices: bool = False 

644) -> ddl.TableSpec: 

645 """Make a specification that can be used to create a table to store 

646 this query's outputs. 

647 

648 Parameters 

649 ---------- 

650 columns : `lsst.daf.butler.queries.tree.ColumnSet` 

651 Columns to include in the table. 

652 db : `Database` 

653 Database engine and connection abstraction. 

654 postprocessing : `Postprocessing` 

655 Struct representing post-query processing in Python, which may 

656 require additional columns in the query results. 

657 make_indices : `bool`, optional 

658 If `True` add indices for groups of columns. 

659 

660 Returns 

661 ------- 

662 spec : `.ddl.TableSpec` 

663 Table specification for this query's result columns (including 

664 those from `postprocessing` and `SqlJoinsBuilder.special`). 

665 """ 

666 indices = _make_table_indices(columns.dimensions) if make_indices else [] 

667 results = ddl.TableSpec( 

668 [ 

669 columns.get_column_spec(logical_table, field).to_sql_spec(name_shrinker=db.name_shrinker) 

670 for logical_table, field in columns 

671 ], 

672 indexes=indices, 

673 ) 

674 if postprocessing: 

675 for element in postprocessing.iter_missing(columns): 

676 results.fields.add( 

677 ddl.FieldSpec.for_region( 

678 db.name_shrinker.shrink(columns.get_qualified_name(element.name, "region")) 

679 ) 

680 ) 

681 for name in postprocessing.spatial_expression_filtering: 

682 results.fields.add(ddl.FieldSpec(name, dtype=sqlalchemy.types.LargeBinary, nullable=True)) 

683 if not results.fields: 

684 results.fields.add( 

685 ddl.FieldSpec(name=SqlSelectBuilder.EMPTY_COLUMNS_NAME, dtype=SqlSelectBuilder.EMPTY_COLUMNS_TYPE) 

686 ) 

687 return results 

688 

689 

690def _make_table_indices(dimensions: DimensionGroup) -> list[ddl.IndexSpec]: 

691 index_columns: list[SortedSequenceSet] = [] 

692 for dimension in dimensions.required: 

693 minimal_group = dimensions.universe[dimension].minimal_group.required 

694 

695 for idx in range(len(index_columns)): 

696 if index_columns[idx] <= minimal_group: 

697 index_columns[idx] = minimal_group 

698 break 

699 else: 

700 index_columns.append(minimal_group) 

701 

702 return [ddl.IndexSpec(*columns) for columns in index_columns]