Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining Apdb class and related methods. 

23""" 

24 

25__all__ = ["ApdbConfig", "Apdb", "Visit"] 

26 

27from collections import namedtuple 

28from contextlib import contextmanager 

29from datetime import datetime 

30import logging 

31import numpy as np 

32import os 

33import pandas 

34 

35import lsst.geom as geom 

36import lsst.afw.table as afwTable 

37import lsst.pex.config as pexConfig 

38from lsst.pex.config import Field, ChoiceField, ListField 

39import sqlalchemy 

40from sqlalchemy import (func, sql) 

41from sqlalchemy.pool import NullPool 

42from . import timer, apdbSchema 

43 

44 

45_LOG = logging.getLogger(__name__.partition(".")[2]) # strip leading "lsst." 

46 

47 

48class Timer(object): 

49 """Timer class defining context manager which tracks execution timing. 

50 

51 Typical use: 

52 

53 with Timer("timer_name"): 

54 do_something 

55 

56 On exit from block it will print elapsed time. 

57 

58 See also :py:mod:`timer` module. 

59 """ 

60 def __init__(self, name, do_logging=True, log_before_cursor_execute=False): 

61 self._log_before_cursor_execute = log_before_cursor_execute 

62 self._do_logging = do_logging 

63 self._timer1 = timer.Timer(name) 

64 self._timer2 = timer.Timer(name + " (before/after cursor)") 

65 

66 def __enter__(self): 

67 """ 

68 Enter context, start timer 

69 """ 

70# event.listen(engine.Engine, "before_cursor_execute", self._start_timer) 

71# event.listen(engine.Engine, "after_cursor_execute", self._stop_timer) 

72 self._timer1.start() 

73 return self 

74 

75 def __exit__(self, exc_type, exc_val, exc_tb): 

76 """ 

77 Exit context, stop and dump timer 

78 """ 

79 if exc_type is None: 

80 self._timer1.stop() 

81 if self._do_logging: 

82 self._timer1.dump() 

83# event.remove(engine.Engine, "before_cursor_execute", self._start_timer) 

84# event.remove(engine.Engine, "after_cursor_execute", self._stop_timer) 

85 return False 

86 

87 def _start_timer(self, conn, cursor, statement, parameters, context, executemany): 

88 """Start counting""" 

89 if self._log_before_cursor_execute: 

90 _LOG.info("before_cursor_execute") 

91 self._timer2.start() 

92 

93 def _stop_timer(self, conn, cursor, statement, parameters, context, executemany): 

94 """Stop counting""" 

95 self._timer2.stop() 

96 if self._do_logging: 

97 self._timer2.dump() 

98 

99 

100def _split(seq, nItems): 

101 """Split a sequence into smaller sequences""" 

102 seq = list(seq) 

103 while seq: 

104 yield seq[:nItems] 

105 del seq[:nItems] 

106 

107 

108# Information about single visit 

109Visit = namedtuple('Visit', 'visitId visitTime lastObjectId lastSourceId') 

110 

111 

112@contextmanager 

113def _ansi_session(engine): 

114 """Returns a connection, makes sure that ANSI mode is set for MySQL 

115 """ 

116 with engine.begin() as conn: 

117 if engine.name == 'mysql': 

118 conn.execute(sql.text("SET SESSION SQL_MODE = 'ANSI'")) 

119 yield conn 

120 return 

121 

122 

123def _data_file_name(basename): 

124 """Return path name of a data file. 

125 """ 

126 return os.path.join("${DAX_APDB_DIR}", "data", basename) 

127 

128 

129class ApdbConfig(pexConfig.Config): 

130 

131 db_url = Field(dtype=str, doc="SQLAlchemy database connection URI") 

132 isolation_level = ChoiceField(dtype=str, 

133 doc="Transaction isolation level", 

134 allowed={"READ_COMMITTED": "Read committed", 

135 "READ_UNCOMMITTED": "Read uncommitted", 

136 "REPEATABLE_READ": "Repeatable read", 

137 "SERIALIZABLE": "Serializable"}, 

138 default="READ_COMMITTED", 

139 optional=True) 

140 connection_pool = Field(dtype=bool, 

141 doc=("If False then disable SQLAlchemy connection pool. " 

142 "Do not use connection pool when forking."), 

143 default=True) 

144 connection_timeout = Field(dtype=float, 

145 doc="Maximum time to wait time for database lock to be released before " 

146 "exiting. Defaults to sqlachemy defaults if not set.", 

147 default=None, 

148 optional=True) 

149 sql_echo = Field(dtype=bool, 

150 doc="If True then pass SQLAlchemy echo option.", 

151 default=False) 

152 dia_object_index = ChoiceField(dtype=str, 

153 doc="Indexing mode for DiaObject table", 

154 allowed={'baseline': "Index defined in baseline schema", 

155 'pix_id_iov': "(pixelId, objectId, iovStart) PK", 

156 'last_object_table': "Separate DiaObjectLast table"}, 

157 default='baseline') 

158 dia_object_nightly = Field(dtype=bool, 

159 doc="Use separate nightly table for DiaObject", 

160 default=False) 

161 read_sources_months = Field(dtype=int, 

162 doc="Number of months of history to read from DiaSource", 

163 default=12) 

164 read_forced_sources_months = Field(dtype=int, 

165 doc="Number of months of history to read from DiaForcedSource", 

166 default=12) 

167 dia_object_columns = ListField(dtype=str, 

168 doc="List of columns to read from DiaObject, by default read all columns", 

169 default=[]) 

170 object_last_replace = Field(dtype=bool, 

171 doc="If True (default) then use \"upsert\" for DiaObjectsLast table", 

172 default=True) 

173 schema_file = Field(dtype=str, 

174 doc="Location of (YAML) configuration file with standard schema", 

175 default=_data_file_name("apdb-schema.yaml")) 

176 extra_schema_file = Field(dtype=str, 

177 doc="Location of (YAML) configuration file with extra schema", 

178 default=_data_file_name("apdb-schema-extra.yaml")) 

179 column_map = Field(dtype=str, 

180 doc="Location of (YAML) configuration file with column mapping", 

181 default=_data_file_name("apdb-afw-map.yaml")) 

182 prefix = Field(dtype=str, 

183 doc="Prefix to add to table names and index names", 

184 default="") 

185 explain = Field(dtype=bool, 

186 doc="If True then run EXPLAIN SQL command on each executed query", 

187 default=False) 

188 timer = Field(dtype=bool, 

189 doc="If True then print/log timing information", 

190 default=False) 

191 diaobject_index_hint = Field(dtype=str, 

192 doc="Name of the index to use with Oracle index hint", 

193 default=None, 

194 optional=True) 

195 dynamic_sampling_hint = Field(dtype=int, 

196 doc="If non-zero then use dynamic_sampling hint", 

197 default=0) 

198 cardinality_hint = Field(dtype=int, 

199 doc="If non-zero then use cardinality hint", 

200 default=0) 

201 

202 def validate(self): 

203 super().validate() 

204 if self.isolation_level == "READ_COMMITTED" and self.db_url.startswith("sqlite"): 

205 raise ValueError("Attempting to run Apdb with SQLITE and isolation level 'READ_COMMITTED.' " 

206 "Use 'READ_UNCOMMITTED' instead.") 

207 

208 

209class Apdb(object): 

210 """Interface to L1 database, hides all database access details. 

211 

212 The implementation is configured via standard ``pex_config`` mechanism 

213 using `ApdbConfig` configuration class. For an example of different 

214 configurations check config/ folder. 

215 

216 Parameters 

217 ---------- 

218 config : `ApdbConfig` 

219 afw_schemas : `dict`, optional 

220 Dictionary with table name for a key and `afw.table.Schema` 

221 for a value. Columns in schema will be added to standard 

222 APDB schema. 

223 """ 

224 

225 def __init__(self, config, afw_schemas=None): 

226 

227 self.config = config 

228 

229 # logging.getLogger('sqlalchemy').setLevel(logging.INFO) 

230 _LOG.debug("APDB Configuration:") 

231 _LOG.debug(" dia_object_index: %s", self.config.dia_object_index) 

232 _LOG.debug(" dia_object_nightly: %s", self.config.dia_object_nightly) 

233 _LOG.debug(" read_sources_months: %s", self.config.read_sources_months) 

234 _LOG.debug(" read_forced_sources_months: %s", self.config.read_forced_sources_months) 

235 _LOG.debug(" dia_object_columns: %s", self.config.dia_object_columns) 

236 _LOG.debug(" object_last_replace: %s", self.config.object_last_replace) 

237 _LOG.debug(" schema_file: %s", self.config.schema_file) 

238 _LOG.debug(" extra_schema_file: %s", self.config.extra_schema_file) 

239 _LOG.debug(" column_map: %s", self.config.column_map) 

240 _LOG.debug(" schema prefix: %s", self.config.prefix) 

241 

242 # engine is reused between multiple processes, make sure that we don't 

243 # share connections by disabling pool (by using NullPool class) 

244 kw = dict(echo=self.config.sql_echo) 

245 conn_args = dict() 

246 if not self.config.connection_pool: 

247 kw.update(poolclass=NullPool) 

248 if self.config.isolation_level is not None: 

249 kw.update(isolation_level=self.config.isolation_level) 

250 if self.config.connection_timeout is not None: 

251 if self.config.db_url.startswith("sqlite"): 

252 conn_args.update(timeout=self.config.connection_timeout) 

253 elif self.config.db_url.startswith(("postgresql", "mysql")): 

254 conn_args.update(connect_timeout=self.config.connection_timeout) 

255 kw.update(connect_args=conn_args) 

256 self._engine = sqlalchemy.create_engine(self.config.db_url, **kw) 

257 

258 self._schema = apdbSchema.ApdbSchema(engine=self._engine, 

259 dia_object_index=self.config.dia_object_index, 

260 dia_object_nightly=self.config.dia_object_nightly, 

261 schema_file=self.config.schema_file, 

262 extra_schema_file=self.config.extra_schema_file, 

263 column_map=self.config.column_map, 

264 afw_schemas=afw_schemas, 

265 prefix=self.config.prefix) 

266 

267 def lastVisit(self): 

268 """Returns last visit information or `None` if visits table is empty. 

269 

270 Visits table is used by ap_proto to track visit information, it is 

271 not a part of the regular APDB schema. 

272 

273 Returns 

274 ------- 

275 visit : `Visit` or `None` 

276 Last stored visit info or `None` if there was nothing stored yet. 

277 """ 

278 

279 with self._engine.begin() as conn: 

280 

281 stmnt = sql.select([sql.func.max(self._schema.visits.c.visitId), 

282 sql.func.max(self._schema.visits.c.visitTime)]) 

283 res = conn.execute(stmnt) 

284 row = res.fetchone() 

285 if row[0] is None: 

286 return None 

287 

288 visitId = row[0] 

289 visitTime = row[1] 

290 _LOG.info("lastVisit: visitId: %s visitTime: %s (%s)", visitId, 

291 visitTime, type(visitTime)) 

292 

293 # get max IDs from corresponding tables 

294 stmnt = sql.select([sql.func.max(self._schema.objects.c.diaObjectId)]) 

295 lastObjectId = conn.scalar(stmnt) 

296 stmnt = sql.select([sql.func.max(self._schema.sources.c.diaSourceId)]) 

297 lastSourceId = conn.scalar(stmnt) 

298 

299 return Visit(visitId=visitId, visitTime=visitTime, 

300 lastObjectId=lastObjectId, lastSourceId=lastSourceId) 

301 

302 def saveVisit(self, visitId, visitTime): 

303 """Store visit information. 

304 

305 This method is only used by ``ap_proto`` script from ``l1dbproto`` 

306 and is not intended for production pipelines. 

307 

308 Parameters 

309 ---------- 

310 visitId : `int` 

311 Visit identifier 

312 visitTime : `datetime.datetime` 

313 Visit timestamp. 

314 """ 

315 

316 ins = self._schema.visits.insert().values(visitId=visitId, 

317 visitTime=visitTime) 

318 self._engine.execute(ins) 

319 

320 def tableRowCount(self): 

321 """Returns dictionary with the table names and row counts. 

322 

323 Used by ``ap_proto`` to keep track of the size of the database tables. 

324 Depending on database technology this could be expensive operation. 

325 

326 Returns 

327 ------- 

328 row_counts : `dict` 

329 Dict where key is a table name and value is a row count. 

330 """ 

331 res = {} 

332 tables = [self._schema.objects, self._schema.sources, self._schema.forcedSources] 

333 if self.config.dia_object_index == 'last_object_table': 

334 tables.append(self._schema.objects_last) 

335 for table in tables: 

336 stmt = sql.select([func.count()]).select_from(table) 

337 count = self._engine.scalar(stmt) 

338 res[table.name] = count 

339 

340 return res 

341 

342 def getDiaObjects(self, pixel_ranges, return_pandas=False): 

343 """Returns catalog of DiaObject instances from given region. 

344 

345 Objects are searched based on pixelization index and region is 

346 determined by the set of indices. There is no assumption on a 

347 particular type of index, client is responsible for consistency 

348 when calculating pixelization indices. 

349 

350 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by 

351 the schema of APDB table. Re-mapping of the column names is done for 

352 some columns (based on column map passed to constructor) but types 

353 or units are not changed. 

354 

355 Returns only the last version of each DiaObject. 

356 

357 Parameters 

358 ---------- 

359 pixel_ranges : `list` of `tuple` 

360 Sequence of ranges, range is a tuple (minPixelID, maxPixelID). 

361 This defines set of pixel indices to be included in result. 

362 return_pandas : `bool` 

363 Return a `pandas.DataFrame` instead of 

364 `lsst.afw.table.SourceCatalog`. 

365 

366 Returns 

367 ------- 

368 catalog : `lsst.afw.table.SourceCatalog` or `pandas.DataFrame` 

369 Catalog containing DiaObject records. 

370 """ 

371 

372 # decide what columns we need 

373 if self.config.dia_object_index == 'last_object_table': 

374 table = self._schema.objects_last 

375 else: 

376 table = self._schema.objects 

377 if not self.config.dia_object_columns: 

378 query = table.select() 

379 else: 

380 columns = [table.c[col] for col in self.config.dia_object_columns] 

381 query = sql.select(columns) 

382 

383 if self.config.diaobject_index_hint: 

384 val = self.config.diaobject_index_hint 

385 query = query.with_hint(table, 'index_rs_asc(%(name)s "{}")'.format(val)) 

386 if self.config.dynamic_sampling_hint > 0: 

387 val = self.config.dynamic_sampling_hint 

388 query = query.with_hint(table, 'dynamic_sampling(%(name)s {})'.format(val)) 

389 if self.config.cardinality_hint > 0: 

390 val = self.config.cardinality_hint 

391 query = query.with_hint(table, 'FIRST_ROWS_1 cardinality(%(name)s {})'.format(val)) 

392 

393 # build selection 

394 exprlist = [] 

395 for low, upper in pixel_ranges: 

396 upper -= 1 

397 if low == upper: 

398 exprlist.append(table.c.pixelId == low) 

399 else: 

400 exprlist.append(sql.expression.between(table.c.pixelId, low, upper)) 

401 query = query.where(sql.expression.or_(*exprlist)) 

402 

403 # select latest version of objects 

404 if self.config.dia_object_index != 'last_object_table': 

405 query = query.where(table.c.validityEnd == None) # noqa: E711 

406 

407 _LOG.debug("query: %s", query) 

408 

409 if self.config.explain: 

410 # run the same query with explain 

411 self._explain(query, self._engine) 

412 

413 # execute select 

414 with Timer('DiaObject select', self.config.timer): 

415 with self._engine.begin() as conn: 

416 if return_pandas: 

417 objects = pandas.read_sql_query(query, conn) 

418 else: 

419 res = conn.execute(query) 

420 objects = self._convertResult(res, "DiaObject") 

421 _LOG.debug("found %s DiaObjects", len(objects)) 

422 return objects 

423 

424 def getDiaSourcesInRegion(self, pixel_ranges, dt, return_pandas=False): 

425 """Returns catalog of DiaSource instances from given region. 

426 

427 Sources are searched based on pixelization index and region is 

428 determined by the set of indices. There is no assumption on a 

429 particular type of index, client is responsible for consistency 

430 when calculating pixelization indices. 

431 

432 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by 

433 the schema of APDB table. Re-mapping of the column names is done for 

434 some columns (based on column map passed to constructor) but types or 

435 units are not changed. 

436 

437 Parameters 

438 ---------- 

439 pixel_ranges : `list` of `tuple` 

440 Sequence of ranges, range is a tuple (minPixelID, maxPixelID). 

441 This defines set of pixel indices to be included in result. 

442 dt : `datetime.datetime` 

443 Time of the current visit 

444 return_pandas : `bool` 

445 Return a `pandas.DataFrame` instead of 

446 `lsst.afw.table.SourceCatalog`. 

447 

448 Returns 

449 ------- 

450 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None` 

451 Catalog containing DiaSource records. `None` is returned if 

452 ``read_sources_months`` configuration parameter is set to 0. 

453 """ 

454 

455 if self.config.read_sources_months == 0: 

456 _LOG.info("Skip DiaSources fetching") 

457 return None 

458 

459 table = self._schema.sources 

460 query = table.select() 

461 

462 # build selection 

463 exprlist = [] 

464 for low, upper in pixel_ranges: 

465 upper -= 1 

466 if low == upper: 

467 exprlist.append(table.c.pixelId == low) 

468 else: 

469 exprlist.append(sql.expression.between(table.c.pixelId, low, upper)) 

470 query = query.where(sql.expression.or_(*exprlist)) 

471 

472 # execute select 

473 with Timer('DiaSource select', self.config.timer): 

474 with _ansi_session(self._engine) as conn: 

475 if return_pandas: 

476 sources = pandas.read_sql_query(query, conn) 

477 else: 

478 res = conn.execute(query) 

479 sources = self._convertResult(res, "DiaSource") 

480 _LOG.debug("found %s DiaSources", len(sources)) 

481 return sources 

482 

483 def getDiaSources(self, object_ids, dt, return_pandas=False): 

484 """Returns catalog of DiaSource instances given set of DiaObject IDs. 

485 

486 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by 

487 the schema of APDB table. Re-mapping of the column names is done for 

488 some columns (based on column map passed to constructor) but types or 

489 units are not changed. 

490 

491 Parameters 

492 ---------- 

493 object_ids : 

494 Collection of DiaObject IDs 

495 dt : `datetime.datetime` 

496 Time of the current visit 

497 return_pandas : `bool` 

498 Return a `pandas.DataFrame` instead of 

499 `lsst.afw.table.SourceCatalog`. 

500 

501 

502 Returns 

503 ------- 

504 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None` 

505 Catalog contaning DiaSource records. `None` is returned if 

506 ``read_sources_months`` configuration parameter is set to 0 or 

507 when ``object_ids`` is empty. 

508 """ 

509 

510 if self.config.read_sources_months == 0: 

511 _LOG.info("Skip DiaSources fetching") 

512 return None 

513 

514 if len(object_ids) <= 0: 

515 _LOG.info("Skip DiaSources fetching - no Objects") 

516 # this should create a catalog, but the list of columns may be empty 

517 return None 

518 

519 table = self._schema.sources 

520 sources = None 

521 with Timer('DiaSource select', self.config.timer): 

522 with _ansi_session(self._engine) as conn: 

523 for ids in _split(sorted(object_ids), 1000): 

524 query = 'SELECT * FROM "' + table.name + '" WHERE ' 

525 

526 # select by object id 

527 ids = ",".join(str(id) for id in ids) 

528 query += '"diaObjectId" IN (' + ids + ') ' 

529 

530 # execute select 

531 if return_pandas: 

532 df = pandas.read_sql_query(sql.text(query), conn) 

533 if sources is None: 

534 sources = df 

535 else: 

536 sources = sources.append(df) 

537 else: 

538 res = conn.execute(sql.text(query)) 

539 sources = self._convertResult(res, "DiaSource", sources) 

540 

541 _LOG.debug("found %s DiaSources", len(sources)) 

542 return sources 

543 

544 def getDiaForcedSources(self, object_ids, dt, return_pandas=False): 

545 """Returns catalog of DiaForcedSource instances matching given 

546 DiaObjects. 

547 

548 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by 

549 the schema of L1 database table. Re-mapping of the column names may 

550 be done for some columns (based on column map passed to constructor) 

551 but types or units are not changed. 

552 

553 Parameters 

554 ---------- 

555 object_ids : 

556 Collection of DiaObject IDs 

557 dt : `datetime.datetime` 

558 Time of the current visit 

559 return_pandas : `bool` 

560 Return a `pandas.DataFrame` instead of 

561 `lsst.afw.table.SourceCatalog`. 

562 

563 Returns 

564 ------- 

565 catalog : `lsst.afw.table.SourceCatalog` or `None` 

566 Catalog contaning DiaForcedSource records. `None` is returned if 

567 ``read_sources_months`` configuration parameter is set to 0 or 

568 when ``object_ids`` is empty. 

569 """ 

570 

571 if self.config.read_forced_sources_months == 0: 

572 _LOG.info("Skip DiaForceSources fetching") 

573 return None 

574 

575 if len(object_ids) <= 0: 

576 _LOG.info("Skip DiaForceSources fetching - no Objects") 

577 # this should create a catalog, but the list of columns may be empty 

578 return None 

579 

580 table = self._schema.forcedSources 

581 sources = None 

582 

583 with Timer('DiaForcedSource select', self.config.timer): 

584 with _ansi_session(self._engine) as conn: 

585 for ids in _split(sorted(object_ids), 1000): 

586 

587 query = 'SELECT * FROM "' + table.name + '" WHERE ' 

588 

589 # select by object id 

590 ids = ",".join(str(id) for id in ids) 

591 query += '"diaObjectId" IN (' + ids + ') ' 

592 

593 # execute select 

594 if return_pandas: 

595 df = pandas.read_sql_query(sql.text(query), conn) 

596 if sources is None: 

597 sources = df 

598 else: 

599 sources = sources.append(df) 

600 else: 

601 res = conn.execute(sql.text(query)) 

602 sources = self._convertResult(res, "DiaForcedSource", sources) 

603 

604 _LOG.debug("found %s DiaForcedSources", len(sources)) 

605 return sources 

606 

607 def storeDiaObjects(self, objs, dt): 

608 """Store catalog of DiaObjects from current visit. 

609 

610 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be 

611 compatible with the schema of APDB table: 

612 

613 - column names must correspond to database table columns 

614 - some columns names are re-mapped based on column map passed to 

615 constructor 

616 - types and units of the columns must match database definitions, 

617 no unit conversion is performed presently 

618 - columns that have default values in database schema can be 

619 omitted from afw schema 

620 - this method knows how to fill interval-related columns 

621 (validityStart, validityEnd) they do not need to appear in 

622 afw schema 

623 

624 Parameters 

625 ---------- 

626 objs : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame` 

627 Catalog with DiaObject records 

628 dt : `datetime.datetime` 

629 Time of the visit 

630 """ 

631 

632 if isinstance(objs, pandas.DataFrame): 

633 ids = sorted(objs['diaObjectId']) 

634 else: 

635 ids = sorted([obj['id'] for obj in objs]) 

636 _LOG.debug("first object ID: %d", ids[0]) 

637 

638 # NOTE: workaround for sqlite, need this here to avoid 

639 # "database is locked" error. 

640 table = self._schema.objects 

641 

642 # everything to be done in single transaction 

643 with _ansi_session(self._engine) as conn: 

644 

645 ids = ",".join(str(id) for id in ids) 

646 

647 if self.config.dia_object_index == 'last_object_table': 

648 

649 # insert and replace all records in LAST table, mysql and postgres have 

650 # non-standard features (handled in _storeObjectsAfw) 

651 table = self._schema.objects_last 

652 do_replace = self.config.object_last_replace 

653 # If the input data is of type Pandas, we drop the previous 

654 # objects regardless of the do_replace setting due to how 

655 # Pandas inserts objects. 

656 if not do_replace or isinstance(objs, pandas.DataFrame): 

657 query = 'DELETE FROM "' + table.name + '" ' 

658 query += 'WHERE "diaObjectId" IN (' + ids + ') ' 

659 

660 if self.config.explain: 

661 # run the same query with explain 

662 self._explain(query, conn) 

663 

664 with Timer(table.name + ' delete', self.config.timer): 

665 res = conn.execute(sql.text(query)) 

666 _LOG.debug("deleted %s objects", res.rowcount) 

667 

668 extra_columns = dict(lastNonForcedSource=dt) 

669 if isinstance(objs, pandas.DataFrame): 

670 with Timer("DiaObjectLast insert", self.config.timer): 

671 for col, data in extra_columns.items(): 

672 objs[col] = data 

673 objs.to_sql("DiaObjectLast", conn, if_exists='append', 

674 index=False) 

675 else: 

676 self._storeObjectsAfw(objs, conn, table, "DiaObjectLast", 

677 replace=do_replace, 

678 extra_columns=extra_columns) 

679 

680 else: 

681 

682 # truncate existing validity intervals 

683 table = self._schema.objects 

684 query = 'UPDATE "' + table.name + '" ' 

685 query += "SET \"validityEnd\" = '" + str(dt) + "' " 

686 query += 'WHERE "diaObjectId" IN (' + ids + ') ' 

687 query += 'AND "validityEnd" IS NULL' 

688 

689 # _LOG.debug("query: %s", query) 

690 

691 if self.config.explain: 

692 # run the same query with explain 

693 self._explain(query, conn) 

694 

695 with Timer(table.name + ' truncate', self.config.timer): 

696 res = conn.execute(sql.text(query)) 

697 _LOG.debug("truncated %s intervals", res.rowcount) 

698 

699 # insert new versions 

700 if self.config.dia_object_nightly: 

701 table = self._schema.objects_nightly 

702 else: 

703 table = self._schema.objects 

704 extra_columns = dict(lastNonForcedSource=dt, validityStart=dt, 

705 validityEnd=None) 

706 if isinstance(objs, pandas.DataFrame): 

707 with Timer("DiaObject insert", self.config.timer): 

708 for col, data in extra_columns.items(): 

709 objs[col] = data 

710 objs.to_sql("DiaObject", conn, if_exists='append', 

711 index=False) 

712 else: 

713 self._storeObjectsAfw(objs, conn, table, "DiaObject", 

714 extra_columns=extra_columns) 

715 

716 def storeDiaSources(self, sources): 

717 """Store catalog of DIASources from current visit. 

718 

719 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be 

720 compatible with the schema of L1 database table: 

721 

722 - column names must correspond to database table columns 

723 - some columns names may be re-mapped based on column map passed to 

724 constructor 

725 - types and units of the columns must match database definitions, 

726 no unit conversion is performed presently 

727 - columns that have default values in database schema can be 

728 omitted from afw schema 

729 

730 Parameters 

731 ---------- 

732 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame` 

733 Catalog containing DiaSource records 

734 """ 

735 

736 # everything to be done in single transaction 

737 with _ansi_session(self._engine) as conn: 

738 

739 if isinstance(sources, pandas.DataFrame): 

740 with Timer("DiaSource insert", self.config.timer): 

741 sources.to_sql("DiaSource", conn, if_exists='append', 

742 index=False) 

743 else: 

744 table = self._schema.sources 

745 self._storeObjectsAfw(sources, conn, table, "DiaSource") 

746 

747 def storeDiaForcedSources(self, sources): 

748 """Store a set of DIAForcedSources from current visit. 

749 

750 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be 

751 compatible with the schema of L1 database table: 

752 

753 - column names must correspond to database table columns 

754 - some columns names may be re-mapped based on column map passed to 

755 constructor 

756 - types and units of the columns must match database definitions, 

757 no unit conversion is performed presently 

758 - columns that have default values in database schema can be 

759 omitted from afw schema 

760 

761 Parameters 

762 ---------- 

763 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame` 

764 Catalog containing DiaForcedSource records 

765 """ 

766 

767 # everything to be done in single transaction 

768 with _ansi_session(self._engine) as conn: 

769 

770 if isinstance(sources, pandas.DataFrame): 

771 with Timer("DiaForcedSource insert", self.config.timer): 

772 sources.to_sql("DiaForcedSource", conn, if_exists='append', 

773 index=False) 

774 else: 

775 table = self._schema.forcedSources 

776 self._storeObjectsAfw(sources, conn, table, "DiaForcedSource") 

777 

778 def countUnassociatedObjects(self): 

779 """Return the number of DiaObjects that have only one DiaSource associated 

780 with them. 

781 

782 Used as part of ap_verify metrics. 

783 

784 Returns 

785 ------- 

786 count : `int` 

787 Number of DiaObjects with exactly one associated DiaSource. 

788 """ 

789 # Retrieve the DiaObject table. 

790 table = self._schema.objects 

791 

792 # Construct the sql statement. 

793 stmt = sql.select([func.count()]).select_from(table).where(table.c.nDiaSources == 1) 

794 stmt = stmt.where(table.c.validityEnd == None) # noqa: E711 

795 

796 # Return the count. 

797 count = self._engine.scalar(stmt) 

798 

799 return count 

800 

801 def isVisitProcessed(self, visitInfo): 

802 """Test whether data from an image has been loaded into the database. 

803 

804 Used as part of ap_verify metrics. 

805 

806 Parameters 

807 ---------- 

808 visitInfo : `lsst.afw.image.VisitInfo` 

809 The metadata for the image of interest. 

810 

811 Returns 

812 ------- 

813 isProcessed : `bool` 

814 `True` if the data are present, `False` otherwise. 

815 """ 

816 id = visitInfo.getExposureId() 

817 table = self._schema.sources 

818 idField = table.c.ccdVisitId 

819 

820 # Hopefully faster than SELECT DISTINCT 

821 query = sql.select([idField]).select_from(table) \ 

822 .where(idField == id).limit(1) 

823 

824 return self._engine.scalar(query) is not None 

825 

826 def dailyJob(self): 

827 """Implement daily activities like cleanup/vacuum. 

828 

829 What should be done during daily cleanup is determined by 

830 configuration/schema. 

831 """ 

832 

833 # move data from DiaObjectNightly into DiaObject 

834 if self.config.dia_object_nightly: 

835 with _ansi_session(self._engine) as conn: 

836 query = 'INSERT INTO "' + self._schema.objects.name + '" ' 

837 query += 'SELECT * FROM "' + self._schema.objects_nightly.name + '"' 

838 with Timer('DiaObjectNightly copy', self.config.timer): 

839 conn.execute(sql.text(query)) 

840 

841 query = 'DELETE FROM "' + self._schema.objects_nightly.name + '"' 

842 with Timer('DiaObjectNightly delete', self.config.timer): 

843 conn.execute(sql.text(query)) 

844 

845 if self._engine.name == 'postgresql': 

846 

847 # do VACUUM on all tables 

848 _LOG.info("Running VACUUM on all tables") 

849 connection = self._engine.raw_connection() 

850 ISOLATION_LEVEL_AUTOCOMMIT = 0 

851 connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) 

852 cursor = connection.cursor() 

853 cursor.execute("VACUUM ANALYSE") 

854 

855 def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False): 

856 """Create or re-create all tables. 

857 

858 Parameters 

859 ---------- 

860 drop : `bool` 

861 If True then drop tables before creating new ones. 

862 mysql_engine : `str`, optional 

863 Name of the MySQL engine to use for new tables. 

864 oracle_tablespace : `str`, optional 

865 Name of Oracle tablespace. 

866 oracle_iot : `bool`, optional 

867 Make Index-organized DiaObjectLast table. 

868 """ 

869 self._schema.makeSchema(drop=drop, mysql_engine=mysql_engine, 

870 oracle_tablespace=oracle_tablespace, 

871 oracle_iot=oracle_iot) 

872 

873 def _explain(self, query, conn): 

874 """Run the query with explain 

875 """ 

876 

877 _LOG.info("explain for query: %s...", query[:64]) 

878 

879 if conn.engine.name == 'mysql': 

880 query = "EXPLAIN EXTENDED " + query 

881 else: 

882 query = "EXPLAIN " + query 

883 

884 res = conn.execute(sql.text(query)) 

885 if res.returns_rows: 

886 _LOG.info("explain: %s", res.keys()) 

887 for row in res: 

888 _LOG.info("explain: %s", row) 

889 else: 

890 _LOG.info("EXPLAIN returned nothing") 

891 

892 def _storeObjectsAfw(self, objects, conn, table, schema_table_name, 

893 replace=False, extra_columns=None): 

894 """Generic store method. 

895 

896 Takes catalog of records and stores a bunch of objects in a table. 

897 

898 Parameters 

899 ---------- 

900 objects : `lsst.afw.table.BaseCatalog` 

901 Catalog containing object records 

902 conn : 

903 Database connection 

904 table : `sqlalchemy.Table` 

905 Database table 

906 schema_table_name : `str` 

907 Name of the table to be used for finding table schema. 

908 replace : `boolean` 

909 If `True` then use replace instead of INSERT (should be more efficient) 

910 extra_columns : `dict`, optional 

911 Mapping (column_name, column_value) which gives column values to add 

912 to every row, only if column is missing in catalog records. 

913 """ 

914 

915 def quoteValue(v): 

916 """Quote and escape values""" 

917 if v is None: 

918 v = "NULL" 

919 elif isinstance(v, datetime): 

920 v = "'" + str(v) + "'" 

921 elif isinstance(v, str): 

922 # we don't expect nasty stuff in strings 

923 v = "'" + v + "'" 

924 elif isinstance(v, geom.Angle): 

925 v = v.asDegrees() 

926 if np.isfinite(v): 

927 v = str(v) 

928 else: 

929 v = "NULL" 

930 else: 

931 if np.isfinite(v): 

932 v = str(v) 

933 else: 

934 v = "NULL" 

935 return v 

936 

937 def quoteId(columnName): 

938 """Smart quoting for column names. 

939 Lower-case names are not quoted. 

940 """ 

941 if not columnName.islower(): 

942 columnName = '"' + columnName + '"' 

943 return columnName 

944 

945 if conn.engine.name == "oracle": 

946 return self._storeObjectsAfwOracle(objects, conn, table, 

947 schema_table_name, replace, 

948 extra_columns) 

949 

950 schema = objects.getSchema() 

951 # use extra columns if specified 

952 extra_fields = list((extra_columns or {}).keys()) 

953 

954 afw_fields = [field.getName() for key, field in schema 

955 if field.getName() not in extra_fields] 

956 

957 column_map = self._schema.getAfwColumns(schema_table_name) 

958 # list of columns (as in cat schema) 

959 fields = [column_map[field].name for field in afw_fields if field in column_map] 

960 

961 if replace and conn.engine.name in ('mysql', 'sqlite'): 

962 query = 'REPLACE INTO ' 

963 else: 

964 query = 'INSERT INTO ' 

965 qfields = [quoteId(field) for field in fields + extra_fields] 

966 query += quoteId(table.name) + ' (' + ','.join(qfields) + ') ' + 'VALUES ' 

967 

968 values = [] 

969 for rec in objects: 

970 row = [] 

971 for field in afw_fields: 

972 if field not in column_map: 

973 continue 

974 value = rec[field] 

975 if column_map[field].type == "DATETIME" and \ 

976 np.isfinite(value): 

977 # convert seconds into datetime 

978 value = datetime.utcfromtimestamp(value) 

979 row.append(quoteValue(value)) 

980 for field in extra_fields: 

981 row.append(quoteValue(extra_columns[field])) 

982 values.append('(' + ','.join(row) + ')') 

983 

984 if self.config.explain: 

985 # run the same query with explain, only give it one row of data 

986 self._explain(query + values[0], conn) 

987 

988 query += ','.join(values) 

989 

990 if replace and conn.engine.name == 'postgresql': 

991 # This depends on that "replace" can only be true for DiaObjectLast table 

992 pks = ('pixelId', 'diaObjectId') 

993 query += " ON CONFLICT (\"{}\", \"{}\") DO UPDATE SET ".format(*pks) 

994 fields = [column_map[field].name for field in afw_fields if field in column_map] 

995 fields = ['"{0}" = EXCLUDED."{0}"'.format(field) 

996 for field in fields if field not in pks] 

997 query += ', '.join(fields) 

998 

999 # _LOG.debug("query: %s", query) 

1000 _LOG.info("%s: will store %d records", table.name, len(objects)) 

1001 with Timer(table.name + ' insert', self.config.timer): 

1002 res = conn.execute(sql.text(query)) 

1003 _LOG.debug("inserted %s intervals", res.rowcount) 

1004 

1005 def _storeObjectsAfwOracle(self, objects, conn, table, schema_table_name, 

1006 replace=False, extra_columns=None): 

1007 """Store method for Oracle. 

1008 

1009 Takes catalog of records and stores a bunch of objects in a table. 

1010 

1011 Parameters 

1012 ---------- 

1013 objects : `lsst.afw.table.BaseCatalog` 

1014 Catalog containing object records 

1015 conn : 

1016 Database connection 

1017 table : `sqlalchemy.Table` 

1018 Database table 

1019 schema_table_name : `str` 

1020 Name of the table to be used for finding table schema. 

1021 replace : `boolean` 

1022 If `True` then use replace instead of INSERT (should be more efficient) 

1023 extra_columns : `dict`, optional 

1024 Mapping (column_name, column_value) which gives column values to add 

1025 to every row, only if column is missing in catalog records. 

1026 """ 

1027 

1028 def quoteId(columnName): 

1029 """Smart quoting for column names. 

1030 Lower-case naems are not quoted (Oracle backend needs them unquoted). 

1031 """ 

1032 if not columnName.islower(): 

1033 columnName = '"' + columnName + '"' 

1034 return columnName 

1035 

1036 schema = objects.getSchema() 

1037 

1038 # use extra columns that as overrides always. 

1039 extra_fields = list((extra_columns or {}).keys()) 

1040 

1041 afw_fields = [field.getName() for key, field in schema 

1042 if field.getName() not in extra_fields] 

1043 # _LOG.info("afw_fields: %s", afw_fields) 

1044 

1045 column_map = self._schema.getAfwColumns(schema_table_name) 

1046 # _LOG.info("column_map: %s", column_map) 

1047 

1048 # list of columns (as in cat schema) 

1049 fields = [column_map[field].name for field in afw_fields 

1050 if field in column_map] 

1051 # _LOG.info("fields: %s", fields) 

1052 

1053 qfields = [quoteId(field) for field in fields + extra_fields] 

1054 

1055 if not replace: 

1056 vals = [":col{}".format(i) for i in range(len(fields))] 

1057 vals += [":extcol{}".format(i) for i in range(len(extra_fields))] 

1058 query = 'INSERT INTO ' + quoteId(table.name) 

1059 query += ' (' + ','.join(qfields) + ') VALUES' 

1060 query += ' (' + ','.join(vals) + ')' 

1061 else: 

1062 qvals = [":col{} {}".format(i, quoteId(field)) for i, field in enumerate(fields)] 

1063 qvals += [":extcol{} {}".format(i, quoteId(field)) for i, field in enumerate(extra_fields)] 

1064 pks = ('pixelId', 'diaObjectId') 

1065 onexpr = ["SRC.{col} = DST.{col}".format(col=quoteId(col)) for col in pks] 

1066 setexpr = ["DST.{col} = SRC.{col}".format(col=quoteId(col)) 

1067 for col in fields + extra_fields if col not in pks] 

1068 vals = ["SRC.{col}".format(col=quoteId(col)) for col in fields + extra_fields] 

1069 query = "MERGE INTO {} DST ".format(quoteId(table.name)) 

1070 query += "USING (SELECT {} FROM DUAL) SRC ".format(", ".join(qvals)) 

1071 query += "ON ({}) ".format(" AND ".join(onexpr)) 

1072 query += "WHEN MATCHED THEN UPDATE SET {} ".format(" ,".join(setexpr)) 

1073 query += "WHEN NOT MATCHED THEN INSERT " 

1074 query += "({}) VALUES ({})".format(','.join(qfields), ','.join(vals)) 

1075 # _LOG.info("query: %s", query) 

1076 

1077 values = [] 

1078 for rec in objects: 

1079 row = {} 

1080 col = 0 

1081 for field in afw_fields: 

1082 if field not in column_map: 

1083 continue 

1084 value = rec[field] 

1085 if column_map[field].type == "DATETIME" and not np.isnan(value): 

1086 # convert seconds into datetime 

1087 value = datetime.utcfromtimestamp(value) 

1088 elif isinstance(value, geom.Angle): 

1089 value = str(value.asDegrees()) 

1090 elif not np.isfinite(value): 

1091 value = None 

1092 row["col{}".format(col)] = value 

1093 col += 1 

1094 for i, field in enumerate(extra_fields): 

1095 row["extcol{}".format(i)] = extra_columns[field] 

1096 values.append(row) 

1097 

1098 # _LOG.debug("query: %s", query) 

1099 _LOG.info("%s: will store %d records", table.name, len(objects)) 

1100 with Timer(table.name + ' insert', self.config.timer): 

1101 res = conn.execute(sql.text(query), values) 

1102 _LOG.debug("inserted %s intervals", res.rowcount) 

1103 

1104 def _convertResult(self, res, table_name, catalog=None): 

1105 """Convert result set into output catalog. 

1106 

1107 Parameters 

1108 ---------- 

1109 res : `sqlalchemy.ResultProxy` 

1110 SQLAlchemy result set returned by query. 

1111 table_name : `str` 

1112 Name of the table. 

1113 catalog : `lsst.afw.table.BaseCatalog` 

1114 If not None then extend existing catalog 

1115 

1116 Returns 

1117 ------- 

1118 catalog : `lsst.afw.table.SourceCatalog` 

1119 If ``catalog`` is None then new instance is returned, otherwise 

1120 ``catalog`` is updated and returned. 

1121 """ 

1122 # make catalog schema 

1123 columns = res.keys() 

1124 schema, col_map = self._schema.getAfwSchema(table_name, columns) 

1125 if catalog is None: 

1126 _LOG.debug("_convertResult: schema: %s", schema) 

1127 _LOG.debug("_convertResult: col_map: %s", col_map) 

1128 catalog = afwTable.SourceCatalog(schema) 

1129 

1130 # fill catalog 

1131 for row in res: 

1132 record = catalog.addNew() 

1133 for col, value in row.items(): 

1134 # some columns may exist in database but not included in afw schema 

1135 col = col_map.get(col) 

1136 if col is not None: 

1137 if isinstance(value, datetime): 

1138 # convert datetime to number of seconds 

1139 value = int((value - datetime.utcfromtimestamp(0)).total_seconds()) 

1140 elif col.getTypeString() == 'Angle' and value is not None: 

1141 value = value * geom.degrees 

1142 if value is not None: 

1143 record.set(col, value) 

1144 

1145 return catalog