Coverage for python/lsst/dax/apdb/apdb.py: 10%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining Apdb class and related methods.
23"""
25__all__ = ["ApdbConfig", "Apdb", "Visit"]
27from collections import namedtuple
28from contextlib import contextmanager
29from datetime import datetime
30import logging
31import numpy as np
32import os
33import pandas
35import lsst.geom as geom
36import lsst.afw.table as afwTable
37import lsst.pex.config as pexConfig
38from lsst.pex.config import Field, ChoiceField, ListField
39import sqlalchemy
40from sqlalchemy import (func, sql)
41from sqlalchemy.pool import NullPool
42from . import timer, apdbSchema
45_LOG = logging.getLogger(__name__.partition(".")[2]) # strip leading "lsst."
48class Timer(object):
49 """Timer class defining context manager which tracks execution timing.
51 Typical use:
53 with Timer("timer_name"):
54 do_something
56 On exit from block it will print elapsed time.
58 See also :py:mod:`timer` module.
59 """
60 def __init__(self, name, do_logging=True, log_before_cursor_execute=False):
61 self._log_before_cursor_execute = log_before_cursor_execute
62 self._do_logging = do_logging
63 self._timer1 = timer.Timer(name)
64 self._timer2 = timer.Timer(name + " (before/after cursor)")
66 def __enter__(self):
67 """
68 Enter context, start timer
69 """
70# event.listen(engine.Engine, "before_cursor_execute", self._start_timer)
71# event.listen(engine.Engine, "after_cursor_execute", self._stop_timer)
72 self._timer1.start()
73 return self
75 def __exit__(self, exc_type, exc_val, exc_tb):
76 """
77 Exit context, stop and dump timer
78 """
79 if exc_type is None:
80 self._timer1.stop()
81 if self._do_logging:
82 self._timer1.dump()
83# event.remove(engine.Engine, "before_cursor_execute", self._start_timer)
84# event.remove(engine.Engine, "after_cursor_execute", self._stop_timer)
85 return False
87 def _start_timer(self, conn, cursor, statement, parameters, context, executemany):
88 """Start counting"""
89 if self._log_before_cursor_execute:
90 _LOG.info("before_cursor_execute")
91 self._timer2.start()
93 def _stop_timer(self, conn, cursor, statement, parameters, context, executemany):
94 """Stop counting"""
95 self._timer2.stop()
96 if self._do_logging:
97 self._timer2.dump()
100def _split(seq, nItems):
101 """Split a sequence into smaller sequences"""
102 seq = list(seq)
103 while seq:
104 yield seq[:nItems]
105 del seq[:nItems]
108def _coerce_uint64(df: pandas.DataFrame) -> pandas.DataFrame:
109 """Change type of the uint64 columns to int64, return copy of data frame.
110 """
111 names = [c[0] for c in df.dtypes.items() if c[1] == np.uint64]
112 return df.astype({name: np.int64 for name in names})
115# Information about single visit
116Visit = namedtuple('Visit', 'visitId visitTime lastObjectId lastSourceId')
119@contextmanager
120def _ansi_session(engine):
121 """Returns a connection, makes sure that ANSI mode is set for MySQL
122 """
123 with engine.begin() as conn:
124 if engine.name == 'mysql':
125 conn.execute(sql.text("SET SESSION SQL_MODE = 'ANSI'"))
126 yield conn
127 return
130def _data_file_name(basename):
131 """Return path name of a data file.
132 """
133 return os.path.join("${DAX_APDB_DIR}", "data", basename)
136class ApdbConfig(pexConfig.Config):
138 db_url = Field(dtype=str, doc="SQLAlchemy database connection URI")
139 isolation_level = ChoiceField(dtype=str,
140 doc="Transaction isolation level",
141 allowed={"READ_COMMITTED": "Read committed",
142 "READ_UNCOMMITTED": "Read uncommitted",
143 "REPEATABLE_READ": "Repeatable read",
144 "SERIALIZABLE": "Serializable"},
145 default="READ_COMMITTED",
146 optional=True)
147 connection_pool = Field(dtype=bool,
148 doc=("If False then disable SQLAlchemy connection pool. "
149 "Do not use connection pool when forking."),
150 default=True)
151 connection_timeout = Field(dtype=float,
152 doc="Maximum time to wait time for database lock to be released before "
153 "exiting. Defaults to sqlachemy defaults if not set.",
154 default=None,
155 optional=True)
156 sql_echo = Field(dtype=bool,
157 doc="If True then pass SQLAlchemy echo option.",
158 default=False)
159 dia_object_index = ChoiceField(dtype=str,
160 doc="Indexing mode for DiaObject table",
161 allowed={'baseline': "Index defined in baseline schema",
162 'pix_id_iov': "(pixelId, objectId, iovStart) PK",
163 'last_object_table': "Separate DiaObjectLast table"},
164 default='baseline')
165 dia_object_nightly = Field(dtype=bool,
166 doc="Use separate nightly table for DiaObject",
167 default=False)
168 read_sources_months = Field(dtype=int,
169 doc="Number of months of history to read from DiaSource",
170 default=12)
171 read_forced_sources_months = Field(dtype=int,
172 doc="Number of months of history to read from DiaForcedSource",
173 default=12)
174 dia_object_columns = ListField(dtype=str,
175 doc="List of columns to read from DiaObject, by default read all columns",
176 default=[])
177 object_last_replace = Field(dtype=bool,
178 doc="If True (default) then use \"upsert\" for DiaObjectsLast table",
179 default=True)
180 schema_file = Field(dtype=str,
181 doc="Location of (YAML) configuration file with standard schema",
182 default=_data_file_name("apdb-schema.yaml"))
183 extra_schema_file = Field(dtype=str,
184 doc="Location of (YAML) configuration file with extra schema",
185 default=_data_file_name("apdb-schema-extra.yaml"))
186 column_map = Field(dtype=str,
187 doc="Location of (YAML) configuration file with column mapping",
188 default=_data_file_name("apdb-afw-map.yaml"))
189 prefix = Field(dtype=str,
190 doc="Prefix to add to table names and index names",
191 default="")
192 explain = Field(dtype=bool,
193 doc="If True then run EXPLAIN SQL command on each executed query",
194 default=False)
195 timer = Field(dtype=bool,
196 doc="If True then print/log timing information",
197 default=False)
198 diaobject_index_hint = Field(dtype=str,
199 doc="Name of the index to use with Oracle index hint",
200 default=None,
201 optional=True)
202 dynamic_sampling_hint = Field(dtype=int,
203 doc="If non-zero then use dynamic_sampling hint",
204 default=0)
205 cardinality_hint = Field(dtype=int,
206 doc="If non-zero then use cardinality hint",
207 default=0)
209 def validate(self):
210 super().validate()
211 if self.isolation_level == "READ_COMMITTED" and self.db_url.startswith("sqlite"):
212 raise ValueError("Attempting to run Apdb with SQLITE and isolation level 'READ_COMMITTED.' "
213 "Use 'READ_UNCOMMITTED' instead.")
216class Apdb(object):
217 """Interface to L1 database, hides all database access details.
219 The implementation is configured via standard ``pex_config`` mechanism
220 using `ApdbConfig` configuration class. For an example of different
221 configurations check config/ folder.
223 Parameters
224 ----------
225 config : `ApdbConfig`
226 afw_schemas : `dict`, optional
227 Dictionary with table name for a key and `afw.table.Schema`
228 for a value. Columns in schema will be added to standard
229 APDB schema.
230 """
232 def __init__(self, config, afw_schemas=None):
234 self.config = config
236 # logging.getLogger('sqlalchemy').setLevel(logging.INFO)
237 _LOG.debug("APDB Configuration:")
238 _LOG.debug(" dia_object_index: %s", self.config.dia_object_index)
239 _LOG.debug(" dia_object_nightly: %s", self.config.dia_object_nightly)
240 _LOG.debug(" read_sources_months: %s", self.config.read_sources_months)
241 _LOG.debug(" read_forced_sources_months: %s", self.config.read_forced_sources_months)
242 _LOG.debug(" dia_object_columns: %s", self.config.dia_object_columns)
243 _LOG.debug(" object_last_replace: %s", self.config.object_last_replace)
244 _LOG.debug(" schema_file: %s", self.config.schema_file)
245 _LOG.debug(" extra_schema_file: %s", self.config.extra_schema_file)
246 _LOG.debug(" column_map: %s", self.config.column_map)
247 _LOG.debug(" schema prefix: %s", self.config.prefix)
249 # engine is reused between multiple processes, make sure that we don't
250 # share connections by disabling pool (by using NullPool class)
251 kw = dict(echo=self.config.sql_echo)
252 conn_args = dict()
253 if not self.config.connection_pool:
254 kw.update(poolclass=NullPool)
255 if self.config.isolation_level is not None:
256 kw.update(isolation_level=self.config.isolation_level)
257 if self.config.connection_timeout is not None:
258 if self.config.db_url.startswith("sqlite"):
259 conn_args.update(timeout=self.config.connection_timeout)
260 elif self.config.db_url.startswith(("postgresql", "mysql")):
261 conn_args.update(connect_timeout=self.config.connection_timeout)
262 kw.update(connect_args=conn_args)
263 self._engine = sqlalchemy.create_engine(self.config.db_url, **kw)
265 self._schema = apdbSchema.ApdbSchema(engine=self._engine,
266 dia_object_index=self.config.dia_object_index,
267 dia_object_nightly=self.config.dia_object_nightly,
268 schema_file=self.config.schema_file,
269 extra_schema_file=self.config.extra_schema_file,
270 column_map=self.config.column_map,
271 afw_schemas=afw_schemas,
272 prefix=self.config.prefix)
274 def lastVisit(self):
275 """Returns last visit information or `None` if visits table is empty.
277 Visits table is used by ap_proto to track visit information, it is
278 not a part of the regular APDB schema.
280 Returns
281 -------
282 visit : `Visit` or `None`
283 Last stored visit info or `None` if there was nothing stored yet.
284 """
286 with self._engine.begin() as conn:
288 stmnt = sql.select([sql.func.max(self._schema.visits.c.visitId),
289 sql.func.max(self._schema.visits.c.visitTime)])
290 res = conn.execute(stmnt)
291 row = res.fetchone()
292 if row[0] is None:
293 return None
295 visitId = row[0]
296 visitTime = row[1]
297 _LOG.info("lastVisit: visitId: %s visitTime: %s (%s)", visitId,
298 visitTime, type(visitTime))
300 # get max IDs from corresponding tables
301 stmnt = sql.select([sql.func.max(self._schema.objects.c.diaObjectId)])
302 lastObjectId = conn.scalar(stmnt)
303 stmnt = sql.select([sql.func.max(self._schema.sources.c.diaSourceId)])
304 lastSourceId = conn.scalar(stmnt)
306 return Visit(visitId=visitId, visitTime=visitTime,
307 lastObjectId=lastObjectId, lastSourceId=lastSourceId)
309 def saveVisit(self, visitId, visitTime):
310 """Store visit information.
312 This method is only used by ``ap_proto`` script from ``l1dbproto``
313 and is not intended for production pipelines.
315 Parameters
316 ----------
317 visitId : `int`
318 Visit identifier
319 visitTime : `datetime.datetime`
320 Visit timestamp.
321 """
323 ins = self._schema.visits.insert().values(visitId=visitId,
324 visitTime=visitTime)
325 self._engine.execute(ins)
327 def tableRowCount(self):
328 """Returns dictionary with the table names and row counts.
330 Used by ``ap_proto`` to keep track of the size of the database tables.
331 Depending on database technology this could be expensive operation.
333 Returns
334 -------
335 row_counts : `dict`
336 Dict where key is a table name and value is a row count.
337 """
338 res = {}
339 tables = [self._schema.objects, self._schema.sources, self._schema.forcedSources]
340 if self.config.dia_object_index == 'last_object_table':
341 tables.append(self._schema.objects_last)
342 for table in tables:
343 stmt = sql.select([func.count()]).select_from(table)
344 count = self._engine.scalar(stmt)
345 res[table.name] = count
347 return res
349 def getDiaObjects(self, pixel_ranges, return_pandas=False):
350 """Returns catalog of DiaObject instances from given region.
352 Objects are searched based on pixelization index and region is
353 determined by the set of indices. There is no assumption on a
354 particular type of index, client is responsible for consistency
355 when calculating pixelization indices.
357 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
358 the schema of APDB table. Re-mapping of the column names is done for
359 some columns (based on column map passed to constructor) but types
360 or units are not changed.
362 Returns only the last version of each DiaObject.
364 Parameters
365 ----------
366 pixel_ranges : `list` of `tuple`
367 Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
368 This defines set of pixel indices to be included in result.
369 return_pandas : `bool`
370 Return a `pandas.DataFrame` instead of
371 `lsst.afw.table.SourceCatalog`.
373 Returns
374 -------
375 catalog : `lsst.afw.table.SourceCatalog` or `pandas.DataFrame`
376 Catalog containing DiaObject records.
377 """
379 # decide what columns we need
380 if self.config.dia_object_index == 'last_object_table':
381 table = self._schema.objects_last
382 else:
383 table = self._schema.objects
384 if not self.config.dia_object_columns:
385 query = table.select()
386 else:
387 columns = [table.c[col] for col in self.config.dia_object_columns]
388 query = sql.select(columns)
390 if self.config.diaobject_index_hint:
391 val = self.config.diaobject_index_hint
392 query = query.with_hint(table, 'index_rs_asc(%(name)s "{}")'.format(val))
393 if self.config.dynamic_sampling_hint > 0:
394 val = self.config.dynamic_sampling_hint
395 query = query.with_hint(table, 'dynamic_sampling(%(name)s {})'.format(val))
396 if self.config.cardinality_hint > 0:
397 val = self.config.cardinality_hint
398 query = query.with_hint(table, 'FIRST_ROWS_1 cardinality(%(name)s {})'.format(val))
400 # build selection
401 exprlist = []
402 for low, upper in pixel_ranges:
403 upper -= 1
404 if low == upper:
405 exprlist.append(table.c.pixelId == low)
406 else:
407 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
408 query = query.where(sql.expression.or_(*exprlist))
410 # select latest version of objects
411 if self.config.dia_object_index != 'last_object_table':
412 query = query.where(table.c.validityEnd == None) # noqa: E711
414 _LOG.debug("query: %s", query)
416 if self.config.explain:
417 # run the same query with explain
418 self._explain(query, self._engine)
420 # execute select
421 with Timer('DiaObject select', self.config.timer):
422 with self._engine.begin() as conn:
423 if return_pandas:
424 objects = pandas.read_sql_query(query, conn)
425 else:
426 res = conn.execute(query)
427 objects = self._convertResult(res, "DiaObject")
428 _LOG.debug("found %s DiaObjects", len(objects))
429 return objects
431 def getDiaSourcesInRegion(self, pixel_ranges, dt, return_pandas=False):
432 """Returns catalog of DiaSource instances from given region.
434 Sources are searched based on pixelization index and region is
435 determined by the set of indices. There is no assumption on a
436 particular type of index, client is responsible for consistency
437 when calculating pixelization indices.
439 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
440 the schema of APDB table. Re-mapping of the column names is done for
441 some columns (based on column map passed to constructor) but types or
442 units are not changed.
444 Parameters
445 ----------
446 pixel_ranges : `list` of `tuple`
447 Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
448 This defines set of pixel indices to be included in result.
449 dt : `datetime.datetime`
450 Time of the current visit
451 return_pandas : `bool`
452 Return a `pandas.DataFrame` instead of
453 `lsst.afw.table.SourceCatalog`.
455 Returns
456 -------
457 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
458 Catalog containing DiaSource records. `None` is returned if
459 ``read_sources_months`` configuration parameter is set to 0.
460 """
462 if self.config.read_sources_months == 0:
463 _LOG.info("Skip DiaSources fetching")
464 return None
466 table = self._schema.sources
467 query = table.select()
469 # build selection
470 exprlist = []
471 for low, upper in pixel_ranges:
472 upper -= 1
473 if low == upper:
474 exprlist.append(table.c.pixelId == low)
475 else:
476 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
477 query = query.where(sql.expression.or_(*exprlist))
479 # execute select
480 with Timer('DiaSource select', self.config.timer):
481 with _ansi_session(self._engine) as conn:
482 if return_pandas:
483 sources = pandas.read_sql_query(query, conn)
484 else:
485 res = conn.execute(query)
486 sources = self._convertResult(res, "DiaSource")
487 _LOG.debug("found %s DiaSources", len(sources))
488 return sources
490 def getDiaSources(self, object_ids, dt, return_pandas=False):
491 """Returns catalog of DiaSource instances given set of DiaObject IDs.
493 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
494 the schema of APDB table. Re-mapping of the column names is done for
495 some columns (based on column map passed to constructor) but types or
496 units are not changed.
498 Parameters
499 ----------
500 object_ids :
501 Collection of DiaObject IDs
502 dt : `datetime.datetime`
503 Time of the current visit
504 return_pandas : `bool`
505 Return a `pandas.DataFrame` instead of
506 `lsst.afw.table.SourceCatalog`.
509 Returns
510 -------
511 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
512 Catalog contaning DiaSource records. `None` is returned if
513 ``read_sources_months`` configuration parameter is set to 0 or
514 when ``object_ids`` is empty.
515 """
517 if self.config.read_sources_months == 0:
518 _LOG.info("Skip DiaSources fetching")
519 return None
521 if len(object_ids) <= 0:
522 _LOG.info("Skip DiaSources fetching - no Objects")
523 # this should create a catalog, but the list of columns may be empty
524 return None
526 table = self._schema.sources
527 sources = None
528 with Timer('DiaSource select', self.config.timer):
529 with _ansi_session(self._engine) as conn:
530 for ids in _split(sorted(object_ids), 1000):
531 query = 'SELECT * FROM "' + table.name + '" WHERE '
533 # select by object id
534 ids = ",".join(str(id) for id in ids)
535 query += '"diaObjectId" IN (' + ids + ') '
537 # execute select
538 if return_pandas:
539 df = pandas.read_sql_query(sql.text(query), conn)
540 if sources is None:
541 sources = df
542 else:
543 sources = sources.append(df)
544 else:
545 res = conn.execute(sql.text(query))
546 sources = self._convertResult(res, "DiaSource", sources)
548 _LOG.debug("found %s DiaSources", len(sources))
549 return sources
551 def getDiaForcedSources(self, object_ids, dt, return_pandas=False):
552 """Returns catalog of DiaForcedSource instances matching given
553 DiaObjects.
555 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
556 the schema of L1 database table. Re-mapping of the column names may
557 be done for some columns (based on column map passed to constructor)
558 but types or units are not changed.
560 Parameters
561 ----------
562 object_ids :
563 Collection of DiaObject IDs
564 dt : `datetime.datetime`
565 Time of the current visit
566 return_pandas : `bool`
567 Return a `pandas.DataFrame` instead of
568 `lsst.afw.table.SourceCatalog`.
570 Returns
571 -------
572 catalog : `lsst.afw.table.SourceCatalog` or `None`
573 Catalog contaning DiaForcedSource records. `None` is returned if
574 ``read_sources_months`` configuration parameter is set to 0 or
575 when ``object_ids`` is empty.
576 """
578 if self.config.read_forced_sources_months == 0:
579 _LOG.info("Skip DiaForceSources fetching")
580 return None
582 if len(object_ids) <= 0:
583 _LOG.info("Skip DiaForceSources fetching - no Objects")
584 # this should create a catalog, but the list of columns may be empty
585 return None
587 table = self._schema.forcedSources
588 sources = None
590 with Timer('DiaForcedSource select', self.config.timer):
591 with _ansi_session(self._engine) as conn:
592 for ids in _split(sorted(object_ids), 1000):
594 query = 'SELECT * FROM "' + table.name + '" WHERE '
596 # select by object id
597 ids = ",".join(str(id) for id in ids)
598 query += '"diaObjectId" IN (' + ids + ') '
600 # execute select
601 if return_pandas:
602 df = pandas.read_sql_query(sql.text(query), conn)
603 if sources is None:
604 sources = df
605 else:
606 sources = sources.append(df)
607 else:
608 res = conn.execute(sql.text(query))
609 sources = self._convertResult(res, "DiaForcedSource", sources)
611 _LOG.debug("found %s DiaForcedSources", len(sources))
612 return sources
614 def storeDiaObjects(self, objs, dt):
615 """Store catalog of DiaObjects from current visit.
617 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
618 compatible with the schema of APDB table:
620 - column names must correspond to database table columns
621 - some columns names are re-mapped based on column map passed to
622 constructor
623 - types and units of the columns must match database definitions,
624 no unit conversion is performed presently
625 - columns that have default values in database schema can be
626 omitted from afw schema
627 - this method knows how to fill interval-related columns
628 (validityStart, validityEnd) they do not need to appear in
629 afw schema
631 Parameters
632 ----------
633 objs : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
634 Catalog with DiaObject records
635 dt : `datetime.datetime`
636 Time of the visit
637 """
639 if isinstance(objs, pandas.DataFrame):
640 ids = sorted(objs['diaObjectId'])
641 else:
642 ids = sorted([obj['id'] for obj in objs])
643 _LOG.debug("first object ID: %d", ids[0])
645 # NOTE: workaround for sqlite, need this here to avoid
646 # "database is locked" error.
647 table = self._schema.objects
649 # everything to be done in single transaction
650 with _ansi_session(self._engine) as conn:
652 ids = ",".join(str(id) for id in ids)
654 if self.config.dia_object_index == 'last_object_table':
656 # insert and replace all records in LAST table, mysql and postgres have
657 # non-standard features (handled in _storeObjectsAfw)
658 table = self._schema.objects_last
659 do_replace = self.config.object_last_replace
660 # If the input data is of type Pandas, we drop the previous
661 # objects regardless of the do_replace setting due to how
662 # Pandas inserts objects.
663 if not do_replace or isinstance(objs, pandas.DataFrame):
664 query = 'DELETE FROM "' + table.name + '" '
665 query += 'WHERE "diaObjectId" IN (' + ids + ') '
667 if self.config.explain:
668 # run the same query with explain
669 self._explain(query, conn)
671 with Timer(table.name + ' delete', self.config.timer):
672 res = conn.execute(sql.text(query))
673 _LOG.debug("deleted %s objects", res.rowcount)
675 extra_columns = dict(lastNonForcedSource=dt)
676 if isinstance(objs, pandas.DataFrame):
677 with Timer("DiaObjectLast insert", self.config.timer):
678 objs = _coerce_uint64(objs)
679 for col, data in extra_columns.items():
680 objs[col] = data
681 objs.to_sql("DiaObjectLast", conn, if_exists='append',
682 index=False)
683 else:
684 self._storeObjectsAfw(objs, conn, table, "DiaObjectLast",
685 replace=do_replace,
686 extra_columns=extra_columns)
688 else:
690 # truncate existing validity intervals
691 table = self._schema.objects
692 query = 'UPDATE "' + table.name + '" '
693 query += "SET \"validityEnd\" = '" + str(dt) + "' "
694 query += 'WHERE "diaObjectId" IN (' + ids + ') '
695 query += 'AND "validityEnd" IS NULL'
697 # _LOG.debug("query: %s", query)
699 if self.config.explain:
700 # run the same query with explain
701 self._explain(query, conn)
703 with Timer(table.name + ' truncate', self.config.timer):
704 res = conn.execute(sql.text(query))
705 _LOG.debug("truncated %s intervals", res.rowcount)
707 # insert new versions
708 if self.config.dia_object_nightly:
709 table = self._schema.objects_nightly
710 else:
711 table = self._schema.objects
712 extra_columns = dict(lastNonForcedSource=dt, validityStart=dt,
713 validityEnd=None)
714 if isinstance(objs, pandas.DataFrame):
715 with Timer("DiaObject insert", self.config.timer):
716 objs = _coerce_uint64(objs)
717 for col, data in extra_columns.items():
718 objs[col] = data
719 objs.to_sql("DiaObject", conn, if_exists='append',
720 index=False)
721 else:
722 self._storeObjectsAfw(objs, conn, table, "DiaObject",
723 extra_columns=extra_columns)
725 def storeDiaSources(self, sources):
726 """Store catalog of DIASources from current visit.
728 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
729 compatible with the schema of L1 database table:
731 - column names must correspond to database table columns
732 - some columns names may be re-mapped based on column map passed to
733 constructor
734 - types and units of the columns must match database definitions,
735 no unit conversion is performed presently
736 - columns that have default values in database schema can be
737 omitted from afw schema
739 Parameters
740 ----------
741 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
742 Catalog containing DiaSource records
743 """
745 # everything to be done in single transaction
746 with _ansi_session(self._engine) as conn:
748 if isinstance(sources, pandas.DataFrame):
749 with Timer("DiaSource insert", self.config.timer):
750 sources = _coerce_uint64(sources)
751 sources.to_sql("DiaSource", conn, if_exists='append',
752 index=False)
753 else:
754 table = self._schema.sources
755 self._storeObjectsAfw(sources, conn, table, "DiaSource")
757 def storeDiaForcedSources(self, sources):
758 """Store a set of DIAForcedSources from current visit.
760 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
761 compatible with the schema of L1 database table:
763 - column names must correspond to database table columns
764 - some columns names may be re-mapped based on column map passed to
765 constructor
766 - types and units of the columns must match database definitions,
767 no unit conversion is performed presently
768 - columns that have default values in database schema can be
769 omitted from afw schema
771 Parameters
772 ----------
773 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
774 Catalog containing DiaForcedSource records
775 """
777 # everything to be done in single transaction
778 with _ansi_session(self._engine) as conn:
780 if isinstance(sources, pandas.DataFrame):
781 with Timer("DiaForcedSource insert", self.config.timer):
782 sources = _coerce_uint64(sources)
783 sources.to_sql("DiaForcedSource", conn, if_exists='append',
784 index=False)
785 else:
786 table = self._schema.forcedSources
787 self._storeObjectsAfw(sources, conn, table, "DiaForcedSource")
789 def countUnassociatedObjects(self):
790 """Return the number of DiaObjects that have only one DiaSource associated
791 with them.
793 Used as part of ap_verify metrics.
795 Returns
796 -------
797 count : `int`
798 Number of DiaObjects with exactly one associated DiaSource.
799 """
800 # Retrieve the DiaObject table.
801 table = self._schema.objects
803 # Construct the sql statement.
804 stmt = sql.select([func.count()]).select_from(table).where(table.c.nDiaSources == 1)
805 stmt = stmt.where(table.c.validityEnd == None) # noqa: E711
807 # Return the count.
808 count = self._engine.scalar(stmt)
810 return count
812 def isVisitProcessed(self, visitInfo):
813 """Test whether data from an image has been loaded into the database.
815 Used as part of ap_verify metrics.
817 Parameters
818 ----------
819 visitInfo : `lsst.afw.image.VisitInfo`
820 The metadata for the image of interest.
822 Returns
823 -------
824 isProcessed : `bool`
825 `True` if the data are present, `False` otherwise.
826 """
827 id = visitInfo.getExposureId()
828 table = self._schema.sources
829 idField = table.c.ccdVisitId
831 # Hopefully faster than SELECT DISTINCT
832 query = sql.select([idField]).select_from(table) \
833 .where(idField == id).limit(1)
835 return self._engine.scalar(query) is not None
837 def dailyJob(self):
838 """Implement daily activities like cleanup/vacuum.
840 What should be done during daily cleanup is determined by
841 configuration/schema.
842 """
844 # move data from DiaObjectNightly into DiaObject
845 if self.config.dia_object_nightly:
846 with _ansi_session(self._engine) as conn:
847 query = 'INSERT INTO "' + self._schema.objects.name + '" '
848 query += 'SELECT * FROM "' + self._schema.objects_nightly.name + '"'
849 with Timer('DiaObjectNightly copy', self.config.timer):
850 conn.execute(sql.text(query))
852 query = 'DELETE FROM "' + self._schema.objects_nightly.name + '"'
853 with Timer('DiaObjectNightly delete', self.config.timer):
854 conn.execute(sql.text(query))
856 if self._engine.name == 'postgresql':
858 # do VACUUM on all tables
859 _LOG.info("Running VACUUM on all tables")
860 connection = self._engine.raw_connection()
861 ISOLATION_LEVEL_AUTOCOMMIT = 0
862 connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
863 cursor = connection.cursor()
864 cursor.execute("VACUUM ANALYSE")
866 def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
867 """Create or re-create all tables.
869 Parameters
870 ----------
871 drop : `bool`
872 If True then drop tables before creating new ones.
873 mysql_engine : `str`, optional
874 Name of the MySQL engine to use for new tables.
875 oracle_tablespace : `str`, optional
876 Name of Oracle tablespace.
877 oracle_iot : `bool`, optional
878 Make Index-organized DiaObjectLast table.
879 """
880 self._schema.makeSchema(drop=drop, mysql_engine=mysql_engine,
881 oracle_tablespace=oracle_tablespace,
882 oracle_iot=oracle_iot)
884 def _explain(self, query, conn):
885 """Run the query with explain
886 """
888 _LOG.info("explain for query: %s...", query[:64])
890 if conn.engine.name == 'mysql':
891 query = "EXPLAIN EXTENDED " + query
892 else:
893 query = "EXPLAIN " + query
895 res = conn.execute(sql.text(query))
896 if res.returns_rows:
897 _LOG.info("explain: %s", res.keys())
898 for row in res:
899 _LOG.info("explain: %s", row)
900 else:
901 _LOG.info("EXPLAIN returned nothing")
903 def _storeObjectsAfw(self, objects, conn, table, schema_table_name,
904 replace=False, extra_columns=None):
905 """Generic store method.
907 Takes catalog of records and stores a bunch of objects in a table.
909 Parameters
910 ----------
911 objects : `lsst.afw.table.BaseCatalog`
912 Catalog containing object records
913 conn :
914 Database connection
915 table : `sqlalchemy.Table`
916 Database table
917 schema_table_name : `str`
918 Name of the table to be used for finding table schema.
919 replace : `boolean`
920 If `True` then use replace instead of INSERT (should be more efficient)
921 extra_columns : `dict`, optional
922 Mapping (column_name, column_value) which gives column values to add
923 to every row, only if column is missing in catalog records.
924 """
926 def quoteValue(v):
927 """Quote and escape values"""
928 if v is None:
929 v = "NULL"
930 elif isinstance(v, datetime):
931 v = "'" + str(v) + "'"
932 elif isinstance(v, str):
933 # we don't expect nasty stuff in strings
934 v = "'" + v + "'"
935 elif isinstance(v, geom.Angle):
936 v = v.asDegrees()
937 if np.isfinite(v):
938 v = str(v)
939 else:
940 v = "NULL"
941 else:
942 if np.isfinite(v):
943 v = str(v)
944 else:
945 v = "NULL"
946 return v
948 def quoteId(columnName):
949 """Smart quoting for column names.
950 Lower-case names are not quoted.
951 """
952 if not columnName.islower():
953 columnName = '"' + columnName + '"'
954 return columnName
956 if conn.engine.name == "oracle":
957 return self._storeObjectsAfwOracle(objects, conn, table,
958 schema_table_name, replace,
959 extra_columns)
961 schema = objects.getSchema()
962 # use extra columns if specified
963 extra_fields = list((extra_columns or {}).keys())
965 afw_fields = [field.getName() for key, field in schema
966 if field.getName() not in extra_fields]
968 column_map = self._schema.getAfwColumns(schema_table_name)
969 # list of columns (as in cat schema)
970 fields = [column_map[field].name for field in afw_fields if field in column_map]
972 if replace and conn.engine.name in ('mysql', 'sqlite'):
973 query = 'REPLACE INTO '
974 else:
975 query = 'INSERT INTO '
976 qfields = [quoteId(field) for field in fields + extra_fields]
977 query += quoteId(table.name) + ' (' + ','.join(qfields) + ') ' + 'VALUES '
979 values = []
980 for rec in objects:
981 row = []
982 for field in afw_fields:
983 if field not in column_map:
984 continue
985 value = rec[field]
986 if column_map[field].type == "DATETIME" and \
987 np.isfinite(value):
988 # convert seconds into datetime
989 value = datetime.utcfromtimestamp(value)
990 row.append(quoteValue(value))
991 for field in extra_fields:
992 row.append(quoteValue(extra_columns[field]))
993 values.append('(' + ','.join(row) + ')')
995 if self.config.explain:
996 # run the same query with explain, only give it one row of data
997 self._explain(query + values[0], conn)
999 query += ','.join(values)
1001 if replace and conn.engine.name == 'postgresql':
1002 # This depends on that "replace" can only be true for DiaObjectLast table
1003 pks = ('pixelId', 'diaObjectId')
1004 query += " ON CONFLICT (\"{}\", \"{}\") DO UPDATE SET ".format(*pks)
1005 fields = [column_map[field].name for field in afw_fields if field in column_map]
1006 fields = ['"{0}" = EXCLUDED."{0}"'.format(field)
1007 for field in fields if field not in pks]
1008 query += ', '.join(fields)
1010 # _LOG.debug("query: %s", query)
1011 _LOG.info("%s: will store %d records", table.name, len(objects))
1012 with Timer(table.name + ' insert', self.config.timer):
1013 res = conn.execute(sql.text(query))
1014 _LOG.debug("inserted %s intervals", res.rowcount)
1016 def _storeObjectsAfwOracle(self, objects, conn, table, schema_table_name,
1017 replace=False, extra_columns=None):
1018 """Store method for Oracle.
1020 Takes catalog of records and stores a bunch of objects in a table.
1022 Parameters
1023 ----------
1024 objects : `lsst.afw.table.BaseCatalog`
1025 Catalog containing object records
1026 conn :
1027 Database connection
1028 table : `sqlalchemy.Table`
1029 Database table
1030 schema_table_name : `str`
1031 Name of the table to be used for finding table schema.
1032 replace : `boolean`
1033 If `True` then use replace instead of INSERT (should be more efficient)
1034 extra_columns : `dict`, optional
1035 Mapping (column_name, column_value) which gives column values to add
1036 to every row, only if column is missing in catalog records.
1037 """
1039 def quoteId(columnName):
1040 """Smart quoting for column names.
1041 Lower-case naems are not quoted (Oracle backend needs them unquoted).
1042 """
1043 if not columnName.islower():
1044 columnName = '"' + columnName + '"'
1045 return columnName
1047 schema = objects.getSchema()
1049 # use extra columns that as overrides always.
1050 extra_fields = list((extra_columns or {}).keys())
1052 afw_fields = [field.getName() for key, field in schema
1053 if field.getName() not in extra_fields]
1054 # _LOG.info("afw_fields: %s", afw_fields)
1056 column_map = self._schema.getAfwColumns(schema_table_name)
1057 # _LOG.info("column_map: %s", column_map)
1059 # list of columns (as in cat schema)
1060 fields = [column_map[field].name for field in afw_fields
1061 if field in column_map]
1062 # _LOG.info("fields: %s", fields)
1064 qfields = [quoteId(field) for field in fields + extra_fields]
1066 if not replace:
1067 vals = [":col{}".format(i) for i in range(len(fields))]
1068 vals += [":extcol{}".format(i) for i in range(len(extra_fields))]
1069 query = 'INSERT INTO ' + quoteId(table.name)
1070 query += ' (' + ','.join(qfields) + ') VALUES'
1071 query += ' (' + ','.join(vals) + ')'
1072 else:
1073 qvals = [":col{} {}".format(i, quoteId(field)) for i, field in enumerate(fields)]
1074 qvals += [":extcol{} {}".format(i, quoteId(field)) for i, field in enumerate(extra_fields)]
1075 pks = ('pixelId', 'diaObjectId')
1076 onexpr = ["SRC.{col} = DST.{col}".format(col=quoteId(col)) for col in pks]
1077 setexpr = ["DST.{col} = SRC.{col}".format(col=quoteId(col))
1078 for col in fields + extra_fields if col not in pks]
1079 vals = ["SRC.{col}".format(col=quoteId(col)) for col in fields + extra_fields]
1080 query = "MERGE INTO {} DST ".format(quoteId(table.name))
1081 query += "USING (SELECT {} FROM DUAL) SRC ".format(", ".join(qvals))
1082 query += "ON ({}) ".format(" AND ".join(onexpr))
1083 query += "WHEN MATCHED THEN UPDATE SET {} ".format(" ,".join(setexpr))
1084 query += "WHEN NOT MATCHED THEN INSERT "
1085 query += "({}) VALUES ({})".format(','.join(qfields), ','.join(vals))
1086 # _LOG.info("query: %s", query)
1088 values = []
1089 for rec in objects:
1090 row = {}
1091 col = 0
1092 for field in afw_fields:
1093 if field not in column_map:
1094 continue
1095 value = rec[field]
1096 if column_map[field].type == "DATETIME" and not np.isnan(value):
1097 # convert seconds into datetime
1098 value = datetime.utcfromtimestamp(value)
1099 elif isinstance(value, geom.Angle):
1100 value = str(value.asDegrees())
1101 elif not np.isfinite(value):
1102 value = None
1103 row["col{}".format(col)] = value
1104 col += 1
1105 for i, field in enumerate(extra_fields):
1106 row["extcol{}".format(i)] = extra_columns[field]
1107 values.append(row)
1109 # _LOG.debug("query: %s", query)
1110 _LOG.info("%s: will store %d records", table.name, len(objects))
1111 with Timer(table.name + ' insert', self.config.timer):
1112 res = conn.execute(sql.text(query), values)
1113 _LOG.debug("inserted %s intervals", res.rowcount)
1115 def _convertResult(self, res, table_name, catalog=None):
1116 """Convert result set into output catalog.
1118 Parameters
1119 ----------
1120 res : `sqlalchemy.ResultProxy`
1121 SQLAlchemy result set returned by query.
1122 table_name : `str`
1123 Name of the table.
1124 catalog : `lsst.afw.table.BaseCatalog`
1125 If not None then extend existing catalog
1127 Returns
1128 -------
1129 catalog : `lsst.afw.table.SourceCatalog`
1130 If ``catalog`` is None then new instance is returned, otherwise
1131 ``catalog`` is updated and returned.
1132 """
1133 # make catalog schema
1134 columns = res.keys()
1135 schema, col_map = self._schema.getAfwSchema(table_name, columns)
1136 if catalog is None:
1137 _LOG.debug("_convertResult: schema: %s", schema)
1138 _LOG.debug("_convertResult: col_map: %s", col_map)
1139 catalog = afwTable.SourceCatalog(schema)
1141 # fill catalog
1142 for row in res:
1143 record = catalog.addNew()
1144 for col, value in row.items():
1145 # some columns may exist in database but not included in afw schema
1146 col = col_map.get(col)
1147 if col is not None:
1148 if isinstance(value, datetime):
1149 # convert datetime to number of seconds
1150 value = int((value - datetime.utcfromtimestamp(0)).total_seconds())
1151 elif col.getTypeString() == 'Angle' and value is not None:
1152 value = value * geom.degrees
1153 if value is not None:
1154 record.set(col, value)
1156 return catalog