Coverage for python/lsst/dax/apdb/apdb.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining Apdb class and related methods.
23"""
25__all__ = ["ApdbConfig", "Apdb", "Visit"]
27from collections import namedtuple
28from contextlib import contextmanager
29from datetime import datetime
30import logging
31import numpy as np
32import os
33import pandas
35import lsst.geom as geom
36import lsst.afw.table as afwTable
37import lsst.pex.config as pexConfig
38from lsst.pex.config import Field, ChoiceField, ListField
39from lsst.utils import getPackageDir
40import sqlalchemy
41from sqlalchemy import (func, sql)
42from sqlalchemy.pool import NullPool
43from . import timer, apdbSchema
46_LOG = logging.getLogger(__name__.partition(".")[2]) # strip leading "lsst."
49class Timer(object):
50 """Timer class defining context manager which tracks execution timing.
52 Typical use:
54 with Timer("timer_name"):
55 do_something
57 On exit from block it will print elapsed time.
59 See also :py:mod:`timer` module.
60 """
61 def __init__(self, name, do_logging=True, log_before_cursor_execute=False):
62 self._log_before_cursor_execute = log_before_cursor_execute
63 self._do_logging = do_logging
64 self._timer1 = timer.Timer(name)
65 self._timer2 = timer.Timer(name + " (before/after cursor)")
67 def __enter__(self):
68 """
69 Enter context, start timer
70 """
71# event.listen(engine.Engine, "before_cursor_execute", self._start_timer)
72# event.listen(engine.Engine, "after_cursor_execute", self._stop_timer)
73 self._timer1.start()
74 return self
76 def __exit__(self, exc_type, exc_val, exc_tb):
77 """
78 Exit context, stop and dump timer
79 """
80 if exc_type is None:
81 self._timer1.stop()
82 if self._do_logging:
83 self._timer1.dump()
84# event.remove(engine.Engine, "before_cursor_execute", self._start_timer)
85# event.remove(engine.Engine, "after_cursor_execute", self._stop_timer)
86 return False
88 def _start_timer(self, conn, cursor, statement, parameters, context, executemany):
89 """Start counting"""
90 if self._log_before_cursor_execute:
91 _LOG.info("before_cursor_execute")
92 self._timer2.start()
94 def _stop_timer(self, conn, cursor, statement, parameters, context, executemany):
95 """Stop counting"""
96 self._timer2.stop()
97 if self._do_logging:
98 self._timer2.dump()
101def _split(seq, nItems):
102 """Split a sequence into smaller sequences"""
103 seq = list(seq)
104 while seq:
105 yield seq[:nItems]
106 del seq[:nItems]
109# Information about single visit
110Visit = namedtuple('Visit', 'visitId visitTime lastObjectId lastSourceId')
113@contextmanager
114def _ansi_session(engine):
115 """Returns a connection, makes sure that ANSI mode is set for MySQL
116 """
117 with engine.begin() as conn:
118 if engine.name == 'mysql':
119 conn.execute(sql.text("SET SESSION SQL_MODE = 'ANSI'"))
120 yield conn
121 return
124def _data_file_name(basename):
125 """Return path name of a data file.
126 """
127 return os.path.join(getPackageDir("dax_apdb"), "data", basename)
130class ApdbConfig(pexConfig.Config):
132 db_url = Field(dtype=str, doc="SQLAlchemy database connection URI")
133 isolation_level = ChoiceField(dtype=str,
134 doc="Transaction isolation level",
135 allowed={"READ_COMMITTED": "Read committed",
136 "READ_UNCOMMITTED": "Read uncommitted",
137 "REPEATABLE_READ": "Repeatable read",
138 "SERIALIZABLE": "Serializable"},
139 default="READ_COMMITTED",
140 optional=True)
141 connection_pool = Field(dtype=bool,
142 doc=("If False then disable SQLAlchemy connection pool. "
143 "Do not use connection pool when forking."),
144 default=True)
145 connection_timeout = Field(dtype=float,
146 doc="Maximum time to wait time for database lock to be released before "
147 "exiting. Defaults to sqlachemy defaults if not set.",
148 default=None,
149 optional=True)
150 sql_echo = Field(dtype=bool,
151 doc="If True then pass SQLAlchemy echo option.",
152 default=False)
153 dia_object_index = ChoiceField(dtype=str,
154 doc="Indexing mode for DiaObject table",
155 allowed={'baseline': "Index defined in baseline schema",
156 'pix_id_iov': "(pixelId, objectId, iovStart) PK",
157 'last_object_table': "Separate DiaObjectLast table"},
158 default='baseline')
159 dia_object_nightly = Field(dtype=bool,
160 doc="Use separate nightly table for DiaObject",
161 default=False)
162 read_sources_months = Field(dtype=int,
163 doc="Number of months of history to read from DiaSource",
164 default=12)
165 read_forced_sources_months = Field(dtype=int,
166 doc="Number of months of history to read from DiaForcedSource",
167 default=12)
168 dia_object_columns = ListField(dtype=str,
169 doc="List of columns to read from DiaObject, by default read all columns",
170 default=[])
171 object_last_replace = Field(dtype=bool,
172 doc="If True (default) then use \"upsert\" for DiaObjectsLast table",
173 default=True)
174 schema_file = Field(dtype=str,
175 doc="Location of (YAML) configuration file with standard schema",
176 default=_data_file_name("apdb-schema.yaml"))
177 extra_schema_file = Field(dtype=str,
178 doc="Location of (YAML) configuration file with extra schema",
179 default=_data_file_name("apdb-schema-extra.yaml"))
180 column_map = Field(dtype=str,
181 doc="Location of (YAML) configuration file with column mapping",
182 default=_data_file_name("apdb-afw-map.yaml"))
183 prefix = Field(dtype=str,
184 doc="Prefix to add to table names and index names",
185 default="")
186 explain = Field(dtype=bool,
187 doc="If True then run EXPLAIN SQL command on each executed query",
188 default=False)
189 timer = Field(dtype=bool,
190 doc="If True then print/log timing information",
191 default=False)
192 diaobject_index_hint = Field(dtype=str,
193 doc="Name of the index to use with Oracle index hint",
194 default=None,
195 optional=True)
196 dynamic_sampling_hint = Field(dtype=int,
197 doc="If non-zero then use dynamic_sampling hint",
198 default=0)
199 cardinality_hint = Field(dtype=int,
200 doc="If non-zero then use cardinality hint",
201 default=0)
203 def validate(self):
204 super().validate()
205 if self.isolation_level == "READ_COMMITTED" and self.db_url.startswith("sqlite"):
206 raise ValueError("Attempting to run Apdb with SQLITE and isolation level 'READ_COMMITTED.' "
207 "Use 'READ_UNCOMMITTED' instead.")
210class Apdb(object):
211 """Interface to L1 database, hides all database access details.
213 The implementation is configured via standard ``pex_config`` mechanism
214 using `ApdbConfig` configuration class. For an example of different
215 configurations check config/ folder.
217 Parameters
218 ----------
219 config : `ApdbConfig`
220 afw_schemas : `dict`, optional
221 Dictionary with table name for a key and `afw.table.Schema`
222 for a value. Columns in schema will be added to standard
223 APDB schema.
224 """
226 def __init__(self, config, afw_schemas=None):
228 self.config = config
230 # logging.getLogger('sqlalchemy').setLevel(logging.INFO)
231 _LOG.debug("APDB Configuration:")
232 _LOG.debug(" dia_object_index: %s", self.config.dia_object_index)
233 _LOG.debug(" dia_object_nightly: %s", self.config.dia_object_nightly)
234 _LOG.debug(" read_sources_months: %s", self.config.read_sources_months)
235 _LOG.debug(" read_forced_sources_months: %s", self.config.read_forced_sources_months)
236 _LOG.debug(" dia_object_columns: %s", self.config.dia_object_columns)
237 _LOG.debug(" object_last_replace: %s", self.config.object_last_replace)
238 _LOG.debug(" schema_file: %s", self.config.schema_file)
239 _LOG.debug(" extra_schema_file: %s", self.config.extra_schema_file)
240 _LOG.debug(" column_map: %s", self.config.column_map)
241 _LOG.debug(" schema prefix: %s", self.config.prefix)
243 # engine is reused between multiple processes, make sure that we don't
244 # share connections by disabling pool (by using NullPool class)
245 kw = dict(echo=self.config.sql_echo)
246 conn_args = dict()
247 if not self.config.connection_pool:
248 kw.update(poolclass=NullPool)
249 if self.config.isolation_level is not None:
250 kw.update(isolation_level=self.config.isolation_level)
251 if self.config.connection_timeout is not None:
252 if self.config.db_url.startswith("sqlite"):
253 conn_args.update(timeout=self.config.connection_timeout)
254 elif self.config.db_url.startswith(("postgresql", "mysql")):
255 conn_args.update(connect_timeout=self.config.connection_timeout)
256 kw.update(connect_args=conn_args)
257 self._engine = sqlalchemy.create_engine(self.config.db_url, **kw)
259 self._schema = apdbSchema.ApdbSchema(engine=self._engine,
260 dia_object_index=self.config.dia_object_index,
261 dia_object_nightly=self.config.dia_object_nightly,
262 schema_file=self.config.schema_file,
263 extra_schema_file=self.config.extra_schema_file,
264 column_map=self.config.column_map,
265 afw_schemas=afw_schemas,
266 prefix=self.config.prefix)
268 def lastVisit(self):
269 """Returns last visit information or `None` if visits table is empty.
271 Visits table is used by ap_proto to track visit information, it is
272 not a part of the regular APDB schema.
274 Returns
275 -------
276 visit : `Visit` or `None`
277 Last stored visit info or `None` if there was nothing stored yet.
278 """
280 with self._engine.begin() as conn:
282 stmnt = sql.select([sql.func.max(self._schema.visits.c.visitId),
283 sql.func.max(self._schema.visits.c.visitTime)])
284 res = conn.execute(stmnt)
285 row = res.fetchone()
286 if row[0] is None:
287 return None
289 visitId = row[0]
290 visitTime = row[1]
291 _LOG.info("lastVisit: visitId: %s visitTime: %s (%s)", visitId,
292 visitTime, type(visitTime))
294 # get max IDs from corresponding tables
295 stmnt = sql.select([sql.func.max(self._schema.objects.c.diaObjectId)])
296 lastObjectId = conn.scalar(stmnt)
297 stmnt = sql.select([sql.func.max(self._schema.sources.c.diaSourceId)])
298 lastSourceId = conn.scalar(stmnt)
300 return Visit(visitId=visitId, visitTime=visitTime,
301 lastObjectId=lastObjectId, lastSourceId=lastSourceId)
303 def saveVisit(self, visitId, visitTime):
304 """Store visit information.
306 This method is only used by ``ap_proto`` script from ``l1dbproto``
307 and is not intended for production pipelines.
309 Parameters
310 ----------
311 visitId : `int`
312 Visit identifier
313 visitTime : `datetime.datetime`
314 Visit timestamp.
315 """
317 ins = self._schema.visits.insert().values(visitId=visitId,
318 visitTime=visitTime)
319 self._engine.execute(ins)
321 def tableRowCount(self):
322 """Returns dictionary with the table names and row counts.
324 Used by ``ap_proto`` to keep track of the size of the database tables.
325 Depending on database technology this could be expensive operation.
327 Returns
328 -------
329 row_counts : `dict`
330 Dict where key is a table name and value is a row count.
331 """
332 res = {}
333 tables = [self._schema.objects, self._schema.sources, self._schema.forcedSources]
334 if self.config.dia_object_index == 'last_object_table':
335 tables.append(self._schema.objects_last)
336 for table in tables:
337 stmt = sql.select([func.count()]).select_from(table)
338 count = self._engine.scalar(stmt)
339 res[table.name] = count
341 return res
343 def getDiaObjects(self, pixel_ranges, return_pandas=False):
344 """Returns catalog of DiaObject instances from given region.
346 Objects are searched based on pixelization index and region is
347 determined by the set of indices. There is no assumption on a
348 particular type of index, client is responsible for consistency
349 when calculating pixelization indices.
351 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
352 the schema of APDB table. Re-mapping of the column names is done for
353 some columns (based on column map passed to constructor) but types
354 or units are not changed.
356 Returns only the last version of each DiaObject.
358 Parameters
359 ----------
360 pixel_ranges : `list` of `tuple`
361 Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
362 This defines set of pixel indices to be included in result.
363 return_pandas : `bool`
364 Return a `pandas.DataFrame` instead of
365 `lsst.afw.table.SourceCatalog`.
367 Returns
368 -------
369 catalog : `lsst.afw.table.SourceCatalog` or `pandas.DataFrame`
370 Catalog containing DiaObject records.
371 """
373 # decide what columns we need
374 if self.config.dia_object_index == 'last_object_table':
375 table = self._schema.objects_last
376 else:
377 table = self._schema.objects
378 if not self.config.dia_object_columns:
379 query = table.select()
380 else:
381 columns = [table.c[col] for col in self.config.dia_object_columns]
382 query = sql.select(columns)
384 if self.config.diaobject_index_hint:
385 val = self.config.diaobject_index_hint
386 query = query.with_hint(table, 'index_rs_asc(%(name)s "{}")'.format(val))
387 if self.config.dynamic_sampling_hint > 0:
388 val = self.config.dynamic_sampling_hint
389 query = query.with_hint(table, 'dynamic_sampling(%(name)s {})'.format(val))
390 if self.config.cardinality_hint > 0:
391 val = self.config.cardinality_hint
392 query = query.with_hint(table, 'FIRST_ROWS_1 cardinality(%(name)s {})'.format(val))
394 # build selection
395 exprlist = []
396 for low, upper in pixel_ranges:
397 upper -= 1
398 if low == upper:
399 exprlist.append(table.c.pixelId == low)
400 else:
401 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
402 query = query.where(sql.expression.or_(*exprlist))
404 # select latest version of objects
405 if self.config.dia_object_index != 'last_object_table':
406 query = query.where(table.c.validityEnd == None) # noqa: E711
408 _LOG.debug("query: %s", query)
410 if self.config.explain:
411 # run the same query with explain
412 self._explain(query, self._engine)
414 # execute select
415 with Timer('DiaObject select', self.config.timer):
416 with self._engine.begin() as conn:
417 if return_pandas:
418 objects = pandas.read_sql_query(query, conn)
419 else:
420 res = conn.execute(query)
421 objects = self._convertResult(res, "DiaObject")
422 _LOG.debug("found %s DiaObjects", len(objects))
423 return objects
425 def getDiaSourcesInRegion(self, pixel_ranges, dt, return_pandas=False):
426 """Returns catalog of DiaSource instances from given region.
428 Sources are searched based on pixelization index and region is
429 determined by the set of indices. There is no assumption on a
430 particular type of index, client is responsible for consistency
431 when calculating pixelization indices.
433 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
434 the schema of APDB table. Re-mapping of the column names is done for
435 some columns (based on column map passed to constructor) but types or
436 units are not changed.
438 Parameters
439 ----------
440 pixel_ranges : `list` of `tuple`
441 Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
442 This defines set of pixel indices to be included in result.
443 dt : `datetime.datetime`
444 Time of the current visit
445 return_pandas : `bool`
446 Return a `pandas.DataFrame` instead of
447 `lsst.afw.table.SourceCatalog`.
449 Returns
450 -------
451 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
452 Catalog containing DiaSource records. `None` is returned if
453 ``read_sources_months`` configuration parameter is set to 0.
454 """
456 if self.config.read_sources_months == 0:
457 _LOG.info("Skip DiaSources fetching")
458 return None
460 table = self._schema.sources
461 query = table.select()
463 # build selection
464 exprlist = []
465 for low, upper in pixel_ranges:
466 upper -= 1
467 if low == upper:
468 exprlist.append(table.c.pixelId == low)
469 else:
470 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
471 query = query.where(sql.expression.or_(*exprlist))
473 # execute select
474 with Timer('DiaSource select', self.config.timer):
475 with _ansi_session(self._engine) as conn:
476 if return_pandas:
477 sources = pandas.read_sql_query(query, conn)
478 else:
479 res = conn.execute(query)
480 sources = self._convertResult(res, "DiaSource")
481 _LOG.debug("found %s DiaSources", len(sources))
482 return sources
484 def getDiaSources(self, object_ids, dt, return_pandas=False):
485 """Returns catalog of DiaSource instances given set of DiaObject IDs.
487 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
488 the schema of APDB table. Re-mapping of the column names is done for
489 some columns (based on column map passed to constructor) but types or
490 units are not changed.
492 Parameters
493 ----------
494 object_ids :
495 Collection of DiaObject IDs
496 dt : `datetime.datetime`
497 Time of the current visit
498 return_pandas : `bool`
499 Return a `pandas.DataFrame` instead of
500 `lsst.afw.table.SourceCatalog`.
503 Returns
504 -------
505 catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
506 Catalog contaning DiaSource records. `None` is returned if
507 ``read_sources_months`` configuration parameter is set to 0 or
508 when ``object_ids`` is empty.
509 """
511 if self.config.read_sources_months == 0:
512 _LOG.info("Skip DiaSources fetching")
513 return None
515 if len(object_ids) <= 0:
516 _LOG.info("Skip DiaSources fetching - no Objects")
517 # this should create a catalog, but the list of columns may be empty
518 return None
520 table = self._schema.sources
521 sources = None
522 with Timer('DiaSource select', self.config.timer):
523 with _ansi_session(self._engine) as conn:
524 for ids in _split(sorted(object_ids), 1000):
525 query = 'SELECT * FROM "' + table.name + '" WHERE '
527 # select by object id
528 ids = ",".join(str(id) for id in ids)
529 query += '"diaObjectId" IN (' + ids + ') '
531 # execute select
532 if return_pandas:
533 df = pandas.read_sql_query(sql.text(query), conn)
534 if sources is None:
535 sources = df
536 else:
537 sources = sources.append(df)
538 else:
539 res = conn.execute(sql.text(query))
540 sources = self._convertResult(res, "DiaSource", sources)
542 _LOG.debug("found %s DiaSources", len(sources))
543 return sources
545 def getDiaForcedSources(self, object_ids, dt, return_pandas=False):
546 """Returns catalog of DiaForcedSource instances matching given
547 DiaObjects.
549 This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
550 the schema of L1 database table. Re-mapping of the column names may
551 be done for some columns (based on column map passed to constructor)
552 but types or units are not changed.
554 Parameters
555 ----------
556 object_ids :
557 Collection of DiaObject IDs
558 dt : `datetime.datetime`
559 Time of the current visit
560 return_pandas : `bool`
561 Return a `pandas.DataFrame` instead of
562 `lsst.afw.table.SourceCatalog`.
564 Returns
565 -------
566 catalog : `lsst.afw.table.SourceCatalog` or `None`
567 Catalog contaning DiaForcedSource records. `None` is returned if
568 ``read_sources_months`` configuration parameter is set to 0 or
569 when ``object_ids`` is empty.
570 """
572 if self.config.read_forced_sources_months == 0:
573 _LOG.info("Skip DiaForceSources fetching")
574 return None
576 if not object_ids:
577 _LOG.info("Skip DiaForceSources fetching - no Objects")
578 # this should create a catalog, but the list of columns may be empty
579 return None
581 table = self._schema.forcedSources
582 sources = None
584 with Timer('DiaForcedSource select', self.config.timer):
585 with _ansi_session(self._engine) as conn:
586 for ids in _split(sorted(object_ids), 1000):
588 query = 'SELECT * FROM "' + table.name + '" WHERE '
590 # select by object id
591 ids = ",".join(str(id) for id in ids)
592 query += '"diaObjectId" IN (' + ids + ') '
594 # execute select
595 if return_pandas:
596 df = pandas.read_sql_query(sql.text(query), conn)
597 if sources is None:
598 sources = df
599 else:
600 sources = sources.append(df)
601 else:
602 res = conn.execute(sql.text(query))
603 sources = self._convertResult(res, "DiaForcedSource", sources)
605 _LOG.debug("found %s DiaForcedSources", len(sources))
606 return sources
608 def storeDiaObjects(self, objs, dt):
609 """Store catalog of DiaObjects from current visit.
611 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
612 compatible with the schema of APDB table:
614 - column names must correspond to database table columns
615 - some columns names are re-mapped based on column map passed to
616 constructor
617 - types and units of the columns must match database definitions,
618 no unit conversion is performed presently
619 - columns that have default values in database schema can be
620 omitted from afw schema
621 - this method knows how to fill interval-related columns
622 (validityStart, validityEnd) they do not need to appear in
623 afw schema
625 Parameters
626 ----------
627 objs : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
628 Catalog with DiaObject records
629 dt : `datetime.datetime`
630 Time of the visit
631 """
633 if isinstance(objs, pandas.DataFrame):
634 ids = sorted(objs['diaObjectId'])
635 else:
636 ids = sorted([obj['id'] for obj in objs])
637 _LOG.debug("first object ID: %d", ids[0])
639 # NOTE: workaround for sqlite, need this here to avoid
640 # "database is locked" error.
641 table = self._schema.objects
643 # everything to be done in single transaction
644 with _ansi_session(self._engine) as conn:
646 ids = ",".join(str(id) for id in ids)
648 if self.config.dia_object_index == 'last_object_table':
650 # insert and replace all records in LAST table, mysql and postgres have
651 # non-standard features (handled in _storeObjectsAfw)
652 table = self._schema.objects_last
653 do_replace = self.config.object_last_replace
654 # If the input data is of type Pandas, we drop the previous
655 # objects regardless of the do_replace setting due to how
656 # Pandas inserts objects.
657 if not do_replace or isinstance(objs, pandas.DataFrame):
658 query = 'DELETE FROM "' + table.name + '" '
659 query += 'WHERE "diaObjectId" IN (' + ids + ') '
661 if self.config.explain:
662 # run the same query with explain
663 self._explain(query, conn)
665 with Timer(table.name + ' delete', self.config.timer):
666 res = conn.execute(sql.text(query))
667 _LOG.debug("deleted %s objects", res.rowcount)
669 extra_columns = dict(lastNonForcedSource=dt)
670 if isinstance(objs, pandas.DataFrame):
671 with Timer("DiaObjectLast insert", self.config.timer):
672 for col, data in extra_columns.items():
673 objs[col] = data
674 objs.to_sql("DiaObjectLast", conn, if_exists='append',
675 index=False)
676 else:
677 self._storeObjectsAfw(objs, conn, table, "DiaObjectLast",
678 replace=do_replace,
679 extra_columns=extra_columns)
681 else:
683 # truncate existing validity intervals
684 table = self._schema.objects
685 query = 'UPDATE "' + table.name + '" '
686 query += "SET \"validityEnd\" = '" + str(dt) + "' "
687 query += 'WHERE "diaObjectId" IN (' + ids + ') '
688 query += 'AND "validityEnd" IS NULL'
690 # _LOG.debug("query: %s", query)
692 if self.config.explain:
693 # run the same query with explain
694 self._explain(query, conn)
696 with Timer(table.name + ' truncate', self.config.timer):
697 res = conn.execute(sql.text(query))
698 _LOG.debug("truncated %s intervals", res.rowcount)
700 # insert new versions
701 if self.config.dia_object_nightly:
702 table = self._schema.objects_nightly
703 else:
704 table = self._schema.objects
705 extra_columns = dict(lastNonForcedSource=dt, validityStart=dt,
706 validityEnd=None)
707 if isinstance(objs, pandas.DataFrame):
708 with Timer("DiaObject insert", self.config.timer):
709 for col, data in extra_columns.items():
710 objs[col] = data
711 objs.to_sql("DiaObject", conn, if_exists='append',
712 index=False)
713 else:
714 self._storeObjectsAfw(objs, conn, table, "DiaObject",
715 extra_columns=extra_columns)
717 def storeDiaSources(self, sources):
718 """Store catalog of DIASources from current visit.
720 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
721 compatible with the schema of L1 database table:
723 - column names must correspond to database table columns
724 - some columns names may be re-mapped based on column map passed to
725 constructor
726 - types and units of the columns must match database definitions,
727 no unit conversion is performed presently
728 - columns that have default values in database schema can be
729 omitted from afw schema
731 Parameters
732 ----------
733 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
734 Catalog containing DiaSource records
735 """
737 # everything to be done in single transaction
738 with _ansi_session(self._engine) as conn:
740 if isinstance(sources, pandas.DataFrame):
741 with Timer("DiaSource insert", self.config.timer):
742 sources.to_sql("DiaSource", conn, if_exists='append',
743 index=False)
744 else:
745 table = self._schema.sources
746 self._storeObjectsAfw(sources, conn, table, "DiaSource")
748 def storeDiaForcedSources(self, sources):
749 """Store a set of DIAForcedSources from current visit.
751 This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
752 compatible with the schema of L1 database table:
754 - column names must correspond to database table columns
755 - some columns names may be re-mapped based on column map passed to
756 constructor
757 - types and units of the columns must match database definitions,
758 no unit conversion is performed presently
759 - columns that have default values in database schema can be
760 omitted from afw schema
762 Parameters
763 ----------
764 sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
765 Catalog containing DiaForcedSource records
766 """
768 # everything to be done in single transaction
769 with _ansi_session(self._engine) as conn:
771 if isinstance(sources, pandas.DataFrame):
772 with Timer("DiaForcedSource insert", self.config.timer):
773 sources.to_sql("DiaForcedSource", conn, if_exists='append',
774 index=False)
775 else:
776 table = self._schema.forcedSources
777 self._storeObjectsAfw(sources, conn, table, "DiaForcedSource")
779 def countUnassociatedObjects(self):
780 """Return the number of DiaObjects that have only one DiaSource associated
781 with them.
783 Used as part of ap_verify metrics.
785 Returns
786 -------
787 count : `int`
788 Number of DiaObjects with exactly one associated DiaSource.
789 """
790 # Retrieve the DiaObject table.
791 table = self._schema.objects
793 # Construct the sql statement.
794 stmt = sql.select([func.count()]).select_from(table).where(table.c.nDiaSources == 1)
795 stmt = stmt.where(table.c.validityEnd == None) # noqa: E711
797 # Return the count.
798 count = self._engine.scalar(stmt)
800 return count
802 def isVisitProcessed(self, visitInfo):
803 """Test whether data from an image has been loaded into the database.
805 Used as part of ap_verify metrics.
807 Parameters
808 ----------
809 visitInfo : `lsst.afw.image.VisitInfo`
810 The metadata for the image of interest.
812 Returns
813 -------
814 isProcessed : `bool`
815 `True` if the data are present, `False` otherwise.
816 """
817 id = visitInfo.getExposureId()
818 table = self._schema.sources
819 idField = table.c.ccdVisitId
821 # Hopefully faster than SELECT DISTINCT
822 query = sql.select([idField]).select_from(table) \
823 .where(idField == id).limit(1)
825 return self._engine.scalar(query) is not None
827 def dailyJob(self):
828 """Implement daily activities like cleanup/vacuum.
830 What should be done during daily cleanup is determined by
831 configuration/schema.
832 """
834 # move data from DiaObjectNightly into DiaObject
835 if self.config.dia_object_nightly:
836 with _ansi_session(self._engine) as conn:
837 query = 'INSERT INTO "' + self._schema.objects.name + '" '
838 query += 'SELECT * FROM "' + self._schema.objects_nightly.name + '"'
839 with Timer('DiaObjectNightly copy', self.config.timer):
840 conn.execute(sql.text(query))
842 query = 'DELETE FROM "' + self._schema.objects_nightly.name + '"'
843 with Timer('DiaObjectNightly delete', self.config.timer):
844 conn.execute(sql.text(query))
846 if self._engine.name == 'postgresql':
848 # do VACUUM on all tables
849 _LOG.info("Running VACUUM on all tables")
850 connection = self._engine.raw_connection()
851 ISOLATION_LEVEL_AUTOCOMMIT = 0
852 connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
853 cursor = connection.cursor()
854 cursor.execute("VACUUM ANALYSE")
856 def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
857 """Create or re-create all tables.
859 Parameters
860 ----------
861 drop : `bool`
862 If True then drop tables before creating new ones.
863 mysql_engine : `str`, optional
864 Name of the MySQL engine to use for new tables.
865 oracle_tablespace : `str`, optional
866 Name of Oracle tablespace.
867 oracle_iot : `bool`, optional
868 Make Index-organized DiaObjectLast table.
869 """
870 self._schema.makeSchema(drop=drop, mysql_engine=mysql_engine,
871 oracle_tablespace=oracle_tablespace,
872 oracle_iot=oracle_iot)
874 def _explain(self, query, conn):
875 """Run the query with explain
876 """
878 _LOG.info("explain for query: %s...", query[:64])
880 if conn.engine.name == 'mysql':
881 query = "EXPLAIN EXTENDED " + query
882 else:
883 query = "EXPLAIN " + query
885 res = conn.execute(sql.text(query))
886 if res.returns_rows:
887 _LOG.info("explain: %s", res.keys())
888 for row in res:
889 _LOG.info("explain: %s", row)
890 else:
891 _LOG.info("EXPLAIN returned nothing")
893 def _storeObjectsAfw(self, objects, conn, table, schema_table_name,
894 replace=False, extra_columns=None):
895 """Generic store method.
897 Takes catalog of records and stores a bunch of objects in a table.
899 Parameters
900 ----------
901 objects : `lsst.afw.table.BaseCatalog`
902 Catalog containing object records
903 conn :
904 Database connection
905 table : `sqlalchemy.Table`
906 Database table
907 schema_table_name : `str`
908 Name of the table to be used for finding table schema.
909 replace : `boolean`
910 If `True` then use replace instead of INSERT (should be more efficient)
911 extra_columns : `dict`, optional
912 Mapping (column_name, column_value) which gives column values to add
913 to every row, only if column is missing in catalog records.
914 """
916 def quoteValue(v):
917 """Quote and escape values"""
918 if v is None:
919 v = "NULL"
920 elif isinstance(v, datetime):
921 v = "'" + str(v) + "'"
922 elif isinstance(v, str):
923 # we don't expect nasty stuff in strings
924 v = "'" + v + "'"
925 elif isinstance(v, geom.Angle):
926 v = v.asDegrees()
927 if np.isfinite(v):
928 v = str(v)
929 else:
930 v = "NULL"
931 else:
932 if np.isfinite(v):
933 v = str(v)
934 else:
935 v = "NULL"
936 return v
938 def quoteId(columnName):
939 """Smart quoting for column names.
940 Lower-case names are not quoted.
941 """
942 if not columnName.islower():
943 columnName = '"' + columnName + '"'
944 return columnName
946 if conn.engine.name == "oracle":
947 return self._storeObjectsAfwOracle(objects, conn, table,
948 schema_table_name, replace,
949 extra_columns)
951 schema = objects.getSchema()
952 # use extra columns if specified
953 extra_fields = list((extra_columns or {}).keys())
955 afw_fields = [field.getName() for key, field in schema
956 if field.getName() not in extra_fields]
958 column_map = self._schema.getAfwColumns(schema_table_name)
959 # list of columns (as in cat schema)
960 fields = [column_map[field].name for field in afw_fields if field in column_map]
962 if replace and conn.engine.name in ('mysql', 'sqlite'):
963 query = 'REPLACE INTO '
964 else:
965 query = 'INSERT INTO '
966 qfields = [quoteId(field) for field in fields + extra_fields]
967 query += quoteId(table.name) + ' (' + ','.join(qfields) + ') ' + 'VALUES '
969 values = []
970 for rec in objects:
971 row = []
972 for field in afw_fields:
973 if field not in column_map:
974 continue
975 value = rec[field]
976 if column_map[field].type == "DATETIME" and \
977 np.isfinite(value):
978 # convert seconds into datetime
979 value = datetime.utcfromtimestamp(value)
980 row.append(quoteValue(value))
981 for field in extra_fields:
982 row.append(quoteValue(extra_columns[field]))
983 values.append('(' + ','.join(row) + ')')
985 if self.config.explain:
986 # run the same query with explain, only give it one row of data
987 self._explain(query + values[0], conn)
989 query += ','.join(values)
991 if replace and conn.engine.name == 'postgresql':
992 # This depends on that "replace" can only be true for DiaObjectLast table
993 pks = ('pixelId', 'diaObjectId')
994 query += " ON CONFLICT (\"{}\", \"{}\") DO UPDATE SET ".format(*pks)
995 fields = [column_map[field].name for field in afw_fields if field in column_map]
996 fields = ['"{0}" = EXCLUDED."{0}"'.format(field)
997 for field in fields if field not in pks]
998 query += ', '.join(fields)
1000 # _LOG.debug("query: %s", query)
1001 _LOG.info("%s: will store %d records", table.name, len(objects))
1002 with Timer(table.name + ' insert', self.config.timer):
1003 res = conn.execute(sql.text(query))
1004 _LOG.debug("inserted %s intervals", res.rowcount)
1006 def _storeObjectsAfwOracle(self, objects, conn, table, schema_table_name,
1007 replace=False, extra_columns=None):
1008 """Store method for Oracle.
1010 Takes catalog of records and stores a bunch of objects in a table.
1012 Parameters
1013 ----------
1014 objects : `lsst.afw.table.BaseCatalog`
1015 Catalog containing object records
1016 conn :
1017 Database connection
1018 table : `sqlalchemy.Table`
1019 Database table
1020 schema_table_name : `str`
1021 Name of the table to be used for finding table schema.
1022 replace : `boolean`
1023 If `True` then use replace instead of INSERT (should be more efficient)
1024 extra_columns : `dict`, optional
1025 Mapping (column_name, column_value) which gives column values to add
1026 to every row, only if column is missing in catalog records.
1027 """
1029 def quoteId(columnName):
1030 """Smart quoting for column names.
1031 Lower-case naems are not quoted (Oracle backend needs them unquoted).
1032 """
1033 if not columnName.islower():
1034 columnName = '"' + columnName + '"'
1035 return columnName
1037 schema = objects.getSchema()
1039 # use extra columns that as overrides always.
1040 extra_fields = list((extra_columns or {}).keys())
1042 afw_fields = [field.getName() for key, field in schema
1043 if field.getName() not in extra_fields]
1044 # _LOG.info("afw_fields: %s", afw_fields)
1046 column_map = self._schema.getAfwColumns(schema_table_name)
1047 # _LOG.info("column_map: %s", column_map)
1049 # list of columns (as in cat schema)
1050 fields = [column_map[field].name for field in afw_fields
1051 if field in column_map]
1052 # _LOG.info("fields: %s", fields)
1054 qfields = [quoteId(field) for field in fields + extra_fields]
1056 if not replace:
1057 vals = [":col{}".format(i) for i in range(len(fields))]
1058 vals += [":extcol{}".format(i) for i in range(len(extra_fields))]
1059 query = 'INSERT INTO ' + quoteId(table.name)
1060 query += ' (' + ','.join(qfields) + ') VALUES'
1061 query += ' (' + ','.join(vals) + ')'
1062 else:
1063 qvals = [":col{} {}".format(i, quoteId(field)) for i, field in enumerate(fields)]
1064 qvals += [":extcol{} {}".format(i, quoteId(field)) for i, field in enumerate(extra_fields)]
1065 pks = ('pixelId', 'diaObjectId')
1066 onexpr = ["SRC.{col} = DST.{col}".format(col=quoteId(col)) for col in pks]
1067 setexpr = ["DST.{col} = SRC.{col}".format(col=quoteId(col))
1068 for col in fields + extra_fields if col not in pks]
1069 vals = ["SRC.{col}".format(col=quoteId(col)) for col in fields + extra_fields]
1070 query = "MERGE INTO {} DST ".format(quoteId(table.name))
1071 query += "USING (SELECT {} FROM DUAL) SRC ".format(", ".join(qvals))
1072 query += "ON ({}) ".format(" AND ".join(onexpr))
1073 query += "WHEN MATCHED THEN UPDATE SET {} ".format(" ,".join(setexpr))
1074 query += "WHEN NOT MATCHED THEN INSERT "
1075 query += "({}) VALUES ({})".format(','.join(qfields), ','.join(vals))
1076 # _LOG.info("query: %s", query)
1078 values = []
1079 for rec in objects:
1080 row = {}
1081 col = 0
1082 for field in afw_fields:
1083 if field not in column_map:
1084 continue
1085 value = rec[field]
1086 if column_map[field].type == "DATETIME" and not np.isnan(value):
1087 # convert seconds into datetime
1088 value = datetime.utcfromtimestamp(value)
1089 elif isinstance(value, geom.Angle):
1090 value = str(value.asDegrees())
1091 elif not np.isfinite(value):
1092 value = None
1093 row["col{}".format(col)] = value
1094 col += 1
1095 for i, field in enumerate(extra_fields):
1096 row["extcol{}".format(i)] = extra_columns[field]
1097 values.append(row)
1099 # _LOG.debug("query: %s", query)
1100 _LOG.info("%s: will store %d records", table.name, len(objects))
1101 with Timer(table.name + ' insert', self.config.timer):
1102 res = conn.execute(sql.text(query), values)
1103 _LOG.debug("inserted %s intervals", res.rowcount)
1105 def _convertResult(self, res, table_name, catalog=None):
1106 """Convert result set into output catalog.
1108 Parameters
1109 ----------
1110 res : `sqlalchemy.ResultProxy`
1111 SQLAlchemy result set returned by query.
1112 table_name : `str`
1113 Name of the table.
1114 catalog : `lsst.afw.table.BaseCatalog`
1115 If not None then extend existing catalog
1117 Returns
1118 -------
1119 catalog : `lsst.afw.table.SourceCatalog`
1120 If ``catalog`` is None then new instance is returned, otherwise
1121 ``catalog`` is updated and returned.
1122 """
1123 # make catalog schema
1124 columns = res.keys()
1125 schema, col_map = self._schema.getAfwSchema(table_name, columns)
1126 if catalog is None:
1127 _LOG.debug("_convertResult: schema: %s", schema)
1128 _LOG.debug("_convertResult: col_map: %s", col_map)
1129 catalog = afwTable.SourceCatalog(schema)
1131 # fill catalog
1132 for row in res:
1133 record = catalog.addNew()
1134 for col, value in row.items():
1135 # some columns may exist in database but not included in afw schema
1136 col = col_map.get(col)
1137 if col is not None:
1138 if isinstance(value, datetime):
1139 # convert datetime to number of seconds
1140 value = int((value - datetime.utcfromtimestamp(0)).total_seconds())
1141 elif col.getTypeString() == 'Angle' and value is not None:
1142 value = value * geom.degrees
1143 if value is not None:
1144 record.set(col, value)
1146 return catalog