Coverage for python/lsst/dax/apdb/apdbCassandra.py: 17%
525 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-03 10:51 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-03 10:51 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbCassandraConfig", "ApdbCassandra"]
26import logging
27import uuid
28from collections.abc import Iterable, Iterator, Mapping, Set
29from typing import TYPE_CHECKING, Any, cast
31import numpy as np
32import pandas
34# If cassandra-driver is not there the module can still be imported
35# but ApdbCassandra cannot be instantiated.
36try:
37 import cassandra
38 import cassandra.query
39 from cassandra.auth import AuthProvider, PlainTextAuthProvider
40 from cassandra.cluster import EXEC_PROFILE_DEFAULT, Cluster, ExecutionProfile
41 from cassandra.policies import AddressTranslator, RoundRobinPolicy, WhiteListRoundRobinPolicy
43 CASSANDRA_IMPORTED = True
44except ImportError:
45 CASSANDRA_IMPORTED = False
47import felis.types
48import lsst.daf.base as dafBase
49from felis.simple import Table
50from lsst import sphgeom
51from lsst.pex.config import ChoiceField, Field, ListField
52from lsst.utils.db_auth import DbAuth, DbAuthNotFoundError
53from lsst.utils.iteration import chunk_iterable
55from .apdb import Apdb, ApdbConfig, ApdbInsertId, ApdbTableData
56from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables
57from .apdbMetadataCassandra import ApdbMetadataCassandra
58from .apdbSchema import ApdbTables
59from .cassandra_utils import (
60 ApdbCassandraTableData,
61 PreparedStatementCache,
62 literal,
63 pandas_dataframe_factory,
64 quote_id,
65 raw_data_factory,
66 select_concurrent,
67)
68from .pixelization import Pixelization
69from .timer import Timer
70from .versionTuple import IncompatibleVersionError, VersionTuple
72if TYPE_CHECKING: 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 from .apdbMetadata import ApdbMetadata
75_LOG = logging.getLogger(__name__)
77VERSION = VersionTuple(0, 1, 0)
78"""Version for the code defined in this module. This needs to be updated
79(following compatibility rules) when schema produced by this code changes.
80"""
82# Copied from daf_butler.
83DB_AUTH_ENVVAR = "LSST_DB_AUTH"
84"""Default name of the environmental variable that will be used to locate DB
85credentials configuration file. """
87DB_AUTH_PATH = "~/.lsst/db-auth.yaml"
88"""Default path at which it is expected that DB credentials are found."""
91class CassandraMissingError(Exception):
92 def __init__(self) -> None:
93 super().__init__("cassandra-driver module cannot be imported")
96class ApdbCassandraConfig(ApdbConfig):
97 """Configuration class for Cassandra-based APDB implementation."""
99 contact_points = ListField[str](
100 doc="The list of contact points to try connecting for cluster discovery.", default=["127.0.0.1"]
101 )
102 private_ips = ListField[str](doc="List of internal IP addresses for contact_points.", default=[])
103 port = Field[int](doc="Port number to connect to.", default=9042)
104 keyspace = Field[str](doc="Default keyspace for operations.", default="apdb")
105 username = Field[str](
106 doc=f"Cassandra user name, if empty then {DB_AUTH_PATH} has to provide it with password.",
107 default="",
108 )
109 read_consistency = Field[str](
110 doc="Name for consistency level of read operations, default: QUORUM, can be ONE.", default="QUORUM"
111 )
112 write_consistency = Field[str](
113 doc="Name for consistency level of write operations, default: QUORUM, can be ONE.", default="QUORUM"
114 )
115 read_timeout = Field[float](doc="Timeout in seconds for read operations.", default=120.0)
116 write_timeout = Field[float](doc="Timeout in seconds for write operations.", default=10.0)
117 remove_timeout = Field[float](doc="Timeout in seconds for remove operations.", default=600.0)
118 read_concurrency = Field[int](doc="Concurrency level for read operations.", default=500)
119 protocol_version = Field[int](
120 doc="Cassandra protocol version to use, default is V4",
121 default=cassandra.ProtocolVersion.V4 if CASSANDRA_IMPORTED else 0,
122 )
123 dia_object_columns = ListField[str](
124 doc="List of columns to read from DiaObject[Last], by default read all columns", default=[]
125 )
126 prefix = Field[str](doc="Prefix to add to table names", default="")
127 part_pixelization = ChoiceField[str](
128 allowed=dict(htm="HTM pixelization", q3c="Q3C pixelization", mq3c="MQ3C pixelization"),
129 doc="Pixelization used for partitioning index.",
130 default="mq3c",
131 )
132 part_pix_level = Field[int](doc="Pixelization level used for partitioning index.", default=10)
133 part_pix_max_ranges = Field[int](doc="Max number of ranges in pixelization envelope", default=64)
134 ra_dec_columns = ListField[str](default=["ra", "dec"], doc="Names of ra/dec columns in DiaObject table")
135 timer = Field[bool](doc="If True then print/log timing information", default=False)
136 time_partition_tables = Field[bool](
137 doc="Use per-partition tables for sources instead of partitioning by time", default=True
138 )
139 time_partition_days = Field[int](
140 doc=(
141 "Time partitioning granularity in days, this value must not be changed after database is "
142 "initialized"
143 ),
144 default=30,
145 )
146 time_partition_start = Field[str](
147 doc=(
148 "Starting time for per-partition tables, in yyyy-mm-ddThh:mm:ss format, in TAI. "
149 "This is used only when time_partition_tables is True."
150 ),
151 default="2018-12-01T00:00:00",
152 )
153 time_partition_end = Field[str](
154 doc=(
155 "Ending time for per-partition tables, in yyyy-mm-ddThh:mm:ss format, in TAI. "
156 "This is used only when time_partition_tables is True."
157 ),
158 default="2030-01-01T00:00:00",
159 )
160 query_per_time_part = Field[bool](
161 default=False,
162 doc=(
163 "If True then build separate query for each time partition, otherwise build one single query. "
164 "This is only used when time_partition_tables is False in schema config."
165 ),
166 )
167 query_per_spatial_part = Field[bool](
168 default=False,
169 doc="If True then build one query per spatial partition, otherwise build single query.",
170 )
171 use_insert_id_skips_diaobjects = Field[bool](
172 default=False,
173 doc=(
174 "If True then do not store DiaObjects when use_insert_id is True "
175 "(DiaObjectsInsertId has the same data)."
176 ),
177 )
180if CASSANDRA_IMPORTED: 180 ↛ 195line 180 didn't jump to line 195, because the condition on line 180 was never false
182 class _AddressTranslator(AddressTranslator):
183 """Translate internal IP address to external.
185 Only used for docker-based setup, not viable long-term solution.
186 """
188 def __init__(self, public_ips: list[str], private_ips: list[str]):
189 self._map = dict((k, v) for k, v in zip(private_ips, public_ips))
191 def translate(self, private_ip: str) -> str:
192 return self._map.get(private_ip, private_ip)
195def _quote_column(name: str) -> str:
196 """Quote column name"""
197 if name.islower():
198 return name
199 else:
200 return f'"{name}"'
203class ApdbCassandra(Apdb):
204 """Implementation of APDB database on to of Apache Cassandra.
206 The implementation is configured via standard ``pex_config`` mechanism
207 using `ApdbCassandraConfig` configuration class. For an example of
208 different configurations check config/ folder.
210 Parameters
211 ----------
212 config : `ApdbCassandraConfig`
213 Configuration object.
214 """
216 metadataSchemaVersionKey = "version:schema"
217 """Name of the metadata key to store schema version number."""
219 metadataCodeVersionKey = "version:ApdbCassandra"
220 """Name of the metadata key to store code version number."""
222 partition_zero_epoch = dafBase.DateTime(1970, 1, 1, 0, 0, 0, dafBase.DateTime.TAI)
223 """Start time for partition 0, this should never be changed."""
225 def __init__(self, config: ApdbCassandraConfig):
226 if not CASSANDRA_IMPORTED:
227 raise CassandraMissingError()
229 config.validate()
230 self.config = config
232 _LOG.debug("ApdbCassandra Configuration:")
233 for key, value in self.config.items():
234 _LOG.debug(" %s: %s", key, value)
236 self._pixelization = Pixelization(
237 config.part_pixelization, config.part_pix_level, config.part_pix_max_ranges
238 )
240 addressTranslator: AddressTranslator | None = None
241 if config.private_ips:
242 addressTranslator = _AddressTranslator(list(config.contact_points), list(config.private_ips))
244 self._keyspace = config.keyspace
246 self._cluster = Cluster(
247 execution_profiles=self._makeProfiles(config),
248 contact_points=self.config.contact_points,
249 port=self.config.port,
250 address_translator=addressTranslator,
251 protocol_version=self.config.protocol_version,
252 auth_provider=self._make_auth_provider(config),
253 )
254 self._session = self._cluster.connect()
255 # Disable result paging
256 self._session.default_fetch_size = None
258 self._schema = ApdbCassandraSchema(
259 session=self._session,
260 keyspace=self._keyspace,
261 schema_file=self.config.schema_file,
262 schema_name=self.config.schema_name,
263 prefix=self.config.prefix,
264 time_partition_tables=self.config.time_partition_tables,
265 use_insert_id=self.config.use_insert_id,
266 )
267 self._partition_zero_epoch_mjd = self.partition_zero_epoch.get(system=dafBase.DateTime.MJD)
269 self._metadata: ApdbMetadataCassandra | None = None
270 if not self._schema.empty():
271 self._metadata = ApdbMetadataCassandra(self._session, self._schema, self.config)
272 self._versionCheck(self._metadata)
274 # Cache for prepared statements
275 self._preparer = PreparedStatementCache(self._session)
277 def __del__(self) -> None:
278 if hasattr(self, "_cluster"):
279 self._cluster.shutdown()
281 def _make_auth_provider(self, config: ApdbCassandraConfig) -> AuthProvider | None:
282 """Make Cassandra authentication provider instance."""
283 try:
284 dbauth = DbAuth(DB_AUTH_PATH, DB_AUTH_ENVVAR)
285 except DbAuthNotFoundError:
286 # Credentials file doesn't exist, use anonymous login.
287 return None
289 empty_username = True
290 # Try every contact point in turn.
291 for hostname in config.contact_points:
292 try:
293 username, password = dbauth.getAuth(
294 "cassandra", config.username, hostname, config.port, config.keyspace
295 )
296 if not username:
297 # Password without user name, try next hostname, but give
298 # warning later if no better match is found.
299 empty_username = True
300 else:
301 return PlainTextAuthProvider(username=username, password=password)
302 except DbAuthNotFoundError:
303 pass
305 if empty_username:
306 _LOG.warning(
307 f"Credentials file ({DB_AUTH_PATH} or ${DB_AUTH_ENVVAR}) provided password but not "
308 f"user name, anonymous Cassandra logon will be attempted."
309 )
311 return None
313 def _versionCheck(self, metadata: ApdbMetadataCassandra) -> None:
314 """Check schema version compatibility."""
316 def _get_version(key: str, default: VersionTuple) -> VersionTuple:
317 """Retrieve version number from given metadata key."""
318 if metadata.table_exists():
319 version_str = metadata.get(key)
320 if version_str is None:
321 # Should not happen with existing metadata table.
322 raise RuntimeError(f"Version key {key!r} does not exist in metadata table.")
323 return VersionTuple.fromString(version_str)
324 return default
326 # For old databases where metadata table does not exist we assume that
327 # version of both code and schema is 0.1.0.
328 initial_version = VersionTuple(0, 1, 0)
329 db_schema_version = _get_version(self.metadataSchemaVersionKey, initial_version)
330 db_code_version = _get_version(self.metadataCodeVersionKey, initial_version)
332 # For now there is no way to make read-only APDB instances, assume that
333 # any access can do updates.
334 if not self._schema.schemaVersion().checkCompatibility(db_schema_version, True):
335 raise IncompatibleVersionError(
336 f"Configured schema version {self._schema.schemaVersion()} "
337 f"is not compatible with database version {db_schema_version}"
338 )
339 if not self.apdbImplementationVersion().checkCompatibility(db_code_version, True):
340 raise IncompatibleVersionError(
341 f"Current code version {self.apdbImplementationVersion()} "
342 f"is not compatible with database version {db_code_version}"
343 )
345 @classmethod
346 def apdbImplementationVersion(cls) -> VersionTuple:
347 # Docstring inherited from base class.
348 return VERSION
350 def apdbSchemaVersion(self) -> VersionTuple:
351 # Docstring inherited from base class.
352 return self._schema.schemaVersion()
354 def tableDef(self, table: ApdbTables) -> Table | None:
355 # docstring is inherited from a base class
356 return self._schema.tableSchemas.get(table)
358 def makeSchema(self, drop: bool = False) -> None:
359 # docstring is inherited from a base class
361 if self.config.time_partition_tables:
362 time_partition_start = dafBase.DateTime(self.config.time_partition_start, dafBase.DateTime.TAI)
363 time_partition_end = dafBase.DateTime(self.config.time_partition_end, dafBase.DateTime.TAI)
364 part_range = (
365 self._time_partition(time_partition_start),
366 self._time_partition(time_partition_end) + 1,
367 )
368 self._schema.makeSchema(drop=drop, part_range=part_range)
369 else:
370 self._schema.makeSchema(drop=drop)
372 # Reset metadata after schema initialization.
373 self._metadata = ApdbMetadataCassandra(self._session, self._schema, self.config)
375 # Fill version numbers, but only if they are not defined.
376 if self._metadata.table_exists():
377 if self._metadata.get(self.metadataSchemaVersionKey) is None:
378 self._metadata.set(self.metadataSchemaVersionKey, str(self._schema.schemaVersion()))
379 if self._metadata.get(self.metadataCodeVersionKey) is None:
380 self._metadata.set(self.metadataCodeVersionKey, str(self.apdbImplementationVersion()))
382 def getDiaObjects(self, region: sphgeom.Region) -> pandas.DataFrame:
383 # docstring is inherited from a base class
385 sp_where = self._spatial_where(region)
386 _LOG.debug("getDiaObjects: #partitions: %s", len(sp_where))
388 # We need to exclude extra partitioning columns from result.
389 column_names = self._schema.apdbColumnNames(ApdbTables.DiaObjectLast)
390 what = ",".join(_quote_column(column) for column in column_names)
392 table_name = self._schema.tableName(ApdbTables.DiaObjectLast)
393 query = f'SELECT {what} from "{self._keyspace}"."{table_name}"'
394 statements: list[tuple] = []
395 for where, params in sp_where:
396 full_query = f"{query} WHERE {where}"
397 if params:
398 statement = self._preparer.prepare(full_query)
399 else:
400 # If there are no params then it is likely that query has a
401 # bunch of literals rendered already, no point trying to
402 # prepare it because it's not reusable.
403 statement = cassandra.query.SimpleStatement(full_query)
404 statements.append((statement, params))
405 _LOG.debug("getDiaObjects: #queries: %s", len(statements))
407 with Timer("DiaObject select", self.config.timer):
408 objects = cast(
409 pandas.DataFrame,
410 select_concurrent(
411 self._session, statements, "read_pandas_multi", self.config.read_concurrency
412 ),
413 )
415 _LOG.debug("found %s DiaObjects", objects.shape[0])
416 return objects
418 def getDiaSources(
419 self, region: sphgeom.Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
420 ) -> pandas.DataFrame | None:
421 # docstring is inherited from a base class
422 months = self.config.read_sources_months
423 if months == 0:
424 return None
425 mjd_end = visit_time.get(system=dafBase.DateTime.MJD)
426 mjd_start = mjd_end - months * 30
428 return self._getSources(region, object_ids, mjd_start, mjd_end, ApdbTables.DiaSource)
430 def getDiaForcedSources(
431 self, region: sphgeom.Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
432 ) -> pandas.DataFrame | None:
433 # docstring is inherited from a base class
434 months = self.config.read_forced_sources_months
435 if months == 0:
436 return None
437 mjd_end = visit_time.get(system=dafBase.DateTime.MJD)
438 mjd_start = mjd_end - months * 30
440 return self._getSources(region, object_ids, mjd_start, mjd_end, ApdbTables.DiaForcedSource)
442 def containsVisitDetector(self, visit: int, detector: int) -> bool:
443 # docstring is inherited from a base class
444 raise NotImplementedError()
446 def getInsertIds(self) -> list[ApdbInsertId] | None:
447 # docstring is inherited from a base class
448 if not self._schema.has_insert_id:
449 return None
451 # everything goes into a single partition
452 partition = 0
454 table_name = self._schema.tableName(ExtraTables.DiaInsertId)
455 query = f'SELECT insert_time, insert_id FROM "{self._keyspace}"."{table_name}" WHERE partition = ?'
457 result = self._session.execute(
458 self._preparer.prepare(query),
459 (partition,),
460 timeout=self.config.read_timeout,
461 execution_profile="read_tuples",
462 )
463 # order by insert_time
464 rows = sorted(result)
465 return [
466 ApdbInsertId(id=row[1], insert_time=dafBase.DateTime(int(row[0].timestamp() * 1e9)))
467 for row in rows
468 ]
470 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
471 # docstring is inherited from a base class
472 if not self._schema.has_insert_id:
473 raise ValueError("APDB is not configured for history storage")
475 all_insert_ids = [id.id for id in ids]
476 # There is 64k limit on number of markers in Cassandra CQL
477 for insert_ids in chunk_iterable(all_insert_ids, 20_000):
478 params = ",".join("?" * len(insert_ids))
480 # everything goes into a single partition
481 partition = 0
483 table_name = self._schema.tableName(ExtraTables.DiaInsertId)
484 query = (
485 f'DELETE FROM "{self._keyspace}"."{table_name}" '
486 f"WHERE partition = ? AND insert_id IN ({params})"
487 )
489 self._session.execute(
490 self._preparer.prepare(query),
491 [partition] + list(insert_ids),
492 timeout=self.config.remove_timeout,
493 )
495 # Also remove those insert_ids from Dia*InsertId tables.abs
496 for table in (
497 ExtraTables.DiaObjectInsertId,
498 ExtraTables.DiaSourceInsertId,
499 ExtraTables.DiaForcedSourceInsertId,
500 ):
501 table_name = self._schema.tableName(table)
502 query = f'DELETE FROM "{self._keyspace}"."{table_name}" WHERE insert_id IN ({params})'
503 self._session.execute(
504 self._preparer.prepare(query),
505 insert_ids,
506 timeout=self.config.remove_timeout,
507 )
509 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
510 # docstring is inherited from a base class
511 return self._get_history(ExtraTables.DiaObjectInsertId, ids)
513 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
514 # docstring is inherited from a base class
515 return self._get_history(ExtraTables.DiaSourceInsertId, ids)
517 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
518 # docstring is inherited from a base class
519 return self._get_history(ExtraTables.DiaForcedSourceInsertId, ids)
521 def getSSObjects(self) -> pandas.DataFrame:
522 # docstring is inherited from a base class
523 tableName = self._schema.tableName(ApdbTables.SSObject)
524 query = f'SELECT * from "{self._keyspace}"."{tableName}"'
526 objects = None
527 with Timer("SSObject select", self.config.timer):
528 result = self._session.execute(query, execution_profile="read_pandas")
529 objects = result._current_rows
531 _LOG.debug("found %s DiaObjects", objects.shape[0])
532 return objects
534 def store(
535 self,
536 visit_time: dafBase.DateTime,
537 objects: pandas.DataFrame,
538 sources: pandas.DataFrame | None = None,
539 forced_sources: pandas.DataFrame | None = None,
540 ) -> None:
541 # docstring is inherited from a base class
543 insert_id: ApdbInsertId | None = None
544 if self._schema.has_insert_id:
545 insert_id = ApdbInsertId.new_insert_id(visit_time)
546 self._storeInsertId(insert_id, visit_time)
548 # fill region partition column for DiaObjects
549 objects = self._add_obj_part(objects)
550 self._storeDiaObjects(objects, visit_time, insert_id)
552 if sources is not None:
553 # copy apdb_part column from DiaObjects to DiaSources
554 sources = self._add_src_part(sources, objects)
555 self._storeDiaSources(ApdbTables.DiaSource, sources, visit_time, insert_id)
556 self._storeDiaSourcesPartitions(sources, visit_time, insert_id)
558 if forced_sources is not None:
559 forced_sources = self._add_fsrc_part(forced_sources, objects)
560 self._storeDiaSources(ApdbTables.DiaForcedSource, forced_sources, visit_time, insert_id)
562 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
563 # docstring is inherited from a base class
564 self._storeObjectsPandas(objects, ApdbTables.SSObject)
566 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
567 # docstring is inherited from a base class
569 # To update a record we need to know its exact primary key (including
570 # partition key) so we start by querying for diaSourceId to find the
571 # primary keys.
573 table_name = self._schema.tableName(ExtraTables.DiaSourceToPartition)
574 # split it into 1k IDs per query
575 selects: list[tuple] = []
576 for ids in chunk_iterable(idMap.keys(), 1_000):
577 ids_str = ",".join(str(item) for item in ids)
578 selects.append(
579 (
580 (
581 'SELECT "diaSourceId", "apdb_part", "apdb_time_part", "insert_id" '
582 f'FROM "{self._keyspace}"."{table_name}" WHERE "diaSourceId" IN ({ids_str})'
583 ),
584 {},
585 )
586 )
588 # No need for DataFrame here, read data as tuples.
589 result = cast(
590 list[tuple[int, int, int, uuid.UUID | None]],
591 select_concurrent(self._session, selects, "read_tuples", self.config.read_concurrency),
592 )
594 # Make mapping from source ID to its partition.
595 id2partitions: dict[int, tuple[int, int]] = {}
596 id2insert_id: dict[int, uuid.UUID] = {}
597 for row in result:
598 id2partitions[row[0]] = row[1:3]
599 if row[3] is not None:
600 id2insert_id[row[0]] = row[3]
602 # make sure we know partitions for each ID
603 if set(id2partitions) != set(idMap):
604 missing = ",".join(str(item) for item in set(idMap) - set(id2partitions))
605 raise ValueError(f"Following DiaSource IDs do not exist in the database: {missing}")
607 # Reassign in standard tables
608 queries = cassandra.query.BatchStatement()
609 table_name = self._schema.tableName(ApdbTables.DiaSource)
610 for diaSourceId, ssObjectId in idMap.items():
611 apdb_part, apdb_time_part = id2partitions[diaSourceId]
612 values: tuple
613 if self.config.time_partition_tables:
614 query = (
615 f'UPDATE "{self._keyspace}"."{table_name}_{apdb_time_part}"'
616 ' SET "ssObjectId" = ?, "diaObjectId" = NULL'
617 ' WHERE "apdb_part" = ? AND "diaSourceId" = ?'
618 )
619 values = (ssObjectId, apdb_part, diaSourceId)
620 else:
621 query = (
622 f'UPDATE "{self._keyspace}"."{table_name}"'
623 ' SET "ssObjectId" = ?, "diaObjectId" = NULL'
624 ' WHERE "apdb_part" = ? AND "apdb_time_part" = ? AND "diaSourceId" = ?'
625 )
626 values = (ssObjectId, apdb_part, apdb_time_part, diaSourceId)
627 queries.add(self._preparer.prepare(query), values)
629 # Reassign in history tables, only if history is enabled
630 if id2insert_id:
631 # Filter out insert ids that have been deleted already. There is a
632 # potential race with concurrent removal of insert IDs, but it
633 # should be handled by WHERE in UPDATE.
634 known_ids = set()
635 if insert_ids := self.getInsertIds():
636 known_ids = set(insert_id.id for insert_id in insert_ids)
637 id2insert_id = {key: value for key, value in id2insert_id.items() if value in known_ids}
638 if id2insert_id:
639 table_name = self._schema.tableName(ExtraTables.DiaSourceInsertId)
640 for diaSourceId, ssObjectId in idMap.items():
641 if insert_id := id2insert_id.get(diaSourceId):
642 query = (
643 f'UPDATE "{self._keyspace}"."{table_name}" '
644 ' SET "ssObjectId" = ?, "diaObjectId" = NULL '
645 'WHERE "insert_id" = ? AND "diaSourceId" = ?'
646 )
647 values = (ssObjectId, insert_id, diaSourceId)
648 queries.add(self._preparer.prepare(query), values)
650 _LOG.debug("%s: will update %d records", table_name, len(idMap))
651 with Timer(table_name + " update", self.config.timer):
652 self._session.execute(queries, execution_profile="write")
654 def dailyJob(self) -> None:
655 # docstring is inherited from a base class
656 pass
658 def countUnassociatedObjects(self) -> int:
659 # docstring is inherited from a base class
661 # It's too inefficient to implement it for Cassandra in current schema.
662 raise NotImplementedError()
664 @property
665 def metadata(self) -> ApdbMetadata:
666 # docstring is inherited from a base class
667 if self._metadata is None:
668 raise RuntimeError("Database schema was not initialized.")
669 return self._metadata
671 def _makeProfiles(self, config: ApdbCassandraConfig) -> Mapping[Any, ExecutionProfile]:
672 """Make all execution profiles used in the code."""
673 if config.private_ips:
674 loadBalancePolicy = WhiteListRoundRobinPolicy(hosts=config.contact_points)
675 else:
676 loadBalancePolicy = RoundRobinPolicy()
678 read_tuples_profile = ExecutionProfile(
679 consistency_level=getattr(cassandra.ConsistencyLevel, config.read_consistency),
680 request_timeout=config.read_timeout,
681 row_factory=cassandra.query.tuple_factory,
682 load_balancing_policy=loadBalancePolicy,
683 )
684 read_pandas_profile = ExecutionProfile(
685 consistency_level=getattr(cassandra.ConsistencyLevel, config.read_consistency),
686 request_timeout=config.read_timeout,
687 row_factory=pandas_dataframe_factory,
688 load_balancing_policy=loadBalancePolicy,
689 )
690 read_raw_profile = ExecutionProfile(
691 consistency_level=getattr(cassandra.ConsistencyLevel, config.read_consistency),
692 request_timeout=config.read_timeout,
693 row_factory=raw_data_factory,
694 load_balancing_policy=loadBalancePolicy,
695 )
696 # Profile to use with select_concurrent to return pandas data frame
697 read_pandas_multi_profile = ExecutionProfile(
698 consistency_level=getattr(cassandra.ConsistencyLevel, config.read_consistency),
699 request_timeout=config.read_timeout,
700 row_factory=pandas_dataframe_factory,
701 load_balancing_policy=loadBalancePolicy,
702 )
703 # Profile to use with select_concurrent to return raw data (columns and
704 # rows)
705 read_raw_multi_profile = ExecutionProfile(
706 consistency_level=getattr(cassandra.ConsistencyLevel, config.read_consistency),
707 request_timeout=config.read_timeout,
708 row_factory=raw_data_factory,
709 load_balancing_policy=loadBalancePolicy,
710 )
711 write_profile = ExecutionProfile(
712 consistency_level=getattr(cassandra.ConsistencyLevel, config.write_consistency),
713 request_timeout=config.write_timeout,
714 load_balancing_policy=loadBalancePolicy,
715 )
716 # To replace default DCAwareRoundRobinPolicy
717 default_profile = ExecutionProfile(
718 load_balancing_policy=loadBalancePolicy,
719 )
720 return {
721 "read_tuples": read_tuples_profile,
722 "read_pandas": read_pandas_profile,
723 "read_raw": read_raw_profile,
724 "read_pandas_multi": read_pandas_multi_profile,
725 "read_raw_multi": read_raw_multi_profile,
726 "write": write_profile,
727 EXEC_PROFILE_DEFAULT: default_profile,
728 }
730 def _getSources(
731 self,
732 region: sphgeom.Region,
733 object_ids: Iterable[int] | None,
734 mjd_start: float,
735 mjd_end: float,
736 table_name: ApdbTables,
737 ) -> pandas.DataFrame:
738 """Return catalog of DiaSource instances given set of DiaObject IDs.
740 Parameters
741 ----------
742 region : `lsst.sphgeom.Region`
743 Spherical region.
744 object_ids :
745 Collection of DiaObject IDs
746 mjd_start : `float`
747 Lower bound of time interval.
748 mjd_end : `float`
749 Upper bound of time interval.
750 table_name : `ApdbTables`
751 Name of the table.
753 Returns
754 -------
755 catalog : `pandas.DataFrame`, or `None`
756 Catalog containing DiaSource records. Empty catalog is returned if
757 ``object_ids`` is empty.
758 """
759 object_id_set: Set[int] = set()
760 if object_ids is not None:
761 object_id_set = set(object_ids)
762 if len(object_id_set) == 0:
763 return self._make_empty_catalog(table_name)
765 sp_where = self._spatial_where(region)
766 tables, temporal_where = self._temporal_where(table_name, mjd_start, mjd_end)
768 # We need to exclude extra partitioning columns from result.
769 column_names = self._schema.apdbColumnNames(table_name)
770 what = ",".join(_quote_column(column) for column in column_names)
772 # Build all queries
773 statements: list[tuple] = []
774 for table in tables:
775 prefix = f'SELECT {what} from "{self._keyspace}"."{table}"'
776 statements += list(self._combine_where(prefix, sp_where, temporal_where))
777 _LOG.debug("_getSources %s: #queries: %s", table_name, len(statements))
779 with Timer(table_name.name + " select", self.config.timer):
780 catalog = cast(
781 pandas.DataFrame,
782 select_concurrent(
783 self._session, statements, "read_pandas_multi", self.config.read_concurrency
784 ),
785 )
787 # filter by given object IDs
788 if len(object_id_set) > 0:
789 catalog = cast(pandas.DataFrame, catalog[catalog["diaObjectId"].isin(object_id_set)])
791 # precise filtering on midpointMjdTai
792 catalog = cast(pandas.DataFrame, catalog[catalog["midpointMjdTai"] > mjd_start])
794 _LOG.debug("found %d %ss", catalog.shape[0], table_name.name)
795 return catalog
797 def _get_history(self, table: ExtraTables, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
798 """Return records from a particular table given set of insert IDs."""
799 if not self._schema.has_insert_id:
800 raise ValueError("APDB is not configured for history retrieval")
802 insert_ids = [id.id for id in ids]
803 params = ",".join("?" * len(insert_ids))
805 table_name = self._schema.tableName(table)
806 # I know that history table schema has only regular APDB columns plus
807 # an insert_id column, and this is exactly what we need to return from
808 # this method, so selecting a star is fine here.
809 query = f'SELECT * FROM "{self._keyspace}"."{table_name}" WHERE insert_id IN ({params})'
810 statement = self._preparer.prepare(query)
812 with Timer("DiaObject history", self.config.timer):
813 result = self._session.execute(statement, insert_ids, execution_profile="read_raw")
814 table_data = cast(ApdbCassandraTableData, result._current_rows)
815 return table_data
817 def _storeInsertId(self, insert_id: ApdbInsertId, visit_time: dafBase.DateTime) -> None:
818 # Cassandra timestamp uses milliseconds since epoch
819 timestamp = insert_id.insert_time.nsecs() // 1_000_000
821 # everything goes into a single partition
822 partition = 0
824 table_name = self._schema.tableName(ExtraTables.DiaInsertId)
825 query = (
826 f'INSERT INTO "{self._keyspace}"."{table_name}" (partition, insert_id, insert_time) '
827 "VALUES (?, ?, ?)"
828 )
830 self._session.execute(
831 self._preparer.prepare(query),
832 (partition, insert_id.id, timestamp),
833 timeout=self.config.write_timeout,
834 execution_profile="write",
835 )
837 def _storeDiaObjects(
838 self, objs: pandas.DataFrame, visit_time: dafBase.DateTime, insert_id: ApdbInsertId | None
839 ) -> None:
840 """Store catalog of DiaObjects from current visit.
842 Parameters
843 ----------
844 objs : `pandas.DataFrame`
845 Catalog with DiaObject records
846 visit_time : `lsst.daf.base.DateTime`
847 Time of the current visit.
848 """
849 visit_time_dt = visit_time.toPython()
850 extra_columns = dict(lastNonForcedSource=visit_time_dt)
851 self._storeObjectsPandas(objs, ApdbTables.DiaObjectLast, extra_columns=extra_columns)
853 extra_columns["validityStart"] = visit_time_dt
854 time_part: int | None = self._time_partition(visit_time)
855 if not self.config.time_partition_tables:
856 extra_columns["apdb_time_part"] = time_part
857 time_part = None
859 # Only store DiaObects if not storing insert_ids or explicitly
860 # configured to always store them
861 if insert_id is None or not self.config.use_insert_id_skips_diaobjects:
862 self._storeObjectsPandas(
863 objs, ApdbTables.DiaObject, extra_columns=extra_columns, time_part=time_part
864 )
866 if insert_id is not None:
867 extra_columns = dict(insert_id=insert_id.id, validityStart=visit_time_dt)
868 self._storeObjectsPandas(objs, ExtraTables.DiaObjectInsertId, extra_columns=extra_columns)
870 def _storeDiaSources(
871 self,
872 table_name: ApdbTables,
873 sources: pandas.DataFrame,
874 visit_time: dafBase.DateTime,
875 insert_id: ApdbInsertId | None,
876 ) -> None:
877 """Store catalog of DIASources or DIAForcedSources from current visit.
879 Parameters
880 ----------
881 sources : `pandas.DataFrame`
882 Catalog containing DiaSource records
883 visit_time : `lsst.daf.base.DateTime`
884 Time of the current visit.
885 """
886 time_part: int | None = self._time_partition(visit_time)
887 extra_columns: dict[str, Any] = {}
888 if not self.config.time_partition_tables:
889 extra_columns["apdb_time_part"] = time_part
890 time_part = None
892 self._storeObjectsPandas(sources, table_name, extra_columns=extra_columns, time_part=time_part)
894 if insert_id is not None:
895 extra_columns = dict(insert_id=insert_id.id)
896 if table_name is ApdbTables.DiaSource:
897 extra_table = ExtraTables.DiaSourceInsertId
898 else:
899 extra_table = ExtraTables.DiaForcedSourceInsertId
900 self._storeObjectsPandas(sources, extra_table, extra_columns=extra_columns)
902 def _storeDiaSourcesPartitions(
903 self, sources: pandas.DataFrame, visit_time: dafBase.DateTime, insert_id: ApdbInsertId | None
904 ) -> None:
905 """Store mapping of diaSourceId to its partitioning values.
907 Parameters
908 ----------
909 sources : `pandas.DataFrame`
910 Catalog containing DiaSource records
911 visit_time : `lsst.daf.base.DateTime`
912 Time of the current visit.
913 """
914 id_map = cast(pandas.DataFrame, sources[["diaSourceId", "apdb_part"]])
915 extra_columns = {
916 "apdb_time_part": self._time_partition(visit_time),
917 "insert_id": insert_id.id if insert_id is not None else None,
918 }
920 self._storeObjectsPandas(
921 id_map, ExtraTables.DiaSourceToPartition, extra_columns=extra_columns, time_part=None
922 )
924 def _storeObjectsPandas(
925 self,
926 records: pandas.DataFrame,
927 table_name: ApdbTables | ExtraTables,
928 extra_columns: Mapping | None = None,
929 time_part: int | None = None,
930 ) -> None:
931 """Store generic objects.
933 Takes Pandas catalog and stores a bunch of records in a table.
935 Parameters
936 ----------
937 records : `pandas.DataFrame`
938 Catalog containing object records
939 table_name : `ApdbTables`
940 Name of the table as defined in APDB schema.
941 extra_columns : `dict`, optional
942 Mapping (column_name, column_value) which gives fixed values for
943 columns in each row, overrides values in ``records`` if matching
944 columns exist there.
945 time_part : `int`, optional
946 If not `None` then insert into a per-partition table.
948 Notes
949 -----
950 If Pandas catalog contains additional columns not defined in table
951 schema they are ignored. Catalog does not have to contain all columns
952 defined in a table, but partition and clustering keys must be present
953 in a catalog or ``extra_columns``.
954 """
955 # use extra columns if specified
956 if extra_columns is None:
957 extra_columns = {}
958 extra_fields = list(extra_columns.keys())
960 # Fields that will come from dataframe.
961 df_fields = [column for column in records.columns if column not in extra_fields]
963 column_map = self._schema.getColumnMap(table_name)
964 # list of columns (as in felis schema)
965 fields = [column_map[field].name for field in df_fields if field in column_map]
966 fields += extra_fields
968 # check that all partitioning and clustering columns are defined
969 required_columns = self._schema.partitionColumns(table_name) + self._schema.clusteringColumns(
970 table_name
971 )
972 missing_columns = [column for column in required_columns if column not in fields]
973 if missing_columns:
974 raise ValueError(f"Primary key columns are missing from catalog: {missing_columns}")
976 qfields = [quote_id(field) for field in fields]
977 qfields_str = ",".join(qfields)
979 with Timer(table_name.name + " query build", self.config.timer):
980 table = self._schema.tableName(table_name)
981 if time_part is not None:
982 table = f"{table}_{time_part}"
984 holders = ",".join(["?"] * len(qfields))
985 query = f'INSERT INTO "{self._keyspace}"."{table}" ({qfields_str}) VALUES ({holders})'
986 statement = self._preparer.prepare(query)
987 queries = cassandra.query.BatchStatement()
988 for rec in records.itertuples(index=False):
989 values = []
990 for field in df_fields:
991 if field not in column_map:
992 continue
993 value = getattr(rec, field)
994 if column_map[field].datatype is felis.types.Timestamp:
995 if isinstance(value, pandas.Timestamp):
996 value = literal(value.to_pydatetime())
997 else:
998 # Assume it's seconds since epoch, Cassandra
999 # datetime is in milliseconds
1000 value = int(value * 1000)
1001 values.append(literal(value))
1002 for field in extra_fields:
1003 value = extra_columns[field]
1004 values.append(literal(value))
1005 queries.add(statement, values)
1007 _LOG.debug("%s: will store %d records", self._schema.tableName(table_name), records.shape[0])
1008 with Timer(table_name.name + " insert", self.config.timer):
1009 self._session.execute(queries, timeout=self.config.write_timeout, execution_profile="write")
1011 def _add_obj_part(self, df: pandas.DataFrame) -> pandas.DataFrame:
1012 """Calculate spatial partition for each record and add it to a
1013 DataFrame.
1015 Notes
1016 -----
1017 This overrides any existing column in a DataFrame with the same name
1018 (apdb_part). Original DataFrame is not changed, copy of a DataFrame is
1019 returned.
1020 """
1021 # calculate HTM index for every DiaObject
1022 apdb_part = np.zeros(df.shape[0], dtype=np.int64)
1023 ra_col, dec_col = self.config.ra_dec_columns
1024 for i, (ra, dec) in enumerate(zip(df[ra_col], df[dec_col])):
1025 uv3d = sphgeom.UnitVector3d(sphgeom.LonLat.fromDegrees(ra, dec))
1026 idx = self._pixelization.pixel(uv3d)
1027 apdb_part[i] = idx
1028 df = df.copy()
1029 df["apdb_part"] = apdb_part
1030 return df
1032 def _add_src_part(self, sources: pandas.DataFrame, objs: pandas.DataFrame) -> pandas.DataFrame:
1033 """Add apdb_part column to DiaSource catalog.
1035 Notes
1036 -----
1037 This method copies apdb_part value from a matching DiaObject record.
1038 DiaObject catalog needs to have a apdb_part column filled by
1039 ``_add_obj_part`` method and DiaSource records need to be
1040 associated to DiaObjects via ``diaObjectId`` column.
1042 This overrides any existing column in a DataFrame with the same name
1043 (apdb_part). Original DataFrame is not changed, copy of a DataFrame is
1044 returned.
1045 """
1046 pixel_id_map: dict[int, int] = {
1047 diaObjectId: apdb_part for diaObjectId, apdb_part in zip(objs["diaObjectId"], objs["apdb_part"])
1048 }
1049 apdb_part = np.zeros(sources.shape[0], dtype=np.int64)
1050 ra_col, dec_col = self.config.ra_dec_columns
1051 for i, (diaObjId, ra, dec) in enumerate(
1052 zip(sources["diaObjectId"], sources[ra_col], sources[dec_col])
1053 ):
1054 if diaObjId == 0:
1055 # DiaSources associated with SolarSystemObjects do not have an
1056 # associated DiaObject hence we skip them and set partition
1057 # based on its own ra/dec
1058 uv3d = sphgeom.UnitVector3d(sphgeom.LonLat.fromDegrees(ra, dec))
1059 idx = self._pixelization.pixel(uv3d)
1060 apdb_part[i] = idx
1061 else:
1062 apdb_part[i] = pixel_id_map[diaObjId]
1063 sources = sources.copy()
1064 sources["apdb_part"] = apdb_part
1065 return sources
1067 def _add_fsrc_part(self, sources: pandas.DataFrame, objs: pandas.DataFrame) -> pandas.DataFrame:
1068 """Add apdb_part column to DiaForcedSource catalog.
1070 Notes
1071 -----
1072 This method copies apdb_part value from a matching DiaObject record.
1073 DiaObject catalog needs to have a apdb_part column filled by
1074 ``_add_obj_part`` method and DiaSource records need to be
1075 associated to DiaObjects via ``diaObjectId`` column.
1077 This overrides any existing column in a DataFrame with the same name
1078 (apdb_part). Original DataFrame is not changed, copy of a DataFrame is
1079 returned.
1080 """
1081 pixel_id_map: dict[int, int] = {
1082 diaObjectId: apdb_part for diaObjectId, apdb_part in zip(objs["diaObjectId"], objs["apdb_part"])
1083 }
1084 apdb_part = np.zeros(sources.shape[0], dtype=np.int64)
1085 for i, diaObjId in enumerate(sources["diaObjectId"]):
1086 apdb_part[i] = pixel_id_map[diaObjId]
1087 sources = sources.copy()
1088 sources["apdb_part"] = apdb_part
1089 return sources
1091 def _time_partition(self, time: float | dafBase.DateTime) -> int:
1092 """Calculate time partiton number for a given time.
1094 Parameters
1095 ----------
1096 time : `float` or `lsst.daf.base.DateTime`
1097 Time for which to calculate partition number. Can be float to mean
1098 MJD or `lsst.daf.base.DateTime`
1100 Returns
1101 -------
1102 partition : `int`
1103 Partition number for a given time.
1104 """
1105 if isinstance(time, dafBase.DateTime):
1106 mjd = time.get(system=dafBase.DateTime.MJD)
1107 else:
1108 mjd = time
1109 days_since_epoch = mjd - self._partition_zero_epoch_mjd
1110 partition = int(days_since_epoch) // self.config.time_partition_days
1111 return partition
1113 def _make_empty_catalog(self, table_name: ApdbTables) -> pandas.DataFrame:
1114 """Make an empty catalog for a table with a given name.
1116 Parameters
1117 ----------
1118 table_name : `ApdbTables`
1119 Name of the table.
1121 Returns
1122 -------
1123 catalog : `pandas.DataFrame`
1124 An empty catalog.
1125 """
1126 table = self._schema.tableSchemas[table_name]
1128 data = {
1129 columnDef.name: pandas.Series(dtype=self._schema.column_dtype(columnDef.datatype))
1130 for columnDef in table.columns
1131 }
1132 return pandas.DataFrame(data)
1134 def _combine_where(
1135 self,
1136 prefix: str,
1137 where1: list[tuple[str, tuple]],
1138 where2: list[tuple[str, tuple]],
1139 suffix: str | None = None,
1140 ) -> Iterator[tuple[cassandra.query.Statement, tuple]]:
1141 """Make cartesian product of two parts of WHERE clause into a series
1142 of statements to execute.
1144 Parameters
1145 ----------
1146 prefix : `str`
1147 Initial statement prefix that comes before WHERE clause, e.g.
1148 "SELECT * from Table"
1149 """
1150 # If lists are empty use special sentinels.
1151 if not where1:
1152 where1 = [("", ())]
1153 if not where2:
1154 where2 = [("", ())]
1156 for expr1, params1 in where1:
1157 for expr2, params2 in where2:
1158 full_query = prefix
1159 wheres = []
1160 if expr1:
1161 wheres.append(expr1)
1162 if expr2:
1163 wheres.append(expr2)
1164 if wheres:
1165 full_query += " WHERE " + " AND ".join(wheres)
1166 if suffix:
1167 full_query += " " + suffix
1168 params = params1 + params2
1169 if params:
1170 statement = self._preparer.prepare(full_query)
1171 else:
1172 # If there are no params then it is likely that query
1173 # has a bunch of literals rendered already, no point
1174 # trying to prepare it.
1175 statement = cassandra.query.SimpleStatement(full_query)
1176 yield (statement, params)
1178 def _spatial_where(
1179 self, region: sphgeom.Region | None, use_ranges: bool = False
1180 ) -> list[tuple[str, tuple]]:
1181 """Generate expressions for spatial part of WHERE clause.
1183 Parameters
1184 ----------
1185 region : `sphgeom.Region`
1186 Spatial region for query results.
1187 use_ranges : `bool`
1188 If True then use pixel ranges ("apdb_part >= p1 AND apdb_part <=
1189 p2") instead of exact list of pixels. Should be set to True for
1190 large regions covering very many pixels.
1192 Returns
1193 -------
1194 expressions : `list` [ `tuple` ]
1195 Empty list is returned if ``region`` is `None`, otherwise a list
1196 of one or more (expression, parameters) tuples
1197 """
1198 if region is None:
1199 return []
1200 if use_ranges:
1201 pixel_ranges = self._pixelization.envelope(region)
1202 expressions: list[tuple[str, tuple]] = []
1203 for lower, upper in pixel_ranges:
1204 upper -= 1
1205 if lower == upper:
1206 expressions.append(('"apdb_part" = ?', (lower,)))
1207 else:
1208 expressions.append(('"apdb_part" >= ? AND "apdb_part" <= ?', (lower, upper)))
1209 return expressions
1210 else:
1211 pixels = self._pixelization.pixels(region)
1212 if self.config.query_per_spatial_part:
1213 return [('"apdb_part" = ?', (pixel,)) for pixel in pixels]
1214 else:
1215 pixels_str = ",".join([str(pix) for pix in pixels])
1216 return [(f'"apdb_part" IN ({pixels_str})', ())]
1218 def _temporal_where(
1219 self,
1220 table: ApdbTables,
1221 start_time: float | dafBase.DateTime,
1222 end_time: float | dafBase.DateTime,
1223 query_per_time_part: bool | None = None,
1224 ) -> tuple[list[str], list[tuple[str, tuple]]]:
1225 """Generate table names and expressions for temporal part of WHERE
1226 clauses.
1228 Parameters
1229 ----------
1230 table : `ApdbTables`
1231 Table to select from.
1232 start_time : `dafBase.DateTime` or `float`
1233 Starting Datetime of MJD value of the time range.
1234 start_time : `dafBase.DateTime` or `float`
1235 Starting Datetime of MJD value of the time range.
1236 query_per_time_part : `bool`, optional
1237 If None then use ``query_per_time_part`` from configuration.
1239 Returns
1240 -------
1241 tables : `list` [ `str` ]
1242 List of the table names to query.
1243 expressions : `list` [ `tuple` ]
1244 A list of zero or more (expression, parameters) tuples.
1245 """
1246 tables: list[str]
1247 temporal_where: list[tuple[str, tuple]] = []
1248 table_name = self._schema.tableName(table)
1249 time_part_start = self._time_partition(start_time)
1250 time_part_end = self._time_partition(end_time)
1251 time_parts = list(range(time_part_start, time_part_end + 1))
1252 if self.config.time_partition_tables:
1253 tables = [f"{table_name}_{part}" for part in time_parts]
1254 else:
1255 tables = [table_name]
1256 if query_per_time_part is None:
1257 query_per_time_part = self.config.query_per_time_part
1258 if query_per_time_part:
1259 temporal_where = [('"apdb_time_part" = ?', (time_part,)) for time_part in time_parts]
1260 else:
1261 time_part_list = ",".join([str(part) for part in time_parts])
1262 temporal_where = [(f'"apdb_time_part" IN ({time_part_list})', ())]
1264 return tables, temporal_where