Coverage for python/lsst/dax/apdb/cassandra/apdbCassandraReplica.py: 33%

65 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 02:41 -0700

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbCassandraReplica"] 

25 

26import logging 

27from collections.abc import Iterable 

28from typing import TYPE_CHECKING, Any, cast 

29 

30import astropy.time 

31from lsst.utils.iteration import chunk_iterable 

32 

33from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk 

34from ..timer import Timer 

35from ..versionTuple import VersionTuple 

36from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables 

37from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache 

38 

39if TYPE_CHECKING: 

40 from .apdbCassandra import ApdbCassandra 

41 

42_LOG = logging.getLogger(__name__) 

43 

44VERSION = VersionTuple(1, 0, 0) 

45"""Version for the code controlling replication tables. This needs to be 

46updated following compatibility rules when schema produced by this code 

47changes. 

48""" 

49 

50 

51class ApdbCassandraReplica(ApdbReplica): 

52 """Implementation of `ApdbReplica` for Cassandra backend. 

53 

54 Parameters 

55 ---------- 

56 apdb : `ApdbCassandra` 

57 Instance of ApbdCassandra for database. 

58 schema : `ApdbCassandraSchema` 

59 Instance of ApdbCassandraSchema for database. 

60 session 

61 Instance of cassandra session type. 

62 """ 

63 

64 def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any): 

65 # Note that ApdbCassandra instance must stay alive while this object 

66 # exists, so we keep reference to it. 

67 self._apdb = apdb 

68 self._schema = schema 

69 self._session = session 

70 self._config = apdb.config 

71 

72 # Cache for prepared statements 

73 self._preparer = PreparedStatementCache(self._session) 

74 

75 @classmethod 

76 def apdbReplicaImplementationVersion(cls) -> VersionTuple: 

77 # Docstring inherited from base class. 

78 return VERSION 

79 

80 def getReplicaChunks(self) -> list[ReplicaChunk] | None: 

81 # docstring is inherited from a base class 

82 if not self._schema.has_replica_chunks: 

83 return None 

84 

85 # everything goes into a single partition 

86 partition = 0 

87 

88 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks) 

89 # We want to avoid timezone mess so return timestamps as milliseconds. 

90 query = ( 

91 "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id " 

92 f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?' 

93 ) 

94 

95 result = self._session.execute( 

96 self._preparer.prepare(query), 

97 (partition,), 

98 timeout=self._config.read_timeout, 

99 execution_profile="read_tuples", 

100 ) 

101 # order by last_update_time 

102 rows = sorted(result) 

103 return [ 

104 ReplicaChunk( 

105 id=row[1], 

106 last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"), 

107 unique_id=row[2], 

108 ) 

109 for row in rows 

110 ] 

111 

112 def deleteReplicaChunks(self, chunks: Iterable[int]) -> None: 

113 # docstring is inherited from a base class 

114 if not self._schema.has_replica_chunks: 

115 raise ValueError("APDB is not configured for replication") 

116 

117 # There is 64k limit on number of markers in Cassandra CQL 

118 for chunk_ids in chunk_iterable(chunks, 20_000): 

119 params = ",".join("?" * len(chunk_ids)) 

120 

121 # everything goes into a single partition 

122 partition = 0 

123 

124 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks) 

125 query = ( 

126 f'DELETE FROM "{self._config.keyspace}"."{table_name}" ' 

127 f"WHERE partition = ? AND apdb_replica_chunk IN ({params})" 

128 ) 

129 

130 self._session.execute( 

131 self._preparer.prepare(query), 

132 [partition] + list(chunk_ids), 

133 timeout=self._config.remove_timeout, 

134 ) 

135 

136 # Also remove those chunk_ids from Dia*Chunks tables. 

137 for table in ( 

138 ExtraTables.DiaObjectChunks, 

139 ExtraTables.DiaSourceChunks, 

140 ExtraTables.DiaForcedSourceChunks, 

141 ): 

142 table_name = self._schema.tableName(table) 

143 query = ( 

144 f'DELETE FROM "{self._config.keyspace}"."{table_name}"' 

145 f" WHERE apdb_replica_chunk IN ({params})" 

146 ) 

147 self._session.execute( 

148 self._preparer.prepare(query), 

149 chunk_ids, 

150 timeout=self._config.remove_timeout, 

151 ) 

152 

153 def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

154 # docstring is inherited from a base class 

155 return self._get_chunks(ExtraTables.DiaObjectChunks, chunks) 

156 

157 def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

158 # docstring is inherited from a base class 

159 return self._get_chunks(ExtraTables.DiaSourceChunks, chunks) 

160 

161 def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

162 # docstring is inherited from a base class 

163 return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks) 

164 

165 def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData: 

166 """Return records from a particular table given set of insert IDs.""" 

167 if not self._schema.has_replica_chunks: 

168 raise ValueError("APDB is not configured for replication") 

169 

170 # We do not expect too may chunks in this query. 

171 chunks = list(chunks) 

172 params = ",".join("?" * len(chunks)) 

173 

174 table_name = self._schema.tableName(table) 

175 # I know that chunk table schema has only regular APDB columns plus 

176 # apdb_replica_chunk column, and this is exactly what we need to return 

177 # from this method, so selecting a star is fine here. 

178 query = ( 

179 f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})' 

180 ) 

181 statement = self._preparer.prepare(query) 

182 

183 with Timer(f"{table_name} select chunk", self._config.timer): 

184 result = self._session.execute(statement, chunks, execution_profile="read_raw") 

185 table_data = cast(ApdbCassandraTableData, result._current_rows) 

186 return table_data