Coverage for python/lsst/dax/apdb/cassandra/apdbCassandraReplica.py: 30%

75 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-01 10:44 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbCassandraReplica"] 

25 

26import logging 

27from collections.abc import Iterable, Mapping 

28from typing import TYPE_CHECKING, Any, cast 

29 

30import astropy.time 

31from lsst.utils.iteration import chunk_iterable 

32 

33from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk 

34from ..monitor import MonAgent 

35from ..timer import Timer 

36from ..versionTuple import VersionTuple 

37from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables 

38from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache 

39 

40if TYPE_CHECKING: 

41 from .apdbCassandra import ApdbCassandra 

42 

43_LOG = logging.getLogger(__name__) 

44 

45_MON = MonAgent(__name__) 

46 

47VERSION = VersionTuple(1, 0, 0) 

48"""Version for the code controlling replication tables. This needs to be 

49updated following compatibility rules when schema produced by this code 

50changes. 

51""" 

52 

53 

54class ApdbCassandraReplica(ApdbReplica): 

55 """Implementation of `ApdbReplica` for Cassandra backend. 

56 

57 Parameters 

58 ---------- 

59 apdb : `ApdbCassandra` 

60 Instance of ApbdCassandra for database. 

61 schema : `ApdbCassandraSchema` 

62 Instance of ApdbCassandraSchema for database. 

63 session 

64 Instance of cassandra session type. 

65 """ 

66 

67 def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any): 

68 # Note that ApdbCassandra instance must stay alive while this object 

69 # exists, so we keep reference to it. 

70 self._apdb = apdb 

71 self._schema = schema 

72 self._session = session 

73 self._config = apdb.config 

74 

75 # Cache for prepared statements 

76 self._preparer = PreparedStatementCache(self._session) 

77 

78 self._timer_args: list[MonAgent | logging.Logger] = [_MON] 

79 if self._config.timer: 

80 self._timer_args.append(_LOG) 

81 

82 def _timer(self, name: str, *, tags: Mapping[str, str | int] | None = None) -> Timer: 

83 """Create `Timer` instance given its name.""" 

84 return Timer(name, *self._timer_args, tags=tags) 

85 

86 @classmethod 

87 def apdbReplicaImplementationVersion(cls) -> VersionTuple: 

88 # Docstring inherited from base class. 

89 return VERSION 

90 

91 def getReplicaChunks(self) -> list[ReplicaChunk] | None: 

92 # docstring is inherited from a base class 

93 if not self._schema.has_replica_chunks: 

94 return None 

95 

96 # everything goes into a single partition 

97 partition = 0 

98 

99 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks) 

100 # We want to avoid timezone mess so return timestamps as milliseconds. 

101 query = ( 

102 "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id " 

103 f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?' 

104 ) 

105 

106 with self._timer("chunks_select_time"): 

107 result = self._session.execute( 

108 self._preparer.prepare(query), 

109 (partition,), 

110 timeout=self._config.read_timeout, 

111 execution_profile="read_tuples", 

112 ) 

113 # order by last_update_time 

114 rows = sorted(result) 

115 return [ 

116 ReplicaChunk( 

117 id=row[1], 

118 last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"), 

119 unique_id=row[2], 

120 ) 

121 for row in rows 

122 ] 

123 

124 def deleteReplicaChunks(self, chunks: Iterable[int]) -> None: 

125 # docstring is inherited from a base class 

126 if not self._schema.has_replica_chunks: 

127 raise ValueError("APDB is not configured for replication") 

128 

129 # There is 64k limit on number of markers in Cassandra CQL 

130 for chunk_ids in chunk_iterable(chunks, 20_000): 

131 params = ",".join("?" * len(chunk_ids)) 

132 

133 # everything goes into a single partition 

134 partition = 0 

135 

136 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks) 

137 query = ( 

138 f'DELETE FROM "{self._config.keyspace}"."{table_name}" ' 

139 f"WHERE partition = ? AND apdb_replica_chunk IN ({params})" 

140 ) 

141 

142 with self._timer("chunks_delete_time"): 

143 self._session.execute( 

144 self._preparer.prepare(query), 

145 [partition] + list(chunk_ids), 

146 timeout=self._config.remove_timeout, 

147 ) 

148 

149 # Also remove those chunk_ids from Dia*Chunks tables. 

150 for table in ( 

151 ExtraTables.DiaObjectChunks, 

152 ExtraTables.DiaSourceChunks, 

153 ExtraTables.DiaForcedSourceChunks, 

154 ): 

155 table_name = self._schema.tableName(table) 

156 query = ( 

157 f'DELETE FROM "{self._config.keyspace}"."{table_name}"' 

158 f" WHERE apdb_replica_chunk IN ({params})" 

159 ) 

160 with self._timer("table_chunk_detele_time", tags={"table": table_name}): 

161 self._session.execute( 

162 self._preparer.prepare(query), 

163 chunk_ids, 

164 timeout=self._config.remove_timeout, 

165 ) 

166 

167 def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

168 # docstring is inherited from a base class 

169 return self._get_chunks(ExtraTables.DiaObjectChunks, chunks) 

170 

171 def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

172 # docstring is inherited from a base class 

173 return self._get_chunks(ExtraTables.DiaSourceChunks, chunks) 

174 

175 def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData: 

176 # docstring is inherited from a base class 

177 return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks) 

178 

179 def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData: 

180 """Return records from a particular table given set of insert IDs.""" 

181 if not self._schema.has_replica_chunks: 

182 raise ValueError("APDB is not configured for replication") 

183 

184 # We do not expect too may chunks in this query. 

185 chunks = list(chunks) 

186 params = ",".join("?" * len(chunks)) 

187 

188 table_name = self._schema.tableName(table) 

189 # I know that chunk table schema has only regular APDB columns plus 

190 # apdb_replica_chunk column, and this is exactly what we need to return 

191 # from this method, so selecting a star is fine here. 

192 query = ( 

193 f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})' 

194 ) 

195 statement = self._preparer.prepare(query) 

196 

197 with self._timer("table_chunk_select_time", tags={"table": table_name}): 

198 result = self._session.execute(statement, chunks, execution_profile="read_raw") 

199 table_data = cast(ApdbCassandraTableData, result._current_rows) 

200 return table_data