Coverage for python/lsst/daf/butler/registry/dimensions/caching.py: 89%

61 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 07:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["CachingDimensionRecordStorage"] 

30 

31from collections.abc import Mapping 

32from typing import Any 

33 

34import sqlalchemy 

35from lsst.daf.relation import Join, Relation 

36from lsst.utils import doImportType 

37 

38from ...core import ( 

39 DatabaseDimensionElement, 

40 DataCoordinate, 

41 DimensionRecord, 

42 GovernorDimension, 

43 NamedKeyMapping, 

44) 

45from .. import queries 

46from ..interfaces import ( 

47 Database, 

48 DatabaseDimensionRecordStorage, 

49 GovernorDimensionRecordStorage, 

50 StaticTablesContext, 

51) 

52 

53 

54class CachingDimensionRecordStorage(DatabaseDimensionRecordStorage): 

55 """A record storage implementation that adds caching to some other nested 

56 storage implementation. 

57 

58 Parameters 

59 ---------- 

60 nested : `DatabaseDimensionRecordStorage` 

61 The other storage to cache fetches from and to delegate all other 

62 operations to. 

63 """ 

64 

65 def __init__(self, nested: DatabaseDimensionRecordStorage): 

66 self._nested = nested 

67 self._cache: dict[DataCoordinate, DimensionRecord] | None = None 

68 

69 @classmethod 

70 def initialize( 

71 cls, 

72 db: Database, 

73 element: DatabaseDimensionElement, 

74 *, 

75 context: StaticTablesContext | None = None, 

76 config: Mapping[str, Any], 

77 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

78 view_target: DatabaseDimensionRecordStorage | None = None, 

79 ) -> DatabaseDimensionRecordStorage: 

80 # Docstring inherited from DatabaseDimensionRecordStorage. 

81 config = config["nested"] 

82 NestedClass = doImportType(config["cls"]) 

83 if not hasattr(NestedClass, "initialize"): 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 raise TypeError(f"Nested class {config['cls']} does not have an initialize() method.") 

85 nested = NestedClass.initialize( 

86 db, element, context=context, config=config, governors=governors, view_target=view_target 

87 ) 

88 if view_target is not None: 

89 # Caching records that are really a view into another element's 

90 # records is problematic, because the caching code has no way of 

91 # intercepting changes to its target's records. Instead of 

92 # inventing a callback system to address that directly or dealing 

93 # with an untrustworthy combination, we just ban this combination. 

94 # But there's a problem: this is how we've configured the default 

95 # dimension universe from the beginning, with the 'band' dimension 

96 # being a cached view into physical_filter, and we don't want to 

97 # break all those configurations. 

98 if isinstance(view_target, CachingDimensionRecordStorage): 98 ↛ 106line 98 didn't jump to line 106, because the condition on line 98 was never false

99 # Happily, there's a way out: if the view target's record 

100 # storage is _also_ cached, then this outer caching is pretty 

101 # thoroughly unnecessary as well as problematic, and it's 

102 # reasonable to silently drop it, by returning the nested 

103 # storage object instead of a new caching wrapper. And this 

104 # too is the case with the default dimension configuration. 

105 return nested 

106 raise RuntimeError( 

107 f"Invalid dimension storage configuration: cannot cache dimension element {element} " 

108 f"that is itself a view of {view_target.element}." 

109 ) 

110 return cls(nested) 

111 

112 @property 

113 def element(self) -> DatabaseDimensionElement: 

114 # Docstring inherited from DimensionRecordStorage.element. 

115 return self._nested.element 

116 

117 def clearCaches(self) -> None: 

118 # Docstring inherited from DimensionRecordStorage.clearCaches. 

119 self._cache = None 

120 self._nested.clearCaches() 

121 

122 def make_relation(self, context: queries.SqlQueryContext) -> Relation: 

123 # Docstring inherited. 

124 return self._nested.make_relation(context) 

125 

126 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

127 # Docstring inherited from DimensionRecordStorage.insert. 

128 self._nested.insert(*records, replace=replace, skip_existing=skip_existing) 

129 if self._cache is not None: 

130 for record in records: 

131 # We really shouldn't ever get into a situation where the 

132 # record here differs from the one in the DB, but the last 

133 # thing we want is to make it harder to debug by making the 

134 # cache different from the DB. 

135 if skip_existing: 135 ↛ 138line 135 didn't jump to line 138, because the condition on line 135 was never false

136 self._cache.setdefault(record.dataId, record) 

137 else: 

138 self._cache[record.dataId] = record 

139 

140 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

141 # Docstring inherited from DimensionRecordStorage.sync. 

142 inserted_or_updated = self._nested.sync(record, update=update) 

143 if self._cache is not None and inserted_or_updated: 143 ↛ 144line 143 didn't jump to line 144, because the condition on line 143 was never true

144 self._cache[record.dataId] = record 

145 return inserted_or_updated 

146 

147 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None: 

148 # Docstring inherited from DimensionRecordStorage. 

149 cache = self.get_record_cache(context) 

150 return cache.get(data_id) 

151 

152 def get_record_cache(self, context: queries.SqlQueryContext) -> Mapping[DataCoordinate, DimensionRecord]: 

153 # Docstring inherited. 

154 if self._cache is None: 

155 relation = self._nested.join( 

156 context.make_initial_relation(), 

157 Join(), 

158 context, 

159 ) 

160 reader = queries.DimensionRecordReader(self.element) 

161 cache: dict[DataCoordinate, DimensionRecord] = {} 

162 for row in context.fetch_iterable(relation): 

163 record = reader.read(row) 

164 cache[record.dataId] = record 

165 self._cache = cache 

166 return self._cache 

167 

168 def digestTables(self) -> list[sqlalchemy.schema.Table]: 

169 # Docstring inherited from DimensionRecordStorage.digestTables. 

170 return self._nested.digestTables()