Coverage for python/lsst/daf/butler/registry/dimensions/caching.py: 90%

61 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-28 02:29 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CachingDimensionRecordStorage"] 

24 

25from collections.abc import Mapping 

26from typing import Any 

27 

28import sqlalchemy 

29from lsst.daf.relation import Join, Relation 

30from lsst.utils import doImportType 

31 

32from ...core import ( 

33 DatabaseDimensionElement, 

34 DataCoordinate, 

35 DimensionRecord, 

36 GovernorDimension, 

37 NamedKeyMapping, 

38) 

39from .. import queries 

40from ..interfaces import ( 

41 Database, 

42 DatabaseDimensionRecordStorage, 

43 GovernorDimensionRecordStorage, 

44 StaticTablesContext, 

45) 

46 

47 

48class CachingDimensionRecordStorage(DatabaseDimensionRecordStorage): 

49 """A record storage implementation that adds caching to some other nested 

50 storage implementation. 

51 

52 Parameters 

53 ---------- 

54 nested : `DatabaseDimensionRecordStorage` 

55 The other storage to cache fetches from and to delegate all other 

56 operations to. 

57 """ 

58 

59 def __init__(self, nested: DatabaseDimensionRecordStorage): 

60 self._nested = nested 

61 self._cache: dict[DataCoordinate, DimensionRecord] | None = None 

62 

63 @classmethod 

64 def initialize( 

65 cls, 

66 db: Database, 

67 element: DatabaseDimensionElement, 

68 *, 

69 context: StaticTablesContext | None = None, 

70 config: Mapping[str, Any], 

71 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

72 view_target: DatabaseDimensionRecordStorage | None = None, 

73 ) -> DatabaseDimensionRecordStorage: 

74 # Docstring inherited from DatabaseDimensionRecordStorage. 

75 config = config["nested"] 

76 NestedClass = doImportType(config["cls"]) 

77 if not hasattr(NestedClass, "initialize"): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true

78 raise TypeError(f"Nested class {config['cls']} does not have an initialize() method.") 

79 nested = NestedClass.initialize( 

80 db, element, context=context, config=config, governors=governors, view_target=view_target 

81 ) 

82 if view_target is not None: 

83 # Caching records that are really a view into another element's 

84 # records is problematic, because the caching code has no way of 

85 # intercepting changes to its target's records. Instead of 

86 # inventing a callback system to address that directly or dealing 

87 # with an untrustworthy combination, we just ban this combination. 

88 # But there's a problem: this is how we've configured the default 

89 # dimension universe from the beginning, with the 'band' dimension 

90 # being a cached view into physical_filter, and we don't want to 

91 # break all those configurations. 

92 if isinstance(view_target, CachingDimensionRecordStorage): 92 ↛ 100line 92 didn't jump to line 100, because the condition on line 92 was never false

93 # Happily, there's a way out: if the view target's record 

94 # storage is _also_ cached, then this outer caching is pretty 

95 # thoroughly unnecessary as well as problematic, and it's 

96 # reasonable to silently drop it, by returning the nested 

97 # storage object instead of a new caching wrapper. And this 

98 # too is the case with the default dimension configuration. 

99 return nested 

100 raise RuntimeError( 

101 f"Invalid dimension storage configuration: cannot cache dimension element {element} " 

102 f"that is itself a view of {view_target.element}." 

103 ) 

104 return cls(nested) 

105 

106 @property 

107 def element(self) -> DatabaseDimensionElement: 

108 # Docstring inherited from DimensionRecordStorage.element. 

109 return self._nested.element 

110 

111 def clearCaches(self) -> None: 

112 # Docstring inherited from DimensionRecordStorage.clearCaches. 

113 self._cache = None 

114 self._nested.clearCaches() 

115 

116 def make_relation(self, context: queries.SqlQueryContext) -> Relation: 

117 # Docstring inherited. 

118 return self._nested.make_relation(context) 

119 

120 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

121 # Docstring inherited from DimensionRecordStorage.insert. 

122 self._nested.insert(*records, replace=replace, skip_existing=skip_existing) 

123 if self._cache is not None: 

124 for record in records: 

125 # We really shouldn't ever get into a situation where the 

126 # record here differs from the one in the DB, but the last 

127 # thing we want is to make it harder to debug by making the 

128 # cache different from the DB. 

129 if skip_existing: 129 ↛ 132line 129 didn't jump to line 132, because the condition on line 129 was never false

130 self._cache.setdefault(record.dataId, record) 

131 else: 

132 self._cache[record.dataId] = record 

133 

134 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

135 # Docstring inherited from DimensionRecordStorage.sync. 

136 inserted_or_updated = self._nested.sync(record, update=update) 

137 if self._cache is not None and inserted_or_updated: 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true

138 self._cache[record.dataId] = record 

139 return inserted_or_updated 

140 

141 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None: 

142 # Docstring inherited from DimensionRecordStorage. 

143 cache = self.get_record_cache(context) 

144 return cache.get(data_id) 

145 

146 def get_record_cache(self, context: queries.SqlQueryContext) -> Mapping[DataCoordinate, DimensionRecord]: 

147 # Docstring inherited. 

148 if self._cache is None: 

149 relation = self._nested.join( 

150 context.make_initial_relation(), 

151 Join(), 

152 context, 

153 ) 

154 reader = queries.DimensionRecordReader(self.element) 

155 cache: dict[DataCoordinate, DimensionRecord] = {} 

156 for row in context.fetch_iterable(relation): 

157 record = reader.read(row) 

158 cache[record.dataId] = record 

159 self._cache = cache 

160 return self._cache 

161 

162 def digestTables(self) -> list[sqlalchemy.schema.Table]: 

163 # Docstring inherited from DimensionRecordStorage.digestTables. 

164 return self._nested.digestTables()