Coverage for python/lsst/daf/butler/registry/dimensions/caching.py: 89%

62 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:52 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["CachingDimensionRecordStorage"] 

30 

31from collections.abc import Mapping 

32from typing import Any 

33 

34import sqlalchemy 

35from lsst.daf.relation import Join, Relation 

36from lsst.utils import doImportType 

37 

38from ..._named import NamedKeyMapping 

39from ...dimensions import DatabaseDimensionElement, DataCoordinate, DimensionRecord, GovernorDimension 

40from .. import queries 

41from ..interfaces import ( 

42 Database, 

43 DatabaseDimensionRecordStorage, 

44 GovernorDimensionRecordStorage, 

45 StaticTablesContext, 

46) 

47 

48 

49class CachingDimensionRecordStorage(DatabaseDimensionRecordStorage): 

50 """A record storage implementation that adds caching to some other nested 

51 storage implementation. 

52 

53 Parameters 

54 ---------- 

55 nested : `DatabaseDimensionRecordStorage` 

56 The other storage to cache fetches from and to delegate all other 

57 operations to. 

58 """ 

59 

60 def __init__(self, nested: DatabaseDimensionRecordStorage): 

61 self._nested = nested 

62 self._cache: dict[DataCoordinate, DimensionRecord] | None = None 

63 

64 @classmethod 

65 def initialize( 

66 cls, 

67 db: Database, 

68 element: DatabaseDimensionElement, 

69 *, 

70 context: StaticTablesContext | None = None, 

71 config: Mapping[str, Any], 

72 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

73 view_target: DatabaseDimensionRecordStorage | None = None, 

74 ) -> DatabaseDimensionRecordStorage: 

75 # Docstring inherited from DatabaseDimensionRecordStorage. 

76 config = config["nested"] 

77 NestedClass = doImportType(config["cls"]) 

78 if not hasattr(NestedClass, "initialize"): 78 ↛ 79line 78 didn't jump to line 79, because the condition on line 78 was never true

79 raise TypeError(f"Nested class {config['cls']} does not have an initialize() method.") 

80 nested = NestedClass.initialize( 

81 db, element, context=context, config=config, governors=governors, view_target=view_target 

82 ) 

83 if view_target is not None: 

84 # Caching records that are really a view into another element's 

85 # records is problematic, because the caching code has no way of 

86 # intercepting changes to its target's records. Instead of 

87 # inventing a callback system to address that directly or dealing 

88 # with an untrustworthy combination, we just ban this combination. 

89 # But there's a problem: this is how we've configured the default 

90 # dimension universe from the beginning, with the 'band' dimension 

91 # being a cached view into physical_filter, and we don't want to 

92 # break all those configurations. 

93 if isinstance(view_target, CachingDimensionRecordStorage): 93 ↛ 101line 93 didn't jump to line 101, because the condition on line 93 was never false

94 # Happily, there's a way out: if the view target's record 

95 # storage is _also_ cached, then this outer caching is pretty 

96 # thoroughly unnecessary as well as problematic, and it's 

97 # reasonable to silently drop it, by returning the nested 

98 # storage object instead of a new caching wrapper. And this 

99 # too is the case with the default dimension configuration. 

100 return nested 

101 raise RuntimeError( 

102 f"Invalid dimension storage configuration: cannot cache dimension element {element} " 

103 f"that is itself a view of {view_target.element}." 

104 ) 

105 return cls(nested) 

106 

107 @property 

108 def element(self) -> DatabaseDimensionElement: 

109 # Docstring inherited from DimensionRecordStorage.element. 

110 return self._nested.element 

111 

112 def clearCaches(self) -> None: 

113 # Docstring inherited from DimensionRecordStorage.clearCaches. 

114 self._cache = None 

115 self._nested.clearCaches() 

116 

117 def make_relation(self, context: queries.SqlQueryContext) -> Relation: 

118 # Docstring inherited. 

119 return self._nested.make_relation(context) 

120 

121 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

122 # Docstring inherited from DimensionRecordStorage.insert. 

123 self._nested.insert(*records, replace=replace, skip_existing=skip_existing) 

124 if self._cache is not None: 

125 for record in records: 

126 # We really shouldn't ever get into a situation where the 

127 # record here differs from the one in the DB, but the last 

128 # thing we want is to make it harder to debug by making the 

129 # cache different from the DB. 

130 if skip_existing: 130 ↛ 133line 130 didn't jump to line 133, because the condition on line 130 was never false

131 self._cache.setdefault(record.dataId, record) 

132 else: 

133 self._cache[record.dataId] = record 

134 

135 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

136 # Docstring inherited from DimensionRecordStorage.sync. 

137 inserted_or_updated = self._nested.sync(record, update=update) 

138 if self._cache is not None and inserted_or_updated: 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true

139 self._cache[record.dataId] = record 

140 return inserted_or_updated 

141 

142 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None: 

143 # Docstring inherited from DimensionRecordStorage. 

144 cache = self.get_record_cache(context) 

145 return cache.get(data_id) 

146 

147 def get_record_cache(self, context: queries.SqlQueryContext) -> Mapping[DataCoordinate, DimensionRecord]: 

148 # Docstring inherited. 

149 if self._cache is None: 

150 relation = self._nested.join( 

151 context.make_initial_relation(), 

152 Join(), 

153 context, 

154 ) 

155 reader = queries.DimensionRecordReader(self.element) 

156 cache: dict[DataCoordinate, DimensionRecord] = {} 

157 for row in context.fetch_iterable(relation): 

158 record = reader.read(row) 

159 cache[record.dataId] = record 

160 self._cache = cache 

161 return self._cache 

162 

163 def digestTables(self) -> list[sqlalchemy.schema.Table]: 

164 # Docstring inherited from DimensionRecordStorage.digestTables. 

165 return self._nested.digestTables()