Coverage for python/lsst/daf/butler/persistence_context.py: 53%

54 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:43 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("PersistenceContextVars",) 

31 

32 

33import uuid 

34from collections.abc import Callable, Hashable 

35from contextvars import Context, ContextVar, Token, copy_context 

36from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast 

37 

38if TYPE_CHECKING: 

39 from ._dataset_ref import DatasetRef 

40 from ._dataset_type import DatasetType, SerializedDatasetType 

41 from .datastore.record_data import DatastoreRecordData 

42 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate 

43 from .dimensions._records import DimensionRecord, SerializedDimensionRecord 

44 

45_T = TypeVar("_T") 

46_V = TypeVar("_V") 

47 

48_P = ParamSpec("_P") 

49_Q = ParamSpec("_Q") 

50 

51 

52class PersistenceContextVars: 

53 r"""Helper class for deserializing butler data structures. 

54 

55 When serializing various butler data structures nested dataset types get 

56 serialized independently. This means what were multiple references to the 

57 same object in memory are all duplicated in the serialization process. 

58 

59 Upon deserialization multiple independent data structures are created to 

60 represent the same logical bit of data. 

61 

62 This class can be used to remove this duplication by caching objects as 

63 they are created and returning a reference to that object. This is done in 

64 concert with ``direct`` and ``from_simple`` methods on the various butler 

65 dataset structures. 

66 

67 This class utilizes class level variables as a form of global state. Each 

68 of the various data structures can look to see if these global caches has 

69 been initialized as a cache (a dictionary) or is in the default None state. 

70 

71 Users of this class are intended to create an instance, and then call the 

72 `run` method, supplying a callable function, and passing any required 

73 arguments. The `run` method then creates a specific execution context, 

74 initializing the caches, and then runs the supplied function. Upon 

75 completion of the function call, the caches are cleared and returned to the 

76 default state. 

77 

78 This process is thread safe. 

79 

80 Notes 

81 ----- 

82 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was 

83 discovered that these caused excessive python memory allocations which 

84 though cleaned up upon completion, left the process using more memory than 

85 it otherwise needed as python does not return allocated memory to the OS 

86 until process completion. It was determined the runtime cost of recreating 

87 the `SerializedDatasetRef`\ s was worth the memory savings. 

88 """ 

89 

90 serializedDatasetTypeMapping: ContextVar[ 

91 dict[tuple[str, str], SerializedDatasetType] | None 

92 ] = ContextVar("serializedDatasetTypeMapping", default=None) 

93 r"""A cache of `SerializedDatasetType`\ s. 

94 """ 

95 

96 serializedDataCoordinateMapping: ContextVar[ 

97 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None 

98 ] = ContextVar("serializedDataCoordinateMapping", default=None) 

99 r"""A cache of `SerializedDataCoordinate`\ s. 

100 """ 

101 

102 serializedDimensionRecordMapping: ContextVar[ 

103 dict[tuple[str, frozenset] | tuple[int, DataCoordinate], SerializedDimensionRecord] | None 

104 ] = ContextVar("serializedDimensionRecordMapping", default=None) 

105 r"""A cache of `SerializedDimensionRecord`\ s. 

106 """ 

107 

108 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar( 

109 "loadedTypes", default=None 

110 ) 

111 r"""A cache of `DatasetType`\ s. 

112 """ 

113 

114 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar( 

115 "dataCoordinates", default=None 

116 ) 

117 r"""A cache of `DataCoordinate`\ s. 

118 """ 

119 

120 datasetRefs: ContextVar[dict[tuple[int, str], DatasetRef] | None] = ContextVar( 

121 "datasetRefs", default=None 

122 ) 

123 r"""A cache of `DatasetRef`\ s. 

124 """ 

125 

126 dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar( 

127 "dimensionRecords", default=None 

128 ) 

129 r"""A cache of `DimensionRecord`\ s. 

130 """ 

131 

132 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar( 

133 "dataStoreRecords", default=None 

134 ) 

135 r"""A cache of `DatastoreRecordData` objects. 

136 """ 

137 

138 @classmethod 

139 def _getContextVars(cls) -> dict[str, ContextVar]: 

140 """Build a dictionary of names to caches declared at class scope.""" 

141 classAttributes: dict[str, ContextVar] = {} 

142 for k in vars(cls): 

143 v = getattr(cls, k) 

144 # filter out callables and private attributes 

145 if not callable(v) and not k.startswith("__"): 

146 classAttributes[k] = v 

147 return classAttributes 

148 

149 def __init__(self) -> None: 

150 self._ctx: Context | None = None 

151 self._tokens: dict[str, Token] | None = None 

152 

153 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V: 

154 # create a storage space for the tokens returned from setting the 

155 # context variables 

156 self._tokens = {} 

157 

158 # Set each cache to an empty dictionary and record the token returned 

159 # by this operation. 

160 for name, attribute in self._getContextVars().items(): 

161 self._tokens[name] = attribute.set({}) 

162 

163 # Call the supplied function and record the result 

164 result = function(*args, **kwargs) 

165 

166 # Reset all the context variables back to the state they were in before 

167 # this function was run. 

168 persistenceVars = self._getContextVars() 

169 assert self._tokens is not None 

170 for name, token in self._tokens.items(): 

171 attribute = persistenceVars[name] 

172 attribute.reset(token) 

173 self._tokens = None 

174 return result 

175 

176 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T: 

177 """Execute the supplied function inside context specific caches. 

178 

179 Parameters 

180 ---------- 

181 function : `Callable` 

182 A callable which is to be executed inside a specific context. 

183 *args : tuple 

184 Positional arguments which are to be passed to the `Callable` 

185 **kwargs: dict, optional 

186 Extra key word arguments which are to be passed to the `Callable` 

187 

188 Returns 

189 ------- 

190 result : `Any` 

191 The result returned by executing the supplied `Callable` 

192 """ 

193 self._ctx = copy_context() 

194 # Type checkers seem to have trouble with a second layer nesting of 

195 # parameter specs in callables, so ignore the call here and explicitly 

196 # cast the result as we know this is exactly what the return type will 

197 # be. 

198 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore 

199 return cast(_T, result)