Coverage for python/lsst/daf/butler/core/persistenceContext.py: 53%

54 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("PersistenceContextVars",) 

25 

26 

27import uuid 

28from collections.abc import Callable 

29from contextvars import Context, ContextVar, Token, copy_context 

30from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast 

31 

32if TYPE_CHECKING: 

33 from .datasets.ref import DatasetRef 

34 from .datasets.type import DatasetType, SerializedDatasetType 

35 from .datastoreRecordData import DatastoreRecordData 

36 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate 

37 from .dimensions._records import DimensionRecord, SerializedDimensionRecord 

38 

39_T = TypeVar("_T") 

40_V = TypeVar("_V") 

41 

42_P = ParamSpec("_P") 

43_Q = ParamSpec("_Q") 

44 

45 

46class PersistenceContextVars: 

47 r"""Helper class for deserializing butler data structures. 

48 

49 When serializing various butler data structures nested dataset types get 

50 serialized independently. This means what were multiple references to the 

51 same object in memory are all duplicated in the serialization process. 

52 

53 Upon deserialization multiple independent data structures are created to 

54 represent the same logical bit of data. 

55 

56 This class can be used to remove this duplication by caching objects as 

57 they are created and returning a reference to that object. This is done in 

58 concert with ``direct`` and ``from_simple`` methods on the various butler 

59 dataset structures. 

60 

61 This class utilizes class level variables as a form of global state. Each 

62 of the various data structures can look to see if these global caches has 

63 been initialized as a cache (a dictionary) or is in the default None state. 

64 

65 Users of this class are intended to create an instance, and then call the 

66 `run` method, supplying a callable function, and passing any required 

67 arguments. The `run` method then creates a specific execution context, 

68 initializing the caches, and then runs the supplied function. Upon 

69 completion of the function call, the caches are cleared and returned to the 

70 default state. 

71 

72 This process is thread safe. 

73 

74 Notes 

75 ----- 

76 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was 

77 discovered that these caused excessive python memory allocations which 

78 though cleaned up upon completion, left the process using more memory than 

79 it otherwise needed as python does not return allocated memory to the OS 

80 until process completion. It was determined the runtime cost of recreating 

81 the `SerializedDatasetRef`\ s was worth the memory savings. 

82 """ 

83 

84 serializedDatasetTypeMapping: ContextVar[ 

85 dict[tuple[str, str], SerializedDatasetType] | None 

86 ] = ContextVar("serializedDatasetTypeMapping", default=None) 

87 r"""A cache of `SerializedDatasetType`\ s. 

88 """ 

89 

90 serializedDataCoordinateMapping: ContextVar[ 

91 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None 

92 ] = ContextVar("serializedDataCoordinateMapping", default=None) 

93 r"""A cache of `SerializedDataCoordinate`\ s. 

94 """ 

95 

96 serializedDimensionRecordMapping: ContextVar[ 

97 dict[tuple[str, frozenset], SerializedDimensionRecord] | None 

98 ] = ContextVar("serializedDimensionRecordMapping", default=None) 

99 r"""A cache of `SerializedDimensionRecord`\ s. 

100 """ 

101 

102 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar( 

103 "loadedTypes", default=None 

104 ) 

105 r"""A cache of `DatasetType`\ s. 

106 """ 

107 

108 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar( 

109 "dataCoordinates", default=None 

110 ) 

111 r"""A cache of `DataCoordinate`\ s. 

112 """ 

113 

114 datasetRefs: ContextVar[dict[tuple[int, str], DatasetRef] | None] = ContextVar( 

115 "datasetRefs", default=None 

116 ) 

117 r"""A cache of `DatasetRef`\ s. 

118 """ 

119 

120 dimensionRecords: ContextVar[dict[tuple[str, frozenset], DimensionRecord] | None] = ContextVar( 

121 "dimensionRecords", default=None 

122 ) 

123 r"""A cache of `DimensionRecord`\ s. 

124 """ 

125 

126 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar( 

127 "dataStoreRecords", default=None 

128 ) 

129 r"""A cache of `DatastoreRecordData` objects. 

130 """ 

131 

132 @classmethod 

133 def _getContextVars(cls) -> dict[str, ContextVar]: 

134 """Build a dictionary of names to caches declared at class scope.""" 

135 classAttributes: dict[str, ContextVar] = {} 

136 for k in vars(cls): 

137 v = getattr(cls, k) 

138 # filter out callables and private attributes 

139 if not callable(v) and not k.startswith("__"): 

140 classAttributes[k] = v 

141 return classAttributes 

142 

143 def __init__(self) -> None: 

144 self._ctx: Context | None = None 

145 self._tokens: dict[str, Token] | None = None 

146 

147 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V: 

148 # create a storage space for the tokens returned from setting the 

149 # context variables 

150 self._tokens = {} 

151 

152 # Set each cache to an empty dictionary and record the token returned 

153 # by this operation. 

154 for name, attribute in self._getContextVars().items(): 

155 self._tokens[name] = attribute.set({}) 

156 

157 # Call the supplied function and record the result 

158 result = function(*args, **kwargs) 

159 

160 # Reset all the context variables back to the state they were in before 

161 # this function was run. 

162 persistenceVars = self._getContextVars() 

163 assert self._tokens is not None 

164 for name, token in self._tokens.items(): 

165 attribute = persistenceVars[name] 

166 attribute.reset(token) 

167 self._tokens = None 

168 return result 

169 

170 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T: 

171 """Execute the supplied function inside context specific caches. 

172 

173 Parameters 

174 ---------- 

175 function : `Callable` 

176 A callable which is to be executed inside a specific context. 

177 *args : tuple 

178 Positional arguments which are to be passed to the `Callable` 

179 **kwargs: dict, optional 

180 Extra key word arguments which are to be passed to the `Callable` 

181 

182 Returns 

183 ------- 

184 result : `Any` 

185 The result returned by executing the supplied `Callable` 

186 """ 

187 self._ctx = copy_context() 

188 # Type checkers seem to have trouble with a second layer nesting of 

189 # parameter specs in callables, so ignore the call here and explicitly 

190 # cast the result as we know this is exactly what the return type will 

191 # be. 

192 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore 

193 return cast(_T, result)