Coverage for python/lsst/daf/butler/persistence_context.py: 53%

54 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 10:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("PersistenceContextVars",) 

31 

32 

33import uuid 

34from collections.abc import Callable, Hashable 

35from contextvars import Context, ContextVar, Token, copy_context 

36from typing import TYPE_CHECKING, ParamSpec, TypeVar 

37 

38if TYPE_CHECKING: 

39 from ._dataset_ref import DatasetRef 

40 from ._dataset_type import DatasetType, SerializedDatasetType 

41 from .datastore.record_data import DatastoreRecordData 

42 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate 

43 from .dimensions._records import DimensionRecord, SerializedDimensionRecord 

44 

45_T = TypeVar("_T") 

46_V = TypeVar("_V") 

47 

48_P = ParamSpec("_P") 

49_Q = ParamSpec("_Q") 

50 

51 

52class PersistenceContextVars: 

53 r"""Helper class for deserializing butler data structures. 

54 

55 When serializing various butler data structures nested dataset types get 

56 serialized independently. This means what were multiple references to the 

57 same object in memory are all duplicated in the serialization process. 

58 

59 Upon deserialization multiple independent data structures are created to 

60 represent the same logical bit of data. 

61 

62 This class can be used to remove this duplication by caching objects as 

63 they are created and returning a reference to that object. This is done in 

64 concert with ``direct`` and ``from_simple`` methods on the various butler 

65 dataset structures. 

66 

67 This class utilizes class level variables as a form of global state. Each 

68 of the various data structures can look to see if these global caches has 

69 been initialized as a cache (a dictionary) or is in the default None state. 

70 

71 Users of this class are intended to create an instance, and then call the 

72 `run` method, supplying a callable function, and passing any required 

73 arguments. The `run` method then creates a specific execution context, 

74 initializing the caches, and then runs the supplied function. Upon 

75 completion of the function call, the caches are cleared and returned to the 

76 default state. 

77 

78 This process is thread safe. 

79 

80 Notes 

81 ----- 

82 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was 

83 discovered that these caused excessive python memory allocations which 

84 though cleaned up upon completion, left the process using more memory than 

85 it otherwise needed as python does not return allocated memory to the OS 

86 until process completion. It was determined the runtime cost of recreating 

87 the `SerializedDatasetRef`\ s was worth the memory savings. 

88 """ 

89 

90 serializedDatasetTypeMapping: ContextVar[dict[tuple[str, str], SerializedDatasetType] | None] = ( 

91 ContextVar("serializedDatasetTypeMapping", default=None) 

92 ) 

93 r"""A cache of `SerializedDatasetType`\ s. 

94 """ 

95 

96 serializedDataCoordinateMapping: ContextVar[ 

97 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None 

98 ] = ContextVar("serializedDataCoordinateMapping", default=None) 

99 r"""A cache of `SerializedDataCoordinate`\ s. 

100 """ 

101 

102 serializedDimensionRecordMapping: ContextVar[ 

103 dict[tuple[str, frozenset] | tuple[int, DataCoordinate], SerializedDimensionRecord] | None 

104 ] = ContextVar("serializedDimensionRecordMapping", default=None) 

105 r"""A cache of `SerializedDimensionRecord`\ s. 

106 """ 

107 

108 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar( 

109 "loadedTypes", default=None 

110 ) 

111 r"""A cache of `DatasetType`\ s. 

112 """ 

113 

114 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar( 

115 "dataCoordinates", default=None 

116 ) 

117 r"""A cache of `DataCoordinate`\ s. 

118 """ 

119 

120 datasetRefs: ContextVar[dict[int, DatasetRef] | None] = ContextVar("datasetRefs", default=None) 

121 r"""A cache of `DatasetRef`\ s. 

122 

123 Keys are UUID converted to int, but only refs of parent dataset types are 

124 cached AND THE STORAGE CLASS IS UNSPECIFIED; consumers of this cache must 

125 call overrideStorageClass on the result. 

126 """ 

127 

128 dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar( 

129 "dimensionRecords", default=None 

130 ) 

131 r"""A cache of `DimensionRecord`\ s. 

132 """ 

133 

134 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar( 

135 "dataStoreRecords", default=None 

136 ) 

137 r"""A cache of `DatastoreRecordData` objects. 

138 """ 

139 

140 @classmethod 

141 def _getContextVars(cls) -> dict[str, ContextVar]: 

142 """Build a dictionary of names to caches declared at class scope.""" 

143 classAttributes: dict[str, ContextVar] = {} 

144 for k in vars(cls): 

145 v = getattr(cls, k) 

146 # filter out callables and private attributes 

147 if not callable(v) and not k.startswith("__"): 

148 classAttributes[k] = v 

149 return classAttributes 

150 

151 def __init__(self) -> None: 

152 self._ctx: Context | None = None 

153 self._tokens: dict[str, Token] | None = None 

154 

155 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V: 

156 # create a storage space for the tokens returned from setting the 

157 # context variables 

158 self._tokens = {} 

159 

160 # Set each cache to an empty dictionary and record the token returned 

161 # by this operation. 

162 for name, attribute in self._getContextVars().items(): 

163 self._tokens[name] = attribute.set({}) 

164 

165 # Call the supplied function and record the result 

166 result = function(*args, **kwargs) 

167 

168 # Reset all the context variables back to the state they were in before 

169 # this function was run. 

170 persistenceVars = self._getContextVars() 

171 assert self._tokens is not None 

172 for name, token in self._tokens.items(): 

173 attribute = persistenceVars[name] 

174 attribute.reset(token) 

175 self._tokens = None 

176 return result 

177 

178 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T: 

179 """Execute the supplied function inside context specific caches. 

180 

181 Parameters 

182 ---------- 

183 function : `Callable` 

184 A callable which is to be executed inside a specific context. 

185 *args : tuple 

186 Positional arguments which are to be passed to the `Callable`. 

187 **kwargs : dict, optional 

188 Extra key word arguments which are to be passed to the `Callable`. 

189 

190 Returns 

191 ------- 

192 result : `Any` 

193 The result returned by executing the supplied `Callable`. 

194 """ 

195 self._ctx = copy_context() 

196 # Type checkers seem to have trouble with a second layer nesting of 

197 # parameter specs in callables, so ignore the call here and explicitly 

198 # cast the result as we know this is exactly what the return type will 

199 # be. 

200 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore 

201 return result