Coverage for python/lsst/daf/butler/registry/managers.py: 46%

79 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 02:27 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31from typing import Any, Dict, Generic, Type, TypeVar 

32 

33from lsst.utils import doImportType 

34 

35from ..core import Config, DimensionConfig, DimensionUniverse 

36from ._config import RegistryConfig 

37from .interfaces import ( 

38 ButlerAttributeManager, 

39 CollectionManager, 

40 Database, 

41 DatasetRecordStorageManager, 

42 DatastoreRegistryBridgeManager, 

43 DimensionRecordStorageManager, 

44 OpaqueTableStorageManager, 

45 StaticTablesContext, 

46) 

47from .versions import ButlerVersionsManager 

48 

49_Attributes = TypeVar("_Attributes") 

50_Dimensions = TypeVar("_Dimensions") 

51_Collections = TypeVar("_Collections") 

52_Datasets = TypeVar("_Datasets") 

53_Opaque = TypeVar("_Opaque") 

54_Datastores = TypeVar("_Datastores") 

55 

56 

57_LOG = logging.getLogger(__name__) 

58 

59# key for dimensions configuration in attributes table 

60_DIMENSIONS_ATTR = "config:dimensions.json" 

61 

62 

63@dataclasses.dataclass(frozen=True, eq=False) 

64class _GenericRegistryManagers( 

65 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores] 

66): 

67 """Base struct used to pass around the manager instances or types that back 

68 a `Registry`. 

69 

70 This class should only be used via its non-generic subclasses, 

71 `RegistryManagerInstances` and `RegistryManagerTypes`. 

72 """ 

73 

74 attributes: _Attributes 

75 """Manager for flat key-value pairs, including versions. 

76 """ 

77 

78 dimensions: _Dimensions 

79 """Manager for dimensions. 

80 """ 

81 

82 collections: _Collections 

83 """Manager for collections. 

84 """ 

85 

86 datasets: _Datasets 

87 """Manager for datasets, dataset types, and collection summaries. 

88 """ 

89 

90 opaque: _Opaque 

91 """Manager for opaque (to the Registry) tables. 

92 """ 

93 

94 datastores: _Datastores 

95 """Manager for the interface between `Registry` and `Datastore`. 

96 """ 

97 

98 

99class RegistryManagerTypes( 

100 _GenericRegistryManagers[ 

101 Type[ButlerAttributeManager], 

102 Type[DimensionRecordStorageManager], 

103 Type[CollectionManager], 

104 Type[DatasetRecordStorageManager], 

105 Type[OpaqueTableStorageManager], 

106 Type[DatastoreRegistryBridgeManager], 

107 ] 

108): 

109 """A struct used to pass around the types of the manager objects that back 

110 a `Registry`. 

111 """ 

112 

113 @classmethod 

114 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

115 """Construct by extracting class names from configuration and importing 

116 them. 

117 

118 Parameters 

119 ---------- 

120 config : `RegistryConfig` 

121 Configuration object with a "managers" section that contains all 

122 fully-qualified class names for all manager types. 

123 

124 Returns 

125 ------- 

126 types : `RegistryManagerTypes` 

127 A new struct containing type objects. 

128 """ 

129 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)}) 

130 

131 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

132 """Create all persistent `Registry` state for a new, empty data 

133 repository, and return a new struct containing manager instances. 

134 

135 Parameters 

136 ---------- 

137 database : `Database` 

138 Object that represents a connection to the SQL database that will 

139 back the data repository. Must point to an empty namespace, or at 

140 least one with no tables or other entities whose names might clash 

141 with those used by butler. 

142 dimensionConfig : `DimensionConfig` 

143 Configuration that defines a `DimensionUniverse`, to be written 

144 into the data repository and used to define aspects of the schema. 

145 

146 Returns 

147 ------- 

148 instances : `RegistryManagerInstances` 

149 Struct containing instances of the types contained by ``self``, 

150 pointing to the new repository and backed by ``database``. 

151 """ 

152 universe = DimensionUniverse(dimensionConfig) 

153 with database.declareStaticTables(create=True) as context: 

154 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

155 versions = instances.getVersions() 

156 # store managers and their versions in attributes table 

157 versions.storeManagersConfig() 

158 versions.storeManagersVersions() 

159 # dump universe config as json into attributes (faster than YAML) 

160 json = dimensionConfig.dump(format="json") 

161 if json is not None: 

162 instances.attributes.set(_DIMENSIONS_ATTR, json) 

163 else: 

164 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

165 return instances 

166 

167 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

168 """Construct manager instances that point to an existing data 

169 repository. 

170 

171 Parameters 

172 ---------- 

173 database : `Database` 

174 Object that represents a connection to the SQL database that backs 

175 the data repository. Must point to a namespace that already holds 

176 all tables and other persistent entities used by butler. 

177 

178 Returns 

179 ------- 

180 instances : `RegistryManagerInstances` 

181 Struct containing instances of the types contained by ``self``, 

182 pointing to the new repository and backed by ``database``. 

183 """ 

184 # Create attributes manager only first, so we can use it to load the 

185 # embedded dimensions configuration. 

186 with database.declareStaticTables(create=False) as context: 

187 attributes = self.attributes.initialize(database, context) 

188 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

189 # verify that configured versions are compatible with schema 

190 versions.checkManagersConfig() 

191 versions.checkManagersVersions(database.isWriteable()) 

192 # get serialized as a string from database 

193 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

194 if dimensionsString is not None: 

195 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

196 else: 

197 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

198 universe = DimensionUniverse(dimensionConfig) 

199 with database.declareStaticTables(create=False) as context: 

200 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

201 versions = instances.getVersions() 

202 # verify that configured versions are compatible with schema 

203 versions.checkManagersConfig() 

204 versions.checkManagersVersions(database.isWriteable()) 

205 # Load content from database that we try to keep in-memory. 

206 instances.refresh() 

207 return instances 

208 

209 

210class RegistryManagerInstances( 

211 _GenericRegistryManagers[ 

212 ButlerAttributeManager, 

213 DimensionRecordStorageManager, 

214 CollectionManager, 

215 DatasetRecordStorageManager, 

216 OpaqueTableStorageManager, 

217 DatastoreRegistryBridgeManager, 

218 ] 

219): 

220 """A struct used to pass around the manager instances that back a 

221 `Registry`. 

222 """ 

223 

224 @classmethod 

225 def initialize( 

226 cls, 

227 database: Database, 

228 context: StaticTablesContext, 

229 *, 

230 types: RegistryManagerTypes, 

231 universe: DimensionUniverse, 

232 ) -> RegistryManagerInstances: 

233 """Construct manager instances from their types and an existing 

234 database connection. 

235 

236 Parameters 

237 ---------- 

238 database : `Database` 

239 Object that represents a connection to the SQL database that backs 

240 the data repository. 

241 context : `StaticTablesContext` 

242 Object used to create tables in ``database``. 

243 types : `RegistryManagerTypes` 

244 Struct containing type objects for the manager instances to 

245 construct. 

246 universe : `DimensionUniverse` 

247 Object that describes all dimensions in this data repository. 

248 

249 Returns 

250 ------- 

251 instances : `RegistryManagerInstances` 

252 Struct containing manager instances. 

253 """ 

254 kwargs: Dict[str, Any] = {} 

255 kwargs["attributes"] = types.attributes.initialize(database, context) 

256 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

257 kwargs["collections"] = types.collections.initialize( 

258 database, 

259 context, 

260 dimensions=kwargs["dimensions"], 

261 ) 

262 kwargs["datasets"] = types.datasets.initialize( 

263 database, 

264 context, 

265 collections=kwargs["collections"], 

266 dimensions=kwargs["dimensions"], 

267 ) 

268 kwargs["opaque"] = types.opaque.initialize(database, context) 

269 kwargs["datastores"] = types.datastores.initialize( 

270 database, 

271 context, 

272 opaque=kwargs["opaque"], 

273 datasets=types.datasets, 

274 universe=universe, 

275 ) 

276 return cls(**kwargs) 

277 

278 def getVersions(self) -> ButlerVersionsManager: 

279 """Return an object that can report, check, and save the versions of 

280 all manager objects. 

281 

282 Returns 

283 ------- 

284 versions : `ButlerVersionsManager` 

285 Object that manages versions. 

286 """ 

287 return ButlerVersionsManager( 

288 self.attributes, 

289 # Can't use dataclasses.asdict here, because it tries to do some 

290 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

291 # that doesn't work on some manager objects that definitely aren't 

292 # supposed to be deep-copied anyway. 

293 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)}, 

294 ) 

295 

296 def refresh(self) -> None: 

297 """Refresh all in-memory state by querying the database.""" 

298 self.dimensions.clearCaches() 

299 self.dimensions.refresh() 

300 self.collections.refresh() 

301 self.datasets.refresh()