Coverage for python/lsst/daf/butler/registry/managers.py: 44%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

83 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31from typing import Any, Dict, Generic, Type, TypeVar 

32 

33from lsst.utils import doImportType 

34 

35from ..core import Config, DimensionConfig, DimensionUniverse 

36from ._config import RegistryConfig 

37from .interfaces import ( 

38 ButlerAttributeManager, 

39 CollectionManager, 

40 Database, 

41 DatasetRecordStorageManager, 

42 DatastoreRegistryBridgeManager, 

43 DimensionRecordStorageManager, 

44 OpaqueTableStorageManager, 

45 StaticTablesContext, 

46) 

47from .versions import ButlerVersionsManager, DigestMismatchError 

48 

49_Attributes = TypeVar("_Attributes") 

50_Dimensions = TypeVar("_Dimensions") 

51_Collections = TypeVar("_Collections") 

52_Datasets = TypeVar("_Datasets") 

53_Opaque = TypeVar("_Opaque") 

54_Datastores = TypeVar("_Datastores") 

55 

56 

57_LOG = logging.getLogger(__name__) 

58 

59# key for dimensions configuration in attributes table 

60_DIMENSIONS_ATTR = "config:dimensions.json" 

61 

62 

63@dataclasses.dataclass(frozen=True, eq=False) 

64class _GenericRegistryManagers( 

65 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores] 

66): 

67 """Base struct used to pass around the manager instances or types that back 

68 a `Registry`. 

69 

70 This class should only be used via its non-generic subclasses, 

71 `RegistryManagerInstances` and `RegistryManagerTypes`. 

72 """ 

73 

74 attributes: _Attributes 

75 """Manager for flat key-value pairs, including versions. 

76 """ 

77 

78 dimensions: _Dimensions 

79 """Manager for dimensions. 

80 """ 

81 

82 collections: _Collections 

83 """Manager for collections. 

84 """ 

85 

86 datasets: _Datasets 

87 """Manager for datasets, dataset types, and collection summaries. 

88 """ 

89 

90 opaque: _Opaque 

91 """Manager for opaque (to the Registry) tables. 

92 """ 

93 

94 datastores: _Datastores 

95 """Manager for the interface between `Registry` and `Datastore`. 

96 """ 

97 

98 

99class RegistryManagerTypes( 

100 _GenericRegistryManagers[ 

101 Type[ButlerAttributeManager], 

102 Type[DimensionRecordStorageManager], 

103 Type[CollectionManager], 

104 Type[DatasetRecordStorageManager], 

105 Type[OpaqueTableStorageManager], 

106 Type[DatastoreRegistryBridgeManager], 

107 ] 

108): 

109 """A struct used to pass around the types of the manager objects that back 

110 a `Registry`. 

111 """ 

112 

113 @classmethod 

114 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

115 """Construct by extracting class names from configuration and importing 

116 them. 

117 

118 Parameters 

119 ---------- 

120 config : `RegistryConfig` 

121 Configuration object with a "managers" section that contains all 

122 fully-qualified class names for all manager types. 

123 

124 Returns 

125 ------- 

126 types : `RegistryManagerTypes` 

127 A new struct containing type objects. 

128 """ 

129 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)}) 

130 

131 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

132 """Create all persistent `Registry` state for a new, empty data 

133 repository, and return a new struct containing manager instances. 

134 

135 Parameters 

136 ---------- 

137 database : `Database` 

138 Object that represents a connection to the SQL database that will 

139 back the data repository. Must point to an empty namespace, or at 

140 least one with no tables or other entities whose names might clash 

141 with those used by butler. 

142 dimensionConfig : `DimensionConfig` 

143 Configuration that defines a `DimensionUniverse`, to be written 

144 into the data repository and used to define aspects of the schema. 

145 

146 Returns 

147 ------- 

148 instances : `RegistryManagerInstances` 

149 Struct containing instances of the types contained by ``self``, 

150 pointing to the new repository and backed by ``database``. 

151 """ 

152 universe = DimensionUniverse(dimensionConfig) 

153 with database.declareStaticTables(create=True) as context: 

154 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

155 versions = instances.getVersions() 

156 # store managers and their versions in attributes table 

157 versions.storeManagersConfig() 

158 versions.storeManagersVersions() 

159 # dump universe config as json into attributes (faster than YAML) 

160 json = dimensionConfig.dump(format="json") 

161 if json is not None: 

162 instances.attributes.set(_DIMENSIONS_ATTR, json) 

163 else: 

164 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

165 return instances 

166 

167 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

168 """Construct manager instances that point to an existing data 

169 repository. 

170 

171 Parameters 

172 ---------- 

173 database : `Database` 

174 Object that represents a connection to the SQL database that backs 

175 the data repository. Must point to a namespace that already holds 

176 all tables and other persistent entities used by butler. 

177 

178 Returns 

179 ------- 

180 instances : `RegistryManagerInstances` 

181 Struct containing instances of the types contained by ``self``, 

182 pointing to the new repository and backed by ``database``. 

183 """ 

184 # Create attributes manager only first, so we can use it to load the 

185 # embedded dimensions configuration. 

186 with database.declareStaticTables(create=False) as context: 

187 attributes = self.attributes.initialize(database, context) 

188 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

189 # verify that configured versions are compatible with schema 

190 versions.checkManagersConfig() 

191 versions.checkManagersVersions(database.isWriteable()) 

192 # get serialized as a string from database 

193 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

194 if dimensionsString is not None: 

195 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

196 else: 

197 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

198 universe = DimensionUniverse(dimensionConfig) 

199 with database.declareStaticTables(create=False) as context: 

200 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

201 versions = instances.getVersions() 

202 # verify that configured versions are compatible with schema 

203 versions.checkManagersConfig() 

204 versions.checkManagersVersions(database.isWriteable()) 

205 try: 

206 versions.checkManagersDigests() 

207 except DigestMismatchError as exc: 

208 # potentially digest mismatch is a serious error but during 

209 # development it could be benign, treat this as warning for 

210 # now. 

211 _LOG.warning(f"Registry schema digest mismatch: {exc}") 

212 # Load content from database that we try to keep in-memory. 

213 instances.refresh() 

214 return instances 

215 

216 

217class RegistryManagerInstances( 

218 _GenericRegistryManagers[ 

219 ButlerAttributeManager, 

220 DimensionRecordStorageManager, 

221 CollectionManager, 

222 DatasetRecordStorageManager, 

223 OpaqueTableStorageManager, 

224 DatastoreRegistryBridgeManager, 

225 ] 

226): 

227 """A struct used to pass around the manager instances that back a 

228 `Registry`. 

229 """ 

230 

231 @classmethod 

232 def initialize( 

233 cls, 

234 database: Database, 

235 context: StaticTablesContext, 

236 *, 

237 types: RegistryManagerTypes, 

238 universe: DimensionUniverse, 

239 ) -> RegistryManagerInstances: 

240 """Construct manager instances from their types and an existing 

241 database connection. 

242 

243 Parameters 

244 ---------- 

245 database : `Database` 

246 Object that represents a connection to the SQL database that backs 

247 the data repository. 

248 context : `StaticTablesContext` 

249 Object used to create tables in ``database``. 

250 types : `RegistryManagerTypes` 

251 Struct containing type objects for the manager instances to 

252 construct. 

253 universe : `DimensionUniverse` 

254 Object that describes all dimensions in this data repository. 

255 

256 Returns 

257 ------- 

258 instances : `RegistryManagerInstances` 

259 Struct containing manager instances. 

260 """ 

261 kwargs: Dict[str, Any] = {} 

262 kwargs["attributes"] = types.attributes.initialize(database, context) 

263 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

264 kwargs["collections"] = types.collections.initialize( 

265 database, 

266 context, 

267 dimensions=kwargs["dimensions"], 

268 ) 

269 kwargs["datasets"] = types.datasets.initialize( 

270 database, 

271 context, 

272 collections=kwargs["collections"], 

273 dimensions=kwargs["dimensions"], 

274 ) 

275 kwargs["opaque"] = types.opaque.initialize(database, context) 

276 kwargs["datastores"] = types.datastores.initialize( 

277 database, 

278 context, 

279 opaque=kwargs["opaque"], 

280 datasets=types.datasets, 

281 universe=universe, 

282 ) 

283 return cls(**kwargs) 

284 

285 def getVersions(self) -> ButlerVersionsManager: 

286 """Return an object that can report, check, and save the versions of 

287 all manager objects. 

288 

289 Returns 

290 ------- 

291 versions : `ButlerVersionsManager` 

292 Object that manages versions. 

293 """ 

294 return ButlerVersionsManager( 

295 self.attributes, 

296 # Can't use dataclasses.asdict here, because it tries to do some 

297 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

298 # that doesn't work on some manager objects that definitely aren't 

299 # supposed to be deep-copied anyway. 

300 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)}, 

301 ) 

302 

303 def refresh(self) -> None: 

304 """Refresh all in-memory state by querying the database.""" 

305 self.dimensions.clearCaches() 

306 self.dimensions.refresh() 

307 self.collections.refresh() 

308 self.datasets.refresh()