Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31from typing import Any, Dict, Generic, Type, TypeVar 

32 

33from lsst.utils import doImport 

34 

35from ..core import ( 

36 Config, 

37 DimensionConfig, 

38 DimensionUniverse, 

39) 

40from ._config import RegistryConfig 

41from .interfaces import ( 

42 ButlerAttributeManager, 

43 Database, 

44 DimensionRecordStorageManager, 

45 CollectionManager, 

46 DatasetRecordStorageManager, 

47 DatastoreRegistryBridgeManager, 

48 OpaqueTableStorageManager, 

49 StaticTablesContext, 

50) 

51from .versions import ButlerVersionsManager, DigestMismatchError 

52 

53_Attributes = TypeVar("_Attributes") 

54_Dimensions = TypeVar("_Dimensions") 

55_Collections = TypeVar("_Collections") 

56_Datasets = TypeVar("_Datasets") 

57_Opaque = TypeVar("_Opaque") 

58_Datastores = TypeVar("_Datastores") 

59 

60 

61_LOG = logging.getLogger(__name__) 

62 

63# key for dimensions configuration in attributes table 

64_DIMENSIONS_ATTR = "config:dimensions.json" 

65 

66 

67@dataclasses.dataclass(frozen=True, eq=False) 

68class _GenericRegistryManagers(Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, 

69 _Datastores]): 

70 """Base struct used to pass around the manager instances or types that back 

71 a `Registry`. 

72 

73 This class should only be used via its non-generic subclasses, 

74 `RegistryManagerInstances` and `RegistryManagerTypes`. 

75 """ 

76 

77 attributes: _Attributes 

78 """Manager for flat key-value pairs, including versions. 

79 """ 

80 

81 dimensions: _Dimensions 

82 """Manager for dimensions. 

83 """ 

84 

85 collections: _Collections 

86 """Manager for collections. 

87 """ 

88 

89 datasets: _Datasets 

90 """Manager for datasets, dataset types, and collection summaries. 

91 """ 

92 

93 opaque: _Opaque 

94 """Manager for opaque (to the Registry) tables. 

95 """ 

96 

97 datastores: _Datastores 

98 """Manager for the interface between `Registry` and `Datastore`. 

99 """ 

100 

101 

102class RegistryManagerTypes(_GenericRegistryManagers[ 

103 Type[ButlerAttributeManager], 

104 Type[DimensionRecordStorageManager], 

105 Type[CollectionManager], 

106 Type[DatasetRecordStorageManager], 

107 Type[OpaqueTableStorageManager], 

108 Type[DatastoreRegistryBridgeManager], 

109]): 

110 """A struct used to pass around the types of the manager objects that back 

111 a `Registry`. 

112 """ 

113 

114 @classmethod 

115 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

116 """Construct by extracting class names from configuration and importing 

117 them. 

118 

119 Parameters 

120 ---------- 

121 config : `RegistryConfig` 

122 Configuration object with a "managers" section that contains all 

123 fully-qualified class names for all manager types. 

124 

125 Returns 

126 ------- 

127 types : `RegistryManagerTypes` 

128 A new struct containing type objects. 

129 """ 

130 return cls(**{f.name: doImport(config["managers", f.name]) for f in dataclasses.fields(cls)}) 

131 

132 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

133 """Create all persistent `Registry` state for a new, empty data 

134 repository, and return a new struct containing manager instances. 

135 

136 Parameters 

137 ---------- 

138 database : `Database` 

139 Object that represents a connection to the SQL database that will 

140 back the data repository. Must point to an empty namespace, or at 

141 least one with no tables or other entities whose names might clash 

142 with those used by butler. 

143 dimensionConfig : `DimensionConfig` 

144 Configuration that defines a `DimensionUniverse`, to be written 

145 into the data repository and used to define aspects of the schema. 

146 

147 Returns 

148 ------- 

149 instances : `RegistryManagerInstances` 

150 Struct containing instances of the types contained by ``self``, 

151 pointing to the new repository and backed by ``database``. 

152 """ 

153 universe = DimensionUniverse(dimensionConfig) 

154 with database.declareStaticTables(create=True) as context: 

155 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

156 versions = instances.getVersions() 

157 # store managers and their versions in attributes table 

158 versions.storeManagersConfig() 

159 versions.storeManagersVersions() 

160 # dump universe config as json into attributes (faster than YAML) 

161 json = dimensionConfig.dump(format="json") 

162 if json is not None: 

163 instances.attributes.set(_DIMENSIONS_ATTR, json) 

164 else: 

165 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

166 return instances 

167 

168 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

169 """Construct manager instances that point to an existing data 

170 repository. 

171 

172 Parameters 

173 ---------- 

174 database : `Database` 

175 Object that represents a connection to the SQL database that backs 

176 the data repository. Must point to a namespace that already holds 

177 all tables and other persistent entities used by butler. 

178 

179 Returns 

180 ------- 

181 instances : `RegistryManagerInstances` 

182 Struct containing instances of the types contained by ``self``, 

183 pointing to the new repository and backed by ``database``. 

184 """ 

185 # Create attributes manager only first, so we can use it to load the 

186 # embedded dimensions configuration. 

187 with database.declareStaticTables(create=False) as context: 

188 attributes = self.attributes.initialize(database, context) 

189 versions = ButlerVersionsManager( 

190 attributes, 

191 dict(attributes=attributes) 

192 ) 

193 # verify that configured versions are compatible with schema 

194 versions.checkManagersConfig() 

195 versions.checkManagersVersions(database.isWriteable()) 

196 # get serialized as a string from database 

197 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

198 if dimensionsString is not None: 

199 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

200 else: 

201 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

202 universe = DimensionUniverse(dimensionConfig) 

203 with database.declareStaticTables(create=False) as context: 

204 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

205 versions = instances.getVersions() 

206 # verify that configured versions are compatible with schema 

207 versions.checkManagersConfig() 

208 versions.checkManagersVersions(database.isWriteable()) 

209 try: 

210 versions.checkManagersDigests() 

211 except DigestMismatchError as exc: 

212 # potentially digest mismatch is a serious error but during 

213 # development it could be benign, treat this as warning for 

214 # now. 

215 _LOG.warning(f"Registry schema digest mismatch: {exc}") 

216 # Load content from database that we try to keep in-memory. 

217 instances.refresh() 

218 return instances 

219 

220 

221class RegistryManagerInstances(_GenericRegistryManagers[ 

222 ButlerAttributeManager, 

223 DimensionRecordStorageManager, 

224 CollectionManager, 

225 DatasetRecordStorageManager, 

226 OpaqueTableStorageManager, 

227 DatastoreRegistryBridgeManager, 

228]): 

229 """A struct used to pass around the manager objects that back a `Registry`. 

230 """ 

231 

232 @classmethod 

233 def initialize( 

234 cls, 

235 database: Database, 

236 context: StaticTablesContext, 

237 *, 

238 types: RegistryManagerTypes, 

239 universe: DimensionUniverse, 

240 ) -> RegistryManagerInstances: 

241 """Construct manager instances from their types and an existing 

242 database connection. 

243 

244 Parameters 

245 ---------- 

246 database : `Database` 

247 Object that represents a connection to the SQL database that backs 

248 the data repository. 

249 context : `StaticTablesContext` 

250 Object used to create tables in ``database``. 

251 types : `RegistryManagerTypes` 

252 Struct containing type objects for the manager instances to 

253 construct. 

254 universe : `DimensionUniverse` 

255 Object that describes all dimensions in this data repository. 

256 

257 Returns 

258 ------- 

259 instances : `RegistryManagerInstances` 

260 Struct containing manager instances. 

261 """ 

262 kwargs: Dict[str, Any] = {} 

263 kwargs["attributes"] = types.attributes.initialize(database, context) 

264 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

265 kwargs["collections"] = types.collections.initialize( 

266 database, 

267 context, 

268 dimensions=kwargs["dimensions"], 

269 ) 

270 kwargs["datasets"] = types.datasets.initialize( 

271 database, 

272 context, 

273 collections=kwargs["collections"], 

274 dimensions=kwargs["dimensions"], 

275 ) 

276 kwargs["opaque"] = types.opaque.initialize(database, context) 

277 kwargs["datastores"] = types.datastores.initialize( 

278 database, 

279 context, 

280 opaque=kwargs["opaque"], 

281 datasets=types.datasets, 

282 universe=universe, 

283 ) 

284 return cls(**kwargs) 

285 

286 def getVersions(self) -> ButlerVersionsManager: 

287 """Return an object that can report, check, and save the versions of 

288 all manager objects. 

289 

290 Returns 

291 ------- 

292 versions : `ButlerVersionsManager` 

293 Object that manages versions. 

294 """ 

295 return ButlerVersionsManager( 

296 self.attributes, 

297 # Can't use dataclasses.asdict here, because it tries to do some 

298 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

299 # that doesn't work on some manager objects that definitely aren't 

300 # supposed to be deep-copied anyway. 

301 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)} 

302 ) 

303 

304 def refresh(self) -> None: 

305 """Refresh all in-memory state by querying the database. 

306 """ 

307 self.dimensions.clearCaches() 

308 self.dimensions.refresh() 

309 self.collections.refresh() 

310 self.datasets.refresh()