Coverage for python/lsst/daf/butler/registry/managers.py: 45%

88 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-28 07:52 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31import warnings 

32from typing import Any, Dict, Generic, Type, TypeVar 

33 

34import sqlalchemy 

35from lsst.utils import doImportType 

36 

37from ..core import Config, DimensionConfig, DimensionUniverse 

38from ._config import RegistryConfig 

39from .interfaces import ( 

40 ButlerAttributeManager, 

41 CollectionManager, 

42 Database, 

43 DatasetRecordStorageManager, 

44 DatastoreRegistryBridgeManager, 

45 DimensionRecordStorageManager, 

46 OpaqueTableStorageManager, 

47 StaticTablesContext, 

48) 

49from .versions import ButlerVersionsManager 

50 

51_Attributes = TypeVar("_Attributes") 

52_Dimensions = TypeVar("_Dimensions") 

53_Collections = TypeVar("_Collections") 

54_Datasets = TypeVar("_Datasets") 

55_Opaque = TypeVar("_Opaque") 

56_Datastores = TypeVar("_Datastores") 

57 

58 

59_LOG = logging.getLogger(__name__) 

60 

61# key for dimensions configuration in attributes table 

62_DIMENSIONS_ATTR = "config:dimensions.json" 

63 

64 

65@dataclasses.dataclass(frozen=True, eq=False) 

66class _GenericRegistryManagers( 

67 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores] 

68): 

69 """Base struct used to pass around the manager instances or types that back 

70 a `Registry`. 

71 

72 This class should only be used via its non-generic subclasses, 

73 `RegistryManagerInstances` and `RegistryManagerTypes`. 

74 """ 

75 

76 attributes: _Attributes 

77 """Manager for flat key-value pairs, including versions. 

78 """ 

79 

80 dimensions: _Dimensions 

81 """Manager for dimensions. 

82 """ 

83 

84 collections: _Collections 

85 """Manager for collections. 

86 """ 

87 

88 datasets: _Datasets 

89 """Manager for datasets, dataset types, and collection summaries. 

90 """ 

91 

92 opaque: _Opaque 

93 """Manager for opaque (to the Registry) tables. 

94 """ 

95 

96 datastores: _Datastores 

97 """Manager for the interface between `Registry` and `Datastore`. 

98 """ 

99 

100 

101class RegistryManagerTypes( 

102 _GenericRegistryManagers[ 

103 Type[ButlerAttributeManager], 

104 Type[DimensionRecordStorageManager], 

105 Type[CollectionManager], 

106 Type[DatasetRecordStorageManager], 

107 Type[OpaqueTableStorageManager], 

108 Type[DatastoreRegistryBridgeManager], 

109 ] 

110): 

111 """A struct used to pass around the types of the manager objects that back 

112 a `Registry`. 

113 """ 

114 

115 @classmethod 

116 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

117 """Construct by extracting class names from configuration and importing 

118 them. 

119 

120 Parameters 

121 ---------- 

122 config : `RegistryConfig` 

123 Configuration object with a "managers" section that contains all 

124 fully-qualified class names for all manager types. 

125 

126 Returns 

127 ------- 

128 types : `RegistryManagerTypes` 

129 A new struct containing type objects. 

130 """ 

131 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)}) 

132 

133 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

134 """Create all persistent `Registry` state for a new, empty data 

135 repository, and return a new struct containing manager instances. 

136 

137 Parameters 

138 ---------- 

139 database : `Database` 

140 Object that represents a connection to the SQL database that will 

141 back the data repository. Must point to an empty namespace, or at 

142 least one with no tables or other entities whose names might clash 

143 with those used by butler. 

144 dimensionConfig : `DimensionConfig` 

145 Configuration that defines a `DimensionUniverse`, to be written 

146 into the data repository and used to define aspects of the schema. 

147 

148 Returns 

149 ------- 

150 instances : `RegistryManagerInstances` 

151 Struct containing instances of the types contained by ``self``, 

152 pointing to the new repository and backed by ``database``. 

153 """ 

154 universe = DimensionUniverse(dimensionConfig) 

155 with database.declareStaticTables(create=True) as context: 

156 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger: 

157 warnings.warn( 

158 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

159 "integer dataset IDs; support for integers will be removed after v25.", 

160 FutureWarning, 

161 ) 

162 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

163 versions = instances.getVersions() 

164 # store managers and their versions in attributes table 

165 versions.storeManagersConfig() 

166 versions.storeManagersVersions() 

167 # dump universe config as json into attributes (faster than YAML) 

168 json = dimensionConfig.dump(format="json") 

169 if json is not None: 

170 instances.attributes.set(_DIMENSIONS_ATTR, json) 

171 else: 

172 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

173 return instances 

174 

175 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

176 """Construct manager instances that point to an existing data 

177 repository. 

178 

179 Parameters 

180 ---------- 

181 database : `Database` 

182 Object that represents a connection to the SQL database that backs 

183 the data repository. Must point to a namespace that already holds 

184 all tables and other persistent entities used by butler. 

185 

186 Returns 

187 ------- 

188 instances : `RegistryManagerInstances` 

189 Struct containing instances of the types contained by ``self``, 

190 pointing to the new repository and backed by ``database``. 

191 """ 

192 # Create attributes manager only first, so we can use it to load the 

193 # embedded dimensions configuration. 

194 with database.declareStaticTables(create=False) as context: 

195 attributes = self.attributes.initialize(database, context) 

196 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

197 # verify that configured versions are compatible with schema 

198 versions.checkManagersConfig() 

199 versions.checkManagersVersions(database.isWriteable()) 

200 # get serialized as a string from database 

201 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

202 if dimensionsString is not None: 

203 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

204 else: 

205 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

206 universe = DimensionUniverse(dimensionConfig) 

207 with database.declareStaticTables(create=False) as context: 

208 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

209 versions = instances.getVersions() 

210 # verify that configured versions are compatible with schema 

211 versions.checkManagersConfig() 

212 versions.checkManagersVersions(database.isWriteable()) 

213 # Load content from database that we try to keep in-memory. 

214 instances.refresh() 

215 return instances 

216 

217 

218class RegistryManagerInstances( 

219 _GenericRegistryManagers[ 

220 ButlerAttributeManager, 

221 DimensionRecordStorageManager, 

222 CollectionManager, 

223 DatasetRecordStorageManager, 

224 OpaqueTableStorageManager, 

225 DatastoreRegistryBridgeManager, 

226 ] 

227): 

228 """A struct used to pass around the manager instances that back a 

229 `Registry`. 

230 """ 

231 

232 @classmethod 

233 def initialize( 

234 cls, 

235 database: Database, 

236 context: StaticTablesContext, 

237 *, 

238 types: RegistryManagerTypes, 

239 universe: DimensionUniverse, 

240 ) -> RegistryManagerInstances: 

241 """Construct manager instances from their types and an existing 

242 database connection. 

243 

244 Parameters 

245 ---------- 

246 database : `Database` 

247 Object that represents a connection to the SQL database that backs 

248 the data repository. 

249 context : `StaticTablesContext` 

250 Object used to create tables in ``database``. 

251 types : `RegistryManagerTypes` 

252 Struct containing type objects for the manager instances to 

253 construct. 

254 universe : `DimensionUniverse` 

255 Object that describes all dimensions in this data repository. 

256 

257 Returns 

258 ------- 

259 instances : `RegistryManagerInstances` 

260 Struct containing manager instances. 

261 """ 

262 kwargs: Dict[str, Any] = {} 

263 kwargs["attributes"] = types.attributes.initialize(database, context) 

264 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

265 kwargs["collections"] = types.collections.initialize( 

266 database, 

267 context, 

268 dimensions=kwargs["dimensions"], 

269 ) 

270 kwargs["datasets"] = types.datasets.initialize( 

271 database, 

272 context, 

273 collections=kwargs["collections"], 

274 dimensions=kwargs["dimensions"], 

275 ) 

276 kwargs["opaque"] = types.opaque.initialize(database, context) 

277 kwargs["datastores"] = types.datastores.initialize( 

278 database, 

279 context, 

280 opaque=kwargs["opaque"], 

281 datasets=types.datasets, 

282 universe=universe, 

283 ) 

284 return cls(**kwargs) 

285 

286 def getVersions(self) -> ButlerVersionsManager: 

287 """Return an object that can report, check, and save the versions of 

288 all manager objects. 

289 

290 Returns 

291 ------- 

292 versions : `ButlerVersionsManager` 

293 Object that manages versions. 

294 """ 

295 return ButlerVersionsManager( 

296 self.attributes, 

297 # Can't use dataclasses.asdict here, because it tries to do some 

298 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

299 # that doesn't work on some manager objects that definitely aren't 

300 # supposed to be deep-copied anyway. 

301 {f.name: getattr(self, f.name) for f in dataclasses.fields(self)}, 

302 ) 

303 

304 def refresh(self) -> None: 

305 """Refresh all in-memory state by querying the database.""" 

306 self.dimensions.clearCaches() 

307 self.dimensions.refresh() 

308 self.collections.refresh() 

309 self.datasets.refresh()