Coverage for python/lsst/daf/butler/registry/managers.py: 46%

93 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-27 02:00 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "RegistryManagerInstances", 

26 "RegistryManagerTypes", 

27) 

28 

29import dataclasses 

30import logging 

31import warnings 

32from typing import Any, Dict, Generic, Type, TypeVar 

33 

34import sqlalchemy 

35from lsst.utils import doImportType 

36 

37from ..core import ColumnTypeInfo, Config, DimensionConfig, DimensionUniverse, ddl 

38from ._config import RegistryConfig 

39from .interfaces import ( 

40 ButlerAttributeManager, 

41 CollectionManager, 

42 Database, 

43 DatasetRecordStorageManager, 

44 DatastoreRegistryBridgeManager, 

45 DimensionRecordStorageManager, 

46 OpaqueTableStorageManager, 

47 StaticTablesContext, 

48) 

49from .versions import ButlerVersionsManager 

50 

51_Attributes = TypeVar("_Attributes") 

52_Dimensions = TypeVar("_Dimensions") 

53_Collections = TypeVar("_Collections") 

54_Datasets = TypeVar("_Datasets") 

55_Opaque = TypeVar("_Opaque") 

56_Datastores = TypeVar("_Datastores") 

57 

58 

59_LOG = logging.getLogger(__name__) 

60 

61# key for dimensions configuration in attributes table 

62_DIMENSIONS_ATTR = "config:dimensions.json" 

63 

64 

65@dataclasses.dataclass(frozen=True, eq=False) 

66class _GenericRegistryManagers( 

67 Generic[_Attributes, _Dimensions, _Collections, _Datasets, _Opaque, _Datastores] 

68): 

69 """Base struct used to pass around the manager instances or types that back 

70 a `Registry`. 

71 

72 This class should only be used via its non-generic subclasses, 

73 `RegistryManagerInstances` and `RegistryManagerTypes`. 

74 """ 

75 

76 attributes: _Attributes 

77 """Manager for flat key-value pairs, including versions. 

78 """ 

79 

80 dimensions: _Dimensions 

81 """Manager for dimensions. 

82 """ 

83 

84 collections: _Collections 

85 """Manager for collections. 

86 """ 

87 

88 datasets: _Datasets 

89 """Manager for datasets, dataset types, and collection summaries. 

90 """ 

91 

92 opaque: _Opaque 

93 """Manager for opaque (to the Registry) tables. 

94 """ 

95 

96 datastores: _Datastores 

97 """Manager for the interface between `Registry` and `Datastore`. 

98 """ 

99 

100 

101class RegistryManagerTypes( 

102 _GenericRegistryManagers[ 

103 Type[ButlerAttributeManager], 

104 Type[DimensionRecordStorageManager], 

105 Type[CollectionManager], 

106 Type[DatasetRecordStorageManager], 

107 Type[OpaqueTableStorageManager], 

108 Type[DatastoreRegistryBridgeManager], 

109 ] 

110): 

111 """A struct used to pass around the types of the manager objects that back 

112 a `Registry`. 

113 """ 

114 

115 @classmethod 

116 def fromConfig(cls, config: RegistryConfig) -> RegistryManagerTypes: 

117 """Construct by extracting class names from configuration and importing 

118 them. 

119 

120 Parameters 

121 ---------- 

122 config : `RegistryConfig` 

123 Configuration object with a "managers" section that contains all 

124 fully-qualified class names for all manager types. 

125 

126 Returns 

127 ------- 

128 types : `RegistryManagerTypes` 

129 A new struct containing type objects. 

130 """ 

131 return cls(**{f.name: doImportType(config["managers", f.name]) for f in dataclasses.fields(cls)}) 

132 

133 def makeRepo(self, database: Database, dimensionConfig: DimensionConfig) -> RegistryManagerInstances: 

134 """Create all persistent `Registry` state for a new, empty data 

135 repository, and return a new struct containing manager instances. 

136 

137 Parameters 

138 ---------- 

139 database : `Database` 

140 Object that represents a connection to the SQL database that will 

141 back the data repository. Must point to an empty namespace, or at 

142 least one with no tables or other entities whose names might clash 

143 with those used by butler. 

144 dimensionConfig : `DimensionConfig` 

145 Configuration that defines a `DimensionUniverse`, to be written 

146 into the data repository and used to define aspects of the schema. 

147 

148 Returns 

149 ------- 

150 instances : `RegistryManagerInstances` 

151 Struct containing instances of the types contained by ``self``, 

152 pointing to the new repository and backed by ``database``. 

153 """ 

154 universe = DimensionUniverse(dimensionConfig) 

155 with database.declareStaticTables(create=True) as context: 

156 if self.datasets.getIdColumnType() == sqlalchemy.BigInteger: 

157 warnings.warn( 

158 "New data repositories should be created with UUID dataset IDs instead of autoincrement " 

159 "integer dataset IDs; support for integers will be removed after v25.", 

160 FutureWarning, 

161 ) 

162 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

163 versions = instances.getVersions() 

164 # store managers and their versions in attributes table 

165 versions.storeManagersConfig() 

166 versions.storeManagersVersions() 

167 # dump universe config as json into attributes (faster than YAML) 

168 json = dimensionConfig.dump(format="json") 

169 if json is not None: 

170 instances.attributes.set(_DIMENSIONS_ATTR, json) 

171 else: 

172 raise RuntimeError("Unexpectedly failed to serialize DimensionConfig to JSON") 

173 return instances 

174 

175 def loadRepo(self, database: Database) -> RegistryManagerInstances: 

176 """Construct manager instances that point to an existing data 

177 repository. 

178 

179 Parameters 

180 ---------- 

181 database : `Database` 

182 Object that represents a connection to the SQL database that backs 

183 the data repository. Must point to a namespace that already holds 

184 all tables and other persistent entities used by butler. 

185 

186 Returns 

187 ------- 

188 instances : `RegistryManagerInstances` 

189 Struct containing instances of the types contained by ``self``, 

190 pointing to the new repository and backed by ``database``. 

191 """ 

192 # Create attributes manager only first, so we can use it to load the 

193 # embedded dimensions configuration. 

194 with database.declareStaticTables(create=False) as context: 

195 attributes = self.attributes.initialize(database, context) 

196 versions = ButlerVersionsManager(attributes, dict(attributes=attributes)) 

197 # verify that configured versions are compatible with schema 

198 versions.checkManagersConfig() 

199 versions.checkManagersVersions(database.isWriteable()) 

200 # get serialized as a string from database 

201 dimensionsString = attributes.get(_DIMENSIONS_ATTR) 

202 if dimensionsString is not None: 

203 dimensionConfig = DimensionConfig(Config.fromString(dimensionsString, format="json")) 

204 else: 

205 raise LookupError(f"Registry attribute {_DIMENSIONS_ATTR} is missing from database") 

206 universe = DimensionUniverse(dimensionConfig) 

207 with database.declareStaticTables(create=False) as context: 

208 instances = RegistryManagerInstances.initialize(database, context, types=self, universe=universe) 

209 versions = instances.getVersions() 

210 # verify that configured versions are compatible with schema 

211 versions.checkManagersConfig() 

212 versions.checkManagersVersions(database.isWriteable()) 

213 # Load content from database that we try to keep in-memory. 

214 instances.refresh() 

215 return instances 

216 

217 

218@dataclasses.dataclass(frozen=True, eq=False) 

219class RegistryManagerInstances( 

220 _GenericRegistryManagers[ 

221 ButlerAttributeManager, 

222 DimensionRecordStorageManager, 

223 CollectionManager, 

224 DatasetRecordStorageManager, 

225 OpaqueTableStorageManager, 

226 DatastoreRegistryBridgeManager, 

227 ] 

228): 

229 """A struct used to pass around the manager instances that back a 

230 `Registry`. 

231 """ 

232 

233 column_types: ColumnTypeInfo 

234 """Information about column types that can differ between data repositories 

235 and registry instances, including the dimension universe. 

236 """ 

237 

238 @classmethod 

239 def initialize( 

240 cls, 

241 database: Database, 

242 context: StaticTablesContext, 

243 *, 

244 types: RegistryManagerTypes, 

245 universe: DimensionUniverse, 

246 ) -> RegistryManagerInstances: 

247 """Construct manager instances from their types and an existing 

248 database connection. 

249 

250 Parameters 

251 ---------- 

252 database : `Database` 

253 Object that represents a connection to the SQL database that backs 

254 the data repository. 

255 context : `StaticTablesContext` 

256 Object used to create tables in ``database``. 

257 types : `RegistryManagerTypes` 

258 Struct containing type objects for the manager instances to 

259 construct. 

260 universe : `DimensionUniverse` 

261 Object that describes all dimensions in this data repository. 

262 

263 Returns 

264 ------- 

265 instances : `RegistryManagerInstances` 

266 Struct containing manager instances. 

267 """ 

268 dummy_table = ddl.TableSpec(fields=()) 

269 kwargs: Dict[str, Any] = {} 

270 kwargs["column_types"] = ColumnTypeInfo( 

271 database.getTimespanRepresentation(), 

272 universe, 

273 dataset_id_spec=types.datasets.addDatasetForeignKey( 

274 dummy_table, 

275 primaryKey=False, 

276 nullable=False, 

277 ), 

278 run_key_spec=types.collections.addRunForeignKey(dummy_table, primaryKey=False, nullable=False), 

279 ) 

280 kwargs["attributes"] = types.attributes.initialize(database, context) 

281 kwargs["dimensions"] = types.dimensions.initialize(database, context, universe=universe) 

282 kwargs["collections"] = types.collections.initialize( 

283 database, 

284 context, 

285 dimensions=kwargs["dimensions"], 

286 ) 

287 kwargs["datasets"] = types.datasets.initialize( 

288 database, context, collections=kwargs["collections"], dimensions=kwargs["dimensions"] 

289 ) 

290 kwargs["opaque"] = types.opaque.initialize(database, context) 

291 kwargs["datastores"] = types.datastores.initialize( 

292 database, 

293 context, 

294 opaque=kwargs["opaque"], 

295 datasets=types.datasets, 

296 universe=universe, 

297 ) 

298 return cls(**kwargs) 

299 

300 def getVersions(self) -> ButlerVersionsManager: 

301 """Return an object that can report, check, and save the versions of 

302 all manager objects. 

303 

304 Returns 

305 ------- 

306 versions : `ButlerVersionsManager` 

307 Object that manages versions. 

308 """ 

309 return ButlerVersionsManager( 

310 self.attributes, 

311 # Can't use dataclasses.asdict here, because it tries to do some 

312 # deepcopy stuff (?!) in order to find dataclasses recursively, and 

313 # that doesn't work on some manager objects that definitely aren't 

314 # supposed to be deep-copied anyway. 

315 {f.name: getattr(self, f.name) for f in dataclasses.fields(self) if f.name != "column_types"}, 

316 ) 

317 

318 def refresh(self) -> None: 

319 """Refresh all in-memory state by querying the database.""" 

320 self.dimensions.clearCaches() 

321 self.dimensions.refresh() 

322 self.collections.refresh() 

323 self.datasets.refresh()