Coverage for python/lsst/daf/butler/server.py: 3%

141 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 07:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = () 

31 

32import logging 

33from collections.abc import Mapping 

34from enum import Enum, auto 

35from typing import Any 

36 

37from fastapi import Depends, FastAPI, HTTPException, Query 

38from fastapi.middleware.gzip import GZipMiddleware 

39from lsst.daf.butler import ( 

40 Butler, 

41 Config, 

42 DataCoordinate, 

43 DatasetId, 

44 DatasetRef, 

45 SerializedDataCoordinate, 

46 SerializedDatasetRef, 

47 SerializedDatasetType, 

48 SerializedDimensionRecord, 

49) 

50from lsst.daf.butler.core.serverModels import ( 

51 ExpressionQueryParameter, 

52 QueryDataIdsModel, 

53 QueryDatasetsModel, 

54 QueryDimensionRecordsModel, 

55) 

56from lsst.daf.butler.registry import CollectionType 

57 

58BUTLER_ROOT = "ci_hsc_gen3/DATA" 

59 

60log = logging.getLogger("excalibur") 

61 

62 

63class CollectionTypeNames(str, Enum): 

64 """Collection type names supported by the interface.""" 

65 

66 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 

67 # Use the name directly as the value 

68 return name 

69 

70 RUN = auto() 

71 CALIBRATION = auto() 

72 CHAINED = auto() 

73 TAGGED = auto() 

74 

75 

76app = FastAPI() 

77app.add_middleware(GZipMiddleware, minimum_size=1000) 

78 

79 

80GLOBAL_READWRITE_BUTLER: Butler | None = None 

81GLOBAL_READONLY_BUTLER: Butler | None = None 

82 

83 

84def _make_global_butler() -> None: 

85 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER 

86 if GLOBAL_READONLY_BUTLER is None: 

87 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) 

88 if GLOBAL_READWRITE_BUTLER is None: 

89 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) 

90 

91 

92def butler_readonly_dependency() -> Butler: 

93 """Return global read-only butler.""" 

94 _make_global_butler() 

95 return Butler(butler=GLOBAL_READONLY_BUTLER) 

96 

97 

98def butler_readwrite_dependency() -> Butler: 

99 """Return read-write butler.""" 

100 _make_global_butler() 

101 return Butler(butler=GLOBAL_READWRITE_BUTLER) 

102 

103 

104def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: 

105 """Convert the serialized dataId back to full DataCoordinate. 

106 

107 Parameters 

108 ---------- 

109 butler : `lsst.daf.butler.Butler` 

110 The butler to use for registry and universe. 

111 data_id : `SerializedDataCoordinate` or `None` 

112 The serialized form. 

113 

114 Returns 

115 ------- 

116 dataId : `DataCoordinate` or `None` 

117 The DataId usable by registry. 

118 """ 

119 if data_id is None: 

120 return None 

121 return DataCoordinate.from_simple(data_id, registry=butler.registry) 

122 

123 

124@app.get("/butler/") 

125def read_root() -> str: 

126 """Return message when accessing the root URL.""" 

127 return "Welcome to Excalibur... aka your Butler Server" 

128 

129 

130@app.get("/butler/butler.json", response_model=dict[str, Any]) 

131def read_server_config() -> Mapping: 

132 """Return the butler configuration that the client should use.""" 

133 config_str = f""" 

134datastore: 

135 root: {BUTLER_ROOT} 

136registry: 

137 cls: lsst.daf.butler.registries.remote.RemoteRegistry 

138 db: <butlerRoot> 

139""" 

140 config = Config.fromString(config_str, format="yaml") 

141 return config.toDict() 

142 

143 

144@app.get("/butler/v1/universe", response_model=dict[str, Any]) 

145def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]: 

146 """Allow remote client to get dimensions definition.""" 

147 return butler.dimensions.dimensionConfig.toDict() 

148 

149 

150@app.get("/butler/v1/uri/{id}", response_model=str) 

151def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

152 """Return a single URI of non-disassembled dataset.""" 

153 ref = butler.registry.getDataset(id) 

154 if not ref: 

155 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") 

156 

157 uri = butler.getURI(ref) 

158 

159 # In reality would have to convert this to a signed URL 

160 return str(uri) 

161 

162 

163@app.put("/butler/v1/registry/refresh") 

164def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: 

165 """Refresh the registry cache.""" 

166 # Unclear whether this should exist. Which butler is really being 

167 # refreshed? How do we know the server we are refreshing is used later? 

168 # For testing at the moment it is important if a test adds a dataset type 

169 # directly in the server since the test client will not see it. 

170 butler.registry.refresh() 

171 

172 

173@app.get( 

174 "/butler/v1/registry/datasetType/{datasetTypeName}", 

175 summary="Retrieve this dataset type definition.", 

176 response_model=SerializedDatasetType, 

177 response_model_exclude_unset=True, 

178 response_model_exclude_defaults=True, 

179 response_model_exclude_none=True, 

180) 

181def get_dataset_type( 

182 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) 

183) -> SerializedDatasetType: 

184 """Return the dataset type.""" 

185 datasetType = butler.registry.getDatasetType(datasetTypeName) 

186 return datasetType.to_simple() 

187 

188 

189@app.get( 

190 "/butler/v1/registry/datasetTypes", 

191 summary="Retrieve all dataset type definitions.", 

192 response_model=list[SerializedDatasetType], 

193 response_model_exclude_unset=True, 

194 response_model_exclude_defaults=True, 

195 response_model_exclude_none=True, 

196) 

197def query_all_dataset_types( 

198 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) 

199) -> list[SerializedDatasetType]: 

200 """Return all dataset types.""" 

201 datasetTypes = butler.registry.queryDatasetTypes(..., components=components) 

202 return [d.to_simple() for d in datasetTypes] 

203 

204 

205@app.get( 

206 "/butler/v1/registry/datasetTypes/re", 

207 summary="Retrieve dataset type definitions matching expressions", 

208 response_model=list[SerializedDatasetType], 

209 response_model_exclude_unset=True, 

210 response_model_exclude_defaults=True, 

211 response_model_exclude_none=True, 

212) 

213def query_dataset_types_re( 

214 regex: list[str] | None = Query(None), 

215 glob: list[str] | None = Query(None), 

216 components: bool | None = Query(None), 

217 butler: Butler = Depends(butler_readonly_dependency), 

218) -> list[SerializedDatasetType]: 

219 """Return all dataset types matching a regular expression.""" 

220 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

221 

222 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) 

223 return [d.to_simple() for d in datasetTypes] 

224 

225 

226@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) 

227def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

228 """Return the collection chain members.""" 

229 chain = butler.registry.getCollectionChain(parent) 

230 return list(chain) 

231 

232 

233@app.get("/butler/v1/registry/collections", response_model=list[str]) 

234def query_collections( 

235 regex: list[str] | None = Query(None), 

236 glob: list[str] | None = Query(None), 

237 datasetType: str | None = Query(None), 

238 flattenChains: bool = Query(False), 

239 collectionType: list[CollectionTypeNames] | None = Query(None), 

240 includeChains: bool | None = Query(None), 

241 butler: Butler = Depends(butler_readonly_dependency), 

242) -> list[str]: 

243 """Return collections matching query.""" 

244 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

245 collectionTypes = CollectionType.from_names(collectionType) 

246 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None 

247 

248 collections = butler.registry.queryCollections( 

249 expression=expression_params.expression(), 

250 datasetType=dataset_type, 

251 collectionTypes=collectionTypes, 

252 flattenChains=flattenChains, 

253 includeChains=includeChains, 

254 ) 

255 return list(collections) 

256 

257 

258@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) 

259def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

260 """Return type for named collection.""" 

261 collectionType = butler.registry.getCollectionType(name) 

262 return collectionType.name 

263 

264 

265@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) 

266def register_collection( 

267 name: str, 

268 collectionTypeName: CollectionTypeNames, 

269 doc: str | None = Query(None), 

270 butler: Butler = Depends(butler_readwrite_dependency), 

271) -> str: 

272 """Register a collection.""" 

273 collectionType = CollectionType.from_name(collectionTypeName) 

274 butler.registry.registerCollection(name, collectionType, doc) 

275 

276 # Need to refresh the global read only butler otherwise other clients 

277 # may not see this change. 

278 if GLOBAL_READONLY_BUTLER is not None: # for mypy 

279 GLOBAL_READONLY_BUTLER.registry.refresh() 

280 

281 return name 

282 

283 

284@app.get( 

285 "/butler/v1/registry/dataset/{id}", 

286 summary="Retrieve this dataset definition.", 

287 response_model=SerializedDatasetRef | None, 

288 response_model_exclude_unset=True, 

289 response_model_exclude_defaults=True, 

290 response_model_exclude_none=True, 

291) 

292def get_dataset( 

293 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) 

294) -> SerializedDatasetRef | None: 

295 """Return a single dataset reference.""" 

296 ref = butler.registry.getDataset(id) 

297 if ref is not None: 

298 return ref.to_simple() 

299 # This could raise a 404 since id is not found. The standard regsitry 

300 # getDataset method returns without error so follow that example here. 

301 return ref 

302 

303 

304@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) 

305def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

306 """Return locations of datasets.""" 

307 # Takes an ID so need to convert to a real DatasetRef 

308 fake_ref = SerializedDatasetRef(id=id) 

309 

310 try: 

311 # Converting this to a real DatasetRef takes time and is not 

312 # needed internally since only the ID is used. 

313 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) 

314 except Exception: 

315 # SQL getDatasetLocations looks at ID in datastore and does not 

316 # check it is in registry. Follow that example and return without 

317 # error. 

318 return [] 

319 

320 return list(butler.registry.getDatasetLocations(ref)) 

321 

322 

323# TimeSpan not yet a pydantic model 

324@app.post( 

325 "/butler/v1/registry/findDataset/{datasetType}", 

326 summary="Retrieve this dataset definition from collection, dataset type, and dataId", 

327 response_model=SerializedDatasetRef, 

328 response_model_exclude_unset=True, 

329 response_model_exclude_defaults=True, 

330 response_model_exclude_none=True, 

331) 

332def find_dataset( 

333 datasetType: str, 

334 dataId: SerializedDataCoordinate | None = None, 

335 collections: list[str] | None = Query(None), 

336 butler: Butler = Depends(butler_readonly_dependency), 

337) -> SerializedDatasetRef | None: 

338 """Return a single dataset reference matching query.""" 

339 collection_query = collections if collections else None 

340 

341 ref = butler.registry.findDataset( 

342 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query 

343 ) 

344 return ref.to_simple() if ref else None 

345 

346 

347# POST is used for the complex dict data structures 

348@app.post( 

349 "/butler/v1/registry/datasets", 

350 summary="Query all dataset holdings.", 

351 response_model=list[SerializedDatasetRef], 

352 response_model_exclude_unset=True, 

353 response_model_exclude_defaults=True, 

354 response_model_exclude_none=True, 

355) 

356def query_datasets( 

357 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) 

358) -> list[SerializedDatasetRef]: 

359 """Return datasets matching query.""" 

360 # This method might return a lot of results 

361 

362 if query.collections: 

363 collections = query.collections.expression() 

364 else: 

365 collections = None 

366 

367 datasets = butler.registry.queryDatasets( 

368 query.datasetType.expression(), 

369 collections=collections, 

370 dimensions=query.dimensions, 

371 dataId=unpack_dataId(butler, query.dataId), 

372 where=query.where, 

373 findFirst=query.findFirst, 

374 components=query.components, 

375 bind=query.bind, 

376 check=query.check, 

377 **query.kwargs(), 

378 ) 

379 return [ref.to_simple() for ref in datasets] 

380 

381 

382# POST is used for the complex dict data structures 

383@app.post( 

384 "/butler/v1/registry/dataIds", 

385 summary="Query all data IDs.", 

386 response_model=list[SerializedDataCoordinate], 

387 response_model_exclude_unset=True, 

388 response_model_exclude_defaults=True, 

389 response_model_exclude_none=True, 

390) 

391def query_data_ids( 

392 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) 

393) -> list[SerializedDataCoordinate]: 

394 """Return data IDs matching query.""" 

395 if query.datasets: 

396 datasets = query.datasets.expression() 

397 else: 

398 datasets = None 

399 if query.collections: 

400 collections = query.collections.expression() 

401 else: 

402 collections = None 

403 

404 dataIds = butler.registry.queryDataIds( 

405 query.dimensions, 

406 collections=collections, 

407 datasets=datasets, 

408 dataId=unpack_dataId(butler, query.dataId), 

409 where=query.where, 

410 components=query.components, 

411 bind=query.bind, 

412 check=query.check, 

413 **query.kwargs(), 

414 ) 

415 return [coord.to_simple() for coord in dataIds] 

416 

417 

418# Uses POST to handle the DataId 

419@app.post( 

420 "/butler/v1/registry/dimensionRecords/{element}", 

421 summary="Retrieve dimension records matching query", 

422 response_model=list[SerializedDimensionRecord], 

423 response_model_exclude_unset=True, 

424 response_model_exclude_defaults=True, 

425 response_model_exclude_none=True, 

426) 

427def query_dimension_records( 

428 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) 

429) -> list[SerializedDimensionRecord]: 

430 """Return dimension records matching query.""" 

431 if query.datasets: 

432 datasets = query.datasets.expression() 

433 else: 

434 datasets = None 

435 if query.collections: 

436 collections = query.collections.expression() 

437 else: 

438 collections = None 

439 

440 records = butler.registry.queryDimensionRecords( 

441 element, 

442 dataId=unpack_dataId(butler, query.dataId), 

443 collections=collections, 

444 where=query.where, 

445 datasets=datasets, 

446 components=query.components, 

447 bind=query.bind, 

448 check=query.check, 

449 **query.kwargs(), 

450 ) 

451 return [r.to_simple() for r in records]