Coverage for python/lsst/daf/butler/server.py: 3%

141 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = () 

25 

26import logging 

27from collections.abc import Mapping 

28from enum import Enum, auto 

29from typing import Any 

30 

31from fastapi import Depends, FastAPI, HTTPException, Query 

32from fastapi.middleware.gzip import GZipMiddleware 

33from lsst.daf.butler import ( 

34 Butler, 

35 Config, 

36 DataCoordinate, 

37 DatasetId, 

38 DatasetRef, 

39 SerializedDataCoordinate, 

40 SerializedDatasetRef, 

41 SerializedDatasetType, 

42 SerializedDimensionRecord, 

43) 

44from lsst.daf.butler.core.serverModels import ( 

45 ExpressionQueryParameter, 

46 QueryDataIdsModel, 

47 QueryDatasetsModel, 

48 QueryDimensionRecordsModel, 

49) 

50from lsst.daf.butler.registry import CollectionType 

51 

52BUTLER_ROOT = "ci_hsc_gen3/DATA" 

53 

54log = logging.getLogger("excalibur") 

55 

56 

57class CollectionTypeNames(str, Enum): 

58 """Collection type names supported by the interface.""" 

59 

60 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 

61 # Use the name directly as the value 

62 return name 

63 

64 RUN = auto() 

65 CALIBRATION = auto() 

66 CHAINED = auto() 

67 TAGGED = auto() 

68 

69 

70app = FastAPI() 

71app.add_middleware(GZipMiddleware, minimum_size=1000) 

72 

73 

74GLOBAL_READWRITE_BUTLER: Butler | None = None 

75GLOBAL_READONLY_BUTLER: Butler | None = None 

76 

77 

78def _make_global_butler() -> None: 

79 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER 

80 if GLOBAL_READONLY_BUTLER is None: 

81 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) 

82 if GLOBAL_READWRITE_BUTLER is None: 

83 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) 

84 

85 

86def butler_readonly_dependency() -> Butler: 

87 """Return global read-only butler.""" 

88 _make_global_butler() 

89 return Butler(butler=GLOBAL_READONLY_BUTLER) 

90 

91 

92def butler_readwrite_dependency() -> Butler: 

93 """Return read-write butler.""" 

94 _make_global_butler() 

95 return Butler(butler=GLOBAL_READWRITE_BUTLER) 

96 

97 

98def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: 

99 """Convert the serialized dataId back to full DataCoordinate. 

100 

101 Parameters 

102 ---------- 

103 butler : `lsst.daf.butler.Butler` 

104 The butler to use for registry and universe. 

105 data_id : `SerializedDataCoordinate` or `None` 

106 The serialized form. 

107 

108 Returns 

109 ------- 

110 dataId : `DataCoordinate` or `None` 

111 The DataId usable by registry. 

112 """ 

113 if data_id is None: 

114 return None 

115 return DataCoordinate.from_simple(data_id, registry=butler.registry) 

116 

117 

118@app.get("/butler/") 

119def read_root() -> str: 

120 """Return message when accessing the root URL.""" 

121 return "Welcome to Excalibur... aka your Butler Server" 

122 

123 

124@app.get("/butler/butler.json", response_model=dict[str, Any]) 

125def read_server_config() -> Mapping: 

126 """Return the butler configuration that the client should use.""" 

127 config_str = f""" 

128datastore: 

129 root: {BUTLER_ROOT} 

130registry: 

131 cls: lsst.daf.butler.registries.remote.RemoteRegistry 

132 db: <butlerRoot> 

133""" 

134 config = Config.fromString(config_str, format="yaml") 

135 return config.toDict() 

136 

137 

138@app.get("/butler/v1/universe", response_model=dict[str, Any]) 

139def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]: 

140 """Allow remote client to get dimensions definition.""" 

141 return butler.dimensions.dimensionConfig.toDict() 

142 

143 

144@app.get("/butler/v1/uri/{id}", response_model=str) 

145def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

146 """Return a single URI of non-disassembled dataset.""" 

147 ref = butler.registry.getDataset(id) 

148 if not ref: 

149 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") 

150 

151 uri = butler.getURI(ref) 

152 

153 # In reality would have to convert this to a signed URL 

154 return str(uri) 

155 

156 

157@app.put("/butler/v1/registry/refresh") 

158def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: 

159 """Refresh the registry cache.""" 

160 # Unclear whether this should exist. Which butler is really being 

161 # refreshed? How do we know the server we are refreshing is used later? 

162 # For testing at the moment it is important if a test adds a dataset type 

163 # directly in the server since the test client will not see it. 

164 butler.registry.refresh() 

165 

166 

167@app.get( 

168 "/butler/v1/registry/datasetType/{datasetTypeName}", 

169 summary="Retrieve this dataset type definition.", 

170 response_model=SerializedDatasetType, 

171 response_model_exclude_unset=True, 

172 response_model_exclude_defaults=True, 

173 response_model_exclude_none=True, 

174) 

175def get_dataset_type( 

176 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) 

177) -> SerializedDatasetType: 

178 """Return the dataset type.""" 

179 datasetType = butler.registry.getDatasetType(datasetTypeName) 

180 return datasetType.to_simple() 

181 

182 

183@app.get( 

184 "/butler/v1/registry/datasetTypes", 

185 summary="Retrieve all dataset type definitions.", 

186 response_model=list[SerializedDatasetType], 

187 response_model_exclude_unset=True, 

188 response_model_exclude_defaults=True, 

189 response_model_exclude_none=True, 

190) 

191def query_all_dataset_types( 

192 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) 

193) -> list[SerializedDatasetType]: 

194 """Return all dataset types.""" 

195 datasetTypes = butler.registry.queryDatasetTypes(..., components=components) 

196 return [d.to_simple() for d in datasetTypes] 

197 

198 

199@app.get( 

200 "/butler/v1/registry/datasetTypes/re", 

201 summary="Retrieve dataset type definitions matching expressions", 

202 response_model=list[SerializedDatasetType], 

203 response_model_exclude_unset=True, 

204 response_model_exclude_defaults=True, 

205 response_model_exclude_none=True, 

206) 

207def query_dataset_types_re( 

208 regex: list[str] | None = Query(None), 

209 glob: list[str] | None = Query(None), 

210 components: bool | None = Query(None), 

211 butler: Butler = Depends(butler_readonly_dependency), 

212) -> list[SerializedDatasetType]: 

213 """Return all dataset types matching a regular expression.""" 

214 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

215 

216 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) 

217 return [d.to_simple() for d in datasetTypes] 

218 

219 

220@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) 

221def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

222 """Return the collection chain members.""" 

223 chain = butler.registry.getCollectionChain(parent) 

224 return list(chain) 

225 

226 

227@app.get("/butler/v1/registry/collections", response_model=list[str]) 

228def query_collections( 

229 regex: list[str] | None = Query(None), 

230 glob: list[str] | None = Query(None), 

231 datasetType: str | None = Query(None), 

232 flattenChains: bool = Query(False), 

233 collectionType: list[CollectionTypeNames] | None = Query(None), 

234 includeChains: bool | None = Query(None), 

235 butler: Butler = Depends(butler_readonly_dependency), 

236) -> list[str]: 

237 """Return collections matching query.""" 

238 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

239 collectionTypes = CollectionType.from_names(collectionType) 

240 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None 

241 

242 collections = butler.registry.queryCollections( 

243 expression=expression_params.expression(), 

244 datasetType=dataset_type, 

245 collectionTypes=collectionTypes, 

246 flattenChains=flattenChains, 

247 includeChains=includeChains, 

248 ) 

249 return list(collections) 

250 

251 

252@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) 

253def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

254 """Return type for named collection.""" 

255 collectionType = butler.registry.getCollectionType(name) 

256 return collectionType.name 

257 

258 

259@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) 

260def register_collection( 

261 name: str, 

262 collectionTypeName: CollectionTypeNames, 

263 doc: str | None = Query(None), 

264 butler: Butler = Depends(butler_readwrite_dependency), 

265) -> str: 

266 """Register a collection.""" 

267 collectionType = CollectionType.from_name(collectionTypeName) 

268 butler.registry.registerCollection(name, collectionType, doc) 

269 

270 # Need to refresh the global read only butler otherwise other clients 

271 # may not see this change. 

272 if GLOBAL_READONLY_BUTLER is not None: # for mypy 

273 GLOBAL_READONLY_BUTLER.registry.refresh() 

274 

275 return name 

276 

277 

278@app.get( 

279 "/butler/v1/registry/dataset/{id}", 

280 summary="Retrieve this dataset definition.", 

281 response_model=SerializedDatasetRef | None, 

282 response_model_exclude_unset=True, 

283 response_model_exclude_defaults=True, 

284 response_model_exclude_none=True, 

285) 

286def get_dataset( 

287 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) 

288) -> SerializedDatasetRef | None: 

289 """Return a single dataset reference.""" 

290 ref = butler.registry.getDataset(id) 

291 if ref is not None: 

292 return ref.to_simple() 

293 # This could raise a 404 since id is not found. The standard regsitry 

294 # getDataset method returns without error so follow that example here. 

295 return ref 

296 

297 

298@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) 

299def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

300 """Return locations of datasets.""" 

301 # Takes an ID so need to convert to a real DatasetRef 

302 fake_ref = SerializedDatasetRef(id=id) 

303 

304 try: 

305 # Converting this to a real DatasetRef takes time and is not 

306 # needed internally since only the ID is used. 

307 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) 

308 except Exception: 

309 # SQL getDatasetLocations looks at ID in datastore and does not 

310 # check it is in registry. Follow that example and return without 

311 # error. 

312 return [] 

313 

314 return list(butler.registry.getDatasetLocations(ref)) 

315 

316 

317# TimeSpan not yet a pydantic model 

318@app.post( 

319 "/butler/v1/registry/findDataset/{datasetType}", 

320 summary="Retrieve this dataset definition from collection, dataset type, and dataId", 

321 response_model=SerializedDatasetRef, 

322 response_model_exclude_unset=True, 

323 response_model_exclude_defaults=True, 

324 response_model_exclude_none=True, 

325) 

326def find_dataset( 

327 datasetType: str, 

328 dataId: SerializedDataCoordinate | None = None, 

329 collections: list[str] | None = Query(None), 

330 butler: Butler = Depends(butler_readonly_dependency), 

331) -> SerializedDatasetRef | None: 

332 """Return a single dataset reference matching query.""" 

333 collection_query = collections if collections else None 

334 

335 ref = butler.registry.findDataset( 

336 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query 

337 ) 

338 return ref.to_simple() if ref else None 

339 

340 

341# POST is used for the complex dict data structures 

342@app.post( 

343 "/butler/v1/registry/datasets", 

344 summary="Query all dataset holdings.", 

345 response_model=list[SerializedDatasetRef], 

346 response_model_exclude_unset=True, 

347 response_model_exclude_defaults=True, 

348 response_model_exclude_none=True, 

349) 

350def query_datasets( 

351 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) 

352) -> list[SerializedDatasetRef]: 

353 """Return datasets matching query.""" 

354 # This method might return a lot of results 

355 

356 if query.collections: 

357 collections = query.collections.expression() 

358 else: 

359 collections = None 

360 

361 datasets = butler.registry.queryDatasets( 

362 query.datasetType.expression(), 

363 collections=collections, 

364 dimensions=query.dimensions, 

365 dataId=unpack_dataId(butler, query.dataId), 

366 where=query.where, 

367 findFirst=query.findFirst, 

368 components=query.components, 

369 bind=query.bind, 

370 check=query.check, 

371 **query.kwargs(), 

372 ) 

373 return [ref.to_simple() for ref in datasets] 

374 

375 

376# POST is used for the complex dict data structures 

377@app.post( 

378 "/butler/v1/registry/dataIds", 

379 summary="Query all data IDs.", 

380 response_model=list[SerializedDataCoordinate], 

381 response_model_exclude_unset=True, 

382 response_model_exclude_defaults=True, 

383 response_model_exclude_none=True, 

384) 

385def query_data_ids( 

386 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) 

387) -> list[SerializedDataCoordinate]: 

388 """Return data IDs matching query.""" 

389 if query.datasets: 

390 datasets = query.datasets.expression() 

391 else: 

392 datasets = None 

393 if query.collections: 

394 collections = query.collections.expression() 

395 else: 

396 collections = None 

397 

398 dataIds = butler.registry.queryDataIds( 

399 query.dimensions, 

400 collections=collections, 

401 datasets=datasets, 

402 dataId=unpack_dataId(butler, query.dataId), 

403 where=query.where, 

404 components=query.components, 

405 bind=query.bind, 

406 check=query.check, 

407 **query.kwargs(), 

408 ) 

409 return [coord.to_simple() for coord in dataIds] 

410 

411 

412# Uses POST to handle the DataId 

413@app.post( 

414 "/butler/v1/registry/dimensionRecords/{element}", 

415 summary="Retrieve dimension records matching query", 

416 response_model=list[SerializedDimensionRecord], 

417 response_model_exclude_unset=True, 

418 response_model_exclude_defaults=True, 

419 response_model_exclude_none=True, 

420) 

421def query_dimension_records( 

422 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) 

423) -> list[SerializedDimensionRecord]: 

424 """Return dimension records matching query.""" 

425 if query.datasets: 

426 datasets = query.datasets.expression() 

427 else: 

428 datasets = None 

429 if query.collections: 

430 collections = query.collections.expression() 

431 else: 

432 collections = None 

433 

434 records = butler.registry.queryDimensionRecords( 

435 element, 

436 dataId=unpack_dataId(butler, query.dataId), 

437 collections=collections, 

438 where=query.where, 

439 datasets=datasets, 

440 components=query.components, 

441 bind=query.bind, 

442 check=query.check, 

443 **query.kwargs(), 

444 ) 

445 return [r.to_simple() for r in records]