Coverage for python/lsst/daf/butler/server.py: 4%

141 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = () 

25 

26import logging 

27from collections.abc import Mapping 

28from enum import Enum, auto 

29from typing import Any 

30 

31from fastapi import Depends, FastAPI, HTTPException, Query 

32from fastapi.middleware.gzip import GZipMiddleware 

33from lsst.daf.butler import ( 

34 Butler, 

35 Config, 

36 DataCoordinate, 

37 DatasetId, 

38 DatasetRef, 

39 DimensionConfig, 

40 SerializedDataCoordinate, 

41 SerializedDatasetRef, 

42 SerializedDatasetType, 

43 SerializedDimensionRecord, 

44) 

45from lsst.daf.butler.core.serverModels import ( 

46 ExpressionQueryParameter, 

47 QueryDataIdsModel, 

48 QueryDatasetsModel, 

49 QueryDimensionRecordsModel, 

50) 

51from lsst.daf.butler.registry import CollectionType 

52 

53BUTLER_ROOT = "ci_hsc_gen3/DATA" 

54 

55log = logging.getLogger("excalibur") 

56 

57 

58class CollectionTypeNames(str, Enum): 

59 """Collection type names supported by the interface.""" 

60 

61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 

62 # Use the name directly as the value 

63 return name 

64 

65 RUN = auto() 

66 CALIBRATION = auto() 

67 CHAINED = auto() 

68 TAGGED = auto() 

69 

70 

71app = FastAPI() 

72app.add_middleware(GZipMiddleware, minimum_size=1000) 

73 

74 

75GLOBAL_READWRITE_BUTLER: Butler | None = None 

76GLOBAL_READONLY_BUTLER: Butler | None = None 

77 

78 

79def _make_global_butler() -> None: 

80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER 

81 if GLOBAL_READONLY_BUTLER is None: 

82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) 

83 if GLOBAL_READWRITE_BUTLER is None: 

84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) 

85 

86 

87def butler_readonly_dependency() -> Butler: 

88 """Return global read-only butler.""" 

89 _make_global_butler() 

90 return Butler(butler=GLOBAL_READONLY_BUTLER) 

91 

92 

93def butler_readwrite_dependency() -> Butler: 

94 """Return read-write butler.""" 

95 _make_global_butler() 

96 return Butler(butler=GLOBAL_READWRITE_BUTLER) 

97 

98 

99def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: 

100 """Convert the serialized dataId back to full DataCoordinate. 

101 

102 Parameters 

103 ---------- 

104 butler : `lsst.daf.butler.Butler` 

105 The butler to use for registry and universe. 

106 data_id : `SerializedDataCoordinate` or `None` 

107 The serialized form. 

108 

109 Returns 

110 ------- 

111 dataId : `DataCoordinate` or `None` 

112 The DataId usable by registry. 

113 """ 

114 if data_id is None: 

115 return None 

116 return DataCoordinate.from_simple(data_id, registry=butler.registry) 

117 

118 

119@app.get("/butler/") 

120def read_root() -> str: 

121 """Return message when accessing the root URL.""" 

122 return "Welcome to Excalibur... aka your Butler Server" 

123 

124 

125@app.get("/butler/butler.json", response_model=dict[str, Any]) 

126def read_server_config() -> Mapping: 

127 """Return the butler configuration that the client should use.""" 

128 config_str = f""" 

129datastore: 

130 root: {BUTLER_ROOT} 

131registry: 

132 cls: lsst.daf.butler.registries.remote.RemoteRegistry 

133 db: <butlerRoot> 

134""" 

135 config = Config.fromString(config_str, format="yaml") 

136 return config 

137 

138 

139@app.get("/butler/v1/universe", response_model=dict[str, Any]) 

140def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig: 

141 """Allow remote client to get dimensions definition.""" 

142 return butler.dimensions.dimensionConfig 

143 

144 

145@app.get("/butler/v1/uri/{id}", response_model=str) 

146def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

147 """Return a single URI of non-disassembled dataset.""" 

148 ref = butler.registry.getDataset(id) 

149 if not ref: 

150 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") 

151 

152 uri = butler.getURI(ref) 

153 

154 # In reality would have to convert this to a signed URL 

155 return str(uri) 

156 

157 

158@app.put("/butler/v1/registry/refresh") 

159def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: 

160 """Refresh the registry cache.""" 

161 # Unclear whether this should exist. Which butler is really being 

162 # refreshed? How do we know the server we are refreshing is used later? 

163 # For testing at the moment it is important if a test adds a dataset type 

164 # directly in the server since the test client will not see it. 

165 butler.registry.refresh() 

166 

167 

168@app.get( 

169 "/butler/v1/registry/datasetType/{datasetTypeName}", 

170 summary="Retrieve this dataset type definition.", 

171 response_model=SerializedDatasetType, 

172 response_model_exclude_unset=True, 

173 response_model_exclude_defaults=True, 

174 response_model_exclude_none=True, 

175) 

176def get_dataset_type( 

177 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) 

178) -> SerializedDatasetType: 

179 """Return the dataset type.""" 

180 datasetType = butler.registry.getDatasetType(datasetTypeName) 

181 return datasetType.to_simple() 

182 

183 

184@app.get( 

185 "/butler/v1/registry/datasetTypes", 

186 summary="Retrieve all dataset type definitions.", 

187 response_model=list[SerializedDatasetType], 

188 response_model_exclude_unset=True, 

189 response_model_exclude_defaults=True, 

190 response_model_exclude_none=True, 

191) 

192def query_all_dataset_types( 

193 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) 

194) -> list[SerializedDatasetType]: 

195 """Return all dataset types.""" 

196 datasetTypes = butler.registry.queryDatasetTypes(..., components=components) 

197 return [d.to_simple() for d in datasetTypes] 

198 

199 

200@app.get( 

201 "/butler/v1/registry/datasetTypes/re", 

202 summary="Retrieve dataset type definitions matching expressions", 

203 response_model=list[SerializedDatasetType], 

204 response_model_exclude_unset=True, 

205 response_model_exclude_defaults=True, 

206 response_model_exclude_none=True, 

207) 

208def query_dataset_types_re( 

209 regex: list[str] | None = Query(None), 

210 glob: list[str] | None = Query(None), 

211 components: bool | None = Query(None), 

212 butler: Butler = Depends(butler_readonly_dependency), 

213) -> list[SerializedDatasetType]: 

214 """Return all dataset types matching a regular expression.""" 

215 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

216 

217 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) 

218 return [d.to_simple() for d in datasetTypes] 

219 

220 

221@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) 

222def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

223 """Return the collection chain members.""" 

224 chain = butler.registry.getCollectionChain(parent) 

225 return list(chain) 

226 

227 

228@app.get("/butler/v1/registry/collections", response_model=list[str]) 

229def query_collections( 

230 regex: list[str] | None = Query(None), 

231 glob: list[str] | None = Query(None), 

232 datasetType: str | None = Query(None), 

233 flattenChains: bool = Query(False), 

234 collectionType: list[CollectionTypeNames] | None = Query(None), 

235 includeChains: bool | None = Query(None), 

236 butler: Butler = Depends(butler_readonly_dependency), 

237) -> list[str]: 

238 """Return collections matching query.""" 

239 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

240 collectionTypes = CollectionType.from_names(collectionType) 

241 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None 

242 

243 collections = butler.registry.queryCollections( 

244 expression=expression_params.expression(), 

245 datasetType=dataset_type, 

246 collectionTypes=collectionTypes, 

247 flattenChains=flattenChains, 

248 includeChains=includeChains, 

249 ) 

250 return list(collections) 

251 

252 

253@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) 

254def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

255 """Return type for named collection.""" 

256 collectionType = butler.registry.getCollectionType(name) 

257 return collectionType.name 

258 

259 

260@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) 

261def register_collection( 

262 name: str, 

263 collectionTypeName: CollectionTypeNames, 

264 doc: str | None = Query(None), 

265 butler: Butler = Depends(butler_readwrite_dependency), 

266) -> str: 

267 """Register a collection.""" 

268 collectionType = CollectionType.from_name(collectionTypeName) 

269 butler.registry.registerCollection(name, collectionType, doc) 

270 

271 # Need to refresh the global read only butler otherwise other clients 

272 # may not see this change. 

273 if GLOBAL_READONLY_BUTLER is not None: # for mypy 

274 GLOBAL_READONLY_BUTLER.registry.refresh() 

275 

276 return name 

277 

278 

279@app.get( 

280 "/butler/v1/registry/dataset/{id}", 

281 summary="Retrieve this dataset definition.", 

282 response_model=SerializedDatasetRef | None, 

283 response_model_exclude_unset=True, 

284 response_model_exclude_defaults=True, 

285 response_model_exclude_none=True, 

286) 

287def get_dataset( 

288 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) 

289) -> SerializedDatasetRef | None: 

290 """Return a single dataset reference.""" 

291 ref = butler.registry.getDataset(id) 

292 if ref is not None: 

293 return ref.to_simple() 

294 # This could raise a 404 since id is not found. The standard regsitry 

295 # getDataset method returns without error so follow that example here. 

296 return ref 

297 

298 

299@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) 

300def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

301 """Return locations of datasets.""" 

302 # Takes an ID so need to convert to a real DatasetRef 

303 fake_ref = SerializedDatasetRef(id=id) 

304 

305 try: 

306 # Converting this to a real DatasetRef takes time and is not 

307 # needed internally since only the ID is used. 

308 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) 

309 except Exception: 

310 # SQL getDatasetLocations looks at ID in datastore and does not 

311 # check it is in registry. Follow that example and return without 

312 # error. 

313 return [] 

314 

315 return list(butler.registry.getDatasetLocations(ref)) 

316 

317 

318# TimeSpan not yet a pydantic model 

319@app.post( 

320 "/butler/v1/registry/findDataset/{datasetType}", 

321 summary="Retrieve this dataset definition from collection, dataset type, and dataId", 

322 response_model=SerializedDatasetRef, 

323 response_model_exclude_unset=True, 

324 response_model_exclude_defaults=True, 

325 response_model_exclude_none=True, 

326) 

327def find_dataset( 

328 datasetType: str, 

329 dataId: SerializedDataCoordinate | None = None, 

330 collections: list[str] | None = Query(None), 

331 butler: Butler = Depends(butler_readonly_dependency), 

332) -> SerializedDatasetRef | None: 

333 """Return a single dataset reference matching query.""" 

334 collection_query = collections if collections else None 

335 

336 ref = butler.registry.findDataset( 

337 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query 

338 ) 

339 return ref.to_simple() if ref else None 

340 

341 

342# POST is used for the complex dict data structures 

343@app.post( 

344 "/butler/v1/registry/datasets", 

345 summary="Query all dataset holdings.", 

346 response_model=list[SerializedDatasetRef], 

347 response_model_exclude_unset=True, 

348 response_model_exclude_defaults=True, 

349 response_model_exclude_none=True, 

350) 

351def query_datasets( 

352 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) 

353) -> list[SerializedDatasetRef]: 

354 """Return datasets matching query.""" 

355 # This method might return a lot of results 

356 

357 if query.collections: 

358 collections = query.collections.expression() 

359 else: 

360 collections = None 

361 

362 datasets = butler.registry.queryDatasets( 

363 query.datasetType.expression(), 

364 collections=collections, 

365 dimensions=query.dimensions, 

366 dataId=unpack_dataId(butler, query.dataId), 

367 where=query.where, 

368 findFirst=query.findFirst, 

369 components=query.components, 

370 bind=query.bind, 

371 check=query.check, 

372 **query.kwargs(), 

373 ) 

374 return [ref.to_simple() for ref in datasets] 

375 

376 

377# POST is used for the complex dict data structures 

378@app.post( 

379 "/butler/v1/registry/dataIds", 

380 summary="Query all data IDs.", 

381 response_model=list[SerializedDataCoordinate], 

382 response_model_exclude_unset=True, 

383 response_model_exclude_defaults=True, 

384 response_model_exclude_none=True, 

385) 

386def query_data_ids( 

387 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) 

388) -> list[SerializedDataCoordinate]: 

389 """Return data IDs matching query.""" 

390 if query.datasets: 

391 datasets = query.datasets.expression() 

392 else: 

393 datasets = None 

394 if query.collections: 

395 collections = query.collections.expression() 

396 else: 

397 collections = None 

398 

399 dataIds = butler.registry.queryDataIds( 

400 query.dimensions, 

401 collections=collections, 

402 datasets=datasets, 

403 dataId=unpack_dataId(butler, query.dataId), 

404 where=query.where, 

405 components=query.components, 

406 bind=query.bind, 

407 check=query.check, 

408 **query.kwargs(), 

409 ) 

410 return [coord.to_simple() for coord in dataIds] 

411 

412 

413# Uses POST to handle the DataId 

414@app.post( 

415 "/butler/v1/registry/dimensionRecords/{element}", 

416 summary="Retrieve dimension records matching query", 

417 response_model=list[SerializedDimensionRecord], 

418 response_model_exclude_unset=True, 

419 response_model_exclude_defaults=True, 

420 response_model_exclude_none=True, 

421) 

422def query_dimension_records( 

423 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) 

424) -> list[SerializedDimensionRecord]: 

425 """Return dimension records matching query.""" 

426 if query.datasets: 

427 datasets = query.datasets.expression() 

428 else: 

429 datasets = None 

430 if query.collections: 

431 collections = query.collections.expression() 

432 else: 

433 collections = None 

434 

435 records = butler.registry.queryDimensionRecords( 

436 element, 

437 dataId=unpack_dataId(butler, query.dataId), 

438 collections=collections, 

439 where=query.where, 

440 datasets=datasets, 

441 components=query.components, 

442 bind=query.bind, 

443 check=query.check, 

444 **query.kwargs(), 

445 ) 

446 return [r.to_simple() for r in records]