Coverage for python/lsst/daf/butler/server.py: 4%

141 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-26 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = () 

25 

26import logging 

27from collections.abc import Mapping 

28from enum import Enum, auto 

29from typing import Any 

30 

31from fastapi import Depends, FastAPI, HTTPException, Query 

32from fastapi.middleware.gzip import GZipMiddleware 

33from lsst.daf.butler import ( 

34 Butler, 

35 Config, 

36 DataCoordinate, 

37 DatasetId, 

38 DatasetRef, 

39 DimensionConfig, 

40 SerializedDataCoordinate, 

41 SerializedDatasetRef, 

42 SerializedDatasetType, 

43 SerializedDimensionRecord, 

44) 

45from lsst.daf.butler.core.serverModels import ( 

46 ExpressionQueryParameter, 

47 QueryDataIdsModel, 

48 QueryDatasetsModel, 

49 QueryDimensionRecordsModel, 

50) 

51from lsst.daf.butler.registry import CollectionType 

52 

53BUTLER_ROOT = "ci_hsc_gen3/DATA" 

54 

55log = logging.getLogger("excalibur") 

56 

57 

58class CollectionTypeNames(str, Enum): 

59 """Collection type names supported by the interface.""" 

60 

61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 

62 # Use the name directly as the value 

63 return name 

64 

65 RUN = auto() 

66 CALIBRATION = auto() 

67 CHAINED = auto() 

68 TAGGED = auto() 

69 

70 

71app = FastAPI() 

72app.add_middleware(GZipMiddleware, minimum_size=1000) 

73 

74 

75GLOBAL_READWRITE_BUTLER = None 

76GLOBAL_READONLY_BUTLER = None 

77 

78 

79def _make_global_butler() -> None: 

80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER 

81 if GLOBAL_READONLY_BUTLER is None: 

82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) 

83 if GLOBAL_READWRITE_BUTLER is None: 

84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) 

85 

86 

87def butler_readonly_dependency() -> Butler: 

88 _make_global_butler() 

89 return Butler(butler=GLOBAL_READONLY_BUTLER) 

90 

91 

92def butler_readwrite_dependency() -> Butler: 

93 _make_global_butler() 

94 return Butler(butler=GLOBAL_READWRITE_BUTLER) 

95 

96 

97def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: 

98 """Convert the serialized dataId back to full DataCoordinate. 

99 

100 Parameters 

101 ---------- 

102 butler : `lsst.daf.butler.Butler` 

103 The butler to use for registry and universe. 

104 data_id : `SerializedDataCoordinate` or `None` 

105 The serialized form. 

106 

107 Returns 

108 ------- 

109 dataId : `DataCoordinate` or `None` 

110 The DataId usable by registry. 

111 """ 

112 if data_id is None: 

113 return None 

114 return DataCoordinate.from_simple(data_id, registry=butler.registry) 

115 

116 

117@app.get("/butler/") 

118def read_root() -> str: 

119 return "Welcome to Excalibur... aka your Butler Server" 

120 

121 

122@app.get("/butler/butler.json", response_model=dict[str, Any]) 

123def read_server_config() -> Mapping: 

124 """Return the butler configuration that the client should use.""" 

125 config_str = f""" 

126datastore: 

127 root: {BUTLER_ROOT} 

128registry: 

129 cls: lsst.daf.butler.registries.remote.RemoteRegistry 

130 db: <butlerRoot> 

131""" 

132 config = Config.fromString(config_str, format="yaml") 

133 return config 

134 

135 

136@app.get("/butler/v1/universe", response_model=dict[str, Any]) 

137def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig: 

138 """Allow remote client to get dimensions definition.""" 

139 return butler.registry.dimensions.dimensionConfig 

140 

141 

142@app.get("/butler/v1/uri/{id}", response_model=str) 

143def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

144 """Return a single URI of non-disassembled dataset.""" 

145 ref = butler.registry.getDataset(id) 

146 if not ref: 

147 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") 

148 

149 uri = butler.datastore.getURI(ref) 

150 

151 # In reality would have to convert this to a signed URL 

152 return str(uri) 

153 

154 

155@app.put("/butler/v1/registry/refresh") 

156def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: 

157 # Unclear whether this should exist. Which butler is really being 

158 # refreshed? How do we know the server we are refreshing is used later? 

159 # For testing at the moment it is important if a test adds a dataset type 

160 # directly in the server since the test client will not see it. 

161 butler.registry.refresh() 

162 

163 

164@app.get( 

165 "/butler/v1/registry/datasetType/{datasetTypeName}", 

166 summary="Retrieve this dataset type definition.", 

167 response_model=SerializedDatasetType, 

168 response_model_exclude_unset=True, 

169 response_model_exclude_defaults=True, 

170 response_model_exclude_none=True, 

171) 

172def get_dataset_type( 

173 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) 

174) -> SerializedDatasetType: 

175 datasetType = butler.registry.getDatasetType(datasetTypeName) 

176 return datasetType.to_simple() 

177 

178 

179@app.get( 

180 "/butler/v1/registry/datasetTypes", 

181 summary="Retrieve all dataset type definitions.", 

182 response_model=list[SerializedDatasetType], 

183 response_model_exclude_unset=True, 

184 response_model_exclude_defaults=True, 

185 response_model_exclude_none=True, 

186) 

187def query_all_dataset_types( 

188 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) 

189) -> list[SerializedDatasetType]: 

190 datasetTypes = butler.registry.queryDatasetTypes(..., components=components) 

191 return [d.to_simple() for d in datasetTypes] 

192 

193 

194@app.get( 

195 "/butler/v1/registry/datasetTypes/re", 

196 summary="Retrieve dataset type definitions matching expressions", 

197 response_model=list[SerializedDatasetType], 

198 response_model_exclude_unset=True, 

199 response_model_exclude_defaults=True, 

200 response_model_exclude_none=True, 

201) 

202def query_dataset_types_re( 

203 regex: list[str] | None = Query(None), 

204 glob: list[str] | None = Query(None), 

205 components: bool | None = Query(None), 

206 butler: Butler = Depends(butler_readonly_dependency), 

207) -> list[SerializedDatasetType]: 

208 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

209 

210 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) 

211 return [d.to_simple() for d in datasetTypes] 

212 

213 

214@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) 

215def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

216 chain = butler.registry.getCollectionChain(parent) 

217 return list(chain) 

218 

219 

220@app.get("/butler/v1/registry/collections", response_model=list[str]) 

221def query_collections( 

222 regex: list[str] | None = Query(None), 

223 glob: list[str] | None = Query(None), 

224 datasetType: str | None = Query(None), 

225 flattenChains: bool = Query(False), 

226 collectionType: list[CollectionTypeNames] | None = Query(None), 

227 includeChains: bool | None = Query(None), 

228 butler: Butler = Depends(butler_readonly_dependency), 

229) -> list[str]: 

230 

231 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

232 collectionTypes = CollectionType.from_names(collectionType) 

233 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None 

234 

235 collections = butler.registry.queryCollections( 

236 expression=expression_params.expression(), 

237 datasetType=dataset_type, 

238 collectionTypes=collectionTypes, 

239 flattenChains=flattenChains, 

240 includeChains=includeChains, 

241 ) 

242 return list(collections) 

243 

244 

245@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) 

246def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

247 collectionType = butler.registry.getCollectionType(name) 

248 return collectionType.name 

249 

250 

251@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) 

252def register_collection( 

253 name: str, 

254 collectionTypeName: CollectionTypeNames, 

255 doc: str | None = Query(None), 

256 butler: Butler = Depends(butler_readwrite_dependency), 

257) -> str: 

258 collectionType = CollectionType.from_name(collectionTypeName) 

259 butler.registry.registerCollection(name, collectionType, doc) 

260 

261 # Need to refresh the global read only butler otherwise other clients 

262 # may not see this change. 

263 if GLOBAL_READONLY_BUTLER is not None: # for mypy 

264 GLOBAL_READONLY_BUTLER.registry.refresh() 

265 

266 return name 

267 

268 

269@app.get( 

270 "/butler/v1/registry/dataset/{id}", 

271 summary="Retrieve this dataset definition.", 

272 response_model=SerializedDatasetRef | None, 

273 response_model_exclude_unset=True, 

274 response_model_exclude_defaults=True, 

275 response_model_exclude_none=True, 

276) 

277def get_dataset( 

278 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) 

279) -> SerializedDatasetRef | None: 

280 ref = butler.registry.getDataset(id) 

281 if ref is not None: 

282 return ref.to_simple() 

283 # This could raise a 404 since id is not found. The standard regsitry 

284 # getDataset method returns without error so follow that example here. 

285 return ref 

286 

287 

288@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) 

289def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

290 # Takes an ID so need to convert to a real DatasetRef 

291 fake_ref = SerializedDatasetRef(id=id) 

292 

293 try: 

294 # Converting this to a real DatasetRef takes time and is not 

295 # needed internally since only the ID is used. 

296 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) 

297 except Exception: 

298 # SQL getDatasetLocations looks at ID in datastore and does not 

299 # check it is in registry. Follow that example and return without 

300 # error. 

301 return [] 

302 

303 return list(butler.registry.getDatasetLocations(ref)) 

304 

305 

306# TimeSpan not yet a pydantic model 

307@app.post( 

308 "/butler/v1/registry/findDataset/{datasetType}", 

309 summary="Retrieve this dataset definition from collection, dataset type, and dataId", 

310 response_model=SerializedDatasetRef, 

311 response_model_exclude_unset=True, 

312 response_model_exclude_defaults=True, 

313 response_model_exclude_none=True, 

314) 

315def find_dataset( 

316 datasetType: str, 

317 dataId: SerializedDataCoordinate | None = None, 

318 collections: list[str] | None = Query(None), 

319 butler: Butler = Depends(butler_readonly_dependency), 

320) -> SerializedDatasetRef | None: 

321 collection_query = collections if collections else None 

322 

323 ref = butler.registry.findDataset( 

324 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query 

325 ) 

326 return ref.to_simple() if ref else None 

327 

328 

329# POST is used for the complex dict data structures 

330@app.post( 

331 "/butler/v1/registry/datasets", 

332 summary="Query all dataset holdings.", 

333 response_model=list[SerializedDatasetRef], 

334 response_model_exclude_unset=True, 

335 response_model_exclude_defaults=True, 

336 response_model_exclude_none=True, 

337) 

338def query_datasets( 

339 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) 

340) -> list[SerializedDatasetRef]: 

341 # This method might return a lot of results 

342 

343 if query.collections: 

344 collections = query.collections.expression() 

345 else: 

346 collections = None 

347 

348 datasets = butler.registry.queryDatasets( 

349 query.datasetType.expression(), 

350 collections=collections, 

351 dimensions=query.dimensions, 

352 dataId=unpack_dataId(butler, query.dataId), 

353 where=query.where, 

354 findFirst=query.findFirst, 

355 components=query.components, 

356 bind=query.bind, 

357 check=query.check, 

358 **query.kwargs(), 

359 ) 

360 return [ref.to_simple() for ref in datasets] 

361 

362 

363# POST is used for the complex dict data structures 

364@app.post( 

365 "/butler/v1/registry/dataIds", 

366 summary="Query all data IDs.", 

367 response_model=list[SerializedDataCoordinate], 

368 response_model_exclude_unset=True, 

369 response_model_exclude_defaults=True, 

370 response_model_exclude_none=True, 

371) 

372def query_data_ids( 

373 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) 

374) -> list[SerializedDataCoordinate]: 

375 if query.datasets: 

376 datasets = query.datasets.expression() 

377 else: 

378 datasets = None 

379 if query.collections: 

380 collections = query.collections.expression() 

381 else: 

382 collections = None 

383 

384 dataIds = butler.registry.queryDataIds( 

385 query.dimensions, 

386 collections=collections, 

387 datasets=datasets, 

388 dataId=unpack_dataId(butler, query.dataId), 

389 where=query.where, 

390 components=query.components, 

391 bind=query.bind, 

392 check=query.check, 

393 **query.kwargs(), 

394 ) 

395 return [coord.to_simple() for coord in dataIds] 

396 

397 

398# Uses POST to handle the DataId 

399@app.post( 

400 "/butler/v1/registry/dimensionRecords/{element}", 

401 summary="Retrieve dimension records matching query", 

402 response_model=list[SerializedDimensionRecord], 

403 response_model_exclude_unset=True, 

404 response_model_exclude_defaults=True, 

405 response_model_exclude_none=True, 

406) 

407def query_dimension_records( 

408 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) 

409) -> list[SerializedDimensionRecord]: 

410 

411 if query.datasets: 

412 datasets = query.datasets.expression() 

413 else: 

414 datasets = None 

415 if query.collections: 

416 collections = query.collections.expression() 

417 else: 

418 collections = None 

419 

420 records = butler.registry.queryDimensionRecords( 

421 element, 

422 dataId=unpack_dataId(butler, query.dataId), 

423 collections=collections, 

424 where=query.where, 

425 datasets=datasets, 

426 components=query.components, 

427 bind=query.bind, 

428 check=query.check, 

429 **query.kwargs(), 

430 ) 

431 return [r.to_simple() for r in records]