Coverage for python/lsst/daf/butler/server.py: 3%
141 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ()
32import logging
33from collections.abc import Mapping
34from enum import Enum, auto
35from typing import Any
37from fastapi import Depends, FastAPI, HTTPException, Query
38from fastapi.middleware.gzip import GZipMiddleware
39from lsst.daf.butler import (
40 Butler,
41 Config,
42 DataCoordinate,
43 DatasetId,
44 DatasetRef,
45 SerializedDataCoordinate,
46 SerializedDatasetRef,
47 SerializedDatasetType,
48 SerializedDimensionRecord,
49)
50from lsst.daf.butler.core.serverModels import (
51 ExpressionQueryParameter,
52 QueryDataIdsModel,
53 QueryDatasetsModel,
54 QueryDimensionRecordsModel,
55)
56from lsst.daf.butler.registry import CollectionType
58BUTLER_ROOT = "ci_hsc_gen3/DATA"
60log = logging.getLogger("excalibur")
63class CollectionTypeNames(str, Enum):
64 """Collection type names supported by the interface."""
66 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805
67 # Use the name directly as the value
68 return name
70 RUN = auto()
71 CALIBRATION = auto()
72 CHAINED = auto()
73 TAGGED = auto()
76app = FastAPI()
77app.add_middleware(GZipMiddleware, minimum_size=1000)
80GLOBAL_READWRITE_BUTLER: Butler | None = None
81GLOBAL_READONLY_BUTLER: Butler | None = None
84def _make_global_butler() -> None:
85 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER
86 if GLOBAL_READONLY_BUTLER is None:
87 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False)
88 if GLOBAL_READWRITE_BUTLER is None:
89 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True)
92def butler_readonly_dependency() -> Butler:
93 """Return global read-only butler."""
94 _make_global_butler()
95 return Butler(butler=GLOBAL_READONLY_BUTLER)
98def butler_readwrite_dependency() -> Butler:
99 """Return read-write butler."""
100 _make_global_butler()
101 return Butler(butler=GLOBAL_READWRITE_BUTLER)
104def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
105 """Convert the serialized dataId back to full DataCoordinate.
107 Parameters
108 ----------
109 butler : `lsst.daf.butler.Butler`
110 The butler to use for registry and universe.
111 data_id : `SerializedDataCoordinate` or `None`
112 The serialized form.
114 Returns
115 -------
116 dataId : `DataCoordinate` or `None`
117 The DataId usable by registry.
118 """
119 if data_id is None:
120 return None
121 return DataCoordinate.from_simple(data_id, registry=butler.registry)
124@app.get("/butler/")
125def read_root() -> str:
126 """Return message when accessing the root URL."""
127 return "Welcome to Excalibur... aka your Butler Server"
130@app.get("/butler/butler.json", response_model=dict[str, Any])
131def read_server_config() -> Mapping:
132 """Return the butler configuration that the client should use."""
133 config_str = f"""
134datastore:
135 root: {BUTLER_ROOT}
136registry:
137 cls: lsst.daf.butler.registries.remote.RemoteRegistry
138 db: <butlerRoot>
139"""
140 config = Config.fromString(config_str, format="yaml")
141 return config.toDict()
144@app.get("/butler/v1/universe", response_model=dict[str, Any])
145def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]:
146 """Allow remote client to get dimensions definition."""
147 return butler.dimensions.dimensionConfig.toDict()
150@app.get("/butler/v1/uri/{id}", response_model=str)
151def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str:
152 """Return a single URI of non-disassembled dataset."""
153 ref = butler.registry.getDataset(id)
154 if not ref:
155 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.")
157 uri = butler.getURI(ref)
159 # In reality would have to convert this to a signed URL
160 return str(uri)
163@app.put("/butler/v1/registry/refresh")
164def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None:
165 """Refresh the registry cache."""
166 # Unclear whether this should exist. Which butler is really being
167 # refreshed? How do we know the server we are refreshing is used later?
168 # For testing at the moment it is important if a test adds a dataset type
169 # directly in the server since the test client will not see it.
170 butler.registry.refresh()
173@app.get(
174 "/butler/v1/registry/datasetType/{datasetTypeName}",
175 summary="Retrieve this dataset type definition.",
176 response_model=SerializedDatasetType,
177 response_model_exclude_unset=True,
178 response_model_exclude_defaults=True,
179 response_model_exclude_none=True,
180)
181def get_dataset_type(
182 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency)
183) -> SerializedDatasetType:
184 """Return the dataset type."""
185 datasetType = butler.registry.getDatasetType(datasetTypeName)
186 return datasetType.to_simple()
189@app.get(
190 "/butler/v1/registry/datasetTypes",
191 summary="Retrieve all dataset type definitions.",
192 response_model=list[SerializedDatasetType],
193 response_model_exclude_unset=True,
194 response_model_exclude_defaults=True,
195 response_model_exclude_none=True,
196)
197def query_all_dataset_types(
198 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency)
199) -> list[SerializedDatasetType]:
200 """Return all dataset types."""
201 datasetTypes = butler.registry.queryDatasetTypes(..., components=components)
202 return [d.to_simple() for d in datasetTypes]
205@app.get(
206 "/butler/v1/registry/datasetTypes/re",
207 summary="Retrieve dataset type definitions matching expressions",
208 response_model=list[SerializedDatasetType],
209 response_model_exclude_unset=True,
210 response_model_exclude_defaults=True,
211 response_model_exclude_none=True,
212)
213def query_dataset_types_re(
214 regex: list[str] | None = Query(None),
215 glob: list[str] | None = Query(None),
216 components: bool | None = Query(None),
217 butler: Butler = Depends(butler_readonly_dependency),
218) -> list[SerializedDatasetType]:
219 """Return all dataset types matching a regular expression."""
220 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
222 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components)
223 return [d.to_simple() for d in datasetTypes]
226@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str])
227def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
228 """Return the collection chain members."""
229 chain = butler.registry.getCollectionChain(parent)
230 return list(chain)
233@app.get("/butler/v1/registry/collections", response_model=list[str])
234def query_collections(
235 regex: list[str] | None = Query(None),
236 glob: list[str] | None = Query(None),
237 datasetType: str | None = Query(None),
238 flattenChains: bool = Query(False),
239 collectionType: list[CollectionTypeNames] | None = Query(None),
240 includeChains: bool | None = Query(None),
241 butler: Butler = Depends(butler_readonly_dependency),
242) -> list[str]:
243 """Return collections matching query."""
244 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
245 collectionTypes = CollectionType.from_names(collectionType)
246 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None
248 collections = butler.registry.queryCollections(
249 expression=expression_params.expression(),
250 datasetType=dataset_type,
251 collectionTypes=collectionTypes,
252 flattenChains=flattenChains,
253 includeChains=includeChains,
254 )
255 return list(collections)
258@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str)
259def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str:
260 """Return type for named collection."""
261 collectionType = butler.registry.getCollectionType(name)
262 return collectionType.name
265@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str)
266def register_collection(
267 name: str,
268 collectionTypeName: CollectionTypeNames,
269 doc: str | None = Query(None),
270 butler: Butler = Depends(butler_readwrite_dependency),
271) -> str:
272 """Register a collection."""
273 collectionType = CollectionType.from_name(collectionTypeName)
274 butler.registry.registerCollection(name, collectionType, doc)
276 # Need to refresh the global read only butler otherwise other clients
277 # may not see this change.
278 if GLOBAL_READONLY_BUTLER is not None: # for mypy
279 GLOBAL_READONLY_BUTLER.registry.refresh()
281 return name
284@app.get(
285 "/butler/v1/registry/dataset/{id}",
286 summary="Retrieve this dataset definition.",
287 response_model=SerializedDatasetRef | None,
288 response_model_exclude_unset=True,
289 response_model_exclude_defaults=True,
290 response_model_exclude_none=True,
291)
292def get_dataset(
293 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)
294) -> SerializedDatasetRef | None:
295 """Return a single dataset reference."""
296 ref = butler.registry.getDataset(id)
297 if ref is not None:
298 return ref.to_simple()
299 # This could raise a 404 since id is not found. The standard regsitry
300 # getDataset method returns without error so follow that example here.
301 return ref
304@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str])
305def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
306 """Return locations of datasets."""
307 # Takes an ID so need to convert to a real DatasetRef
308 fake_ref = SerializedDatasetRef(id=id)
310 try:
311 # Converting this to a real DatasetRef takes time and is not
312 # needed internally since only the ID is used.
313 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry)
314 except Exception:
315 # SQL getDatasetLocations looks at ID in datastore and does not
316 # check it is in registry. Follow that example and return without
317 # error.
318 return []
320 return list(butler.registry.getDatasetLocations(ref))
323# TimeSpan not yet a pydantic model
324@app.post(
325 "/butler/v1/registry/findDataset/{datasetType}",
326 summary="Retrieve this dataset definition from collection, dataset type, and dataId",
327 response_model=SerializedDatasetRef,
328 response_model_exclude_unset=True,
329 response_model_exclude_defaults=True,
330 response_model_exclude_none=True,
331)
332def find_dataset(
333 datasetType: str,
334 dataId: SerializedDataCoordinate | None = None,
335 collections: list[str] | None = Query(None),
336 butler: Butler = Depends(butler_readonly_dependency),
337) -> SerializedDatasetRef | None:
338 """Return a single dataset reference matching query."""
339 collection_query = collections if collections else None
341 ref = butler.registry.findDataset(
342 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query
343 )
344 return ref.to_simple() if ref else None
347# POST is used for the complex dict data structures
348@app.post(
349 "/butler/v1/registry/datasets",
350 summary="Query all dataset holdings.",
351 response_model=list[SerializedDatasetRef],
352 response_model_exclude_unset=True,
353 response_model_exclude_defaults=True,
354 response_model_exclude_none=True,
355)
356def query_datasets(
357 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency)
358) -> list[SerializedDatasetRef]:
359 """Return datasets matching query."""
360 # This method might return a lot of results
362 if query.collections:
363 collections = query.collections.expression()
364 else:
365 collections = None
367 datasets = butler.registry.queryDatasets(
368 query.datasetType.expression(),
369 collections=collections,
370 dimensions=query.dimensions,
371 dataId=unpack_dataId(butler, query.dataId),
372 where=query.where,
373 findFirst=query.findFirst,
374 components=query.components,
375 bind=query.bind,
376 check=query.check,
377 **query.kwargs(),
378 )
379 return [ref.to_simple() for ref in datasets]
382# POST is used for the complex dict data structures
383@app.post(
384 "/butler/v1/registry/dataIds",
385 summary="Query all data IDs.",
386 response_model=list[SerializedDataCoordinate],
387 response_model_exclude_unset=True,
388 response_model_exclude_defaults=True,
389 response_model_exclude_none=True,
390)
391def query_data_ids(
392 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency)
393) -> list[SerializedDataCoordinate]:
394 """Return data IDs matching query."""
395 if query.datasets:
396 datasets = query.datasets.expression()
397 else:
398 datasets = None
399 if query.collections:
400 collections = query.collections.expression()
401 else:
402 collections = None
404 dataIds = butler.registry.queryDataIds(
405 query.dimensions,
406 collections=collections,
407 datasets=datasets,
408 dataId=unpack_dataId(butler, query.dataId),
409 where=query.where,
410 components=query.components,
411 bind=query.bind,
412 check=query.check,
413 **query.kwargs(),
414 )
415 return [coord.to_simple() for coord in dataIds]
418# Uses POST to handle the DataId
419@app.post(
420 "/butler/v1/registry/dimensionRecords/{element}",
421 summary="Retrieve dimension records matching query",
422 response_model=list[SerializedDimensionRecord],
423 response_model_exclude_unset=True,
424 response_model_exclude_defaults=True,
425 response_model_exclude_none=True,
426)
427def query_dimension_records(
428 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency)
429) -> list[SerializedDimensionRecord]:
430 """Return dimension records matching query."""
431 if query.datasets:
432 datasets = query.datasets.expression()
433 else:
434 datasets = None
435 if query.collections:
436 collections = query.collections.expression()
437 else:
438 collections = None
440 records = butler.registry.queryDimensionRecords(
441 element,
442 dataId=unpack_dataId(butler, query.dataId),
443 collections=collections,
444 where=query.where,
445 datasets=datasets,
446 components=query.components,
447 bind=query.bind,
448 check=query.check,
449 **query.kwargs(),
450 )
451 return [r.to_simple() for r in records]