Coverage for python/lsst/daf/butler/server.py: 4%
141 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ()
26import logging
27from collections.abc import Mapping
28from enum import Enum, auto
29from typing import Any
31from fastapi import Depends, FastAPI, HTTPException, Query
32from fastapi.middleware.gzip import GZipMiddleware
33from lsst.daf.butler import (
34 Butler,
35 Config,
36 DataCoordinate,
37 DatasetId,
38 DatasetRef,
39 DimensionConfig,
40 SerializedDataCoordinate,
41 SerializedDatasetRef,
42 SerializedDatasetType,
43 SerializedDimensionRecord,
44)
45from lsst.daf.butler.core.serverModels import (
46 ExpressionQueryParameter,
47 QueryDataIdsModel,
48 QueryDatasetsModel,
49 QueryDimensionRecordsModel,
50)
51from lsst.daf.butler.registry import CollectionType
53BUTLER_ROOT = "ci_hsc_gen3/DATA"
55log = logging.getLogger("excalibur")
58class CollectionTypeNames(str, Enum):
59 """Collection type names supported by the interface."""
61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805
62 # Use the name directly as the value
63 return name
65 RUN = auto()
66 CALIBRATION = auto()
67 CHAINED = auto()
68 TAGGED = auto()
71app = FastAPI()
72app.add_middleware(GZipMiddleware, minimum_size=1000)
75GLOBAL_READWRITE_BUTLER = None
76GLOBAL_READONLY_BUTLER = None
79def _make_global_butler() -> None:
80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER
81 if GLOBAL_READONLY_BUTLER is None:
82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False)
83 if GLOBAL_READWRITE_BUTLER is None:
84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True)
87def butler_readonly_dependency() -> Butler:
88 _make_global_butler()
89 return Butler(butler=GLOBAL_READONLY_BUTLER)
92def butler_readwrite_dependency() -> Butler:
93 _make_global_butler()
94 return Butler(butler=GLOBAL_READWRITE_BUTLER)
97def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
98 """Convert the serialized dataId back to full DataCoordinate.
100 Parameters
101 ----------
102 butler : `lsst.daf.butler.Butler`
103 The butler to use for registry and universe.
104 data_id : `SerializedDataCoordinate` or `None`
105 The serialized form.
107 Returns
108 -------
109 dataId : `DataCoordinate` or `None`
110 The DataId usable by registry.
111 """
112 if data_id is None:
113 return None
114 return DataCoordinate.from_simple(data_id, registry=butler.registry)
117@app.get("/butler/")
118def read_root() -> str:
119 return "Welcome to Excalibur... aka your Butler Server"
122@app.get("/butler/butler.json", response_model=dict[str, Any])
123def read_server_config() -> Mapping:
124 """Return the butler configuration that the client should use."""
125 config_str = f"""
126datastore:
127 root: {BUTLER_ROOT}
128registry:
129 cls: lsst.daf.butler.registries.remote.RemoteRegistry
130 db: <butlerRoot>
131"""
132 config = Config.fromString(config_str, format="yaml")
133 return config
136@app.get("/butler/v1/universe", response_model=dict[str, Any])
137def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig:
138 """Allow remote client to get dimensions definition."""
139 return butler.registry.dimensions.dimensionConfig
142@app.get("/butler/v1/uri/{id}", response_model=str)
143def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str:
144 """Return a single URI of non-disassembled dataset."""
145 ref = butler.registry.getDataset(id)
146 if not ref:
147 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.")
149 uri = butler.datastore.getURI(ref)
151 # In reality would have to convert this to a signed URL
152 return str(uri)
155@app.put("/butler/v1/registry/refresh")
156def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None:
157 # Unclear whether this should exist. Which butler is really being
158 # refreshed? How do we know the server we are refreshing is used later?
159 # For testing at the moment it is important if a test adds a dataset type
160 # directly in the server since the test client will not see it.
161 butler.registry.refresh()
164@app.get(
165 "/butler/v1/registry/datasetType/{datasetTypeName}",
166 summary="Retrieve this dataset type definition.",
167 response_model=SerializedDatasetType,
168 response_model_exclude_unset=True,
169 response_model_exclude_defaults=True,
170 response_model_exclude_none=True,
171)
172def get_dataset_type(
173 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency)
174) -> SerializedDatasetType:
175 datasetType = butler.registry.getDatasetType(datasetTypeName)
176 return datasetType.to_simple()
179@app.get(
180 "/butler/v1/registry/datasetTypes",
181 summary="Retrieve all dataset type definitions.",
182 response_model=list[SerializedDatasetType],
183 response_model_exclude_unset=True,
184 response_model_exclude_defaults=True,
185 response_model_exclude_none=True,
186)
187def query_all_dataset_types(
188 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency)
189) -> list[SerializedDatasetType]:
190 datasetTypes = butler.registry.queryDatasetTypes(..., components=components)
191 return [d.to_simple() for d in datasetTypes]
194@app.get(
195 "/butler/v1/registry/datasetTypes/re",
196 summary="Retrieve dataset type definitions matching expressions",
197 response_model=list[SerializedDatasetType],
198 response_model_exclude_unset=True,
199 response_model_exclude_defaults=True,
200 response_model_exclude_none=True,
201)
202def query_dataset_types_re(
203 regex: list[str] | None = Query(None),
204 glob: list[str] | None = Query(None),
205 components: bool | None = Query(None),
206 butler: Butler = Depends(butler_readonly_dependency),
207) -> list[SerializedDatasetType]:
208 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
210 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components)
211 return [d.to_simple() for d in datasetTypes]
214@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str])
215def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
216 chain = butler.registry.getCollectionChain(parent)
217 return list(chain)
220@app.get("/butler/v1/registry/collections", response_model=list[str])
221def query_collections(
222 regex: list[str] | None = Query(None),
223 glob: list[str] | None = Query(None),
224 datasetType: str | None = Query(None),
225 flattenChains: bool = Query(False),
226 collectionType: list[CollectionTypeNames] | None = Query(None),
227 includeChains: bool | None = Query(None),
228 butler: Butler = Depends(butler_readonly_dependency),
229) -> list[str]:
231 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
232 collectionTypes = CollectionType.from_names(collectionType)
233 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None
235 collections = butler.registry.queryCollections(
236 expression=expression_params.expression(),
237 datasetType=dataset_type,
238 collectionTypes=collectionTypes,
239 flattenChains=flattenChains,
240 includeChains=includeChains,
241 )
242 return list(collections)
245@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str)
246def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str:
247 collectionType = butler.registry.getCollectionType(name)
248 return collectionType.name
251@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str)
252def register_collection(
253 name: str,
254 collectionTypeName: CollectionTypeNames,
255 doc: str | None = Query(None),
256 butler: Butler = Depends(butler_readwrite_dependency),
257) -> str:
258 collectionType = CollectionType.from_name(collectionTypeName)
259 butler.registry.registerCollection(name, collectionType, doc)
261 # Need to refresh the global read only butler otherwise other clients
262 # may not see this change.
263 if GLOBAL_READONLY_BUTLER is not None: # for mypy
264 GLOBAL_READONLY_BUTLER.registry.refresh()
266 return name
269@app.get(
270 "/butler/v1/registry/dataset/{id}",
271 summary="Retrieve this dataset definition.",
272 response_model=SerializedDatasetRef | None,
273 response_model_exclude_unset=True,
274 response_model_exclude_defaults=True,
275 response_model_exclude_none=True,
276)
277def get_dataset(
278 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)
279) -> SerializedDatasetRef | None:
280 ref = butler.registry.getDataset(id)
281 if ref is not None:
282 return ref.to_simple()
283 # This could raise a 404 since id is not found. The standard regsitry
284 # getDataset method returns without error so follow that example here.
285 return ref
288@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str])
289def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
290 # Takes an ID so need to convert to a real DatasetRef
291 fake_ref = SerializedDatasetRef(id=id)
293 try:
294 # Converting this to a real DatasetRef takes time and is not
295 # needed internally since only the ID is used.
296 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry)
297 except Exception:
298 # SQL getDatasetLocations looks at ID in datastore and does not
299 # check it is in registry. Follow that example and return without
300 # error.
301 return []
303 return list(butler.registry.getDatasetLocations(ref))
306# TimeSpan not yet a pydantic model
307@app.post(
308 "/butler/v1/registry/findDataset/{datasetType}",
309 summary="Retrieve this dataset definition from collection, dataset type, and dataId",
310 response_model=SerializedDatasetRef,
311 response_model_exclude_unset=True,
312 response_model_exclude_defaults=True,
313 response_model_exclude_none=True,
314)
315def find_dataset(
316 datasetType: str,
317 dataId: SerializedDataCoordinate | None = None,
318 collections: list[str] | None = Query(None),
319 butler: Butler = Depends(butler_readonly_dependency),
320) -> SerializedDatasetRef | None:
321 collection_query = collections if collections else None
323 ref = butler.registry.findDataset(
324 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query
325 )
326 return ref.to_simple() if ref else None
329# POST is used for the complex dict data structures
330@app.post(
331 "/butler/v1/registry/datasets",
332 summary="Query all dataset holdings.",
333 response_model=list[SerializedDatasetRef],
334 response_model_exclude_unset=True,
335 response_model_exclude_defaults=True,
336 response_model_exclude_none=True,
337)
338def query_datasets(
339 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency)
340) -> list[SerializedDatasetRef]:
341 # This method might return a lot of results
343 if query.collections:
344 collections = query.collections.expression()
345 else:
346 collections = None
348 datasets = butler.registry.queryDatasets(
349 query.datasetType.expression(),
350 collections=collections,
351 dimensions=query.dimensions,
352 dataId=unpack_dataId(butler, query.dataId),
353 where=query.where,
354 findFirst=query.findFirst,
355 components=query.components,
356 bind=query.bind,
357 check=query.check,
358 **query.kwargs(),
359 )
360 return [ref.to_simple() for ref in datasets]
363# POST is used for the complex dict data structures
364@app.post(
365 "/butler/v1/registry/dataIds",
366 summary="Query all data IDs.",
367 response_model=list[SerializedDataCoordinate],
368 response_model_exclude_unset=True,
369 response_model_exclude_defaults=True,
370 response_model_exclude_none=True,
371)
372def query_data_ids(
373 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency)
374) -> list[SerializedDataCoordinate]:
375 if query.datasets:
376 datasets = query.datasets.expression()
377 else:
378 datasets = None
379 if query.collections:
380 collections = query.collections.expression()
381 else:
382 collections = None
384 dataIds = butler.registry.queryDataIds(
385 query.dimensions,
386 collections=collections,
387 datasets=datasets,
388 dataId=unpack_dataId(butler, query.dataId),
389 where=query.where,
390 components=query.components,
391 bind=query.bind,
392 check=query.check,
393 **query.kwargs(),
394 )
395 return [coord.to_simple() for coord in dataIds]
398# Uses POST to handle the DataId
399@app.post(
400 "/butler/v1/registry/dimensionRecords/{element}",
401 summary="Retrieve dimension records matching query",
402 response_model=list[SerializedDimensionRecord],
403 response_model_exclude_unset=True,
404 response_model_exclude_defaults=True,
405 response_model_exclude_none=True,
406)
407def query_dimension_records(
408 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency)
409) -> list[SerializedDimensionRecord]:
411 if query.datasets:
412 datasets = query.datasets.expression()
413 else:
414 datasets = None
415 if query.collections:
416 collections = query.collections.expression()
417 else:
418 collections = None
420 records = butler.registry.queryDimensionRecords(
421 element,
422 dataId=unpack_dataId(butler, query.dataId),
423 collections=collections,
424 where=query.where,
425 datasets=datasets,
426 components=query.components,
427 bind=query.bind,
428 check=query.check,
429 **query.kwargs(),
430 )
431 return [r.to_simple() for r in records]