Coverage for python / lsst / daf / butler / datastores / file_datastore / transfer.py: 24%
25 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from collections.abc import Iterable
32from lsst.resources import ResourcePath
33from lsst.utils.logging import getLogger
35from ..._dataset_ref import DatasetRef
36from ...datastore import FileTransferMap, FileTransferSource
38log = getLogger(__name__)
41def retrieve_file_transfer_records(
42 source_datastore: FileTransferSource,
43 refs: Iterable[DatasetRef],
44 artifact_existence: dict[ResourcePath, bool],
45) -> FileTransferMap:
46 """Look up the datastore records corresponding to the given datasets.
48 Parameters
49 ----------
50 source_datastore : `FileTransferSource`
51 Object used to look up records.
52 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
53 List of datasets to retrieve records for.
54 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
55 Cache mapping datastore artifact to existence. Updated by
56 this method with details of all artifacts tested.
58 Returns
59 -------
60 files : `FileTransferMap`
61 A dictionary from `DatasetId` to a list of `FileTransferRecord`,
62 containing information about the files that were found for these
63 artifacts. If files were not found for a given `DatasetRef`, there
64 will be no entry for it in this dictionary.
66 Notes
67 -----
68 This will first attempt to look up records using the database, and then
69 fall back to searching the filesystem if the transfer source is configured
70 to do so.
71 """
72 log.verbose("Looking up source datastore records in %s", source_datastore.name)
73 refs_by_id = {ref.id: ref for ref in refs}
74 source_records = source_datastore.get_file_info_for_transfer(refs_by_id.keys())
76 log.debug("Number of datastore records found in source: %d", len(source_records))
78 # If we couldn't find all of the datasets in the database, continue
79 # searching. Some datastores may have artifacts on disk that do not have
80 # corresponding records in the database.
81 missing_ids = refs_by_id.keys() - source_records.keys()
82 if missing_ids:
83 log.info(
84 "Number of expected datasets missing from source datastore records: %d out of %d",
85 len(missing_ids),
86 len(refs_by_id),
87 )
88 missing_refs = {refs_by_id[id] for id in missing_ids}
89 found_records = source_datastore.locate_missing_files_for_transfer(missing_refs, artifact_existence)
90 source_records |= found_records
92 still_missing = len(missing_refs) - len(found_records)
93 if still_missing:
94 for ref in missing_refs:
95 if ref.id not in found_records:
96 log.warning("Asked to transfer dataset %s but no file artifacts exist for it.", ref)
97 log.warning(
98 "Encountered %d dataset%s where no file artifacts exist from the "
99 "source datastore and will be skipped.",
100 still_missing,
101 "s" if still_missing != 1 else "",
102 )
104 return source_records