Coverage for python / lsst / daf / butler / datastores / file_datastore / transfer.py: 24%

25 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from collections.abc import Iterable 

31 

32from lsst.resources import ResourcePath 

33from lsst.utils.logging import getLogger 

34 

35from ..._dataset_ref import DatasetRef 

36from ...datastore import FileTransferMap, FileTransferSource 

37 

38log = getLogger(__name__) 

39 

40 

41def retrieve_file_transfer_records( 

42 source_datastore: FileTransferSource, 

43 refs: Iterable[DatasetRef], 

44 artifact_existence: dict[ResourcePath, bool], 

45) -> FileTransferMap: 

46 """Look up the datastore records corresponding to the given datasets. 

47 

48 Parameters 

49 ---------- 

50 source_datastore : `FileTransferSource` 

51 Object used to look up records. 

52 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

53 List of datasets to retrieve records for. 

54 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

55 Cache mapping datastore artifact to existence. Updated by 

56 this method with details of all artifacts tested. 

57 

58 Returns 

59 ------- 

60 files : `FileTransferMap` 

61 A dictionary from `DatasetId` to a list of `FileTransferRecord`, 

62 containing information about the files that were found for these 

63 artifacts. If files were not found for a given `DatasetRef`, there 

64 will be no entry for it in this dictionary. 

65 

66 Notes 

67 ----- 

68 This will first attempt to look up records using the database, and then 

69 fall back to searching the filesystem if the transfer source is configured 

70 to do so. 

71 """ 

72 log.verbose("Looking up source datastore records in %s", source_datastore.name) 

73 refs_by_id = {ref.id: ref for ref in refs} 

74 source_records = source_datastore.get_file_info_for_transfer(refs_by_id.keys()) 

75 

76 log.debug("Number of datastore records found in source: %d", len(source_records)) 

77 

78 # If we couldn't find all of the datasets in the database, continue 

79 # searching. Some datastores may have artifacts on disk that do not have 

80 # corresponding records in the database. 

81 missing_ids = refs_by_id.keys() - source_records.keys() 

82 if missing_ids: 

83 log.info( 

84 "Number of expected datasets missing from source datastore records: %d out of %d", 

85 len(missing_ids), 

86 len(refs_by_id), 

87 ) 

88 missing_refs = {refs_by_id[id] for id in missing_ids} 

89 found_records = source_datastore.locate_missing_files_for_transfer(missing_refs, artifact_existence) 

90 source_records |= found_records 

91 

92 still_missing = len(missing_refs) - len(found_records) 

93 if still_missing: 

94 for ref in missing_refs: 

95 if ref.id not in found_records: 

96 log.warning("Asked to transfer dataset %s but no file artifacts exist for it.", ref) 

97 log.warning( 

98 "Encountered %d dataset%s where no file artifacts exist from the " 

99 "source datastore and will be skipped.", 

100 still_missing, 

101 "s" if still_missing != 1 else "", 

102 ) 

103 

104 return source_records