Coverage for python / astro_metadata_translator / bin / writeindex.py: 23%

38 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:50 +0000

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ["write_index_files"] 

15 

16import json 

17import logging 

18import os 

19from collections.abc import MutableMapping, Sequence 

20from typing import IO 

21 

22from lsst.resources import ResourcePath 

23 

24from ..file_helpers import find_files 

25from ..indexing import index_files 

26 

27log = logging.getLogger(__name__) 

28 

29 

30def write_index_files( 

31 files: Sequence[str], 

32 regex: str, 

33 hdrnum: int, 

34 print_trace: bool, 

35 content_mode: str = "translated", 

36 outpath: str | None = None, 

37 outstream: IO | None = None, 

38) -> tuple[list[str], list[str]]: 

39 """Process each file and create JSON index file. 

40 

41 The index file will have common information in the toplevel. 

42 There is then a ``__DIFF__`` key that is a dictionary with file 

43 names as keys and per-file differences as the values in a dict. 

44 

45 Parameters 

46 ---------- 

47 files : iterable of `str` 

48 The files or directories from which the headers are to be read. 

49 regex : `str` 

50 Regular expression string used to filter files when a directory is 

51 scanned. 

52 hdrnum : `int` 

53 The HDU number to read. The primary header is always read and merged 

54 with the specified header. 

55 print_trace : `bool` 

56 If there is an error reading the file and this parameter is `True`, 

57 a full traceback of the exception will be reported. If `False` prints 

58 a one line summary of the error condition. 

59 content_mode : `str` 

60 Form of data to write in index file. Options are: 

61 ``translated`` (default) to write ObservationInfo to the index; 

62 ``metadata`` to write native metadata headers to the index. 

63 The index file is called ``_index.json``. 

64 outpath : `str`, optional 

65 If specified a single index file will be written to this location 

66 combining all the information from all files. If `None`, the default, 

67 and index file will be written to each directory in which files 

68 are found. 

69 outstream : `io.StringIO`, optional 

70 Output stream to use for standard messages. Defaults to `None` which 

71 uses the default output stream. Defaults to `sys.stdout`. 

72 

73 Returns 

74 ------- 

75 okay : `list` of `str` 

76 All the files that were processed successfully. 

77 failed : `list` of `str` 

78 All the files that could not be processed. 

79 """ 

80 if content_mode not in ("translated", "metadata"): 

81 raise ValueError(f"Unrecognized content mode {content_mode}") 

82 

83 if outpath is not None: 

84 _, ext = os.path.splitext(outpath) 

85 if ext != ".json": 

86 raise ValueError(f"Override output file must end in .json but given {outpath}") 

87 

88 found_files = find_files(files, regex) 

89 

90 failed = [] 

91 okay = [] 

92 files_per_directory: MutableMapping[ResourcePath, list[ResourcePath]] = {} 

93 

94 # Group each file by directory if no explicit output path 

95 if outpath is None: 

96 for path in found_files: 

97 head, tail = path.split() 

98 files_per_directory.setdefault(head, []).append(ResourcePath(tail, forceAbsolute=False)) 

99 else: 

100 # We want the requested files to be paths relative to the current 

101 # directory. For now this assumes that all the input files are 

102 # local -- we are not trying to discover a shared root directory. 

103 cwd = ResourcePath(".", forceAbsolute=True, forceDirectory=True) 

104 files_per_directory[cwd] = list(found_files) 

105 

106 # Extract translated metadata for each file in each directory 

107 for directory, files_in_dir in files_per_directory.items(): 

108 output, this_okay, this_failed = index_files( 

109 files_in_dir, 

110 directory, 

111 hdrnum, 

112 print_trace, 

113 content_mode, 

114 outstream, 

115 ) 

116 

117 failed.extend(this_failed) 

118 okay.extend(this_okay) 

119 

120 # Write the index file 

121 if outpath is None: 

122 index_file = directory.join("_index.json") 

123 else: 

124 index_file = ResourcePath(outpath, forceAbsolute=False) 

125 index_file.write(json.dumps(output).encode()) 

126 log.info("Wrote index file to %s", index_file) 

127 

128 return okay, failed