Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ("RawIngestTask", "RawIngestConfig", "makeTransferChoiceField") 

24 

25import os.path 

26from dataclasses import dataclass, InitVar 

27from typing import Callable, List, Iterator, Iterable, Tuple, Type, Optional, Any 

28from collections import defaultdict 

29from multiprocessing import Pool 

30 

31from astro_metadata_translator import ObservationInfo, merge_headers 

32from lsst.afw.fits import readMetadata 

33from lsst.daf.butler import ( 

34 Butler, 

35 CollectionType, 

36 DataCoordinate, 

37 DatasetRef, 

38 DatasetType, 

39 DimensionRecord, 

40 DimensionUniverse, 

41 FileDataset, 

42 Formatter, 

43) 

44from lsst.pex.config import Config, ChoiceField, Field 

45from lsst.pipe.base import Task 

46 

47from ._instrument import Instrument, makeExposureRecordFromObsInfo 

48from ._fitsRawFormatterBase import FitsRawFormatterBase 

49 

50 

51def _do_nothing(*args, **kwargs) -> None: 

52 """A function that accepts anything and does nothing, for use as a default 

53 in callback arguments. 

54 """ 

55 pass 

56 

57 

58@dataclass 

59class RawFileDatasetInfo: 

60 """Structure that holds information about a single dataset within a 

61 raw file. 

62 """ 

63 

64 dataId: DataCoordinate 

65 """Data ID for this file (`lsst.daf.butler.DataCoordinate`). 

66 """ 

67 

68 obsInfo: ObservationInfo 

69 """Standardized observation metadata extracted directly from the file 

70 headers (`astro_metadata_translator.ObservationInfo`). 

71 """ 

72 

73 

74@dataclass 

75class RawFileData: 

76 """Structure that holds information about a single raw file, used during 

77 ingest. 

78 """ 

79 

80 datasets: List[RawFileDatasetInfo] 

81 """The information describing each dataset within this raw file. 

82 (`list` of `RawFileDatasetInfo`) 

83 """ 

84 

85 filename: str 

86 """Name of the file this information was extracted from (`str`). 

87 

88 This is the path prior to ingest, not the path after ingest. 

89 """ 

90 

91 FormatterClass: Type[FitsRawFormatterBase] 

92 """Formatter class that should be used to ingest this file (`type`; as 

93 subclass of `FitsRawFormatterBase`). 

94 """ 

95 

96 instrumentClass: Optional[Type[Instrument]] 

97 """The `Instrument` class associated with this file. Can be `None` 

98 if ``datasets`` is an empty list.""" 

99 

100 

101@dataclass 

102class RawExposureData: 

103 """Structure that holds information about a complete raw exposure, used 

104 during ingest. 

105 """ 

106 

107 dataId: DataCoordinate 

108 """Data ID for this exposure (`lsst.daf.butler.DataCoordinate`). 

109 """ 

110 

111 files: List[RawFileData] 

112 """List of structures containing file-level information. 

113 """ 

114 

115 universe: InitVar[DimensionUniverse] 

116 """Set of all known dimensions. 

117 """ 

118 

119 record: Optional[DimensionRecord] = None 

120 """The exposure `DimensionRecord` that must be inserted into the 

121 `~lsst.daf.butler.Registry` prior to file-level ingest (`DimensionRecord`). 

122 """ 

123 

124 def __post_init__(self, universe: DimensionUniverse): 

125 # We don't care which file or dataset we read metadata from, because 

126 # we're assuming they'll all be the same; just use the first ones. 

127 self.record = makeExposureRecordFromObsInfo(self.files[0].datasets[0].obsInfo, universe) 

128 

129 

130def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default="auto"): 

131 """Create a Config field with options for how to transfer files between 

132 data repositories. 

133 

134 The allowed options for the field are exactly those supported by 

135 `lsst.daf.butler.Datastore.ingest`. 

136 

137 Parameters 

138 ---------- 

139 doc : `str` 

140 Documentation for the configuration field. 

141 

142 Returns 

143 ------- 

144 field : `lsst.pex.config.ChoiceField` 

145 Configuration field. 

146 """ 

147 return ChoiceField( 

148 doc=doc, 

149 dtype=str, 

150 allowed={"move": "move", 

151 "copy": "copy", 

152 "auto": "choice will depend on datastore", 

153 "direct": "use URI to ingested file directly in datastore", 

154 "link": "hard link falling back to symbolic link", 

155 "hardlink": "hard link", 

156 "symlink": "symbolic (soft) link", 

157 "relsymlink": "relative symbolic link", 

158 }, 

159 optional=True, 

160 default=default 

161 ) 

162 

163 

164class RawIngestConfig(Config): 

165 transfer = makeTransferChoiceField() 

166 failFast = Field( 

167 dtype=bool, 

168 default=False, 

169 doc="If True, stop ingest as soon as any problem is encountered with any file. " 

170 "Otherwise problems files will be skipped and logged and a report issued at completion.", 

171 ) 

172 

173 

174class RawIngestTask(Task): 

175 """Driver Task for ingesting raw data into Gen3 Butler repositories. 

176 

177 Parameters 

178 ---------- 

179 config : `RawIngestConfig` 

180 Configuration for the task. 

181 butler : `~lsst.daf.butler.Butler` 

182 Writeable butler instance, with ``butler.run`` set to the appropriate 

183 `~lsst.daf.butler.CollectionType.RUN` collection for these raw 

184 datasets. 

185 on_success : `Callable`, optional 

186 A callback invoked when all of the raws associated with an exposure 

187 are ingested. Will be passed a list of `FileDataset` objects, each 

188 containing one or more resolved `DatasetRef` objects. If this callback 

189 raises it will interrupt the entire ingest process, even if 

190 `RawIngestConfig.failFast` is `False`. 

191 on_metadata_failure : `Callable`, optional 

192 A callback invoked when a failure occurs trying to translate the 

193 metadata for a file. Will be passed the filename and the exception, in 

194 that order, as positional arguments. Guaranteed to be called in an 

195 ``except`` block, allowing the callback to re-raise or replace (with 

196 ``raise ... from``) to override the task's usual error handling (before 

197 `RawIngestConfig.failFast` logic occurs). 

198 on_ingest_failure : `Callable`, optional 

199 A callback invoked when dimension record or dataset insertion into the 

200 database fails for an exposure. Will be passed a `RawExposureData` 

201 instance and the exception, in that order, as positional arguments. 

202 Guaranteed to be called in an ``except`` block, allowing the callback 

203 to re-raise or replace (with ``raise ... from``) to override the task's 

204 usual error handling (before `RawIngestConfig.failFast` logic occurs). 

205 **kwargs 

206 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task` 

207 constructor. 

208 

209 Notes 

210 ----- 

211 Each instance of `RawIngestTask` writes to the same Butler. Each 

212 invocation of `RawIngestTask.run` ingests a list of files. 

213 """ 

214 

215 ConfigClass = RawIngestConfig 

216 

217 _DefaultName = "ingest" 

218 

219 def getDatasetType(self): 

220 """Return the DatasetType of the datasets ingested by this Task. 

221 """ 

222 return DatasetType("raw", ("instrument", "detector", "exposure"), "Exposure", 

223 universe=self.butler.registry.dimensions) 

224 

225 def __init__(self, config: Optional[RawIngestConfig] = None, *, butler: Butler, 

226 on_success: Callable[[List[FileDataset]], Any] = _do_nothing, 

227 on_metadata_failure: Callable[[str, Exception], Any] = _do_nothing, 

228 on_ingest_failure: Callable[[RawExposureData, Exception], Any] = _do_nothing, 

229 **kwargs: Any): 

230 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here. 

231 super().__init__(config, **kwargs) 

232 self.butler = butler 

233 self.universe = self.butler.registry.dimensions 

234 self.datasetType = self.getDatasetType() 

235 self._on_success = on_success 

236 self._on_metadata_failure = on_metadata_failure 

237 self._on_ingest_failure = on_ingest_failure 

238 

239 # Import all the instrument classes so that we ensure that we 

240 # have all the relevant metadata translators loaded. 

241 Instrument.importAll(self.butler.registry) 

242 

243 def _reduce_kwargs(self): 

244 # Add extra parameters to pickle 

245 return dict(**super()._reduce_kwargs(), butler=self.butler, on_success=self._on_success, 

246 on_metadata_failure=self._on_metadata_failure, on_ingest_failure=self._on_ingest_failure) 

247 

248 def extractMetadata(self, filename: str) -> RawFileData: 

249 """Extract and process metadata from a single raw file. 

250 

251 Parameters 

252 ---------- 

253 filename : `str` 

254 Path to the file. 

255 

256 Returns 

257 ------- 

258 data : `RawFileData` 

259 A structure containing the metadata extracted from the file, 

260 as well as the original filename. All fields will be populated, 

261 but the `RawFileData.dataId` attribute will be a minimal 

262 (unexpanded) `DataCoordinate` instance. 

263 

264 Notes 

265 ----- 

266 Assumes that there is a single dataset associated with the given 

267 file. Instruments using a single file to store multiple datasets 

268 must implement their own version of this method. 

269 """ 

270 

271 # We do not want to stop ingest if we are given a bad file. 

272 # Instead return a RawFileData with no datasets and allow 

273 # the caller to report the failure. 

274 

275 try: 

276 # Manually merge the primary and "first data" headers here because 

277 # we do not know in general if an input file has set INHERIT=T. 

278 phdu = readMetadata(filename, 0) 

279 header = merge_headers([phdu, readMetadata(filename)], mode="overwrite") 

280 datasets = [self._calculate_dataset_info(header, filename)] 

281 except Exception as e: 

282 self.log.debug("Problem extracting metadata from %s: %s", filename, e) 

283 # Indicate to the caller that we failed to read 

284 datasets = [] 

285 FormatterClass = Formatter 

286 instrument = None 

287 self._on_metadata_failure(filename, e) 

288 if self.config.failFast: 

289 raise RuntimeError(f"Problem extracting metadata from file {filename}") from e 

290 else: 

291 self.log.debug("Extracted metadata from file %s", filename) 

292 # The data model currently assumes that whilst multiple datasets 

293 # can be associated with a single file, they must all share the 

294 # same formatter. 

295 try: 

296 instrument = Instrument.fromName(datasets[0].dataId["instrument"], self.butler.registry) 

297 except LookupError as e: 

298 self._on_metadata_failure(filename, e) 

299 self.log.warning("Instrument %s for file %s not known to registry", 

300 datasets[0].dataId["instrument"], filename) 

301 if self.config.failFast: 

302 raise RuntimeError(f"Instrument {datasets[0].dataId['instrument']} for" 

303 f" file {filename} not known to registry") from e 

304 datasets = [] 

305 FormatterClass = Formatter 

306 instrument = None 

307 else: 

308 FormatterClass = instrument.getRawFormatter(datasets[0].dataId) 

309 

310 return RawFileData(datasets=datasets, filename=filename, 

311 FormatterClass=FormatterClass, 

312 instrumentClass=instrument) 

313 

314 def _calculate_dataset_info(self, header, filename): 

315 """Calculate a RawFileDatasetInfo from the supplied information. 

316 

317 Parameters 

318 ---------- 

319 header : `Mapping` 

320 Header from the dataset. 

321 filename : `str` 

322 Filename to use for error messages. 

323 

324 Returns 

325 ------- 

326 dataset : `RawFileDatasetInfo` 

327 The dataId, and observation information associated with this 

328 dataset. 

329 """ 

330 # To ensure we aren't slowed down for no reason, explicitly 

331 # list here the properties we need for the schema 

332 # Use a dict with values a boolean where True indicates 

333 # that it is required that we calculate this property. 

334 ingest_subset = { 

335 "altaz_begin": False, 

336 "boresight_rotation_coord": False, 

337 "boresight_rotation_angle": False, 

338 "dark_time": False, 

339 "datetime_begin": True, 

340 "datetime_end": True, 

341 "detector_num": True, 

342 "exposure_group": False, 

343 "exposure_id": True, 

344 "exposure_time": True, 

345 "instrument": True, 

346 "tracking_radec": False, 

347 "object": False, 

348 "observation_counter": False, 

349 "observation_id": True, 

350 "observation_reason": False, 

351 "observation_type": True, 

352 "observing_day": False, 

353 "physical_filter": True, 

354 "science_program": False, 

355 "visit_id": False, 

356 } 

357 

358 obsInfo = ObservationInfo(header, pedantic=False, filename=filename, 

359 required={k for k in ingest_subset if ingest_subset[k]}, 

360 subset=set(ingest_subset)) 

361 

362 dataId = DataCoordinate.standardize(instrument=obsInfo.instrument, 

363 exposure=obsInfo.exposure_id, 

364 detector=obsInfo.detector_num, 

365 universe=self.universe) 

366 return RawFileDatasetInfo(obsInfo=obsInfo, dataId=dataId) 

367 

368 def groupByExposure(self, files: Iterable[RawFileData]) -> List[RawExposureData]: 

369 """Group an iterable of `RawFileData` by exposure. 

370 

371 Parameters 

372 ---------- 

373 files : iterable of `RawFileData` 

374 File-level information to group. 

375 

376 Returns 

377 ------- 

378 exposures : `list` of `RawExposureData` 

379 A list of structures that group the file-level information by 

380 exposure. All fields will be populated. The 

381 `RawExposureData.dataId` attributes will be minimal (unexpanded) 

382 `DataCoordinate` instances. 

383 """ 

384 exposureDimensions = self.universe["exposure"].graph 

385 byExposure = defaultdict(list) 

386 for f in files: 

387 # Assume that the first dataset is representative for the file 

388 byExposure[f.datasets[0].dataId.subset(exposureDimensions)].append(f) 

389 

390 return [RawExposureData(dataId=dataId, files=exposureFiles, universe=self.universe) 

391 for dataId, exposureFiles in byExposure.items()] 

392 

393 def expandDataIds(self, data: RawExposureData) -> RawExposureData: 

394 """Expand the data IDs associated with a raw exposure to include 

395 additional metadata records. 

396 

397 Parameters 

398 ---------- 

399 exposure : `RawExposureData` 

400 A structure containing information about the exposure to be 

401 ingested. Must have `RawExposureData.records` populated. Should 

402 be considered consumed upon return. 

403 

404 Returns 

405 ------- 

406 exposure : `RawExposureData` 

407 An updated version of the input structure, with 

408 `RawExposureData.dataId` and nested `RawFileData.dataId` attributes 

409 updated to data IDs for which `DataCoordinate.hasRecords` returns 

410 `True`. 

411 """ 

412 # We start by expanded the exposure-level data ID; we won't use that 

413 # directly in file ingest, but this lets us do some database lookups 

414 # once per exposure instead of once per file later. 

415 data.dataId = self.butler.registry.expandDataId( 

416 data.dataId, 

417 # We pass in the records we'll be inserting shortly so they aren't 

418 # looked up from the database. We do expect instrument and filter 

419 # records to be retrieved from the database here (though the 

420 # Registry may cache them so there isn't a lookup every time). 

421 records={ 

422 self.butler.registry.dimensions["exposure"]: data.record, 

423 } 

424 ) 

425 # Now we expand the per-file (exposure+detector) data IDs. This time 

426 # we pass in the records we just retrieved from the exposure data ID 

427 # expansion. 

428 for file in data.files: 

429 for dataset in file.datasets: 

430 dataset.dataId = self.butler.registry.expandDataId( 

431 dataset.dataId, 

432 records=dict(data.dataId.records) 

433 ) 

434 return data 

435 

436 def prep(self, files, *, pool: Optional[Pool] = None, processes: int = 1 

437 ) -> Tuple[Iterator[RawExposureData], List[str]]: 

438 """Perform all ingest preprocessing steps that do not involve actually 

439 modifying the database. 

440 

441 Parameters 

442 ---------- 

443 files : iterable over `str` or path-like objects 

444 Paths to the files to be ingested. Will be made absolute 

445 if they are not already. 

446 pool : `multiprocessing.Pool`, optional 

447 If not `None`, a process pool with which to parallelize some 

448 operations. 

449 processes : `int`, optional 

450 The number of processes to use. Ignored if ``pool`` is not `None`. 

451 

452 Returns 

453 ------- 

454 exposures : `Iterator` [ `RawExposureData` ] 

455 Data structures containing dimension records, filenames, and data 

456 IDs to be ingested (one structure for each exposure). 

457 bad_files : `list` of `str` 

458 List of all the files that could not have metadata extracted. 

459 """ 

460 if pool is None and processes > 1: 

461 pool = Pool(processes) 

462 mapFunc = map if pool is None else pool.imap_unordered 

463 

464 # Extract metadata and build per-detector regions. 

465 # This could run in a subprocess so collect all output 

466 # before looking at failures. 

467 fileData: Iterator[RawFileData] = mapFunc(self.extractMetadata, files) 

468 

469 # Filter out all the failed reads and store them for later 

470 # reporting 

471 good_files = [] 

472 bad_files = [] 

473 for fileDatum in fileData: 

474 if not fileDatum.datasets: 

475 bad_files.append(fileDatum.filename) 

476 else: 

477 good_files.append(fileDatum) 

478 fileData = good_files 

479 

480 self.log.info("Successfully extracted metadata from %d file%s with %d failure%s", 

481 len(fileData), "" if len(fileData) == 1 else "s", 

482 len(bad_files), "" if len(bad_files) == 1 else "s") 

483 

484 # Use that metadata to group files (and extracted metadata) by 

485 # exposure. Never parallelized because it's intrinsically a gather 

486 # step. 

487 exposureData: List[RawExposureData] = self.groupByExposure(fileData) 

488 

489 # The next operation operates on RawExposureData instances (one at 

490 # a time) in-place and then returns the modified instance. We call it 

491 # as a pass-through instead of relying on the arguments we pass in to 

492 # have been modified because in the parallel case those arguments are 

493 # going to be pickled and unpickled, and I'm not certain 

494 # multiprocessing is careful enough with that for output arguments to 

495 # work. 

496 

497 # Expand the data IDs to include all dimension metadata; we need this 

498 # because we may need to generate path templates that rely on that 

499 # metadata. 

500 # This is the first step that involves actual database calls (but just 

501 # SELECTs), so if there's going to be a problem with connections vs. 

502 # multiple processes, or lock contention (in SQLite) slowing things 

503 # down, it'll happen here. 

504 return mapFunc(self.expandDataIds, exposureData), bad_files 

505 

506 def ingestExposureDatasets(self, exposure: RawExposureData, *, run: Optional[str] = None 

507 ) -> List[FileDataset]: 

508 """Ingest all raw files in one exposure. 

509 

510 Parameters 

511 ---------- 

512 exposure : `RawExposureData` 

513 A structure containing information about the exposure to be 

514 ingested. Must have `RawExposureData.records` populated and all 

515 data ID attributes expanded. 

516 run : `str`, optional 

517 Name of a RUN-type collection to write to, overriding 

518 ``self.butler.run``. 

519 

520 Returns 

521 ------- 

522 datasets : `list` of `lsst.daf.butler.FileDataset` 

523 Per-file structures identifying the files ingested and their 

524 dataset representation in the data repository. 

525 """ 

526 datasets = [FileDataset(path=os.path.abspath(file.filename), 

527 refs=[DatasetRef(self.datasetType, d.dataId) for d in file.datasets], 

528 formatter=file.FormatterClass) 

529 for file in exposure.files] 

530 self.butler.ingest(*datasets, transfer=self.config.transfer, run=run) 

531 return datasets 

532 

533 def run(self, files, *, pool: Optional[Pool] = None, processes: int = 1, run: Optional[str] = None): 

534 """Ingest files into a Butler data repository. 

535 

536 This creates any new exposure or visit Dimension entries needed to 

537 identify the ingested files, creates new Dataset entries in the 

538 Registry and finally ingests the files themselves into the Datastore. 

539 Any needed instrument, detector, and physical_filter Dimension entries 

540 must exist in the Registry before `run` is called. 

541 

542 Parameters 

543 ---------- 

544 files : iterable over `str` or path-like objects 

545 Paths to the files to be ingested. Will be made absolute 

546 if they are not already. 

547 pool : `multiprocessing.Pool`, optional 

548 If not `None`, a process pool with which to parallelize some 

549 operations. 

550 processes : `int`, optional 

551 The number of processes to use. Ignored if ``pool`` is not `None`. 

552 run : `str`, optional 

553 Name of a RUN-type collection to write to, overriding 

554 the default derived from the instrument name. 

555 

556 Returns 

557 ------- 

558 refs : `list` of `lsst.daf.butler.DatasetRef` 

559 Dataset references for ingested raws. 

560 

561 Notes 

562 ----- 

563 This method inserts all datasets for an exposure within a transaction, 

564 guaranteeing that partial exposures are never ingested. The exposure 

565 dimension record is inserted with `Registry.syncDimensionData` first 

566 (in its own transaction), which inserts only if a record with the same 

567 primary key does not already exist. This allows different files within 

568 the same exposure to be incremented in different runs. 

569 """ 

570 exposureData, bad_files = self.prep(files, pool=pool, processes=processes) 

571 # Up to this point, we haven't modified the data repository at all. 

572 # Now we finally do that, with one transaction per exposure. This is 

573 # not parallelized at present because the performance of this step is 

574 # limited by the database server. That may or may not change in the 

575 # future once we increase our usage of bulk inserts and reduce our 

576 # usage of savepoints; we've tried to get everything but the database 

577 # operations done in advance to reduce the time spent inside 

578 # transactions. 

579 self.butler.registry.registerDatasetType(self.datasetType) 

580 refs = [] 

581 runs = set() 

582 n_exposures = 0 

583 n_exposures_failed = 0 

584 n_ingests_failed = 0 

585 for exposure in exposureData: 

586 

587 self.log.debug("Attempting to ingest %d file%s from exposure %s:%s", 

588 len(exposure.files), "" if len(exposure.files) == 1 else "s", 

589 exposure.record.instrument, exposure.record.obs_id) 

590 

591 try: 

592 self.butler.registry.syncDimensionData("exposure", exposure.record) 

593 except Exception as e: 

594 self._on_ingest_failure(exposure, e) 

595 n_exposures_failed += 1 

596 self.log.warning("Exposure %s:%s could not be registered: %s", 

597 exposure.record.instrument, exposure.record.obs_id, e) 

598 if self.config.failFast: 

599 raise e 

600 continue 

601 

602 # Override default run if nothing specified explicitly 

603 if run is None: 

604 instrumentClass = exposure.files[0].instrumentClass 

605 this_run = instrumentClass.makeDefaultRawIngestRunName() 

606 else: 

607 this_run = run 

608 if this_run not in runs: 

609 self.butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

610 runs.add(this_run) 

611 try: 

612 with self.butler.transaction(): 

613 datasets_for_exposure = self.ingestExposureDatasets(exposure, run=this_run) 

614 except Exception as e: 

615 self._on_ingest_failure(exposure, e) 

616 n_ingests_failed += 1 

617 self.log.warning("Failed to ingest the following for reason: %s", e) 

618 for f in exposure.files: 

619 self.log.warning("- %s", f.filename) 

620 if self.config.failFast: 

621 raise e 

622 continue 

623 else: 

624 self._on_success(datasets_for_exposure) 

625 for dataset in datasets_for_exposure: 

626 refs.extend(dataset.refs) 

627 

628 # Success for this exposure 

629 n_exposures += 1 

630 self.log.info("Exposure %s:%s ingested successfully", 

631 exposure.record.instrument, exposure.record.obs_id) 

632 

633 had_failure = False 

634 

635 if bad_files: 

636 had_failure = True 

637 self.log.warning("Could not extract observation metadata from the following:") 

638 for f in bad_files: 

639 self.log.warning("- %s", f) 

640 

641 self.log.info("Successfully processed data from %d exposure%s with %d failure%s from exposure" 

642 " registration and %d failure%s from file ingest.", 

643 n_exposures, "" if n_exposures == 1 else "s", 

644 n_exposures_failed, "" if n_exposures_failed == 1 else "s", 

645 n_ingests_failed, "" if n_ingests_failed == 1 else "s") 

646 if n_exposures_failed > 0 or n_ingests_failed > 0: 

647 had_failure = True 

648 self.log.info("Ingested %d distinct Butler dataset%s", 

649 len(refs), "" if len(refs) == 1 else "s") 

650 

651 if had_failure: 

652 raise RuntimeError("Some failures encountered during ingestion") 

653 

654 return refs