Coverage for python/lsst/daf/butler/_quantum_backed.py: 25%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

168 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("QuantumBackedButler", "QuantumProvenanceData") 

25 

26import dataclasses 

27import functools 

28import itertools 

29import logging 

30import uuid 

31from collections import defaultdict 

32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Type, Union 

33 

34from lsst.utils import doImportType 

35from lsst.utils.introspection import get_full_type_name 

36 

37from ._butlerConfig import ButlerConfig 

38from ._deferredDatasetHandle import DeferredDatasetHandle 

39from ._limited_butler import LimitedButler 

40from .core import ( 

41 Config, 

42 DatasetId, 

43 DatasetRef, 

44 Datastore, 

45 DatastoreRecordData, 

46 DimensionUniverse, 

47 Quantum, 

48 StorageClassFactory, 

49 StoredDatastoreItemInfo, 

50 ddl, 

51) 

52from .registry.bridge.monolithic import MonolithicDatastoreRegistryBridgeManager 

53from .registry.databases.sqlite import SqliteDatabase 

54from .registry.interfaces import DatastoreRegistryBridgeManager, OpaqueTableStorageManager 

55from .registry.opaque import ByNameOpaqueTableStorageManager 

56 

57if TYPE_CHECKING: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 from ._butler import Butler 

59 from .registry import Registry 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64class _DatasetRecordStorageManagerDatastoreContructionMimic: 

65 """A partial implementation of `DatasetRecordStorageManager` that exists 

66 only to allow a `DatastoreRegistryBridgeManager` (and hence a `Datastore`) 

67 to be constructed without a full `Registry`. 

68 

69 Notes 

70 ----- 

71 The interface implemented by this class should probably be its own ABC, 

72 and that ABC should probably be used in the definition of 

73 `DatastoreRegistryBridgeManager`, but while prototyping I'm trying to keep 

74 changes minimal. 

75 """ 

76 

77 @classmethod 

78 def getIdColumnType(cls) -> type: 

79 # Docstring inherited. 

80 return ddl.GUID 

81 

82 @classmethod 

83 def addDatasetForeignKey( 

84 cls, 

85 tableSpec: ddl.TableSpec, 

86 *, 

87 name: str = "dataset", 

88 constraint: bool = True, 

89 onDelete: Optional[str] = None, 

90 **kwargs: Any, 

91 ) -> ddl.FieldSpec: 

92 # Docstring inherited. 

93 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=ddl.GUID, **kwargs) 

94 tableSpec.fields.add(idFieldSpec) 

95 return idFieldSpec 

96 

97 

98class QuantumBackedButler(LimitedButler): 

99 """An implementation of `LimitedButler` intended to back execution of a 

100 single `Quantum`. 

101 

102 Parameters 

103 ---------- 

104 quantum : `Quantum` 

105 Object describing the predicted input and output dataset relevant to 

106 this butler. This must have resolved `DatasetRef` instances for all 

107 inputs and outputs. 

108 dimensions : `DimensionUniverse` 

109 Object managing all dimension definitions. 

110 datastore : `Datastore` 

111 Datastore to use for all dataset I/O and existence checks. 

112 storageClasses : `StorageClassFactory` 

113 Object managing all storage class definitions. 

114 

115 Notes 

116 ----- 

117 Most callers should use the `initialize` `classmethod` to construct new 

118 instances instead of calling the constructor directly. 

119 

120 `QuantumBackedButler` uses a SQLite database internally, in order to reuse 

121 existing `DatastoreRegistryBridge` and `OpaqueTableStorage` 

122 implementations that rely SQLAlchemy. If implementations are added in the 

123 future that don't rely on SQLAlchemy, it should be possible to swap them 

124 in by overriding the type arguments to `initialize` (though at present, 

125 `QuantumBackedButler` would still create at least an in-memory SQLite 

126 database that would then go unused).` 

127 

128 We imagine `QuantumBackedButler` being used during (at least) batch 

129 execution to capture `Datastore` records and save them to per-quantum 

130 files, which are also a convenient place to store provenance for eventual 

131 upload to a SQL-backed `Registry` (once `Registry` has tables to store 

132 provenance, that is). 

133 These per-quantum files can be written in two ways: 

134 

135 - The SQLite file used internally by `QuantumBackedButler` can be used 

136 directly but customizing the ``filename`` argument to ``initialize``, and 

137 then transferring that file to the object store after execution completes 

138 (or fails; a ``try/finally`` pattern probably makes sense here). 

139 

140 - A JSON or YAML file can be written by calling `extract_provenance_data`, 

141 and using ``pydantic`` methods to write the returned 

142 `QuantumProvenanceData` to a file. 

143 

144 Note that at present, the SQLite file only contains datastore records, not 

145 provenance, but that should be easy to address (if desired) after we 

146 actually design a `Registry` schema for provenance. I also suspect that 

147 we'll want to explicitly close the SQLite file somehow before trying to 

148 transfer it. But I'm guessing we'd prefer to write the per-quantum files 

149 as JSON anyway. 

150 """ 

151 

152 def __init__( 

153 self, 

154 quantum: Quantum, 

155 dimensions: DimensionUniverse, 

156 datastore: Datastore, 

157 storageClasses: StorageClassFactory, 

158 ): 

159 self._quantum = quantum 

160 self._dimensions = dimensions 

161 self._predicted_inputs: Set[DatasetId] = { 

162 ref.getCheckedId() for ref in itertools.chain.from_iterable(quantum.inputs.values()) 

163 } 

164 self._predicted_outputs: Set[DatasetId] = { 

165 ref.getCheckedId() for ref in itertools.chain.from_iterable(quantum.outputs.values()) 

166 } 

167 self._available_inputs: Set[DatasetId] = set() 

168 self._unavailable_inputs: Set[DatasetId] = set() 

169 self._actual_inputs: Set[DatasetId] = set() 

170 self._actual_output_refs: Set[DatasetRef] = set() 

171 self.datastore = datastore 

172 self.storageClasses = storageClasses 

173 

174 @classmethod 

175 def initialize( 

176 cls, 

177 config: Union[Config, str], 

178 quantum: Quantum, 

179 dimensions: DimensionUniverse, 

180 filename: str = ":memory:", 

181 OpaqueManagerClass: Type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager, 

182 BridgeManagerClass: Type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager, 

183 search_paths: Optional[List[str]] = None, 

184 ) -> QuantumBackedButler: 

185 """Construct a new `QuantumBackedButler` from repository configuration 

186 and helper types. 

187 

188 Parameters 

189 ---------- 

190 config : `Config` or `str` 

191 A butler repository root, configuration filename, or configuration 

192 instance. 

193 quantum : `Quantum` 

194 Object describing the predicted input and output dataset relevant 

195 to this butler. This must have resolved `DatasetRef` instances for 

196 all inputs and outputs. 

197 dimensions : `DimensionUniverse` 

198 Object managing all dimension definitions. 

199 filename : `str`, optional 

200 Name for the SQLite database that will back this butler; defaults 

201 to an in-memory database. 

202 OpaqueManagerClass : `type`, optional 

203 A subclass of `OpaqueTableStorageManager` to use for datastore 

204 opaque records. Default is a SQL-backed implementation. 

205 BridgeManagerClass : `type`, optional 

206 A subclass of `DatastoreRegistryBridgeManager` to use for datastore 

207 location records. Default is a SQL-backed implementation. 

208 search_paths : `list` of `str`, optional 

209 Additional search paths for butler configuration. 

210 """ 

211 butler_config = ButlerConfig(config, searchPaths=search_paths) 

212 if "root" in butler_config: 

213 butler_root = butler_config["root"] 

214 else: 

215 butler_root = butler_config.configDir 

216 db = SqliteDatabase.fromUri(f"sqlite:///{filename}", origin=0) 

217 with db.declareStaticTables(create=True) as context: 

218 opaque_manager = OpaqueManagerClass.initialize(db, context) 

219 bridge_manager = BridgeManagerClass.initialize( 

220 db, 

221 context, 

222 opaque=opaque_manager, 

223 # MyPy can tell it's a fake, but we know it shouldn't care. 

224 datasets=_DatasetRecordStorageManagerDatastoreContructionMimic, # type: ignore 

225 universe=dimensions, 

226 ) 

227 # TODO: We need to inform `Datastore` here that it needs to support 

228 # predictive reads; right now that's a configuration option, but after 

229 # execution butler is retired it could just be a kwarg we pass here. 

230 # For now just force this option as we cannot work without it. 

231 butler_config["datastore", "trust_get_request"] = True 

232 datastore = Datastore.fromConfig(butler_config, bridge_manager, butler_root) 

233 datastore.import_records(quantum.datastore_records) 

234 storageClasses = StorageClassFactory() 

235 storageClasses.addFromConfig(butler_config) 

236 return cls(quantum, dimensions, datastore, storageClasses=storageClasses) 

237 

238 def isWriteable(self) -> bool: 

239 # Docstring inherited. 

240 return True 

241 

242 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

243 # Docstring inherited. 

244 try: 

245 obj = super().getDirect(ref, parameters=parameters) 

246 except (LookupError, FileNotFoundError, IOError): 

247 self._unavailable_inputs.add(ref.getCheckedId()) 

248 raise 

249 if ref.id in self._predicted_inputs: 

250 # do this after delegating to super in case that raises. 

251 self._actual_inputs.add(ref.id) 

252 self._available_inputs.add(ref.id) 

253 return obj 

254 

255 def getDirectDeferred( 

256 self, ref: DatasetRef, *, parameters: Union[dict, None] = None 

257 ) -> DeferredDatasetHandle: 

258 # Docstring inherited. 

259 if ref.id in self._predicted_inputs: 

260 # Unfortunately, we can't do this after the handle succeeds in 

261 # loading, so it's conceivable here that we're marking an input 

262 # as "actual" even when it's not even available. 

263 self._actual_inputs.add(ref.id) 

264 return super().getDirectDeferred(ref, parameters=parameters) 

265 

266 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

267 # Docstring inherited. 

268 exists = super().datasetExistsDirect(ref) 

269 if ref.id in self._predicted_inputs: 

270 if exists: 

271 self._available_inputs.add(ref.id) 

272 else: 

273 self._unavailable_inputs.add(ref.id) 

274 return exists 

275 

276 def markInputUnused(self, ref: DatasetRef) -> None: 

277 # Docstring inherited. 

278 self._actual_inputs.discard(ref.getCheckedId()) 

279 

280 @property 

281 def dimensions(self) -> DimensionUniverse: 

282 # Docstring inherited. 

283 return self._dimensions 

284 

285 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

286 # Docstring inherited. 

287 if ref.id not in self._predicted_outputs: 

288 raise RuntimeError("Cannot `put` dataset that was not predicted as an output.") 

289 self.datastore.put(obj, ref) 

290 self._actual_output_refs.add(ref) 

291 return ref 

292 

293 def extract_provenance_data(self) -> QuantumProvenanceData: 

294 """Extract provenance information and datastore records from this 

295 butler. 

296 

297 Returns 

298 ------- 

299 provenance : `QuantumProvenanceData` 

300 A serializable struct containing input/output dataset IDs and 

301 datastore records. This assumes all dataset IDs are UUIDs (just to 

302 make it easier for `pydantic` to reason about the struct's types); 

303 the rest of this class makes no such assumption, but the approach 

304 to processing in which it's useful effectively requires UUIDs 

305 anyway. 

306 

307 Notes 

308 ----- 

309 `QuantumBackedButler` records this provenance information when its 

310 methods are used, which mostly saves `~lsst.pipe.base.PipelineTask` 

311 authors from having to worry about while still recording very 

312 detailed information. But it has two small weaknesses: 

313 

314 - Calling `getDirectDeferred` or `getDirect` is enough to mark a 

315 dataset as an "actual input", which may mark some datasets that 

316 aren't actually used. We rely on task authors to use 

317 `markInputUnused` to address this. 

318 

319 - We assume that the execution system will call ``datasetExistsDirect`` 

320 on all predicted inputs prior to execution, in order to populate the 

321 "available inputs" set. This is what I envision 

322 '`~lsst.ctrl.mpexec.SingleQuantumExecutor` doing after we update it 

323 to use this class, but it feels fragile for this class to make such 

324 a strong assumption about how it will be used, even if I can't think 

325 of any other executor behavior that would make sense. 

326 """ 

327 if not self._actual_inputs.isdisjoint(self._unavailable_inputs): 

328 _LOG.warning( 

329 "Inputs %s were marked as actually used (probably because a DeferredDatasetHandle) " 

330 "was obtained, but did not actually exist. This task should be be using markInputUnused " 

331 "directly to clarify its provenance.", 

332 self._actual_inputs & self._unavailable_inputs, 

333 ) 

334 self._actual_inputs -= self._unavailable_inputs 

335 checked_inputs = self._available_inputs | self._unavailable_inputs 

336 if not self._predicted_inputs == checked_inputs: 

337 _LOG.warning( 

338 "Execution harness did not check predicted inputs %s for existence; available inputs " 

339 "recorded in provenance may be incomplete.", 

340 self._predicted_inputs - checked_inputs, 

341 ) 

342 datastore_records = self.datastore.export_records(self._actual_output_refs) 

343 locations: Dict[str, Set[DatasetId]] = defaultdict(set) 

344 records: Dict[str, List[StoredDatastoreItemInfo]] = defaultdict(list) 

345 for datastore_name, record_data in datastore_records.items(): 

346 locations[datastore_name].update(ref.getCheckedId() for ref in record_data.refs) 

347 for table_name, table_records in record_data.records.items(): 

348 records[table_name].extend(table_records) 

349 

350 return QuantumProvenanceData( 

351 predicted_inputs=self._predicted_inputs, 

352 available_inputs=self._available_inputs, 

353 actual_inputs=self._actual_inputs, 

354 predicted_outputs=self._predicted_outputs, 

355 actual_outputs={ref.getCheckedId() for ref in self._actual_output_refs}, 

356 locations=dict(locations), 

357 records=dict(records), 

358 ) 

359 

360 

361@dataclasses.dataclass(frozen=True) 

362class QuantumProvenanceData: 

363 """A serializable struct for per-quantum provenance information and 

364 datastore records. 

365 

366 Notes 

367 ----- 

368 This class slightly duplicates information from the `Quantum` class itself 

369 (the `predicted_inputs` and `predicted_outputs` sets should have the same 

370 IDs present in `Quantum.inputs` and `Quantum.outputs`), but overall it 

371 assumes the original `Quantum` is also available to reconstruct the 

372 complete provenance (e.g. by associating dataset IDs with data IDs, 

373 dataset types, and `~CollectionType.RUN` names. 

374 """ 

375 

376 # This class probably should have information about its execution 

377 # environment (anything not controlled and recorded at the 

378 # `~CollectionType.RUN` level, such as the compute node ID). but adding it 

379 # now is out of scope for this prototype. 

380 

381 predicted_inputs: Set[DatasetId] 

382 """Unique IDs of datasets that were predicted as inputs to this quantum 

383 when the QuantumGraph was built. 

384 """ 

385 

386 available_inputs: Set[DatasetId] 

387 """Unique IDs of input datasets that were actually present in the datastore 

388 when this quantum was executed. 

389 

390 This is a subset of `predicted_inputs`, with the difference generally being 

391 datasets were `predicted_outputs` but not `actual_outputs` of some upstream 

392 task. 

393 """ 

394 

395 actual_inputs: Set[DatasetId] 

396 """Unique IDs of datasets that were actually used as inputs by this task. 

397 

398 This is a subset of `available_inputs`. 

399 

400 Notes 

401 ----- 

402 The criteria for marking an input as used is that rerunning the quantum 

403 with only these `actual_inputs` available must yield identical outputs. 

404 This means that (for example) even just using an input to help determine 

405 an output rejection criteria and then rejecting it as an outlier qualifies 

406 that input as actually used. 

407 """ 

408 

409 predicted_outputs: Set[DatasetId] 

410 """Unique IDs of datasets that were predicted as outputs of this quantum 

411 when the QuantumGraph was built. 

412 """ 

413 

414 actual_outputs: Set[DatasetId] 

415 """Unique IDs of datasets that were actually written when this quantum 

416 was executed. 

417 """ 

418 

419 locations: Dict[str, Set[DatasetId]] 

420 """Mapping from datastore name to the set of `actual_output` dataset IDs 

421 written by this quantum. 

422 """ 

423 

424 records: Dict[str, List[StoredDatastoreItemInfo]] 

425 """Rows from the opaque tables used by datastores for the `actual_output` 

426 datasets written by this quantum, indexed by opaque table name. 

427 """ 

428 

429 def to_simple(self, minimal: bool = False) -> Dict[str, Any]: 

430 """Make representation of the provenance suitable for serialization. 

431 

432 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

433 

434 Parameters 

435 ---------- 

436 minimal : `bool`, optional 

437 If True produce minimal representation, not used by this method. 

438 

439 Returns 

440 ------- 

441 simple : `dict` 

442 Representation of this instance as a simple dictionary. 

443 """ 

444 # dataclasses.asdict does not know how to handle some types, have to 

445 # do it manually. Also have to replace sets with lists as some 

446 # serializers do not support set type. 

447 def _serialize_dataset_id(id: DatasetId) -> Union[int, str]: 

448 return id if isinstance(id, int) else f"urn:uuid:{id}" 

449 

450 def _serialize_dataset_ids(ids: Set[DatasetId]) -> List[Union[int, str]]: 

451 return [_serialize_dataset_id(id) for id in ids] 

452 

453 records: Dict[str, List[Dict[str, Any]]] = {} 

454 for table_name, table_records in self.records.items(): 

455 records[table_name] = [] 

456 for record in table_records: 

457 record_dict = record.to_record() 

458 # Have to remember actual class name of the record. 

459 record_dict["__class__"] = get_full_type_name(record) 

460 if "dataset_id" in record_dict: 

461 record_dict["dataset_id"] = _serialize_dataset_id(record_dict["dataset_id"]) 

462 records[table_name].append(record_dict) 

463 locations = {datastore: _serialize_dataset_ids(ids) for datastore, ids in self.locations.items()} 

464 return dict( 

465 predicted_inputs=_serialize_dataset_ids(self.predicted_inputs), 

466 available_inputs=_serialize_dataset_ids(self.available_inputs), 

467 actual_inputs=_serialize_dataset_ids(self.actual_inputs), 

468 predicted_outputs=_serialize_dataset_ids(self.predicted_outputs), 

469 actual_outputs=_serialize_dataset_ids(self.actual_outputs), 

470 locations=locations, 

471 records=records, 

472 ) 

473 

474 @classmethod 

475 def from_simple( 

476 cls, 

477 simple: Dict[str, Any], 

478 universe: Optional[DimensionUniverse] = None, 

479 registry: Optional[Registry] = None, 

480 ) -> QuantumProvenanceData: 

481 """Make an instance of this class from serialized data. 

482 

483 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

484 

485 Parameters 

486 ---------- 

487 data : `dict` 

488 Serialized representation returned from `to_simple` method. 

489 universe : `DimensionUniverse`, optional 

490 Dimension universe, not used by this method. 

491 registry : `Registry`, optional 

492 Registry instance, not used by this method. 

493 

494 Returns 

495 ------- 

496 provenance : `QuantumProvenanceData` 

497 De-serialized instance of `QuantumProvenanceData`. 

498 """ 

499 

500 def _deserialize_dataset_id(id: Union[int, str]) -> DatasetId: 

501 return id if isinstance(id, int) else uuid.UUID(id) 

502 

503 def _deserialize_dataset_ids(ids: List[Union[int, str]]) -> Set[DatasetId]: 

504 return set(_deserialize_dataset_id(id) for id in ids) 

505 

506 @functools.lru_cache(maxsize=None) 

507 def _get_class(class_name: str) -> Type: 

508 """Get class type for a given class name""" 

509 return doImportType(class_name) 

510 

511 # unpack records 

512 records: Dict[str, List[StoredDatastoreItemInfo]] = {} 

513 for table_name, table_records in simple["records"].items(): 

514 records[table_name] = [] 

515 for record in table_records: 

516 cls = _get_class(record.pop("__class__")) 

517 if "dataset_id" in record: 

518 record["dataset_id"] = _deserialize_dataset_id(record["dataset_id"]) 

519 records[table_name].append(cls.from_record(record)) 

520 locations = { 

521 datastore: _deserialize_dataset_ids(ids) for datastore, ids in simple["locations"].items() 

522 } 

523 

524 return QuantumProvenanceData( 

525 predicted_inputs=_deserialize_dataset_ids(simple["predicted_inputs"]), 

526 available_inputs=_deserialize_dataset_ids(simple["available_inputs"]), 

527 actual_inputs=_deserialize_dataset_ids(simple["actual_inputs"]), 

528 predicted_outputs=_deserialize_dataset_ids(simple["predicted_outputs"]), 

529 actual_outputs=_deserialize_dataset_ids(simple["actual_outputs"]), 

530 locations=locations, 

531 records=records, 

532 ) 

533 

534 @staticmethod 

535 def collect_and_transfer( 

536 butler: Butler, quanta: Iterable[Quantum], provenance: Iterable[QuantumProvenanceData] 

537 ) -> None: 

538 """Transfer output datasets from multiple quanta to a more permantent 

539 `Butler` repository. 

540 

541 Parameters 

542 ---------- 

543 butler : `Butler` 

544 Full butler representing the data repository to transfer datasets 

545 to. 

546 quanta : `Iterable` [ `Quantum` ] 

547 Iterable of `Quantum` objects that carry information about 

548 predicted outputs. May be a single-pass iterator. 

549 provenance : `Iterable` [ `QuantumProvenanceData` ] 

550 Provenance and datastore data for each of the given quanta, in the 

551 same order. May be a single-pass iterator. 

552 

553 Notes 

554 ----- 

555 Input-output provenance data is not actually transferred yet, because 

556 `Registry` has no place to store it. 

557 

558 This method probably works most efficiently if run on all quanta for a 

559 single task label at once, because this will gather all datasets of 

560 a particular type together into a single vectorized `Registry` import. 

561 It should still behave correctly if run on smaller groups of quanta 

562 or even quanta from multiple tasks. 

563 

564 Currently this method transfers datastore record data unchanged, with 

565 no possibility of actually moving (e.g.) files. Datastores that are 

566 present only in execution or only in the more permanent butler are 

567 ignored. 

568 """ 

569 grouped_refs = defaultdict(list) 

570 datastore_records: Dict[str, DatastoreRecordData] = defaultdict(DatastoreRecordData) 

571 for quantum, provenance_for_quantum in zip(quanta, provenance): 

572 quantum_refs_by_id = { 

573 ref.getCheckedId(): ref 

574 for ref in itertools.chain.from_iterable(quantum.outputs.values()) 

575 if ref.getCheckedId() in provenance_for_quantum.actual_outputs 

576 } 

577 for ref in quantum_refs_by_id.values(): 

578 grouped_refs[ref.datasetType, ref.run].append(ref) 

579 for datastore_name in set(butler.datastore.names) & provenance_for_quantum.locations.keys(): 

580 datastore_records[datastore_name].refs.extend( 

581 quantum_refs_by_id[id] for id in provenance_for_quantum.locations[datastore_name] 

582 ) 

583 for opaque_table_name, records_for_table in provenance_for_quantum.records.items(): 

584 datastore_records[datastore_name].records[opaque_table_name].extend(records_for_table) 

585 for refs in grouped_refs.values(): 

586 butler.registry._importDatasets(refs) 

587 butler.datastore.import_records(datastore_records)