Coverage for python/lsst/obs/base/gen2to3/repoConverter.py: 18%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

213 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RepoConverter"] 

24 

25import fnmatch 

26import os.path 

27import re 

28from abc import ABC, abstractmethod 

29from collections import defaultdict 

30from dataclasses import dataclass 

31from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Optional, Set, Tuple, Union 

32 

33from lsst.daf.butler import DataCoordinate, DatasetType, FileDataset, Progress 

34from lsst.sphgeom import RangeSet, Region 

35from lsst.utils import doImport 

36 

37from ..ingest import _log_msg_counter 

38from .repoWalker import RepoWalker 

39 

40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 from lsst.daf.butler import FormatterParameter, Registry, SkyPixDimension, StorageClass 

42 

43 from .._instrument import Instrument 

44 from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping 

45 from .convertRepo import ConvertRepoTask 

46 from .scanner import PathElementHandler 

47 

48 

49@dataclass 

50class ConversionSubset: 

51 """A helper class for `ConvertRepoTask` and `RepoConverter` that maintains 

52 lists of related data ID values that should be included in the conversion. 

53 

54 Parameters 

55 ---------- 

56 instrument : `str` 

57 Instrument name used in Gen3 data IDs. 

58 visits : `set` of `int` 

59 Visit IDs that define the filter. 

60 """ 

61 

62 def __init__(self, instrument: str, visits: Set[int]): 

63 self.instrument = instrument 

64 self.visits = visits 

65 self.regions = None 

66 self.tracts = {} 

67 self.skypix = {} 

68 

69 def addSkyMap(self, registry: Registry, name: str): 

70 """Populate the included tract IDs for the given skymap from those that 

71 overlap the visits the `ConversionSubset` was initialized with. 

72 

73 Parameters 

74 ---------- 

75 registry : `lsst.daf.butler.Registry` 

76 Registry that can be queried for visit/tract overlaps. 

77 name : `str` 

78 SkyMap name used in Gen3 data IDs. 

79 """ 

80 tracts = set() 

81 self.tracts[name] = tracts 

82 for visit in self.visits: 

83 for dataId in registry.queryDataIds( 

84 ["tract"], dataId={"skymap": name, "instrument": self.instrument, "visit": visit} 

85 ): 

86 tracts.add(dataId["tract"]) 

87 

88 def addSkyPix(self, registry: Registry, dimension: SkyPixDimension): 

89 """Populate the included skypix IDs for the given dimension from those 

90 that overlap the visits the `ConversionSubset` was initialized with. 

91 

92 Parameters 

93 ---------- 

94 registry : `lsst.daf.butler.Registry` 

95 Registry that can be queried for visit regions. 

96 name : `str` 

97 SkyMap name used in Gen3 data IDs. 

98 """ 

99 if self.regions is None: 

100 self.regions = [] 

101 for visit in self.visits: 

102 dataId = registry.expandDataId(instrument=self.instrument, visit=visit) 

103 self.regions.append(dataId.region) 

104 ranges = RangeSet() 

105 for region in self.regions: 

106 ranges = ranges.union(dimension.pixelization.envelope(region)) 

107 self.skypix[dimension] = ranges 

108 

109 def isRelated(self, dataId: DataCoordinate) -> bool: 

110 """Test whether the given data ID is related to this subset and hence 

111 should be included in a repository conversion. 

112 

113 Parameters 

114 ---------- 

115 dataId : `lsst.daf.butler.DataCoordinate` 

116 Data ID to test. 

117 

118 Returns 

119 ------- 

120 related : `bool` 

121 `True` if this data ID should be included in a repository 

122 conversion. 

123 

124 Notes 

125 ----- 

126 More formally, this tests that the given data ID is not unrelated; 

127 if a data ID does not involve tracts, visits, or skypix dimensions, 

128 we always include it. 

129 """ 

130 if self.visits is None: 

131 # We're not filtering at all. 

132 return True 

133 if "visit" in dataId.graph and dataId["visit"] not in self.visits: 

134 return False 

135 if "tract" in dataId.graph and dataId["tract"] not in self.tracts[dataId["skymap"]]: 

136 return False 

137 for dimension, ranges in self.skypix.items(): 

138 if dimension in dataId.graph and not ranges.intersects(dataId[dimension]): 

139 return False 

140 return True 

141 

142 # Class attributes that will be shadowed by public instance attributes; 

143 # defined here only for documentation purposes. 

144 

145 instrument: str 

146 """The name of the instrument, as used in Gen3 data IDs (`str`). 

147 """ 

148 

149 visits: Set[int] 

150 """The set of visit IDs that should be included in the conversion (`set` 

151 of `int`). 

152 """ 

153 

154 regions: Optional[List[Region]] 

155 """Regions for all visits (`list` of `lsst.sphgeom.Region`). 

156 

157 Set to `None` before it has been initialized. Any code that attempts to 

158 use it when it is `None` has a logic bug. 

159 """ 

160 

161 tracts: Dict[str, Set[int]] 

162 """Tracts that should be included in the conversion, grouped by skymap 

163 name (`dict` mapping `str` to `set` of `int`). 

164 """ 

165 

166 skypix: Dict[SkyPixDimension, RangeSet] 

167 """SkyPix ranges that should be included in the conversion, grouped by 

168 dimension (`dict` mapping `SkyPixDimension` to `lsst.sphgeom.RangeSet`). 

169 """ 

170 

171 

172class RepoConverter(ABC): 

173 """An abstract base class for objects that help `ConvertRepoTask` convert 

174 datasets from a single Gen2 repository. 

175 

176 Parameters 

177 ---------- 

178 task : `ConvertRepoTask` 

179 Task instance that is using this helper object. 

180 root : `str` 

181 Root of the Gen2 repo being converted. Will be converted to an 

182 absolute path, resolving symbolic links and ``~``, if necessary. 

183 instrument : `Instrument` 

184 Gen3 instrument class to use for this conversion. 

185 collections : `list` of `str` 

186 Gen3 collections with which all converted datasets should be 

187 associated. 

188 subset : `ConversionSubset, optional 

189 Helper object that implements a filter that restricts the data IDs that 

190 are converted. 

191 

192 Notes 

193 ----- 

194 `RepoConverter` defines the only public API users of its subclasses should 

195 use (`prep` `ingest`, and `finish`). These delegate to several abstract 

196 methods that subclasses must implement. In some cases, subclasses may 

197 reimplement the public methods as well, but are expected to delegate to 

198 ``super()`` either at the beginning or end of their own implementation. 

199 """ 

200 

201 def __init__( 

202 self, 

203 *, 

204 task: ConvertRepoTask, 

205 root: str, 

206 instrument: Instrument, 

207 run: Optional[str], 

208 subset: Optional[ConversionSubset] = None, 

209 ): 

210 self.task = task 

211 self.root = os.path.realpath(os.path.expanduser(root)) 

212 self.instrument = instrument 

213 self.subset = subset 

214 self.progress = Progress("obs.base.gen2to3") 

215 self._run = run 

216 self._repoWalker = None # Created in prep 

217 self._fileDatasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] = defaultdict( 

218 lambda: defaultdict(list) 

219 ) 

220 self._fileDatasetCount = 0 

221 

222 @abstractmethod 

223 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool: 

224 """Test whether the given dataset is handled specially by this 

225 converter and hence should be ignored by generic base-class logic that 

226 searches for dataset types to convert. 

227 

228 Parameters 

229 ---------- 

230 datasetTypeName : `str` 

231 Name of the dataset type to test. 

232 

233 Returns 

234 ------- 

235 special : `bool` 

236 `True` if the dataset type is special. 

237 """ 

238 raise NotImplementedError() 

239 

240 @abstractmethod 

241 def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]: 

242 """Iterate over all `CameraMapper` `Mapping` objects that should be 

243 considered for conversion by this repository. 

244 

245 This this should include any datasets that may appear in the 

246 repository, including those that are special (see 

247 `isDatasetTypeSpecial`) and those that are being ignored (see 

248 `ConvertRepoTask.isDatasetTypeIncluded`); this allows the converter 

249 to identify and hence skip these datasets quietly instead of warning 

250 about them as unrecognized. 

251 

252 Yields 

253 ------ 

254 datasetTypeName: `str` 

255 Name of the dataset type. 

256 mapping : `lsst.obs.base.mapping.Mapping` 

257 Mapping object used by the Gen2 `CameraMapper` to describe the 

258 dataset type. 

259 """ 

260 raise NotImplementedError() 

261 

262 @abstractmethod 

263 def makeRepoWalkerTarget( 

264 self, 

265 datasetTypeName: str, 

266 template: str, 

267 keys: Dict[str, type], 

268 storageClass: StorageClass, 

269 formatter: FormatterParameter = None, 

270 targetHandler: Optional[PathElementHandler] = None, 

271 ) -> RepoWalker.Target: 

272 """Make a struct that identifies a dataset type to be extracted by 

273 walking the repo directory structure. 

274 

275 Parameters 

276 ---------- 

277 datasetTypeName : `str` 

278 Name of the dataset type (the same in both Gen2 and Gen3). 

279 template : `str` 

280 The full Gen2 filename template. 

281 keys : `dict` [`str`, `type`] 

282 A dictionary mapping Gen2 data ID key to the type of its value. 

283 storageClass : `lsst.daf.butler.StorageClass` 

284 Gen3 storage class for this dataset type. 

285 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

286 A Gen 3 formatter class or fully-qualified name. 

287 targetHandler : `PathElementHandler`, optional 

288 Specialist target handler to use for this dataset type. 

289 

290 Returns 

291 ------- 

292 target : `RepoWalker.Target` 

293 A struct containing information about the target dataset (much of 

294 it simplify forwarded from the arguments). 

295 """ 

296 raise NotImplementedError() 

297 

298 def getSpecialDirectories(self) -> List[str]: 

299 """Return a list of directory paths that should not be searched for 

300 files. 

301 

302 These may be directories that simply do not contain datasets (or 

303 contain datasets in another repository), or directories whose datasets 

304 are handled specially by a subclass. 

305 

306 Returns 

307 ------- 

308 directories : `list` [`str`] 

309 The full paths of directories to skip, relative to the repository 

310 root. 

311 """ 

312 return [] 

313 

314 def prep(self): 

315 """Perform preparatory work associated with the dataset types to be 

316 converted from this repository (but not the datasets themselves). 

317 

318 Notes 

319 ----- 

320 This should be a relatively fast operation that should not depend on 

321 the size of the repository. 

322 

323 Subclasses may override this method, but must delegate to the base 

324 class implementation at some point in their own logic. 

325 More often, subclasses will specialize the behavior of `prep` by 

326 overriding other methods to which the base class implementation 

327 delegates. These include: 

328 - `iterMappings` 

329 - `isDatasetTypeSpecial` 

330 - `getSpecialDirectories` 

331 - `makeRepoWalkerTarget` 

332 

333 This should not perform any write operations to the Gen3 repository. 

334 It is guaranteed to be called before `ingest`. 

335 """ 

336 self.task.log.info("Preparing other dataset types from root %s.", self.root) 

337 walkerInputs: List[Union[RepoWalker.Target, RepoWalker.Skip]] = [] 

338 for datasetTypeName, mapping in self.iterMappings(): 

339 try: 

340 template = self.task.config.datasetTemplateOverrides.get(datasetTypeName, mapping.template) 

341 except RuntimeError: 

342 # No template for this dataset in this mapper, so there's no 

343 # way there should be instances of this dataset in this repo. 

344 continue 

345 extensions = [""] 

346 skip = False 

347 message = None 

348 storageClass = None 

349 if not self.task.isDatasetTypeIncluded(datasetTypeName) or self.isDatasetTypeSpecial( 

350 datasetTypeName 

351 ): 

352 # User indicated not to include this data, but we still want 

353 # to recognize files of that type to avoid warning about them. 

354 skip = True 

355 else: 

356 storageClass = self._guessStorageClass(datasetTypeName, mapping) 

357 if storageClass is None: 

358 # This may be a problem, but only if we actually encounter 

359 # any files corresponding to this dataset. Of course, we 

360 # need to be able to parse those files in order to 

361 # recognize that situation. 

362 message = f"no storage class found for {datasetTypeName}" 

363 skip = True 

364 # Handle files that are compressed on disk, but the gen2 template 

365 # is just `.fits` 

366 if template.endswith(".fits"): 

367 extensions.extend((".gz", ".fz")) 

368 for extension in extensions: 

369 if skip: 

370 walkerInput = RepoWalker.Skip( 

371 template=template + extension, 

372 keys=mapping.keys(), 

373 message=message, 

374 ) 

375 self.task.log.debug("Skipping template in walker: %s", template) 

376 else: 

377 assert message is None 

378 targetHandler = self.task.config.targetHandlerClasses.get(datasetTypeName) 

379 if targetHandler is not None: 

380 targetHandler = doImport(targetHandler) 

381 walkerInput = self.makeRepoWalkerTarget( 

382 datasetTypeName=datasetTypeName, 

383 template=template + extension, 

384 keys=mapping.keys(), 

385 storageClass=storageClass, 

386 formatter=self.task.config.formatterClasses.get(datasetTypeName), 

387 targetHandler=targetHandler, 

388 ) 

389 self.task.log.debug( 

390 "Adding template to walker: %s + %s, for %s", 

391 template, 

392 extension, 

393 walkerInput.datasetType, 

394 ) 

395 walkerInputs.append(walkerInput) 

396 

397 for dirPath in self.getSpecialDirectories(): 

398 walkerInputs.append( 

399 RepoWalker.Skip( 

400 template=dirPath, # not really a template, but that's fine; it's relative to root. 

401 keys={}, 

402 message=None, 

403 isForFiles=True, 

404 ) 

405 ) 

406 fileIgnoreRegExTerms = [] 

407 for pattern in self.task.config.fileIgnorePatterns: 

408 fileIgnoreRegExTerms.append(fnmatch.translate(pattern)) 

409 if fileIgnoreRegExTerms: 

410 fileIgnoreRegEx = re.compile("|".join(fileIgnoreRegExTerms)) 

411 else: 

412 fileIgnoreRegEx = None 

413 self._repoWalker = RepoWalker( 

414 walkerInputs, 

415 fileIgnoreRegEx=fileIgnoreRegEx, 

416 log=self.task.log.getChild("repoWalker"), 

417 progress=self.progress, 

418 ) 

419 

420 def iterDatasets(self) -> Iterator[FileDataset]: 

421 """Iterate over datasets in the repository that should be ingested into 

422 the Gen3 repository. 

423 

424 The base class implementation yields nothing; the datasets handled by 

425 the `RepoConverter` base class itself are read directly in 

426 `findDatasets`. 

427 

428 Subclasses should override this method if they support additional 

429 datasets that are handled some other way. 

430 

431 Yields 

432 ------ 

433 dataset : `FileDataset` 

434 Structures representing datasets to be ingested. Paths should be 

435 absolute. 

436 """ 

437 yield from () 

438 

439 def findDatasets(self): 

440 assert self._repoWalker, "prep() must be called before findDatasets." 

441 self.task.log.info("Adding special datasets in repo %s.", self.root) 

442 for dataset in self.iterDatasets(): 

443 assert len(dataset.refs) == 1 

444 # None index below is for calibDate, which is only relevant for 

445 # CalibRepoConverter. 

446 self._fileDatasets[dataset.refs[0].datasetType][None].append(dataset) 

447 self.task.log.info("Finding datasets from files in repo %s.", self.root) 

448 datasetsByTypeAndCalibDate = self._repoWalker.walk( 

449 self.root, predicate=(self.subset.isRelated if self.subset is not None else None) 

450 ) 

451 for datasetType, datasetsByCalibDate in datasetsByTypeAndCalibDate.items(): 

452 for calibDate, datasets in datasetsByCalibDate.items(): 

453 self._fileDatasets[datasetType][calibDate].extend(datasets) 

454 self._fileDatasetCount += len(datasets) 

455 

456 def expandDataIds(self): 

457 """Expand the data IDs for all datasets to be inserted. 

458 

459 Subclasses may override this method, but must delegate to the base 

460 class implementation if they do. 

461 

462 This involves queries to the registry, but not writes. It is 

463 guaranteed to be called between `findDatasets` and `ingest`. 

464 """ 

465 import itertools 

466 

467 with self.progress.bar(desc="Expanding data IDs", total=self._fileDatasetCount) as progressBar: 

468 for datasetType, datasetsByCalibDate in self._fileDatasets.items(): 

469 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): 

470 if calibDate is not None: 

471 self.task.log.info( 

472 "Expanding data IDs for %d dataset%s of type %s at calibDate %s.", 

473 *_log_msg_counter(datasetsForCalibDate), 

474 datasetType.name, 

475 calibDate, 

476 ) 

477 else: 

478 self.task.log.info( 

479 "Expanding data IDs for %d non-calibration dataset%s of type %s.", 

480 *_log_msg_counter(datasetsForCalibDate), 

481 datasetType.name, 

482 ) 

483 expanded = [] 

484 for dataset in datasetsForCalibDate: 

485 for i, ref in enumerate(dataset.refs): 

486 self.task.log.debug("Expanding data ID %s.", ref.dataId) 

487 try: 

488 dataId = self.task.registry.expandDataId(ref.dataId) 

489 dataset.refs[i] = ref.expanded(dataId) 

490 except LookupError as err: 

491 self.task.log.warning("Skipping ingestion for '%s': %s", dataset.path, err) 

492 # Remove skipped datasets from multi-extension 

493 # FileDatasets 

494 dataset.refs[i] = None # We will strip off the `None`s after the loop. 

495 dataset.refs[:] = itertools.filterfalse(lambda x: x is None, dataset.refs) 

496 if dataset.refs: 

497 expanded.append(dataset) 

498 progressBar.update() 

499 datasetsForCalibDate[:] = expanded 

500 

501 def ingest(self): 

502 """Insert converted datasets into the Gen3 repository. 

503 

504 Subclasses may override this method, but must delegate to the base 

505 class implementation at some point in their own logic. 

506 

507 This method is guaranteed to be called after `expandDataIds`. 

508 """ 

509 with self.progress.bar( 

510 desc="Ingesting converted datasets", total=self._fileDatasetCount 

511 ) as progressBar: 

512 for datasetType, datasetsByCalibDate in self._fileDatasets.items(): 

513 self.task.registry.registerDatasetType(datasetType) 

514 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): 

515 try: 

516 run = self.getRun(datasetType.name, calibDate) 

517 except LookupError: 

518 self.task.log.warning(f"No run configured for dataset type {datasetType.name}.") 

519 continue 

520 self.task.log.info( 

521 "Ingesting %d dataset%s into run %s of type %s.", 

522 *_log_msg_counter(datasetsForCalibDate), 

523 run, 

524 datasetType.name, 

525 ) 

526 try: 

527 self.task.registry.registerRun(run) 

528 self.task.butler3.ingest( 

529 *datasetsForCalibDate, transfer=self.task.config.transfer, run=run 

530 ) 

531 progressBar.update(len(datasetsForCalibDate)) 

532 except LookupError as err: 

533 raise LookupError( 

534 f"Error expanding data ID for dataset type {datasetType.name}." 

535 ) from err 

536 

537 def finish(self) -> None: 

538 """Finish conversion of a repository. 

539 

540 This is run after ``ingest``, and delegates to `_finish`, which should 

541 be overridden by derived classes instead of this method. 

542 """ 

543 self._finish(self._fileDatasets, self._fileDatasetCount) 

544 

545 def _finish( 

546 self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], count: int 

547 ) -> None: 

548 """Subclass implementation hook for `_finish`. 

549 

550 The default implementation does nothing. This is generally the best 

551 place to define and populate non-``RUN`` collections that may contain 

552 some of the datasets that have just been ingested. 

553 

554 Parameters 

555 ---------- 

556 datasets : `Mapping` 

557 Nested mapping containing all converted datasets. The outer 

558 mapping keys are `DatasetType` instances. Values are mappings from 

559 ``calibDate`` or `None` to a `list` of `FileDataset` instances. 

560 count : `int` 

561 Total number of `FileDataset` instances in ``datasets``. 

562 """ 

563 pass 

564 

565 def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str: 

566 """Return the name of the run to insert instances of the given dataset 

567 type into in this collection. 

568 

569 Parameters 

570 ---------- 

571 datasetTypeName : `str` 

572 Name of the dataset type. 

573 calibDate : `str`, optional 

574 If not `None`, the "CALIBDATE" associated with this (calibration) 

575 dataset in the Gen2 data repository. 

576 

577 Returns 

578 ------- 

579 run : `str` 

580 Name of the `~lsst.daf.butler.CollectionType.RUN` collection. 

581 """ 

582 assert self._run is not None, "Method must be overridden if self._run is allowed to be None" 

583 assert calibDate is None, "Method must be overridden if calibDate is allowed to be not None" 

584 return self._run 

585 

586 def _guessStorageClass( 

587 self, datasetTypeName: str, mapping: CameraMapperMapping 

588 ) -> Optional[StorageClass]: 

589 """Infer the Gen3 `StorageClass` from a dataset from a combination of 

590 configuration and Gen2 dataset type information. 

591 

592 datasetTypeName: `str` 

593 Name of the dataset type. 

594 mapping : `lsst.obs.base.mapping.Mapping` 

595 Mapping object used by the Gen2 `CameraMapper` to describe the 

596 dataset type. 

597 """ 

598 storageClassName = self.task.config.storageClasses.get(datasetTypeName) 

599 if storageClassName is None and mapping.python is not None: 

600 storageClassName = self.task.config.storageClasses.get(mapping.python, None) 

601 if storageClassName is None and mapping.persistable is not None: 

602 storageClassName = self.task.config.storageClasses.get(mapping.persistable, None) 

603 if storageClassName is None and mapping.python is not None: 

604 unqualified = mapping.python.split(".")[-1] 

605 storageClassName = self.task.config.storageClasses.get(unqualified, None) 

606 if storageClassName is not None: 

607 storageClass = self.task.butler3.storageClasses.getStorageClass(storageClassName) 

608 else: 

609 try: 

610 storageClass = self.task.butler3.storageClasses.getStorageClass(mapping.persistable) 

611 except KeyError: 

612 storageClass = None 

613 if storageClass is None and mapping.python is not None: 

614 try: 

615 storageClass = self.task.butler3.storageClasses.getStorageClass(unqualified) 

616 except KeyError: 

617 pass 

618 if storageClass is None: 

619 self.task.log.debug("No StorageClass found for %s; skipping.", datasetTypeName) 

620 else: 

621 self.task.log.debug("Using StorageClass %s for %s.", storageClass.name, datasetTypeName) 

622 return storageClass 

623 

624 # Class attributes that will be shadowed by public instance attributes; 

625 # defined here only for documentation purposes. 

626 

627 task: ConvertRepoTask 

628 """The parent task that constructed and uses this converter 

629 (`ConvertRepoTask`). 

630 """ 

631 

632 root: str 

633 """Root path to the Gen2 repository this converter manages (`str`). 

634 

635 This is a complete path, not relative to some other repository root. 

636 """ 

637 

638 subset: Optional[ConversionSubset] 

639 """An object that represents a filter to be applied to the datasets that 

640 are converted (`ConversionSubset` or `None`). 

641 """