Coverage for python/lsst/obs/base/gen2to3/repoConverter.py: 19%

220 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-09 06:32 +0000

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RepoConverter"] 

24 

25import fnmatch 

26import os.path 

27import re 

28from abc import ABC, abstractmethod 

29from collections import defaultdict 

30from dataclasses import dataclass 

31from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Optional, Set, Tuple, Union 

32 

33from lsst.daf.butler import DataCoordinate, DatasetType, FileDataset, Progress 

34from lsst.daf.butler.registry import DataIdError 

35from lsst.sphgeom import RangeSet, Region 

36from lsst.utils import doImportType 

37 

38from ..ingest import _log_msg_counter 

39from .repoWalker import RepoWalker 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from lsst.daf.butler import FormatterParameter, Registry, SkyPixDimension, StorageClass 

43 

44 from .._instrument import Instrument 

45 from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping 

46 from .convertRepo import ConvertRepoTask 

47 from .scanner import PathElementHandler 

48 

49 

50@dataclass 

51class ConversionSubset: 

52 """A helper class for `ConvertRepoTask` and `RepoConverter` that maintains 

53 lists of related data ID values that should be included in the conversion. 

54 

55 Parameters 

56 ---------- 

57 instrument : `str` 

58 Instrument name used in Gen3 data IDs. 

59 visits : `set` of `int` 

60 Visit IDs that define the filter. 

61 """ 

62 

63 def __init__(self, instrument: str, visits: Set[int]): 

64 self.instrument = instrument 

65 self.visits = visits 

66 self.regions = None 

67 self.tracts = {} 

68 self.skypix = {} 

69 

70 def addSkyMap(self, registry: Registry, name: str): 

71 """Populate the included tract IDs for the given skymap from those that 

72 overlap the visits the `ConversionSubset` was initialized with. 

73 

74 Parameters 

75 ---------- 

76 registry : `lsst.daf.butler.Registry` 

77 Registry that can be queried for visit/tract overlaps. 

78 name : `str` 

79 SkyMap name used in Gen3 data IDs. 

80 """ 

81 tracts = set() 

82 self.tracts[name] = tracts 

83 for visit in self.visits: 

84 for dataId in registry.queryDataIds( 

85 ["tract"], dataId={"skymap": name, "instrument": self.instrument, "visit": visit} 

86 ): 

87 tracts.add(dataId["tract"]) 

88 

89 def addSkyPix(self, registry: Registry, dimension: SkyPixDimension): 

90 """Populate the included skypix IDs for the given dimension from those 

91 that overlap the visits the `ConversionSubset` was initialized with. 

92 

93 Parameters 

94 ---------- 

95 registry : `lsst.daf.butler.Registry` 

96 Registry that can be queried for visit regions. 

97 name : `str` 

98 SkyMap name used in Gen3 data IDs. 

99 """ 

100 if self.regions is None: 

101 self.regions = [] 

102 for visit in self.visits: 

103 dataId = registry.expandDataId(instrument=self.instrument, visit=visit) 

104 self.regions.append(dataId.region) 

105 ranges = RangeSet() 

106 for region in self.regions: 

107 ranges = ranges.union(dimension.pixelization.envelope(region)) 

108 self.skypix[dimension] = ranges 

109 

110 def isRelated(self, dataId: DataCoordinate) -> bool: 

111 """Test whether the given data ID is related to this subset and hence 

112 should be included in a repository conversion. 

113 

114 Parameters 

115 ---------- 

116 dataId : `lsst.daf.butler.DataCoordinate` 

117 Data ID to test. 

118 

119 Returns 

120 ------- 

121 related : `bool` 

122 `True` if this data ID should be included in a repository 

123 conversion. 

124 

125 Notes 

126 ----- 

127 More formally, this tests that the given data ID is not unrelated; 

128 if a data ID does not involve tracts, visits, or skypix dimensions, 

129 we always include it. 

130 """ 

131 if self.visits is None: 

132 # We're not filtering at all. 

133 return True 

134 if "visit" in dataId.graph and dataId["visit"] not in self.visits: 

135 return False 

136 if "tract" in dataId.graph and dataId["tract"] not in self.tracts[dataId["skymap"]]: 

137 return False 

138 for dimension, ranges in self.skypix.items(): 

139 if dimension in dataId.graph and not ranges.intersects(dataId[dimension]): 

140 return False 

141 return True 

142 

143 # Class attributes that will be shadowed by public instance attributes; 

144 # defined here only for documentation purposes. 

145 

146 instrument: str 

147 """The name of the instrument, as used in Gen3 data IDs (`str`). 

148 """ 

149 

150 visits: Set[int] 

151 """The set of visit IDs that should be included in the conversion (`set` 

152 of `int`). 

153 """ 

154 

155 regions: Optional[List[Region]] 

156 """Regions for all visits (`list` of `lsst.sphgeom.Region`). 

157 

158 Set to `None` before it has been initialized. Any code that attempts to 

159 use it when it is `None` has a logic bug. 

160 """ 

161 

162 tracts: Dict[str, Set[int]] 

163 """Tracts that should be included in the conversion, grouped by skymap 

164 name (`dict` mapping `str` to `set` of `int`). 

165 """ 

166 

167 skypix: Dict[SkyPixDimension, RangeSet] 

168 """SkyPix ranges that should be included in the conversion, grouped by 

169 dimension (`dict` mapping `SkyPixDimension` to `lsst.sphgeom.RangeSet`). 

170 """ 

171 

172 

173class RepoConverter(ABC): 

174 """An abstract base class for objects that help `ConvertRepoTask` convert 

175 datasets from a single Gen2 repository. 

176 

177 Parameters 

178 ---------- 

179 task : `ConvertRepoTask` 

180 Task instance that is using this helper object. 

181 root : `str` 

182 Root of the Gen2 repo being converted. Will be converted to an 

183 absolute path, resolving symbolic links and ``~``, if necessary. 

184 instrument : `Instrument` 

185 Gen3 instrument class to use for this conversion. 

186 collections : `list` of `str` 

187 Gen3 collections with which all converted datasets should be 

188 associated. 

189 subset : `ConversionSubset, optional 

190 Helper object that implements a filter that restricts the data IDs that 

191 are converted. 

192 

193 Notes 

194 ----- 

195 `RepoConverter` defines the only public API users of its subclasses should 

196 use (`prep` `ingest`, and `finish`). These delegate to several abstract 

197 methods that subclasses must implement. In some cases, subclasses may 

198 reimplement the public methods as well, but are expected to delegate to 

199 ``super()`` either at the beginning or end of their own implementation. 

200 """ 

201 

202 def __init__( 

203 self, 

204 *, 

205 task: ConvertRepoTask, 

206 root: str, 

207 instrument: Instrument, 

208 run: Optional[str], 

209 subset: Optional[ConversionSubset] = None, 

210 ): 

211 self.task = task 

212 self.root = os.path.realpath(os.path.expanduser(root)) 

213 self.instrument = instrument 

214 self.subset = subset 

215 self.progress = Progress("obs.base.gen2to3") 

216 self._run = run 

217 self._repoWalker = None # Created in prep 

218 self._fileDatasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] = defaultdict( 

219 lambda: defaultdict(list) 

220 ) 

221 self._fileDatasetCount = 0 

222 

223 @abstractmethod 

224 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool: 

225 """Test whether the given dataset is handled specially by this 

226 converter and hence should be ignored by generic base-class logic that 

227 searches for dataset types to convert. 

228 

229 Parameters 

230 ---------- 

231 datasetTypeName : `str` 

232 Name of the dataset type to test. 

233 

234 Returns 

235 ------- 

236 special : `bool` 

237 `True` if the dataset type is special. 

238 """ 

239 raise NotImplementedError() 

240 

241 @abstractmethod 

242 def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]: 

243 """Iterate over all `CameraMapper` `Mapping` objects that should be 

244 considered for conversion by this repository. 

245 

246 This this should include any datasets that may appear in the 

247 repository, including those that are special (see 

248 `isDatasetTypeSpecial`) and those that are being ignored (see 

249 `ConvertRepoTask.isDatasetTypeIncluded`); this allows the converter 

250 to identify and hence skip these datasets quietly instead of warning 

251 about them as unrecognized. 

252 

253 Yields 

254 ------ 

255 datasetTypeName: `str` 

256 Name of the dataset type. 

257 mapping : `lsst.obs.base.mapping.Mapping` 

258 Mapping object used by the Gen2 `CameraMapper` to describe the 

259 dataset type. 

260 """ 

261 raise NotImplementedError() 

262 

263 @abstractmethod 

264 def makeRepoWalkerTarget( 

265 self, 

266 datasetTypeName: str, 

267 template: str, 

268 keys: Dict[str, type], 

269 storageClass: StorageClass, 

270 formatter: FormatterParameter = None, 

271 targetHandler: Optional[PathElementHandler] = None, 

272 ) -> RepoWalker.Target: 

273 """Make a struct that identifies a dataset type to be extracted by 

274 walking the repo directory structure. 

275 

276 Parameters 

277 ---------- 

278 datasetTypeName : `str` 

279 Name of the dataset type (the same in both Gen2 and Gen3). 

280 template : `str` 

281 The full Gen2 filename template. 

282 keys : `dict` [`str`, `type`] 

283 A dictionary mapping Gen2 data ID key to the type of its value. 

284 storageClass : `lsst.daf.butler.StorageClass` 

285 Gen3 storage class for this dataset type. 

286 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

287 A Gen 3 formatter class or fully-qualified name. 

288 targetHandler : `PathElementHandler`, optional 

289 Specialist target handler to use for this dataset type. 

290 

291 Returns 

292 ------- 

293 target : `RepoWalker.Target` 

294 A struct containing information about the target dataset (much of 

295 it simplify forwarded from the arguments). 

296 """ 

297 raise NotImplementedError() 

298 

299 def getSpecialDirectories(self) -> List[str]: 

300 """Return a list of directory paths that should not be searched for 

301 files. 

302 

303 These may be directories that simply do not contain datasets (or 

304 contain datasets in another repository), or directories whose datasets 

305 are handled specially by a subclass. 

306 

307 Returns 

308 ------- 

309 directories : `list` [`str`] 

310 The full paths of directories to skip, relative to the repository 

311 root. 

312 """ 

313 return [] 

314 

315 def prep(self): 

316 """Perform preparatory work associated with the dataset types to be 

317 converted from this repository (but not the datasets themselves). 

318 

319 Notes 

320 ----- 

321 This should be a relatively fast operation that should not depend on 

322 the size of the repository. 

323 

324 Subclasses may override this method, but must delegate to the base 

325 class implementation at some point in their own logic. 

326 More often, subclasses will specialize the behavior of `prep` by 

327 overriding other methods to which the base class implementation 

328 delegates. These include: 

329 - `iterMappings` 

330 - `isDatasetTypeSpecial` 

331 - `getSpecialDirectories` 

332 - `makeRepoWalkerTarget` 

333 

334 This should not perform any write operations to the Gen3 repository. 

335 It is guaranteed to be called before `ingest`. 

336 """ 

337 self.task.log.info("Preparing other dataset types from root %s.", self.root) 

338 walkerInputs: List[Union[RepoWalker.Target, RepoWalker.Skip]] = [] 

339 for datasetTypeName, mapping in self.iterMappings(): 

340 try: 

341 template = self.task.config.datasetTemplateOverrides.get(datasetTypeName, mapping.template) 

342 except RuntimeError: 

343 # No template for this dataset in this mapper, so there's no 

344 # way there should be instances of this dataset in this repo. 

345 continue 

346 extensions = [""] 

347 skip = False 

348 message = None 

349 storageClass = None 

350 if not self.task.isDatasetTypeIncluded(datasetTypeName) or self.isDatasetTypeSpecial( 

351 datasetTypeName 

352 ): 

353 # User indicated not to include this data, but we still want 

354 # to recognize files of that type to avoid warning about them. 

355 skip = True 

356 else: 

357 storageClass = self._guessStorageClass(datasetTypeName, mapping) 

358 if storageClass is None: 

359 # This may be a problem, but only if we actually encounter 

360 # any files corresponding to this dataset. Of course, we 

361 # need to be able to parse those files in order to 

362 # recognize that situation. 

363 message = f"no storage class found for {datasetTypeName}" 

364 skip = True 

365 # Handle files that are compressed on disk, but the gen2 template 

366 # is just `.fits` 

367 if template.endswith(".fits"): 

368 extensions.extend((".gz", ".fz")) 

369 for extension in extensions: 

370 if skip: 

371 walkerInput = RepoWalker.Skip( 

372 template=template + extension, 

373 keys=mapping.keys(), 

374 message=message, 

375 ) 

376 self.task.log.debug("Skipping template in walker: %s", template) 

377 else: 

378 assert message is None 

379 targetHandler = self.task.config.targetHandlerClasses.get(datasetTypeName) 

380 if targetHandler is not None: 

381 targetHandler = doImportType(targetHandler) 

382 walkerInput = self.makeRepoWalkerTarget( 

383 datasetTypeName=datasetTypeName, 

384 template=template + extension, 

385 keys=mapping.keys(), 

386 storageClass=storageClass, 

387 formatter=self.task.config.formatterClasses.get(datasetTypeName), 

388 targetHandler=targetHandler, 

389 ) 

390 self.task.log.debug( 

391 "Adding template to walker: %s + %s, for %s", 

392 template, 

393 extension, 

394 walkerInput.datasetType, 

395 ) 

396 walkerInputs.append(walkerInput) 

397 

398 for dirPath in self.getSpecialDirectories(): 

399 walkerInputs.append( 

400 RepoWalker.Skip( 

401 template=dirPath, # not really a template, but that's fine; it's relative to root. 

402 keys={}, 

403 message=None, 

404 isForFiles=True, 

405 ) 

406 ) 

407 fileIgnoreRegExTerms = [] 

408 for pattern in self.task.config.fileIgnorePatterns: 

409 fileIgnoreRegExTerms.append(fnmatch.translate(pattern)) 

410 if fileIgnoreRegExTerms: 

411 fileIgnoreRegEx = re.compile("|".join(fileIgnoreRegExTerms)) 

412 else: 

413 fileIgnoreRegEx = None 

414 self._repoWalker = RepoWalker( 

415 walkerInputs, 

416 fileIgnoreRegEx=fileIgnoreRegEx, 

417 log=self.task.log.getChild("repoWalker"), 

418 progress=self.progress, 

419 ) 

420 

421 def iterDatasets(self) -> Iterator[FileDataset]: 

422 """Iterate over datasets in the repository that should be ingested into 

423 the Gen3 repository. 

424 

425 The base class implementation yields nothing; the datasets handled by 

426 the `RepoConverter` base class itself are read directly in 

427 `findDatasets`. 

428 

429 Subclasses should override this method if they support additional 

430 datasets that are handled some other way. 

431 

432 Yields 

433 ------ 

434 dataset : `FileDataset` 

435 Structures representing datasets to be ingested. Paths should be 

436 absolute. 

437 """ 

438 yield from () 

439 

440 def findDatasets(self): 

441 assert self._repoWalker, "prep() must be called before findDatasets." 

442 self.task.log.info("Adding special datasets in repo %s.", self.root) 

443 for dataset in self.iterDatasets(): 

444 assert len(dataset.refs) == 1 

445 # None index below is for calibDate, which is only relevant for 

446 # CalibRepoConverter. 

447 self._fileDatasets[dataset.refs[0].datasetType][None].append(dataset) 

448 self.task.log.info("Finding datasets from files in repo %s.", self.root) 

449 datasetsByTypeAndCalibDate = self._repoWalker.walk( 

450 self.root, predicate=(self.subset.isRelated if self.subset is not None else None) 

451 ) 

452 for datasetType, datasetsByCalibDate in datasetsByTypeAndCalibDate.items(): 

453 for calibDate, datasets in datasetsByCalibDate.items(): 

454 self._fileDatasets[datasetType][calibDate].extend(datasets) 

455 self._fileDatasetCount += len(datasets) 

456 

457 def expandDataIds(self): 

458 """Expand the data IDs for all datasets to be inserted. 

459 

460 Subclasses may override this method, but must delegate to the base 

461 class implementation if they do. 

462 

463 This involves queries to the registry, but not writes. It is 

464 guaranteed to be called between `findDatasets` and `ingest`. 

465 """ 

466 import itertools 

467 

468 with self.progress.bar(desc="Expanding data IDs", total=self._fileDatasetCount) as progressBar: 

469 for datasetType, datasetsByCalibDate in self._fileDatasets.items(): 

470 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): 

471 if calibDate is not None: 

472 self.task.log.info( 

473 "Expanding data IDs for %d dataset%s of type %s at calibDate %s.", 

474 *_log_msg_counter(datasetsForCalibDate), 

475 datasetType.name, 

476 calibDate, 

477 ) 

478 else: 

479 self.task.log.info( 

480 "Expanding data IDs for %d non-calibration dataset%s of type %s.", 

481 *_log_msg_counter(datasetsForCalibDate), 

482 datasetType.name, 

483 ) 

484 expanded = [] 

485 for dataset in datasetsForCalibDate: 

486 for i, ref in enumerate(dataset.refs): 

487 self.task.log.debug("Expanding data ID %s.", ref.dataId) 

488 try: 

489 dataId = self.task.registry.expandDataId(ref.dataId) 

490 dataset.refs[i] = ref.expanded(dataId) 

491 except DataIdError as err: 

492 self.task.log.warning("Skipping ingestion for '%s': %s", dataset.path, err) 

493 # Remove skipped datasets from multi-extension 

494 # FileDatasets 

495 dataset.refs[i] = None # We will strip off the `None`s after the loop. 

496 dataset.refs[:] = itertools.filterfalse(lambda x: x is None, dataset.refs) 

497 if dataset.refs: 

498 expanded.append(dataset) 

499 progressBar.update() 

500 datasetsForCalibDate[:] = expanded 

501 

502 def ingest(self): 

503 """Insert converted datasets into the Gen3 repository. 

504 

505 Subclasses may override this method, but must delegate to the base 

506 class implementation at some point in their own logic. 

507 

508 This method is guaranteed to be called after `expandDataIds`. 

509 """ 

510 with self.progress.bar( 

511 desc="Ingesting converted datasets", total=self._fileDatasetCount 

512 ) as progressBar: 

513 for datasetType, datasetsByCalibDate in self._fileDatasets.items(): 

514 self.task.registry.registerDatasetType(datasetType) 

515 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): 

516 try: 

517 run = self.getRun(datasetType.name, calibDate) 

518 except LookupError: 

519 self.task.log.warning(f"No run configured for dataset type {datasetType.name}.") 

520 continue 

521 self.task.log.info( 

522 "Ingesting %d dataset%s into run %s of type %s.", 

523 *_log_msg_counter(datasetsForCalibDate), 

524 run, 

525 datasetType.name, 

526 ) 

527 try: 

528 self.task.registry.registerRun(run) 

529 self.task.butler3.ingest( 

530 *datasetsForCalibDate, transfer=self.task.config.transfer, run=run 

531 ) 

532 progressBar.update(len(datasetsForCalibDate)) 

533 except LookupError as err: 

534 raise LookupError( 

535 f"Error expanding data ID for dataset type {datasetType.name}." 

536 ) from err 

537 

538 def finish(self) -> None: 

539 """Finish conversion of a repository. 

540 

541 This is run after ``ingest``, and delegates to `_finish`, which should 

542 be overridden by derived classes instead of this method. 

543 """ 

544 self._finish(self._fileDatasets, self._fileDatasetCount) 

545 

546 def _finish( 

547 self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], count: int 

548 ) -> None: 

549 """Subclass implementation hook for `_finish`. 

550 

551 The default implementation does nothing. This is generally the best 

552 place to define and populate non-``RUN`` collections that may contain 

553 some of the datasets that have just been ingested. 

554 

555 Parameters 

556 ---------- 

557 datasets : `Mapping` 

558 Nested mapping containing all converted datasets. The outer 

559 mapping keys are `DatasetType` instances. Values are mappings from 

560 ``calibDate`` or `None` to a `list` of `FileDataset` instances. 

561 count : `int` 

562 Total number of `FileDataset` instances in ``datasets``. 

563 """ 

564 pass 

565 

566 def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str: 

567 """Return the name of the run to insert instances of the given dataset 

568 type into in this collection. 

569 

570 Parameters 

571 ---------- 

572 datasetTypeName : `str` 

573 Name of the dataset type. 

574 calibDate : `str`, optional 

575 If not `None`, the "CALIBDATE" associated with this (calibration) 

576 dataset in the Gen2 data repository. 

577 

578 Returns 

579 ------- 

580 run : `str` 

581 Name of the `~lsst.daf.butler.CollectionType.RUN` collection. 

582 """ 

583 assert self._run is not None, "Method must be overridden if self._run is allowed to be None" 

584 assert calibDate is None, "Method must be overridden if calibDate is allowed to be not None" 

585 return self._run 

586 

587 def _guessStorageClass( 

588 self, datasetTypeName: str, mapping: CameraMapperMapping 

589 ) -> Optional[StorageClass]: 

590 """Infer the Gen3 `StorageClass` from a dataset from a combination of 

591 configuration and Gen2 dataset type information. 

592 

593 datasetTypeName: `str` 

594 Name of the dataset type. 

595 mapping : `lsst.obs.base.mapping.Mapping` 

596 Mapping object used by the Gen2 `CameraMapper` to describe the 

597 dataset type. 

598 """ 

599 storageClassName = self.task.config.storageClasses.get(datasetTypeName) 

600 if storageClassName is None and mapping.python is not None: 

601 storageClassName = self.task.config.storageClasses.get(mapping.python, None) 

602 if storageClassName is None and mapping.persistable is not None: 

603 storageClassName = self.task.config.storageClasses.get(mapping.persistable, None) 

604 if storageClassName is None and mapping.python is not None: 

605 unqualified = mapping.python.split(".")[-1] 

606 storageClassName = self.task.config.storageClasses.get(unqualified, None) 

607 if storageClassName is not None: 

608 storageClass = self.task.butler3.storageClasses.getStorageClass(storageClassName) 

609 else: 

610 try: 

611 storageClass = self.task.butler3.storageClasses.getStorageClass(mapping.persistable) 

612 except KeyError: 

613 storageClass = None 

614 if storageClass is None and mapping.python is not None: 

615 try: 

616 storageClass = self.task.butler3.storageClasses.getStorageClass(unqualified) 

617 except KeyError: 

618 pass 

619 if storageClass is None: 

620 self.task.log.debug("No StorageClass found for %s; skipping.", datasetTypeName) 

621 else: 

622 self.task.log.debug("Using StorageClass %s for %s.", storageClass.name, datasetTypeName) 

623 return storageClass 

624 

625 # Class attributes that will be shadowed by public instance attributes; 

626 # defined here only for documentation purposes. 

627 

628 task: ConvertRepoTask 

629 """The parent task that constructed and uses this converter 

630 (`ConvertRepoTask`). 

631 """ 

632 

633 root: str 

634 """Root path to the Gen2 repository this converter manages (`str`). 

635 

636 This is a complete path, not relative to some other repository root. 

637 """ 

638 

639 subset: Optional[ConversionSubset] 

640 """An object that represents a filter to be applied to the datasets that 

641 are converted (`ConversionSubset` or `None`). 

642 """