Coverage for python/lsst/analysis/tools/interfaces/datastore/_dispatcher.py: 13%
275 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 03:20 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 03:20 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SasquatchDispatchPartialFailure", "SasquatchDispatchFailure", "SasquatchDispatcher")
26"""Sasquatch datastore"""
27import calendar
28import datetime
29import json
30import logging
31import math
32import re
33from collections.abc import Mapping, MutableMapping, Sequence
34from dataclasses import dataclass
35from typing import TYPE_CHECKING, Any, cast
36from uuid import UUID, uuid4
38import requests
39from lsst.daf.butler import DatasetRef
40from lsst.resources import ResourcePath
41from lsst.utils.packages import getEnvironmentPackages
43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from .. import MetricMeasurementBundle
47log = logging.getLogger(__name__)
49# Constants assocated with SasquatchDispatcher
50PARTITIONS = 1
51REPLICATION_FACTOR = 3
53IDENTIFIER_KEYS = [
54 "detector",
55 "patch",
56 "skymap",
57 "visit",
58 "tract",
59 "physical_filter",
60 "instrument",
61 "band",
62 "exposure",
63]
66class SasquatchDispatchPartialFailure(RuntimeError):
67 """This indicates that a Sasquatch dispatch was partially successful."""
69 pass
72class SasquatchDispatchFailure(RuntimeError):
73 """This indicates that dispatching a
74 `~lsst.analysis.tool.interface.MetricMeasurementBundle` failed.
75 """
77 pass
80def _tag2VersionTime(productStr: str) -> tuple[str, float]:
81 """Determine versions and dates from the string returned from
82 getEnvironmentPackages.
84 The `~lsst.utils.packages.genEnvironmentPackages` function returns the
85 setup version associated with a product, along with a list of tags that
86 have been added to it.
88 This method splits up that return string, and determines the earliest date
89 associated with the setup package version.
91 Parameters
92 ----------
93 productStr : `str`
94 The product string returned from a lookup on the result of a call to
95 `~lsst.utils.packages.getEnvironmentPackages`.
97 Returns
98 -------
99 result : `tuple` of `str`, `datetime.datetime`
100 The first `str` is the version of the package, and the second is the
101 datetime object associated with that released version.
103 Raises
104 ------
105 ValueError
106 Raised if there are no tags which correspond to dates.
107 """
108 times: list[datetime.datetime] = []
109 version = productStr.split()[0]
110 tags: str = re.findall("[(](.*)[)]", productStr)[0]
111 for tag in tags.split():
112 numDots = tag.count(".")
113 numUnder = tag.count("_")
114 separator = "_"
115 if numDots > numUnder:
116 separator = "."
117 match tag.split(separator):
118 # Daily tag branch.
119 case ("d", year, month, day):
120 dt = datetime.datetime(year=int(year), month=int(month), day=int(day))
121 # Weekly tag branch.
122 case ("w", year, week):
123 iyear = int(year)
124 iweek = int(week)
125 # Use 4 as the day because releases are available starting
126 # on Thursday
127 dayOfWeek = 4
129 # Find the first week to contain a thursday in it
130 cal = calendar.Calendar()
131 cal.setfirstweekday(6)
132 i = 0
133 for i, iterWeek in enumerate(cal.monthdatescalendar(iyear, 1)):
134 if iterWeek[dayOfWeek].month == 1:
135 break
136 # Handle fromisocalendar not being able to handle week 53
137 # in the case were the date was going to subtract 7 days anyway
138 if i and iweek == 53:
139 i = 0
140 iweek = 52
141 delta = datetime.timedelta(days=7 * i)
143 # Correct for a weekly being issued in the last week of the
144 # previous year, as Thursdays don't always line up evenly in
145 # a week / year split.
146 dt = datetime.datetime.fromisocalendar(iyear, iweek, dayOfWeek) - delta
147 # Skip tags that can't be understood.
148 case _:
149 continue
150 times.append(dt)
151 if len(times) == 0:
152 raise ValueError("Could not find any tags corresponding to dates")
153 minTime = min(times)
154 minTime.replace(tzinfo=datetime.timezone.utc)
155 return version, minTime.timestamp()
158@dataclass
159class SasquatchDispatcher:
160 """This class mediates the transfer of MetricMeasurementBundles to a
161 Sasquatch http kafka proxy server.
162 """
164 url: str
165 """Url of the Sasquatch proxy server"""
167 token: str
168 """Authentication token used in communicating with the proxy server"""
170 namespace: str = "lsst.dm"
171 """The namespace in Sasquatch in which to write the uploaded metrics"""
173 def __post_init__(self) -> None:
174 match ResourcePath(self.url).scheme:
175 case "http" | "https":
176 pass
177 case _:
178 raise ValueError("Proxy server must be locatable with either http or https")
180 self._cluster_id: str | None = None
182 @property
183 def clusterId(self) -> str:
184 """ClusterId of the Kafka proxy
186 Notes
187 -----
188 The cluster Id will be fetched with a network call if it is not
189 already cached.
190 """
191 if self._cluster_id is None:
192 self._populateClusterId()
193 return cast(str, self._cluster_id)
195 def _populateClusterId(self) -> None:
196 """Get Sasquatch kafka cluster ID."""
198 headers = {"content-type": "application/json"}
199 r = requests.get(f"{self.url}/v3/clusters", headers=headers)
201 if r.status_code == requests.codes.ok:
202 cluster_id = r.json()["data"][0]["cluster_id"]
204 self._cluster_id = str(cluster_id)
205 else:
206 log.error("Could not retrieve the cluster id for the specified url")
207 raise SasquatchDispatchFailure("Could not retrieve the cluster id for the specified url")
209 def _create_topic(self, topic_name: str) -> bool:
210 """Create a kafka topic in Sasquatch.
212 Parameters
213 ----------
214 topic_name : `str`
215 The name of the kafka topic to create
217 returns : `bool`
218 If this does not encounter an error it will return a True success
219 code, else it will return a False code.
221 """
223 headers = {"content-type": "application/json"}
225 topic_config = {
226 "topic_name": f"{self.namespace}.{topic_name}",
227 "partitions_count": PARTITIONS,
228 "replication_factor": REPLICATION_FACTOR,
229 }
231 r = requests.post(
232 f"{self.url}/v3/clusters/{self.clusterId}/topics", json=topic_config, headers=headers
233 )
235 if r.status_code == requests.codes.created:
236 log.debug("Created topic %s.%s", self.namespace, topic_name)
237 return True
238 elif r.status_code == requests.codes.bad_request:
239 log.debug("Topic %s.%s already exists.", self.namespace, topic_name)
240 return True
241 else:
242 log.error("Uknown error occured creating kafka topic %s %s", r.status_code, r.json())
243 return False
245 def _generateAvroSchema(self, metric: str, record: MutableMapping[str, Any]) -> tuple[str, bool]:
246 """Infer the Avro schema from the record payload.
248 Parameters
249 ----------
250 metric : `str`
251 The name of the metric
252 record : `MutableMapping`
253 The prepared record for which a schema is to be generated
255 Returns
256 -------
257 resultSchema : `str`
258 A json encoded string of the resulting avro schema
259 errorCode : bool
260 A boolean indicating if any record fields had to be trimmed because
261 a suitable schema could not be generated. True if records were
262 removed, False otherwise.
263 """
264 schema: dict[str, Any] = {"type": "record", "namespace": self.namespace, "name": metric}
266 # Record if any records needed to be trimmed
267 resultsTrimmed = False
269 fields = list()
270 # If avro schemas cant be generated for values, they should be removed
271 # from the records.
272 keysToRemove: list[str] = []
273 for key in record:
274 value = record[key]
275 avroType: Mapping[str, Any]
276 if "timestamp" in key:
277 avroType = {"type": "double"}
278 else:
279 avroType = self._python2Avro(value)
280 if len(avroType) == 0:
281 continue
282 if avroType.get("error_in_conversion"):
283 keysToRemove.append(key)
284 resultsTrimmed = True
285 continue
286 fields.append({"name": key, **avroType})
288 # remove any key that failed to have schema generated
289 for key in keysToRemove:
290 record.pop(key)
292 schema["fields"] = fields
294 return json.dumps(schema), resultsTrimmed
296 def _python2Avro(self, value: Any) -> Mapping:
297 """Map python type to avro schema
299 Parameters
300 ----------
301 value : `Any`
302 Any python parameter.
304 Returns
305 -------
306 result : `Mapping`
307 Return a mapping that represents an entry in an avro schema.
308 """
309 match value:
310 case float() | None:
311 return {"type": "float", "default": 0.0}
312 case str():
313 return {"type": "string", "default": ""}
314 case int():
315 return {"type": "int", "default": 0}
316 case Sequence():
317 tmp = {self._python2Avro(item)["type"] for item in value}
318 if len(tmp) == 0:
319 return {}
320 if len(tmp) > 1:
321 log.error(
322 "Sequence contains mixed types: %s, must be homogeneous for avro conversion "
323 "skipping record",
324 tmp,
325 )
326 return {"error_in_conversion": True}
327 return {"type": "array", "items": tmp.pop()}
328 case _:
329 log.error("Unsupported type %s, skipping record", type(value))
330 return {}
332 def _handleReferencePackage(self, meta: MutableMapping, bundle: MetricMeasurementBundle) -> None:
333 """Check to see if there is a reference package.
335 if there is a reference package, determine the datetime associated with
336 this reference package. Save the package, the version, and the date to
337 the common metric fields.
339 Parameters
340 ----------
341 meta : `MutableMapping`
342 A mapping which corresponds to fields which should be encoded in
343 all records.
344 bundle : `MetricMeasurementBundle`
345 The bundled metrics
346 """
347 package_version, package_timestamp = "", 0.0
348 if ref_package := getattr(bundle, "reference_package", ""):
349 ref_package = bundle.reference_package
350 packages = getEnvironmentPackages(True)
351 if package_info := packages.get(ref_package):
352 try:
353 package_version, package_timestamp = _tag2VersionTime(package_info)
354 except ValueError:
355 # Could not extract package timestamp leaving empty
356 pass
357 # explicit handle if None was set in the bundle for the package
358 meta["reference_package"] = ref_package or ""
359 meta["reference_package_version"] = package_version
360 meta["reference_package_timestamp"] = package_timestamp
362 def _handleTimes(self, meta: MutableMapping, bundle: MetricMeasurementBundle, run: str) -> None:
363 """Add times to the meta fields mapping.
365 Add all appropriate timestamp fields to the meta field mapping. These
366 will be added to all records.
368 This method will also look at the bundle to see if it defines a
369 preferred time. It so it sets that time as the main time stamp to be
370 used for this record.
372 Parameters
373 ----------
374 meta : `MutableMapping`
375 A mapping which corresponds to fields which should be encoded in
376 all records.
377 bundle : `MetricMeasurementBundle`
378 The bundled metrics
379 run : `str`
380 The `~lsst.daf.butler.Butler` collection where the
381 `MetricMeasurementBundle` is stored.
382 """
383 # Determine the timestamp associated with the run, if someone abused
384 # the run collection, use the current timestamp
385 if re.match(r"\d{8}T\d{6}Z", stamp := run.split("/")[-1]):
386 run_timestamp = datetime.datetime.strptime(stamp, r"%Y%m%dT%H%M%S%z")
387 else:
388 run_timestamp = datetime.datetime.now()
389 meta["run_timestamp"] = run_timestamp.timestamp()
391 # If the bundle supports supplying timestamps, dispatch on the type
392 # specified.
393 if hasattr(bundle, "timestamp_version") and bundle.timestamp_version:
394 match bundle.timestamp_version:
395 case "reference_package_timestamp":
396 if not meta["reference_package_timestamp"]:
397 log.error("Reference package timestamp is empty, using run_timestamp")
398 meta["timestamp"] = meta["run_timestamp"]
399 else:
400 meta["timestamp"] = meta["reference_package_timestamp"]
401 case "run_timestamp":
402 meta["timestamp"] = meta["run_timestamp"]
403 case "current_timestamp":
404 timeStamp = datetime.datetime.now()
405 meta["timestamp"] = timeStamp.timestamp()
406 case "dataset_timestamp":
407 log.error("dataset timestamps are not yet supported, run_timestamp will be used")
408 meta["timestamp"] = meta["run_timestamp"]
409 case _:
410 log.error(
411 "Timestamp version %s is not supported, run_timestamp will be used",
412 bundle.timestamp_version,
413 )
414 meta["timestamp"] = meta["run_timestamp"]
415 # Default to using the run_timestamp.
416 else:
417 meta["timestamp"] = meta["run_timestamp"]
419 def _handleIdentifier(
420 self,
421 meta: MutableMapping,
422 identifierFields: Mapping[str, Any] | None,
423 datasetIdentifier: str | None,
424 bundle: MetricMeasurementBundle,
425 ) -> None:
426 """Add an identifier to the meta record mapping.
428 If the bundle declares a dataset identifier to use add that to the
429 record, otherwise use 'Generic' as the identifier. If the
430 datasetIdentifier parameter is specified, that is used instead of
431 anything specified by the bundle.
433 This will also add any identifier fields supplied to the meta record
434 mapping.
436 Together these values (in addition to the timestamp and topic) should
437 uniquely identify an upload to the Sasquatch system.
439 Parameters
440 ----------
441 meta : `MutableMapping`
442 A mapping which corresponds to fields which should be encoded in
443 all records.
444 identifierFields: `Mapping` or `None`
445 The keys and values in this mapping will be both added as fields
446 in the record, and used in creating a unique tag for the uploaded
447 dataset type. I.e. the timestamp, and the tag will be unique, and
448 each record will belong to one combination of such.
449 datasetIdentifier : `str`
450 A string which will be used in creating unique identifier tags.
451 bundle : `MetricMeasurementBundle`
452 The bundle containing metric values to upload.
453 """
454 identifier: str
455 if datasetIdentifier is not None:
456 identifier = datasetIdentifier
457 elif hasattr(bundle, "datasetIdentifier") and bundle.datasetIdentifier is not None:
458 identifier = bundle.datasetIdentifier
459 else:
460 identifier = "Generic"
462 meta["dataset_tag"] = identifier
464 if identifierFields is None:
465 identifierFields = {}
466 for key in IDENTIFIER_KEYS:
467 value = identifierFields.get(key, "")
468 meta[key] = f"{value}"
470 def _prepareBundle(
471 self,
472 bundle: MetricMeasurementBundle,
473 run: str,
474 datasetType: str,
475 timestamp: datetime.datetime | None = None,
476 id: UUID | None = None,
477 identifierFields: Mapping | None = None,
478 datasetIdentifier: str | None = None,
479 extraFields: Mapping | None = None,
480 ) -> tuple[Mapping[str, list[Any]], bool]:
481 """Encode all of the inputs into a format that can be sent to the
482 kafka proxy server.
484 Parameters
485 ----------
486 bundle : `MetricMeasurementBundle`
487 The bundle containing metric values to upload.
488 run : `str`
489 The run name to associate with these metric values. If this bundle
490 is also stored in the butler, this should be the butler run
491 collection the bundle is stored in the butler.
492 datasetType : `str`
493 The dataset type name associated with this
494 `MetricMeasurementBundle`
495 timestamp : `str` or `None`
496 The timestamp to be associated with the measurements in the ingress
497 database. If this value is None, timestamp will be set by the run
498 time or current time.
499 id : `UUID` or `None`
500 The UUID of the `MetricMeasurementBundle` within the butler. If
501 `None`, a new random UUID will be generated so that each record in
502 Sasquatch will have a unique value.
503 datasetIdentifier : `str`
504 A string which will be used in creating unique identifier tags.
505 identifierFields: `Mapping` or `None`
506 The keys and values in this mapping will be both added as fields
507 in the record, and used in creating a unique tag for the uploaded
508 dataset type. I.e. the timestamp, and the tag will be unique, and
509 each record will belong to one combination of such.
510 extraFields: `Mapping`
511 Extra mapping keys and values that will be added as fields to the
512 dispatched record.
514 Returns
515 -------
516 result : `Mapping` of `str` to `list`
517 A mapping of metric name of list of metric measurement records.
518 status : `bool`
519 A status boolean indicating if some records had to be skipped due
520 to a problem parsing the bundle.
521 """
522 if id is None:
523 id = uuid4()
524 sid = str(id)
525 meta: dict[str, Any] = dict()
527 # Add other associated common fields
528 meta["id"] = sid
529 meta["run"] = run
530 meta["dataset_type"] = datasetType
532 # Check to see if the bundle declares a reference package
533 self._handleReferencePackage(meta, bundle)
535 # Handle the various timestamps that could be associated with a record
536 self._handleTimes(meta, bundle, run)
538 # Always use the supplied timestamp if one was passed to use.
539 if timestamp is not None:
540 meta["timestamp"] = timestamp.timestamp()
542 self._handleIdentifier(meta, identifierFields, datasetIdentifier, bundle)
544 # Add in any other fields that were supplied to the function call.
545 if extraFields is not None:
546 meta.update(extraFields)
548 metricRecords: dict[str, list[Any]] = dict()
550 # Record if any records needed skipped
551 resultsTrimmed = False
553 # Look at each of the metrics in the bundle (name, values)
554 for metric, measurements in bundle.items():
555 # Create a list which will contain the records for each measurement
556 # associated with metric.
557 metricRecordList = metricRecords.setdefault(metric, list())
559 record: dict[str, Any] = meta.copy()
561 # loop over each metric measurement within the metric
562 for measurement in measurements:
563 # need to extract any tags, package info, etc
564 note_key = f"{measurement.metric_name.metric}.metric_tags"
565 record["tags"] = dict(measurement.notes.items()).get(note_key, list())
567 # Missing values are replaced by 0 in sasquatch, see RFC-763.
568 name = ""
569 value = 0.0
570 match measurement.json:
571 case {"metric": name, "value": None}:
572 pass
573 case {"metric": name, "value": value}:
574 if math.isnan(value):
575 log.error(
576 "Measurement %s had a value that is a NaN, dispatch will be skipped",
577 measurement,
578 )
579 resultsTrimmed = True
580 continue
581 pass
582 case {"value": _}:
583 log.error("Measurement %s does not contain the key 'metric'", measurement)
584 resultsTrimmed = True
585 continue
586 case {"metric": _}:
587 log.error("Measurement %s does not contain the key 'value'", measurement)
588 resultsTrimmed = True
589 continue
590 record[name] = value
592 metricRecordList.append({"value": record})
593 return metricRecords, resultsTrimmed
595 def dispatch(
596 self,
597 bundle: MetricMeasurementBundle,
598 run: str,
599 datasetType: str,
600 timestamp: datetime.datetime | None = None,
601 id: UUID | None = None,
602 datasetIdentifier: str | None = None,
603 identifierFields: Mapping | None = None,
604 extraFields: Mapping | None = None,
605 ) -> None:
606 """Dispatch a `MetricMeasurementBundle` to Sasquatch.
608 Parameters
609 ----------
610 bundle : `MetricMeasurementBundle`
611 The bundle containing metric values to upload.
612 run : `str`
613 The run name to associate with these metric values. If this bundle
614 is also stored in the butler, this should be the butler run
615 collection the bundle is stored in the butler. This will be used
616 in generating uniqueness constraints in Sasquatch.
617 datasetType : `str`
618 The dataset type name associated with this
619 `MetricMeasurementBundle`.
620 timestamp : `str` or `None`
621 The timestamp to be associated with the measurements in the ingress
622 database. If this value is None, timestamp will be set by the run
623 time or current time.
624 id : `UUID` or `None`
625 The UUID of the `MetricMeasurementBundle` within the Butler. If
626 `None`, a new random UUID will be generated so that each record in
627 Sasquatch will have a unique value.
628 datasetIdentifier : `str` or `None`
629 A string which will be used in creating unique identifier tags. If
630 `None`, a default value will be inserted.
631 identifierFields: `Mapping` or `None`
632 The keys and values in this mapping will be both added as fields
633 in the record, and used in creating a unique tag for the uploaded
634 dataset type. I.e. the timestamp, and the tag will be unique, and
635 each record will belong to one combination of such. Examples of
636 entries would be things like visit or tract.
637 extraFields: `Mapping`
638 Extra mapping keys and values that will be added as fields to the
639 dispatched record.
641 Raises
642 ------
643 SasquatchDispatchPartialFailure
644 Raised if there were any errors in dispatching a bundle.
645 """
646 if id is None:
647 id = uuid4()
649 # Prepare the bundle by transforming it to a list of metric records
650 metricRecords, recordsTrimmed = self._prepareBundle(
651 bundle=bundle,
652 run=run,
653 datasetType=datasetType,
654 timestamp=timestamp,
655 id=id,
656 datasetIdentifier=datasetIdentifier,
657 identifierFields=identifierFields,
658 extraFields=extraFields,
659 )
661 headers = {"content-type": "application/vnd.kafka.avro.v2+json"}
662 data: dict[str, Any] = dict()
663 partialUpload = False
664 uploadFailed = []
666 for metric, record in metricRecords.items():
667 # create the kafka topic if it does not already exist
668 if not self._create_topic(metric):
669 log.error("Topic not created, skipping dispatch of %s", metric)
670 continue
671 recordValue = record[0]["value"]
672 # Generate schemas for each record
673 data["value_schema"], schemaTrimmed = self._generateAvroSchema(metric, recordValue)
674 data["records"] = record
676 if schemaTrimmed:
677 partialUpload = True
679 r = requests.post(f"{self.url}/topics/{self.namespace}.{metric}", json=data, headers=headers)
681 if r.status_code == requests.codes.ok:
682 log.debug("Succesfully sent data for metric %s", metric)
683 uploadFailed.append(False)
684 else:
685 log.error(
686 "There was a problem submitting the metric %s: %s, %s", metric, r.status_code, r.json()
687 )
688 uploadFailed.append(True)
689 partialUpload = True
691 # There may be no metrics to try to upload, and thus the uploadFailed
692 # list may be empty, check before issuing failure
693 if len(uploadFailed) > 0 and all(uploadFailed):
694 raise SasquatchDispatchFailure("All records were unable to be uploaded.")
696 if partialUpload or recordsTrimmed:
697 raise SasquatchDispatchPartialFailure("One or more records may not have been uploaded entirely")
699 def dispatchRef(
700 self,
701 bundle: MetricMeasurementBundle,
702 ref: DatasetRef,
703 timestamp: datetime.datetime | None = None,
704 extraFields: Mapping | None = None,
705 datasetIdentifier: str | None = None,
706 ) -> None:
707 """Dispatch a `MetricMeasurementBundle` to Sasquatch with a known
708 `DatasetRef`.
710 Parameters
711 ----------
712 bundle : `MetricMeasurementBundle`
713 The bundle containing metric values to upload.
714 ref : `DatasetRef`
715 The `Butler` dataset ref corresponding to the input
716 `MetricMeasurementBundle`.
717 timestamp : `str` or `None`
718 The timestamp to be associated with the measurements in the ingress
719 database. If this value is None, timestamp will be set by the run
720 time or current time.
721 extraFields: `Mapping` or `None`
722 Extra mapping keys and values that will be added as fields to the
723 dispatched record if not None.
724 datasetIdentifier : `str` or `None`
725 A string which will be used in creating unique identifier tags. If
726 None, a default value will be inserted.
728 Raises
729 ------
730 SasquatchDispatchPartialFailure
731 Raised if there were any errors in dispatching a bundle.
732 """
733 # Parse the relevant info out of the dataset ref.
734 serializedRef = ref.to_simple()
735 id = serializedRef.id
736 if serializedRef.run is None:
737 run = "<unknown>"
738 else:
739 run = serializedRef.run
740 dstype = serializedRef.datasetType
741 datasetType = dstype.name if dstype is not None else ""
742 dataRefMapping = serializedRef.dataId.dataId if serializedRef.dataId else None
744 self.dispatch(
745 bundle,
746 run=run,
747 timestamp=timestamp,
748 datasetType=datasetType,
749 id=id,
750 identifierFields=dataRefMapping,
751 extraFields=extraFields,
752 datasetIdentifier=datasetIdentifier,
753 )