Coverage for python/lsst/summit/utils/efdUtils.py: 15%
184 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 05:37 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 05:37 -0700
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import asyncio
23import datetime
24import logging
25import re
27import pandas as pd
28from astropy import units as u
29from astropy.time import Time, TimeDelta
30from deprecated.sphinx import deprecated
32from lsst.utils.iteration import ensure_iterable
34from .utils import getSite
36HAS_EFD_CLIENT = True
37try:
38 from lsst_efd_client import EfdClient
39except ImportError:
40 HAS_EFD_CLIENT = False
42__all__ = [
43 "getEfdData",
44 "getMostRecentRowWithDataBefore",
45 "makeEfdClient",
46 "expRecordToTimespan",
47 "efdTimestampToAstropy",
48 "astropyToEfdTimestamp",
49 "clipDataToEvent",
50 "calcNextDay",
51 "getDayObsStartTime",
52 "getDayObsEndTime",
53 "getDayObsForTime",
54 "getSubTopics", # deprecated, being removed in w_2023_50
55 "getTopics",
56 "getCommands",
57]
60COMMAND_ALIASES = {
61 "raDecTarget": "lsst.sal.MTPtg.command_raDecTarget",
62 "moveToTarget": "lsst.sal.MTMount.command_moveToTarget",
63 "startTracking": "lsst.sal.MTMount.command_startTracking",
64 "stopTracking": "lsst.sal.MTMount.command_stopTracking",
65 "trackTarget": "lsst.sal.MTMount.command_trackTarget", # issued at 20Hz - don't plot
66}
68# When looking backwards in time to find the most recent state event, look back
69# in chunks of this size. Too small, and there will be too many queries, too
70# large and there will be too much data returned unnecessarily, as we only need
71# one row by definition. Will tune this parameters in consultation with SQuaRE.
72TIME_CHUNKING = datetime.timedelta(minutes=15)
75def _getBeginEnd(dayObs=None, begin=None, end=None, timespan=None, event=None, expRecord=None):
76 """Calculate the begin and end times to pass to _getEfdData, given the
77 kwargs passed to getEfdData.
79 Parameters
80 ----------
81 dayObs : `int`
82 The dayObs to query. If specified, this is used to determine the begin
83 and end times.
84 begin : `astropy.Time`
85 The begin time for the query. If specified, either an end time or a
86 timespan must be supplied.
87 end : `astropy.Time`
88 The end time for the query. If specified, a begin time must also be
89 supplied.
90 timespan : `astropy.TimeDelta`
91 The timespan for the query. If specified, a begin time must also be
92 supplied.
93 event : `lsst.summit.utils.efdUtils.TmaEvent`
94 The event to query. If specified, this is used to determine the begin
95 and end times, and all other options are disallowed.
96 expRecord : `lsst.daf.butler.dimensions.DimensionRecord`
97 The exposure record containing the timespan to query. If specified, all
98 other options are disallowed.
100 Returns
101 -------
102 begin : `astropy.Time`
103 The begin time for the query.
104 end : `astropy.Time`
105 The end time for the query.
106 """
107 if expRecord is not None:
108 forbiddenOpts = [event, begin, end, timespan, dayObs]
109 if any(x is not None for x in forbiddenOpts):
110 raise ValueError("You can't specify both an expRecord and a begin/end or timespan or dayObs")
111 begin = expRecord.timespan.begin
112 end = expRecord.timespan.end
113 return begin, end
115 if event is not None:
116 forbiddenOpts = [begin, end, timespan, dayObs]
117 if any(x is not None for x in forbiddenOpts):
118 raise ValueError("You can't specify both an event and a begin/end or timespan or dayObs")
119 begin = event.begin
120 end = event.end
121 return begin, end
123 # check for dayObs, and that other options aren't inconsistently specified
124 if dayObs is not None:
125 forbiddenOpts = [begin, end, timespan]
126 if any(x is not None for x in forbiddenOpts):
127 raise ValueError("You can't specify both a dayObs and a begin/end or timespan")
128 begin = getDayObsStartTime(dayObs)
129 end = getDayObsEndTime(dayObs)
130 return begin, end
131 # can now disregard dayObs entirely
133 if begin is None:
134 raise ValueError("You must specify either a dayObs or a begin/end or begin/timespan")
135 # can now rely on begin, so just need to deal with end/timespan
137 if end is None and timespan is None:
138 raise ValueError("If you specify a begin, you must specify either a end or a timespan")
139 if end is not None and timespan is not None:
140 raise ValueError("You can't specify both a end and a timespan")
141 if end is None:
142 if timespan > datetime.timedelta(minutes=0):
143 end = begin + timespan # the normal case
144 else:
145 end = begin # the case where timespan is negative
146 begin = begin + timespan # adding the negative to the start, i.e. subtracting it to bring back
148 assert begin is not None
149 assert end is not None
150 return begin, end
153def getEfdData(
154 client,
155 topic,
156 *,
157 columns=None,
158 prePadding=0,
159 postPadding=0,
160 dayObs=None,
161 begin=None,
162 end=None,
163 timespan=None,
164 event=None,
165 expRecord=None,
166 warn=True,
167):
168 """Get one or more EFD topics over a time range, synchronously.
170 The time range can be specified as either:
171 * a dayObs, in which case the full 24 hour period is used,
172 * a begin point and a end point,
173 * a begin point and a timespan.
174 * a mount event
175 * an exposure record
176 If it is desired to use an end time with a timespan, just specify it as the
177 begin time and use a negative timespan.
179 The results from all topics are merged into a single dataframe.
181 Parameters
182 ----------
183 client : `lsst_efd_client.efd_helper.EfdClient`
184 The EFD client to use.
185 topic : `str`
186 The topic to query.
187 columns : `list` of `str`, optional
188 The columns to query. If not specified, all columns are queried.
189 prePadding : `float`
190 The amount of time before the nominal start of the query to include, in
191 seconds.
192 postPadding : `float`
193 The amount of extra time after the nominal end of the query to include,
194 in seconds.
195 dayObs : `int`, optional
196 The dayObs to query. If specified, this is used to determine the begin
197 and end times.
198 begin : `astropy.Time`, optional
199 The begin time for the query. If specified, either a end time or a
200 timespan must be supplied.
201 end : `astropy.Time`, optional
202 The end time for the query. If specified, a begin time must also be
203 supplied.
204 timespan : `astropy.TimeDelta`, optional
205 The timespan for the query. If specified, a begin time must also be
206 supplied.
207 event : `lsst.summit.utils.efdUtils.TmaEvent`, optional
208 The event to query. If specified, this is used to determine the begin
209 and end times, and all other options are disallowed.
210 expRecord : `lsst.daf.butler.dimensions.DimensionRecord`, optional
211 The exposure record containing the timespan to query. If specified, all
212 other options are disallowed.
213 warn : bool, optional
214 If ``True``, warn when no data is found. Exists so that utility code
215 can disable warnings when checking for data, and therefore defaults to
216 ``True``.
218 Returns
219 -------
220 data : `pd.DataFrame`
221 The merged data from all topics.
223 Raises
224 ------
225 ValueError:
226 If the topics are not in the EFD schema.
227 ValueError:
228 If both a dayObs and a begin/end or timespan are specified.
229 ValueError:
230 If a begin time is specified but no end time or timespan.
232 """
233 # TODO: DM-40100 ideally should calls mpts as necessary so that users
234 # needn't care if things are packed
236 # supports aliases so that you can query with them. If there is no entry in
237 # the alias dict then it queries with the supplied key. The fact the schema
238 # is now being checked means this shouldn't be a problem now.
240 # TODO: RFC-948 Move this import back to top of file once is implemented.
241 import nest_asyncio
243 begin, end = _getBeginEnd(dayObs, begin, end, timespan, event, expRecord)
244 begin -= TimeDelta(prePadding, format="sec")
245 end += TimeDelta(postPadding, format="sec")
247 nest_asyncio.apply()
248 loop = asyncio.get_event_loop()
249 ret = loop.run_until_complete(
250 _getEfdData(client=client, topic=topic, begin=begin, end=end, columns=columns)
251 )
252 if ret.empty and warn:
253 log = logging.getLogger(__name__)
254 log.warning(
255 f"Topic {topic} is in the schema, but no data was returned by the query for the specified"
256 " time range"
257 )
258 return ret
261async def _getEfdData(client, topic, begin, end, columns=None):
262 """Get data for a topic from the EFD over the specified time range.
264 Parameters
265 ----------
266 client : `lsst_efd_client.efd_helper.EfdClient`
267 The EFD client to use.
268 topic : `str`
269 The topic to query.
270 begin : `astropy.Time`
271 The begin time for the query.
272 end : `astropy.Time`
273 The end time for the query.
274 columns : `list` of `str`, optional
275 The columns to query. If not specified, all columns are returned.
277 Returns
278 -------
279 data : `pd.DataFrame`
280 The data from the query.
281 """
282 if columns is None:
283 columns = ["*"]
284 columns = list(ensure_iterable(columns))
286 availableTopics = await client.get_topics()
288 if topic not in availableTopics:
289 raise ValueError(f"Topic {topic} not in EFD schema")
291 data = await client.select_time_series(topic, columns, begin.utc, end.utc)
293 return data
296def getMostRecentRowWithDataBefore(client, topic, timeToLookBefore, warnStaleAfterNMinutes=60 * 12):
297 """Get the most recent row of data for a topic before a given time.
299 Parameters
300 ----------
301 client : `lsst_efd_client.efd_helper.EfdClient`
302 The EFD client to use.
303 topic : `str`
304 The topic to query.
305 timeToLookBefore : `astropy.Time`
306 The time to look before.
307 warnStaleAfterNMinutes : `float`, optional
308 The number of minutes after which to consider the data stale and issue
309 a warning.
311 Returns
312 -------
313 row : `pd.Series`
314 The row of data from the EFD containing the most recent data before the
315 specified time.
317 Raises
318 ------
319 ValueError:
320 If the topic is not in the EFD schema.
321 """
322 staleAge = datetime.timedelta(warnStaleAfterNMinutes)
324 firstDayPossible = getDayObsStartTime(20190101)
326 if timeToLookBefore < firstDayPossible:
327 raise ValueError(f"Requested time {timeToLookBefore} is before any data was put in the EFD")
329 df = pd.DataFrame()
330 beginTime = timeToLookBefore
331 while df.empty and beginTime > firstDayPossible:
332 df = getEfdData(client, topic, begin=beginTime, timespan=-TIME_CHUNKING, warn=False)
333 beginTime -= TIME_CHUNKING
335 if beginTime < firstDayPossible and df.empty: # we ran all the way back to the beginning of time
336 raise ValueError(
337 f"The entire EFD was searched backwards from {timeToLookBefore} and no data was "
338 f"found in {topic=}"
339 )
341 lastRow = df.iloc[-1]
342 commandTime = efdTimestampToAstropy(lastRow["private_efdStamp"])
344 commandAge = timeToLookBefore - commandTime
345 if commandAge > staleAge:
346 log = logging.getLogger(__name__)
347 log.warning(
348 f"Component {topic} was last set {commandAge.sec/60:.1} minutes" " before the requested time"
349 )
351 return lastRow
354def makeEfdClient(testing=False):
355 """Automatically create an EFD client based on the site.
357 Parameters
358 ----------
359 testing : `bool`, optional
360 Set to ``True`` if running in a test suite. This will default to using
361 the USDF EFD, for which data has been recorded for replay by the ``vcr`
362 package. Note data must be re-recorded to ``vcr`` from both inside and
363 outside the USDF when the package/data changes, due to the use of a
364 proxy meaning that the web requests are different depending on whether
365 the EFD is being contacted from inside and outside the USDF.
367 Returns
368 -------
369 efdClient : `lsst_efd_client.efd_helper.EfdClient`
370 The EFD client to use for the current site.
371 """
372 if not HAS_EFD_CLIENT:
373 raise RuntimeError("Could not create EFD client because importing lsst_efd_client failed.")
375 if testing:
376 return EfdClient("usdf_efd")
378 try:
379 site = getSite()
380 except ValueError as e:
381 raise RuntimeError("Could not create EFD client as the site could not be determined") from e
383 if site == "summit":
384 return EfdClient("summit_efd")
385 if site == "tucson":
386 return EfdClient("tucson_teststand_efd")
387 if site == "base":
388 return EfdClient("base_efd")
389 if site in ["staff-rsp", "rubin-devl", "usdf-k8s"]:
390 return EfdClient("usdf_efd")
391 if site == "jenkins":
392 return EfdClient("usdf_efd")
394 raise RuntimeError(f"Could not create EFD client as the {site=} is not recognized")
397def expRecordToTimespan(expRecord):
398 """Get the timespan from an exposure record.
400 Returns the timespan in a format where it can be used to directly unpack
401 into a efdClient.select_time_series() call.
403 Parameters
404 ----------
405 expRecord : `lsst.daf.butler.dimensions.ExposureRecord`
406 The exposure record.
408 Returns
409 -------
410 timespanDict : `dict`
411 The timespan in a format that can be used to directly unpack into a
412 efdClient.select_time_series() call.
413 """
414 return {
415 "begin": expRecord.timespan.begin.utc,
416 "end": expRecord.timespan.end.utc,
417 }
420def efdTimestampToAstropy(timestamp):
421 """Get an efd timestamp as an astropy.time.Time object.
423 Parameters
424 ----------
425 timestamp : `float`
426 The timestamp, as a float.
428 Returns
429 -------
430 time : `astropy.time.Time`
431 The timestamp as an astropy.time.Time object.
432 """
433 return Time(timestamp, format="unix")
436def astropyToEfdTimestamp(time):
437 """Get astropy Time object as an efd timestamp
439 Parameters
440 ----------
441 time : `astropy.time.Time`
442 The time as an astropy.time.Time object.
444 Returns
445 -------
446 timestamp : `float`
447 The timestamp, in UTC, in unix seconds.
448 """
450 return time.utc.unix
453def clipDataToEvent(df, event, prePadding=0, postPadding=0, logger=None):
454 """Clip a padded dataframe to an event.
456 Parameters
457 ----------
458 df : `pd.DataFrame`
459 The dataframe to clip.
460 event : `lsst.summit.utils.efdUtils.TmaEvent`
461 The event to clip to.
462 prePadding : `float`, optional
463 The amount of time before the nominal start of the event to include, in
464 seconds.
465 postPadding : `float`, optional
466 The amount of extra time after the nominal end of the event to include,
467 in seconds.
468 logger : `logging.Logger`, optional
469 The logger to use. If not specified, a new one is created.
471 Returns
472 -------
473 clipped : `pd.DataFrame`
474 The clipped dataframe.
475 """
476 begin = event.begin.value - prePadding
477 end = event.end.value + postPadding
479 if logger is None:
480 logger = logging.getLogger(__name__)
482 if begin < df["private_efdStamp"].min():
483 logger.warning(f"Requested begin time {begin} is before the start of the data")
484 if end > df["private_efdStamp"].max():
485 logger.warning(f"Requested end time {end} is after the end of the data")
487 mask = (df["private_efdStamp"] >= begin) & (df["private_efdStamp"] <= end)
488 clipped_df = df.loc[mask].copy()
489 return clipped_df
492def offsetDayObs(dayObs, nDays):
493 """Offset a dayObs by a given number of days.
495 Parameters
496 ----------
497 dayObs : `int`
498 The dayObs, as an integer, e.g. 20231225
499 nDays : `int`
500 The number of days to offset the dayObs by.
502 Returns
503 -------
504 newDayObs : `int`
505 The new dayObs, as an integer, e.g. 20231225
506 """
507 d1 = datetime.datetime.strptime(str(dayObs), "%Y%m%d")
508 oneDay = datetime.timedelta(days=nDays)
509 return int((d1 + oneDay).strftime("%Y%m%d"))
512def calcNextDay(dayObs):
513 """Given an integer dayObs, calculate the next integer dayObs.
515 Integers are used for dayObs, but dayObs values are therefore not
516 contiguous due to month/year ends etc, so this utility provides a robust
517 way to get the integer dayObs which follows the one specified.
519 Parameters
520 ----------
521 dayObs : `int`
522 The dayObs, as an integer, e.g. 20231231
524 Returns
525 -------
526 nextDayObs : `int`
527 The next dayObs, as an integer, e.g. 20240101
528 """
529 return offsetDayObs(dayObs, 1)
532def calcPreviousDay(dayObs):
533 """Given an integer dayObs, calculate the next integer dayObs.
535 Integers are used for dayObs, but dayObs values are therefore not
536 contiguous due to month/year ends etc, so this utility provides a robust
537 way to get the integer dayObs which follows the one specified.
539 Parameters
540 ----------
541 dayObs : `int`
542 The dayObs, as an integer, e.g. 20231231
544 Returns
545 -------
546 nextDayObs : `int`
547 The next dayObs, as an integer, e.g. 20240101
548 """
549 return offsetDayObs(dayObs, -1)
552def getDayObsStartTime(dayObs):
553 """Get the start of the given dayObs as an astropy.time.Time object.
555 The observatory rolls the date over at UTC-12.
557 Parameters
558 ----------
559 dayObs : `int`
560 The dayObs, as an integer, e.g. 20231225
562 Returns
563 -------
564 time : `astropy.time.Time`
565 The start of the dayObs as an astropy.time.Time object.
566 """
567 pythonDateTime = datetime.datetime.strptime(str(dayObs), "%Y%m%d")
568 return Time(pythonDateTime) + 12 * u.hour
571def getDayObsEndTime(dayObs):
572 """Get the end of the given dayObs as an astropy.time.Time object.
574 Parameters
575 ----------
576 dayObs : `int`
577 The dayObs, as an integer, e.g. 20231225
579 Returns
580 -------
581 time : `astropy.time.Time`
582 The end of the dayObs as an astropy.time.Time object.
583 """
584 return getDayObsStartTime(dayObs) + 24 * u.hour
587def getDayObsForTime(time):
588 """Get the dayObs in which an astropy.time.Time object falls.
590 Parameters
591 ----------
592 time : `astropy.time.Time`
593 The time.
595 Returns
596 -------
597 dayObs : `int`
598 The dayObs, as an integer, e.g. 20231225
599 """
600 twelveHours = datetime.timedelta(hours=-12)
601 offset = TimeDelta(twelveHours, format="datetime")
602 return int((time + offset).utc.isot[:10].replace("-", ""))
605@deprecated(
606 reason="getSubTopics() has been replaced by getTopics() and using wildcards. "
607 "Will be removed after w_2023_50.",
608 version="w_2023_40",
609 category=FutureWarning,
610)
611def getSubTopics(client, topic):
612 """Get all the sub topics within a given topic.
614 Note that the topic need not be a complete one, for example, rather than
615 doing `getSubTopics(client, 'lsst.sal.ATMCS')` to get all the topics for
616 the AuxTel Mount Control System, you can do `getSubTopics(client,
617 'lsst.sal.AT')` to get all which relate to the AuxTel in general.
619 Parameters
620 ----------
621 client : `lsst_efd_client.efd_helper.EfdClient`
622 The EFD client to use.
623 topic : `str`
624 The topic to query.
626 Returns
627 -------
628 subTopics : `list` of `str`
629 The sub topics.
630 """
631 loop = asyncio.get_event_loop()
632 topics = loop.run_until_complete(client.get_topics())
633 return sorted([t for t in topics if t.startswith(topic)])
636def getTopics(client, toFind, caseSensitive=False):
637 """Return all the strings in topics which match the topic query string.
639 Supports wildcards, which are denoted as `*``, as per shell globs.
641 Example:
642 >>> # assume topics are ['apple', 'banana', 'grape']
643 >>> getTopics(, 'a*p*')
644 ['apple', 'grape']
646 Parameters
647 ----------
648 client : `lsst_efd_client.efd_helper.EfdClient`
649 The EFD client to use.
650 toFind : `str`
651 The query string, with optional wildcards denoted as *.
652 caseSensitive : `bool`, optional
653 If ``True``, the query is case sensitive. Defaults to ``False``.
655 Returns
656 -------
657 matches : `list` of `str`
658 The list of matching topics.
659 """
660 loop = asyncio.get_event_loop()
661 topics = loop.run_until_complete(client.get_topics())
663 # Replace wildcard with regex equivalent
664 pattern = toFind.replace("*", ".*")
665 flags = re.IGNORECASE if not caseSensitive else 0
667 matches = []
668 for topic in topics:
669 if re.match(pattern, topic, flags):
670 matches.append(topic)
672 return matches
675def getCommands(client, commands, begin, end, prePadding, postPadding, timeFormat="python"):
676 """Retrieve the commands issued within a specified time range.
678 Parameters
679 ----------
680 client : `EfdClient`
681 The client object used to retrieve EFD data.
682 commands : `list`
683 A list of commands to retrieve.
684 begin : `astropy.time.Time`
685 The start time of the time range.
686 end : `astropy.time.Time`
687 The end time of the time range.
688 prePadding : `float`
689 The amount of time to pad before the begin time.
690 postPadding : `float`
691 The amount of time to pad after the end time.
692 timeFormat : `str`
693 One of 'pandas' or 'astropy' or 'python'. If 'pandas', the dictionary
694 keys will be pandas timestamps, if 'astropy' they will be astropy times
695 and if 'python' they will be python datetimes.
697 Returns
698 -------
699 commandTimes : `dict` [`time`, `str`]
700 A dictionary of the times at which the commands where issued. The type
701 that `time` takes is determined by the format key, and defaults to
702 python datetime.
704 Raises
705 ------
706 ValueError
707 Raise if there is already a command at a timestamp in the dictionary,
708 i.e. there is a collision.
709 """
710 if timeFormat not in ["pandas", "astropy", "python"]:
711 raise ValueError(f"format must be one of 'pandas', 'astropy' or 'python', not {timeFormat=}")
713 commands = list(ensure_iterable(commands))
715 commandTimes = {}
716 for command in commands:
717 data = getEfdData(
718 client,
719 command,
720 begin=begin,
721 end=end,
722 prePadding=prePadding,
723 postPadding=postPadding,
724 warn=False, # most commands will not be issue so we expect many empty queries
725 )
726 for time, _ in data.iterrows():
727 # this is much the most simple data structure, and the chance
728 # of commands being *exactly* simultaneous is minimal so try
729 # it like this, and just raise if we get collisions for now. So
730 # far in testing this seems to be just fine.
732 timeKey = None
733 match timeFormat:
734 case "pandas":
735 timeKey = time
736 case "astropy":
737 timeKey = Time(time)
738 case "python":
739 timeKey = time.to_pydatetime()
741 if timeKey in commandTimes:
742 raise ValueError(
743 f"There is already a command at {timeKey=} -" " make a better data structure!"
744 )
745 commandTimes[timeKey] = command
746 return commandTimes