Coverage for python/lsst/summit/utils/blockUtils.py: 25%
168 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 03:27 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 03:27 -0700
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21from __future__ import annotations
23import logging
24import re
25import time
26from dataclasses import dataclass
27from typing import TYPE_CHECKING
29import numpy as np
30import pandas as pd
31from astropy.time import Time
33from .efdUtils import efdTimestampToAstropy, getEfdData, makeEfdClient
34from .enums import ScriptState
36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true
37 from .tmaUtils import TMAEvent
39 try:
40 from lsst_efd_client import EfdClient
41 except ImportError:
42 EfdClient = None # this is currently just for mypy
44__all__ = ("BlockParser", "BlockInfo", "ScriptStatePoint")
47@dataclass(kw_only=True, frozen=True)
48class BlockInfo:
49 """Information about the execution of a "block".
51 Each BlockInfo instance contains information about a single block
52 execution. This is identified by the block number and sequence number,
53 which, when combined with the dayObs, define the block ID.
55 Each BlockInfo instance contains the following information:
56 * The block ID - this is the primary identifier, as a string, for
57 example "BL52_20230615_02", which is parsed into:
58 * The block number, as an integer, for example 52, for "BLOCK-52".
59 * The dayObs, as an integer, for example 20230615.
60 * The seqNum - the execution number of that block on that day.
61 * The begin and end times of the block execution, as astropy.time.Time
62 * The SAL indices which were involved in the block execution, as a list
63 * The SITCOM tickets which were involved in the block execution, as a
64 list of strings, including the SITCOM- prefix.
65 * The states of the script during the block execution, as a list of
66 ``ScriptStatePoint`` instances.
68 Parameters
69 ----------
70 blockNumber : `int`
71 The block number, as an integer.
72 blockId : `str`
73 The block ID, as a string.
74 dayObs : `int`
75 The dayObs the block was run on.
76 seqNum : `int`
77 The sequence number of the block.
78 begin : `astropy.time.Time`
79 The time the block execution began.
80 end : `astropy.time.Time`
81 The time the block execution ended.
82 salIndices : `list` of `int`
83 One or more SAL indices, relating to the block.
84 tickets : `list` of `str`
85 One or more SITCOM tickets, relating to the block.
86 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint`
87 The states of the script during the block. Each element is a
88 ``ScriptStatePoint`` which contains:
89 - the time, as an astropy.time.Time
90 - the state, as a ``ScriptState`` enum
91 - the reason for state change, as a string, if present
92 """
94 blockNumber: int
95 blockId: str
96 dayObs: int
97 seqNum: int
98 begin: Time
99 end: Time
100 salIndices: list
101 tickets: list
102 states: list
104 def __repr__(self) -> str:
105 return (
106 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices},"
107 f" tickets={self.tickets}, states={self.states!r}"
108 )
110 def _ipython_display_(self) -> None:
111 """This is the function which runs when someone executes a cell in a
112 notebook with just the class instance on its own, without calling
113 print() or str() on it.
114 """
115 print(self.__str__())
117 def __str__(self) -> str:
118 # no literal \n allowed inside {} portion of f-strings until python
119 # 3.12, but it can go in via a variable
120 newline = " \n"
121 return (
122 f"dayObs: {self.dayObs}\n"
123 f"seqNum: {self.seqNum}\n"
124 f"blockNumber: {self.blockNumber}\n"
125 f"blockId: {self.blockId}\n"
126 f"begin: {self.begin.isot}\n"
127 f"end: {self.end.isot}\n"
128 f"salIndices: {self.salIndices}\n"
129 f"tickets: {self.tickets}\n"
130 f"states: \n{newline.join([str(state) for state in self.states])}"
131 )
134@dataclass(kw_only=True, frozen=True)
135class ScriptStatePoint:
136 """The execution state of a script at a point in time.
138 Parameters
139 ----------
140 time : `astropy.time.Time`
141 The time of the state change.
142 state : `lsst.summit.utils.enums.ScriptState`
143 The state of the script at this point in time.
144 reason : `str`
145 The reason for the state change, if given.
146 """
148 time: Time
149 state: ScriptState
150 reason: str
152 def __repr__(self) -> str:
153 return f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})"
155 def _ipython_display_(self) -> None:
156 """This is the function which runs when someone executes a cell in a
157 notebook with just the class instance on its own, without calling
158 print() or str() on it.
159 """
160 print(self.__str__())
162 def __str__(self) -> str:
163 reasonStr = f" - {self.reason}" if self.reason else ""
164 return f"{self.state.name:>10} @ {self.time.isot}{reasonStr}"
167class BlockParser:
168 """A class to parse BLOCK data from the EFD.
170 Information on executed blocks is stored in the EFD (Electronic Facilities
171 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class
172 parses that topic and provides methods to get information on the blocks
173 which were run on a given dayObs. It also provides methods to get the
174 events which occurred during a given block, and also to get the block in
175 which a specified event occurred, if any.
177 Parameters
178 ----------
179 dayObs : `int`
180 The dayObs to get the block data for.
181 client : `lsst_efd_client.efd_client.EfdClient`, optional
182 The EFD client to use. If not specified, a new one is created.
183 """
185 def __init__(self, dayObs: int, client: EfdClient | None = None):
186 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser")
187 self.dayObs = dayObs
189 self.client = client
190 if client is None:
191 self.client = makeEfdClient()
193 t0 = time.time()
194 self.getDataForDayObs()
195 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds")
196 t0 = time.time()
197 self.augmentData()
198 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds")
200 def getDataForDayObs(self) -> None:
201 """Retrieve the data for the specified dayObs from the EFD."""
202 # Tiago thinks no individual block seqNums should take more than an
203 # hour to run, so pad the dayObs by 1.5 hours to make sure we catch
204 # any blocks which might span the end of the day.
205 padding = 1.5 * 60 * 60
206 data = getEfdData(
207 self.client, "lsst.sal.Script.logevent_state", dayObs=self.dayObs, postPadding=padding
208 )
209 self.data = data
211 def augmentDataSlow(self) -> None:
212 """Parse each row in the data frame individually, pulling the
213 information out into its own columns.
214 """
215 data = self.data
216 blockPattern = r"BLOCK-(\d+)"
217 blockIdPattern = r"BL\d+(?:_\w+)+"
219 data["blockNum"] = pd.Series()
220 data["blockId"] = pd.Series()
221 data["blockDayObs"] = pd.Series()
222 data["blockSeqNum"] = pd.Series()
224 if "lastCheckpoint" not in self.data.columns:
225 nRows = len(self.data)
226 self.log.warning(
227 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
228 " so block data cannot be parsed."
229 )
231 for index, row in data.iterrows():
232 rowStr = row["lastCheckpoint"]
234 blockMatch = re.search(blockPattern, rowStr)
235 blockNumber = int(blockMatch.group(1)) if blockMatch else None
236 data.loc[index, "blockNum"] = blockNumber
238 blockIdMatch = re.search(blockIdPattern, rowStr)
239 blockId = blockIdMatch.group(0) if blockIdMatch else None
240 data.loc[index, "blockId"] = blockId
241 if blockId is not None:
242 blockDayObs = int(blockId.split("_")[2])
243 blockSeqNum = int(blockId.split("_")[3])
244 data.loc[index, "blockDayObs"] = blockDayObs
245 data.loc[index, "blockSeqNum"] = blockSeqNum
247 def augmentData(self) -> None:
248 """Parse the dataframe using vectorized methods, pulling the
249 information out into its own columns.
251 This method is much faster for large dataframes than augmentDataSlow,
252 but is also much harder to maintain/debug, as the vectorized regexes
253 are hard to work with, and to know which row is causing problems.
254 """
255 if "lastCheckpoint" not in self.data.columns:
256 nRows = len(self.data)
257 self.log.warning(
258 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
259 " so block data cannot be parsed."
260 )
261 # add the columns that would have been added for consistency
262 self.data["blockNum"] = pd.Series()
263 self.data["blockId"] = pd.Series()
264 self.data["blockDayObs"] = pd.Series()
265 self.data["blockSeqNum"] = pd.Series()
266 return
268 data = self.data
269 blockPattern = r"BLOCK-(\d+)"
270 blockIdPattern = r"(BL\d+(?:_\w+)+)"
272 col = data["lastCheckpoint"]
273 data["blockNum"] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype())
274 data["blockId"] = col.str.extract(blockIdPattern, expand=False)
276 blockIdSplit = data["blockId"].str.split("_", expand=True)
277 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded
278 data["blockDayObs"] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype())
279 data["blockSeqNum"] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype())
280 else: # make nan filled columns for these
281 nanSeries = pd.Series([np.nan] * len(data))
282 data["blockDayObs"] = nanSeries
283 data["blockSeqNum"] = nanSeries
285 def _listColumnValues(self, column: str, removeNone: bool = True) -> list:
286 """Get all the different values for the specified column, as a list.
288 Parameters
289 ----------
290 column : `str`
291 The column to get the values for.
292 removeNone : `bool`
293 Whether to remove None from the list of values.
295 Returns
296 -------
297 values : `list`
298 The values for the specified column.
299 """
300 values = set(self.data[column].dropna())
301 if None in values and removeNone:
302 values.remove(None)
303 return sorted(values)
305 def getBlockNums(self) -> list[int]:
306 """Get the block numbers which were run on the specified dayObs.
308 Returns
309 -------
310 blockNums : `list` of `int`
311 The blocks which were run on the specified dayObs.
312 """
313 return self._listColumnValues("blockNum")
315 def getSeqNums(self, block: int) -> list[int]:
316 """Get the seqNums for the specified block.
318 Parameters
319 ----------
320 block : `int`
321 The block number to get the events for.
323 Returns
324 -------
325 seqNums : `list` of `int`
326 The sequence numbers for the specified block.
327 """
328 seqNums = self.data[self.data["blockNum"] == block]["blockSeqNum"]
329 # block header rows have no blockId or seqNum, but do have a blockNum
330 # so appear here, so drop the nans as they don't relate to an actual
331 # run of a block
332 seqNums = seqNums.dropna()
333 return sorted(set(seqNums))
335 def getRows(self, block: int, seqNum: int | None = None):
336 """Get all rows of data which relate to the specified block.
338 If the seqNum is specified, only the rows for that sequence number are
339 returned, otherwise all the rows relating to any block execution that
340 day are returned. If the specified seqNum doesn't occur on the current
341 day, an empty dataframe is returned.
343 Parameters
344 ----------
345 block : `int`
346 The block number to get the events for.
347 seqNum : `int`, optional
348 The sequence number, if specified, to get the row data for. If not
349 specified, all data for the specified block is returned.
351 Returns
352 -------
353 data : `pandas.DataFrame`
354 The row data.
355 """
356 # Because we query for a whole dayObs, but BLOCKs can overlap the day
357 # start/end, it's possible for the block's blockDayObs not to be the
358 # same as self.dayObs around the beginning or end of the day, so filter
359 # with an extra `& (self.data['blockDayObs'] == self.dayObs` when
360 # getting the relevant rows.
361 rowsForBlock = self.data[
362 np.logical_and(self.data["blockNum"] == block, self.data["blockDayObs"] == self.dayObs)
363 ]
364 if rowsForBlock.empty:
365 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}")
366 if seqNum is None:
367 return rowsForBlock
368 return rowsForBlock[rowsForBlock["blockSeqNum"] == seqNum]
370 def printBlockEvolution(self, block: int, seqNum: int | None = None):
371 """Display the evolution of the specified block.
373 If the seqNum is specified, the evolution of that specific block
374 exection is displayed, otherwise all executions of that block are
375 printed.
377 Parameters
378 ----------
379 block : `int`
380 The block number to get the events for.
381 seqNum : `int`, optional
382 The sequence number, if specified, to print the evolution of. If
383 not specified, all sequence numbers for the block are printed.
384 """
385 if seqNum is None:
386 seqNums = self.getSeqNums(block)
387 else:
388 seqNums = [seqNum]
389 print(f"Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:")
390 for seqNum in seqNums:
391 blockInfo = self.getBlockInfo(block, seqNum)
392 print(blockInfo, "\n")
394 def getBlockInfo(self, block: int, seqNum: int):
395 """Get the block info for the specified block.
397 Parses the rows relating to this block execution, and returns
398 the information as a ``BlockInfo`` instance.
400 Parameters
401 ----------
402 block : `int`
403 The block number.
404 seqNum : `int`
405 The sequence number.
407 Returns
408 -------
409 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo`
410 The block info.
411 """
412 rows = self.getRows(block, seqNum=seqNum)
413 if rows.empty:
414 print(f"No {seqNum=} on dayObs={self.dayObs} for {block=}")
415 return
417 blockIds = set()
418 tickets = set()
419 salIndices = set()
420 statePoints = []
421 sitcomPattern = r"SITCOM-(\d+)"
423 for index, row in rows.iterrows():
424 salIndices.add(row["salIndex"])
425 blockIds.add(row["blockId"])
427 lastCheckpoint = row["lastCheckpoint"]
428 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint)
429 tickets.update(sitcomMatches)
431 time = efdTimestampToAstropy(row["private_efdStamp"])
432 state = ScriptState(row["state"])
433 reason = row["reason"]
434 statePoint = ScriptStatePoint(time=time, state=state, reason=reason)
435 statePoints.append(statePoint)
437 # likewise for the blockIds
438 if len(blockIds) > 1:
439 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}")
440 blockId = blockIds.pop()
442 blockInfo = BlockInfo(
443 blockNumber=block,
444 blockId=blockId,
445 dayObs=self.dayObs,
446 seqNum=seqNum,
447 begin=efdTimestampToAstropy(rows.iloc[0]["private_efdStamp"]),
448 end=efdTimestampToAstropy(rows.iloc[-1]["private_efdStamp"]),
449 salIndices=sorted(salIndices),
450 tickets=[f"SITCOM-{ticket}" for ticket in sorted(tickets)],
451 states=statePoints,
452 )
454 return blockInfo
456 def getEventsForBlock(self, events: list[TMAEvent], block: int, seqNum: int) -> list[TMAEvent]:
457 """Get the events which occurred during the specified block.
459 Parameters
460 ----------
461 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
462 The list of candidate events.
463 block : `int`
464 The block number to get the events for.
465 seqNum : `int`
466 The sequence number to get the events for.
468 Returns
469 -------
470 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
471 The events.
472 """
473 blockInfo = self.getBlockInfo(block, seqNum)
474 begin = blockInfo.begin
475 end = blockInfo.end
477 # each event's end being past the begin time and their
478 # starts being before the end time means we get all the
479 # events in the window and also those that overlap the
480 # start/end too
481 return [e for e in events if e.end >= begin and e.begin <= end]