Coverage for python/lsst/summit/utils/blockUtils.py: 24%
160 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 05:01 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 05:01 -0700
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import logging
23import re
24import time
25from dataclasses import dataclass
27import numpy as np
28import pandas as pd
29from astropy.time import Time
31from .efdUtils import efdTimestampToAstropy, getEfdData, makeEfdClient
32from .enums import ScriptState
34__all__ = ("BlockParser", "BlockInfo", "ScriptStatePoint")
37@dataclass(kw_only=True, frozen=True)
38class BlockInfo:
39 """Information about the execution of a "block".
41 Each BlockInfo instance contains information about a single block
42 execution. This is identified by the block number and sequence number,
43 which, when combined with the dayObs, define the block ID.
45 Each BlockInfo instance contains the following information:
46 * The block ID - this is the primary identifier, as a string, for
47 example "BL52_20230615_02", which is parsed into:
48 * The block number, as an integer, for example 52, for "BLOCK-52".
49 * The dayObs, as an integer, for example 20230615.
50 * The seqNum - the execution number of that block on that day.
51 * The begin and end times of the block execution, as astropy.time.Time
52 * The SAL indices which were involved in the block execution, as a list
53 * The SITCOM tickets which were involved in the block execution, as a
54 list of strings, including the SITCOM- prefix.
55 * The states of the script during the block execution, as a list of
56 ``ScriptStatePoint`` instances.
58 Parameters
59 ----------
60 blockNumber : `int`
61 The block number, as an integer.
62 blockId : `str`
63 The block ID, as a string.
64 dayObs : `int`
65 The dayObs the block was run on.
66 seqNum : `int`
67 The sequence number of the block.
68 begin : `astropy.time.Time`
69 The time the block execution began.
70 end : `astropy.time.Time`
71 The time the block execution ended.
72 salIndices : `list` of `int`
73 One or more SAL indices, relating to the block.
74 tickets : `list` of `str`
75 One or more SITCOM tickets, relating to the block.
76 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint`
77 The states of the script during the block. Each element is a
78 ``ScriptStatePoint`` which contains:
79 - the time, as an astropy.time.Time
80 - the state, as a ``ScriptState`` enum
81 - the reason for state change, as a string, if present
82 """
84 blockNumber: int
85 blockId: str
86 dayObs: int
87 seqNum: int
88 begin: Time
89 end: Time
90 salIndices: list
91 tickets: list
92 states: list
94 def __repr__(self):
95 return (
96 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices},"
97 f" tickets={self.tickets}, states={self.states!r}"
98 )
100 def _ipython_display_(self):
101 """This is the function which runs when someone executes a cell in a
102 notebook with just the class instance on its own, without calling
103 print() or str() on it.
104 """
105 print(self.__str__())
107 def __str__(self):
108 # no literal \n allowed inside {} portion of f-strings until python
109 # 3.12, but it can go in via a variable
110 newline = " \n"
111 return (
112 f"dayObs: {self.dayObs}\n"
113 f"seqNum: {self.seqNum}\n"
114 f"blockNumber: {self.blockNumber}\n"
115 f"blockId: {self.blockId}\n"
116 f"begin: {self.begin.isot}\n"
117 f"end: {self.end.isot}\n"
118 f"salIndices: {self.salIndices}\n"
119 f"tickets: {self.tickets}\n"
120 f"states: \n{newline.join([str(state) for state in self.states])}"
121 )
124@dataclass(kw_only=True, frozen=True)
125class ScriptStatePoint:
126 """The execution state of a script at a point in time.
128 Parameters
129 ----------
130 time : `astropy.time.Time`
131 The time of the state change.
132 state : `lsst.summit.utils.enums.ScriptState`
133 The state of the script at this point in time.
134 reason : `str`
135 The reason for the state change, if given.
136 """
138 time: Time
139 state: ScriptState
140 reason: str
142 def __repr__(self):
143 return f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})"
145 def _ipython_display_(self):
146 """This is the function which runs when someone executes a cell in a
147 notebook with just the class instance on its own, without calling
148 print() or str() on it.
149 """
150 print(self.__str__())
152 def __str__(self):
153 reasonStr = f" - {self.reason}" if self.reason else ""
154 return f"{self.state.name:>10} @ {self.time.isot}{reasonStr}"
157class BlockParser:
158 """A class to parse BLOCK data from the EFD.
160 Information on executed blocks is stored in the EFD (Electronic Facilities
161 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class
162 parses that topic and provides methods to get information on the blocks
163 which were run on a given dayObs. It also provides methods to get the
164 events which occurred during a given block, and also to get the block in
165 which a specified event occurred, if any.
167 Parameters
168 ----------
169 dayObs : `int`
170 The dayObs to get the block data for.
171 client : `lsst_efd_client.efd_client.EfdClient`, optional
172 The EFD client to use. If not specified, a new one is created.
173 """
175 def __init__(self, dayObs, client=None):
176 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser")
177 self.dayObs = dayObs
179 self.client = client
180 if client is None:
181 self.client = makeEfdClient()
183 t0 = time.time()
184 self.getDataForDayObs()
185 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds")
186 t0 = time.time()
187 self.augmentData()
188 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds")
190 def getDataForDayObs(self):
191 """Retrieve the data for the specified dayObs from the EFD."""
192 # Tiago thinks no individual block seqNums should take more than an
193 # hour to run, so pad the dayObs by 1.5 hours to make sure we catch
194 # any blocks which might span the end of the day.
195 padding = 1.5 * 60 * 60
196 data = getEfdData(
197 self.client, "lsst.sal.Script.logevent_state", dayObs=self.dayObs, postPadding=padding
198 )
199 self.data = data
201 def augmentDataSlow(self):
202 """Parse each row in the data frame individually, pulling the
203 information out into its own columns.
204 """
205 data = self.data
206 blockPattern = r"BLOCK-(\d+)"
207 blockIdPattern = r"BL\d+(?:_\w+)+"
209 data["blockNum"] = pd.Series()
210 data["blockId"] = pd.Series()
211 data["blockDayObs"] = pd.Series()
212 data["blockSeqNum"] = pd.Series()
214 if "lastCheckpoint" not in self.data.columns:
215 nRows = len(self.data)
216 self.log.warning(
217 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
218 " so block data cannot be parsed."
219 )
221 for index, row in data.iterrows():
222 rowStr = row["lastCheckpoint"]
224 blockMatch = re.search(blockPattern, rowStr)
225 blockNumber = int(blockMatch.group(1)) if blockMatch else None
226 data.loc[index, "blockNum"] = blockNumber
228 blockIdMatch = re.search(blockIdPattern, rowStr)
229 blockId = blockIdMatch.group(0) if blockIdMatch else None
230 data.loc[index, "blockId"] = blockId
231 if blockId is not None:
232 blockDayObs = int(blockId.split("_")[2])
233 blockSeqNum = int(blockId.split("_")[3])
234 data.loc[index, "blockDayObs"] = blockDayObs
235 data.loc[index, "blockSeqNum"] = blockSeqNum
237 def augmentData(self):
238 """Parse the dataframe using vectorized methods, pulling the
239 information out into its own columns.
241 This method is much faster for large dataframes than augmentDataSlow,
242 but is also much harder to maintain/debug, as the vectorized regexes
243 are hard to work with, and to know which row is causing problems.
244 """
245 if "lastCheckpoint" not in self.data.columns:
246 nRows = len(self.data)
247 self.log.warning(
248 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
249 " so block data cannot be parsed."
250 )
251 # add the columns that would have been added for consistency
252 self.data["blockNum"] = pd.Series()
253 self.data["blockId"] = pd.Series()
254 self.data["blockDayObs"] = pd.Series()
255 self.data["blockSeqNum"] = pd.Series()
256 return
258 data = self.data
259 blockPattern = r"BLOCK-(\d+)"
260 blockIdPattern = r"(BL\d+(?:_\w+)+)"
262 col = data["lastCheckpoint"]
263 data["blockNum"] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype())
264 data["blockId"] = col.str.extract(blockIdPattern, expand=False)
266 blockIdSplit = data["blockId"].str.split("_", expand=True)
267 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded
268 data["blockDayObs"] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype())
269 data["blockSeqNum"] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype())
270 else: # make nan filled columns for these
271 nanSeries = pd.Series([np.nan] * len(data))
272 data["blockDayObs"] = nanSeries
273 data["blockSeqNum"] = nanSeries
275 def _listColumnValues(self, column, removeNone=True):
276 """Get all the different values for the specified column, as a list.
278 Parameters
279 ----------
280 column : `str`
281 The column to get the values for.
282 removeNone : `bool`
283 Whether to remove None from the list of values.
285 Returns
286 -------
287 values : `list`
288 The values for the specified column.
289 """
290 values = set(self.data[column].dropna())
291 if None in values and removeNone:
292 values.remove(None)
293 return sorted(values)
295 def getBlockNums(self):
296 """Get the block numbers which were run on the specified dayObs.
298 Returns
299 -------
300 blockNums : `list` of `int`
301 The blocks which were run on the specified dayObs.
302 """
303 return self._listColumnValues("blockNum")
305 def getSeqNums(self, block):
306 """Get the seqNums for the specified block.
308 Parameters
309 ----------
310 block : `int`
311 The block number to get the events for.
313 Returns
314 -------
315 seqNums : `list` of `int`
316 The sequence numbers for the specified block.
317 """
318 seqNums = self.data[self.data["blockNum"] == block]["blockSeqNum"]
319 # block header rows have no blockId or seqNum, but do have a blockNum
320 # so appear here, so drop the nans as they don't relate to an actual
321 # run of a block
322 seqNums = seqNums.dropna()
323 return sorted(set(seqNums))
325 def getRows(self, block, seqNum=None):
326 """Get all rows of data which relate to the specified block.
328 If the seqNum is specified, only the rows for that sequence number are
329 returned, otherwise all the rows relating to any block execution that
330 day are returned. If the specified seqNum doesn't occur on the current
331 day, an empty dataframe is returned.
333 Parameters
334 ----------
335 block : `int`
336 The block number to get the events for.
337 seqNum : `int`, optional
338 The sequence number, if specified, to get the row data for. If not
339 specified, all data for the specified block is returned.
341 Returns
342 -------
343 data : `pandas.DataFrame`
344 The row data.
345 """
346 # Because we query for a whole dayObs, but BLOCKs can overlap the day
347 # start/end, it's possible for the block's blockDayObs not to be the
348 # same as self.dayObs around the beginning or end of the day, so filter
349 # with an extra `& (self.data['blockDayObs'] == self.dayObs` when
350 # getting the relevant rows.
351 rowsForBlock = self.data[
352 np.logical_and(self.data["blockNum"] == block, self.data["blockDayObs"] == self.dayObs)
353 ]
354 if rowsForBlock.empty:
355 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}")
356 if seqNum is None:
357 return rowsForBlock
358 return rowsForBlock[rowsForBlock["blockSeqNum"] == seqNum]
360 def printBlockEvolution(self, block, seqNum=None):
361 """Display the evolution of the specified block.
363 If the seqNum is specified, the evolution of that specific block
364 exection is displayed, otherwise all executions of that block are
365 printed.
367 Parameters
368 ----------
369 block : `int`
370 The block number to get the events for.
371 seqNum : `int`, optional
372 The sequence number, if specified, to print the evolution of. If
373 not specified, all sequence numbers for the block are printed.
374 """
375 if seqNum is None:
376 seqNums = self.getSeqNums(block)
377 else:
378 seqNums = [seqNum]
379 print(f"Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:")
380 for seqNum in seqNums:
381 blockInfo = self.getBlockInfo(block, seqNum)
382 print(blockInfo, "\n")
384 def getBlockInfo(self, block, seqNum):
385 """Get the block info for the specified block.
387 Parses the rows relating to this block execution, and returns
388 the information as a ``BlockInfo`` instance.
390 Parameters
391 ----------
392 block : `int`
393 The block number.
394 seqNum : `int`
395 The sequence number.
397 Returns
398 -------
399 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo`
400 The block info.
401 """
402 rows = self.getRows(block, seqNum=seqNum)
403 if rows.empty:
404 print(f"No {seqNum=} on dayObs={self.dayObs} for {block=}")
405 return
407 blockIds = set()
408 tickets = set()
409 salIndices = set()
410 statePoints = []
411 sitcomPattern = r"SITCOM-(\d+)"
413 for index, row in rows.iterrows():
414 salIndices.add(row["salIndex"])
415 blockIds.add(row["blockId"])
417 lastCheckpoint = row["lastCheckpoint"]
418 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint)
419 tickets.update(sitcomMatches)
421 time = efdTimestampToAstropy(row["private_efdStamp"])
422 state = ScriptState(row["state"])
423 reason = row["reason"]
424 statePoint = ScriptStatePoint(time=time, state=state, reason=reason)
425 statePoints.append(statePoint)
427 # likewise for the blockIds
428 if len(blockIds) > 1:
429 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}")
430 blockId = blockIds.pop()
432 blockInfo = BlockInfo(
433 blockNumber=block,
434 blockId=blockId,
435 dayObs=self.dayObs,
436 seqNum=seqNum,
437 begin=efdTimestampToAstropy(rows.iloc[0]["private_efdStamp"]),
438 end=efdTimestampToAstropy(rows.iloc[-1]["private_efdStamp"]),
439 salIndices=sorted(salIndices),
440 tickets=[f"SITCOM-{ticket}" for ticket in sorted(tickets)],
441 states=statePoints,
442 )
444 return blockInfo
446 def getEventsForBlock(self, events, block, seqNum):
447 """Get the events which occurred during the specified block.
449 Parameters
450 ----------
451 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
452 The list of candidate events.
453 block : `int`
454 The block number to get the events for.
455 seqNum : `int`
456 The sequence number to get the events for.
458 Returns
459 -------
460 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
461 The events.
462 """
463 blockInfo = self.getBlockInfo(block, seqNum)
464 begin = blockInfo.begin
465 end = blockInfo.end
467 # each event's end being past the begin time and their
468 # starts being before the end time means we get all the
469 # events in the window and also those that overlap the
470 # start/end too
471 return [e for e in events if e.end >= begin and e.begin <= end]