Coverage for python/lsst/summit/utils/blockUtils.py: 25%
157 statements
« prev ^ index » next coverage.py v7.3.3, created at 2023-12-20 20:59 +0000
« prev ^ index » next coverage.py v7.3.3, created at 2023-12-20 20:59 +0000
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import re
23import time
24import logging
25import pandas as pd
26import numpy as np
27from dataclasses import dataclass
28from astropy.time import Time
30from .enums import ScriptState
31from .efdUtils import (getEfdData,
32 makeEfdClient,
33 efdTimestampToAstropy,
34 )
36__all__ = (
37 'BlockParser',
38 'BlockInfo',
39 'ScriptStatePoint'
40)
43@dataclass(kw_only=True, frozen=True)
44class BlockInfo:
45 """Information about the execution of a "block".
47 Each BlockInfo instance contains information about a single block
48 execution. This is identified by the block number and sequence number,
49 which, when combined with the dayObs, define the block ID.
51 Each BlockInfo instance contains the following information:
52 * The block ID - this is the primary identifier, as a string, for
53 example "BL52_20230615_02", which is parsed into:
54 * The block number, as an integer, for example 52, for "BLOCK-52".
55 * The dayObs, as an integer, for example 20230615.
56 * The seqNum - the execution number of that block on that day.
57 * The begin and end times of the block execution, as astropy.time.Time
58 * The SAL indices which were involved in the block execution, as a list
59 * The SITCOM tickets which were involved in the block execution, as a
60 list of strings, including the SITCOM- prefix.
61 * The states of the script during the block execution, as a list of
62 ``ScriptStatePoint`` instances.
64 Parameters
65 ----------
66 blockNumber : `int`
67 The block number, as an integer.
68 blockId : `str`
69 The block ID, as a string.
70 dayObs : `int`
71 The dayObs the block was run on.
72 seqNum : `int`
73 The sequence number of the block.
74 begin : `astropy.time.Time`
75 The time the block execution began.
76 end : `astropy.time.Time`
77 The time the block execution ended.
78 salIndices : `list` of `int`
79 One or more SAL indices, relating to the block.
80 tickets : `list` of `str`
81 One or more SITCOM tickets, relating to the block.
82 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint`
83 The states of the script during the block. Each element is a
84 ``ScriptStatePoint`` which contains:
85 - the time, as an astropy.time.Time
86 - the state, as a ``ScriptState`` enum
87 - the reason for state change, as a string, if present
88 """
89 blockNumber: int
90 blockId: str
91 dayObs: int
92 seqNum: int
93 begin: Time
94 end: Time
95 salIndices: int
96 tickets: list
97 states: list
99 def __repr__(self):
100 return (
101 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices},"
102 f" tickets={self.tickets}, states={self.states!r}"
103 )
105 def _ipython_display_(self):
106 """This is the function which runs when someone executes a cell in a
107 notebook with just the class instance on its own, without calling
108 print() or str() on it.
109 """
110 print(self.__str__())
112 def __str__(self):
113 # no literal \n allowed inside {} portion of f-strings until python
114 # 3.12, but it can go in via a variable
115 newline = ' \n'
116 return (
117 f"dayObs: {self.dayObs}\n"
118 f"seqNum: {self.seqNum}\n"
119 f"blockNumber: {self.blockNumber}\n"
120 f"blockId: {self.blockId}\n"
121 f"begin: {self.begin.isot}\n"
122 f"end: {self.end.isot}\n"
123 f"salIndices: {self.salIndices}\n"
124 f"tickets: {self.tickets}\n"
125 f"states: \n{newline.join([str(state) for state in self.states])}"
126 )
129@dataclass(kw_only=True, frozen=True)
130class ScriptStatePoint:
131 """The execution state of a script at a point in time.
133 Parameters
134 ----------
135 time : `astropy.time.Time`
136 The time of the state change.
137 state : `lsst.summit.utils.enums.ScriptState`
138 The state of the script at this point in time.
139 reason : `str`
140 The reason for the state change, if given.
141 """
142 time: Time
143 state: ScriptState
144 reason: str
146 def __repr__(self):
147 return (
148 f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})"
149 )
151 def _ipython_display_(self):
152 """This is the function which runs when someone executes a cell in a
153 notebook with just the class instance on its own, without calling
154 print() or str() on it.
155 """
156 print(self.__str__())
158 def __str__(self):
159 reasonStr = f" - {self.reason}" if self.reason else ""
160 return (f"{self.state.name:>10} @ {self.time.isot}{reasonStr}")
163class BlockParser:
164 """A class to parse BLOCK data from the EFD.
166 Information on executed blocks is stored in the EFD (Electronic Facilities
167 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class
168 parses that topic and provides methods to get information on the blocks
169 which were run on a given dayObs. It also provides methods to get the
170 events which occurred during a given block, and also to get the block in
171 which a specified event occurred, if any.
173 Parameters
174 ----------
175 dayObs : `int`
176 The dayObs to get the block data for.
177 client : `lsst_efd_client.efd_client.EfdClient`, optional
178 The EFD client to use. If not specified, a new one is created.
179 """
181 def __init__(self, dayObs, client=None):
182 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser")
183 self.dayObs = dayObs
185 self.client = client
186 if client is None:
187 self.client = makeEfdClient()
189 t0 = time.time()
190 self.getDataForDayObs()
191 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds")
192 t0 = time.time()
193 self.augmentData()
194 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds")
196 def getDataForDayObs(self):
197 """Retrieve the data for the specified dayObs from the EFD.
198 """
199 data = getEfdData(self.client, 'lsst.sal.Script.logevent_state', dayObs=self.dayObs)
200 self.data = data
202 def augmentDataSlow(self):
203 """Parse each row in the data frame individually, pulling the
204 information out into its own columns.
205 """
206 data = self.data
207 blockPattern = r"BLOCK-(\d+)"
208 blockIdPattern = r"BL\d+(?:_\w+)+"
210 data['blockNum'] = pd.Series()
211 data['blockId'] = pd.Series()
212 data['blockDayObs'] = pd.Series()
213 data['blockSeqNum'] = pd.Series()
215 if 'lastCheckpoint' not in self.data.columns:
216 nRows = len(self.data)
217 self.log.warning(f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
218 " so block data cannot be parsed.")
220 for index, row in data.iterrows():
221 rowStr = row['lastCheckpoint']
223 blockMatch = re.search(blockPattern, rowStr)
224 blockNumber = int(blockMatch.group(1)) if blockMatch else None
225 data.loc[index, 'blockNum'] = blockNumber
227 blockIdMatch = re.search(blockIdPattern, rowStr)
228 blockId = blockIdMatch.group(0) if blockIdMatch else None
229 data.loc[index, 'blockId'] = blockId
230 if blockId is not None:
231 blockDayObs = int(blockId.split('_')[2])
232 blockSeqNum = int(blockId.split('_')[3])
233 data.loc[index, 'blockDayObs'] = blockDayObs
234 data.loc[index, 'blockSeqNum'] = blockSeqNum
236 def augmentData(self):
237 """Parse the dataframe using vectorized methods, pulling the
238 information out into its own columns.
240 This method is much faster for large dataframes than augmentDataSlow,
241 but is also much harder to maintain/debug, as the vectorized regexes
242 are hard to work with, and to know which row is causing problems.
243 """
244 if 'lastCheckpoint' not in self.data.columns:
245 nRows = len(self.data)
246 self.log.warning(f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data,"
247 " so block data cannot be parsed.")
248 # add the columns that would have been added for consistency
249 self.data['blockNum'] = pd.Series()
250 self.data['blockId'] = pd.Series()
251 self.data['blockDayObs'] = pd.Series()
252 self.data['blockSeqNum'] = pd.Series()
253 return
255 data = self.data
256 blockPattern = r"BLOCK-(\d+)"
257 blockIdPattern = r"(BL\d+(?:_\w+)+)"
259 col = data['lastCheckpoint']
260 data['blockNum'] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype())
261 data['blockId'] = col.str.extract(blockIdPattern, expand=False)
263 blockIdSplit = data['blockId'].str.split('_', expand=True)
264 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded
265 data['blockDayObs'] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype())
266 data['blockSeqNum'] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype())
267 else: # make nan filled columns for these
268 nanSeries = pd.Series([np.nan] * len(data))
269 data['blockDayObs'] = nanSeries
270 data['blockSeqNum'] = nanSeries
272 def _listColumnValues(self, column, removeNone=True):
273 """Get all the different values for the specified column, as a list.
275 Parameters
276 ----------
277 column : `str`
278 The column to get the values for.
279 removeNone : `bool`
280 Whether to remove None from the list of values.
282 Returns
283 -------
284 values : `list`
285 The values for the specified column.
286 """
287 values = set(self.data[column].dropna())
288 if None in values and removeNone:
289 values.remove(None)
290 return sorted(values)
292 def getBlockNums(self):
293 """Get the block numbers which were run on the specified dayObs.
295 Returns
296 -------
297 blockNums : `list` of `int`
298 The blocks which were run on the specified dayObs.
299 """
300 return self._listColumnValues('blockNum')
302 def getSeqNums(self, block):
303 """Get the seqNums for the specified block.
305 Parameters
306 ----------
307 block : `int`
308 The block number to get the events for.
310 Returns
311 -------
312 seqNums : `list` of `int`
313 The sequence numbers for the specified block.
314 """
315 return sorted(set(self.data[self.data['blockNum'] == block]['blockSeqNum']))
317 def getRows(self, block, seqNum=None):
318 """Get all rows of data which relate to the specified block.
320 If the seqNum is specified, only the rows for that sequence number are
321 returned, otherwise all the rows relating to any block execution that
322 day are returned. If the specified seqNum doesn't occur on the current
323 day, an empty dataframe is returned.
325 Parameters
326 ----------
327 block : `int`
328 The block number to get the events for.
329 seqNum : `int`, optional
330 The sequence number, if specified, to get the row data for. If not
331 specified, all data for the specified block is returned.
333 Returns
334 -------
335 data : `pandas.DataFrame`
336 The row data.
337 """
338 rowsForBlock = self.data[self.data['blockNum'] == block]
339 if rowsForBlock.empty:
340 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}")
341 if seqNum is None:
342 return rowsForBlock
343 return rowsForBlock[rowsForBlock['blockSeqNum'] == seqNum]
345 def printBlockEvolution(self, block, seqNum=None):
346 """Display the evolution of the specified block.
348 If the seqNum is specified, the evolution of that specific block
349 exection is displayed, otherwise all executions of that block are
350 printed.
352 Parameters
353 ----------
354 block : `int`
355 The block number to get the events for.
356 seqNum : `int`, optional
357 The sequence number, if specified, to print the evolution of. If
358 not specified, all sequence numbers for the block are printed.
359 """
360 if seqNum is None:
361 seqNums = self.getSeqNums(block)
362 else:
363 seqNums = [seqNum]
364 print(f'Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:')
365 for seqNum in seqNums:
366 blockInfo = self.getBlockInfo(block, seqNum)
367 print(blockInfo, '\n')
369 def getBlockInfo(self, block, seqNum):
370 """Get the block info for the specified block.
372 Parses the rows relating to this block execution, and returns
373 the information as a ``BlockInfo`` instance.
375 Parameters
376 ----------
377 block : `int`
378 The block number.
379 seqNum : `int`
380 The sequence number.
382 Returns
383 -------
384 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo`
385 The block info.
386 """
387 rows = self.getRows(block, seqNum=seqNum)
388 if rows.empty:
389 print(f'No {seqNum=} on dayObs={self.dayObs} for {block=}')
390 return
392 blockIds = set()
393 tickets = set()
394 salIndices = set()
395 statePoints = []
396 sitcomPattern = r"SITCOM-(\d+)"
398 for index, row in rows.iterrows():
399 salIndices.add(row['salIndex'])
400 blockIds.add(row['blockId'])
402 lastCheckpoint = row['lastCheckpoint']
403 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint)
404 tickets.update(sitcomMatches)
406 time = efdTimestampToAstropy(row['private_efdStamp'])
407 state = ScriptState(row['state'])
408 reason = row['reason']
409 statePoint = ScriptStatePoint(time=time, state=state, reason=reason)
410 statePoints.append(statePoint)
412 # likewise for the blockIds
413 if len(blockIds) > 1:
414 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}")
415 blockId = blockIds.pop()
417 blockInfo = BlockInfo(
418 blockNumber=block,
419 blockId=blockId,
420 dayObs=self.dayObs,
421 seqNum=seqNum,
422 begin=efdTimestampToAstropy(rows.iloc[0]['private_efdStamp']),
423 end=efdTimestampToAstropy(rows.iloc[-1]['private_efdStamp']),
424 salIndices=sorted(salIndices),
425 tickets=[f'SITCOM-{ticket}' for ticket in sorted(tickets)],
426 states=statePoints,
427 )
429 return blockInfo
431 def getEventsForBlock(self, events, block, seqNum):
432 """Get the events which occurred during the specified block.
434 Parameters
435 ----------
436 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
437 The list of candidate events.
438 block : `int`
439 The block number to get the events for.
440 seqNum : `int`
441 The sequence number to get the events for.
443 Returns
444 -------
445 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent`
446 The events.
447 """
448 blockInfo = self.getBlockInfo(block, seqNum)
449 begin = blockInfo.begin
450 end = blockInfo.end
452 # each event's end being past the begin time and their
453 # starts being before the end time means we get all the
454 # events in the window and also those that overlap the
455 # start/end too
456 return [e for e in events if e.end >= begin and e.begin <= end]