Coverage for python/lsst/summit/utils/blockUtils.py: 25%

168 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 04:43 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import logging 

24import re 

25import time 

26from dataclasses import dataclass 

27from typing import TYPE_CHECKING 

28 

29import numpy as np 

30import pandas as pd 

31from astropy.time import Time 

32 

33from .efdUtils import efdTimestampToAstropy, getEfdData, makeEfdClient 

34from .enums import ScriptState 

35 

36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 from .tmaUtils import TMAEvent 

38 

39 try: 

40 from lsst_efd_client import EfdClient 

41 except ImportError: 

42 EfdClient = None # this is currently just for mypy 

43 

44__all__ = ("BlockParser", "BlockInfo", "ScriptStatePoint") 

45 

46 

47@dataclass(kw_only=True, frozen=True) 

48class BlockInfo: 

49 """Information about the execution of a "block". 

50 

51 Each BlockInfo instance contains information about a single block 

52 execution. This is identified by the block number and sequence number, 

53 which, when combined with the dayObs, define the block ID. 

54 

55 Each BlockInfo instance contains the following information: 

56 * The block ID - this is the primary identifier, as a string, for 

57 example "BL52_20230615_02", which is parsed into: 

58 * The block number, as an integer, for example 52, for "BLOCK-52". 

59 * The dayObs, as an integer, for example 20230615. 

60 * The seqNum - the execution number of that block on that day. 

61 * The begin and end times of the block execution, as astropy.time.Time 

62 * The SAL indices which were involved in the block execution, as a list 

63 * The SITCOM tickets which were involved in the block execution, as a 

64 list of strings, including the SITCOM- prefix. 

65 * The states of the script during the block execution, as a list of 

66 ``ScriptStatePoint`` instances. 

67 

68 Parameters 

69 ---------- 

70 blockNumber : `int` 

71 The block number, as an integer. 

72 blockId : `str` 

73 The block ID, as a string. 

74 dayObs : `int` 

75 The dayObs the block was run on. 

76 seqNum : `int` 

77 The sequence number of the block. 

78 begin : `astropy.time.Time` 

79 The time the block execution began. 

80 end : `astropy.time.Time` 

81 The time the block execution ended. 

82 salIndices : `list` of `int` 

83 One or more SAL indices, relating to the block. 

84 tickets : `list` of `str` 

85 One or more SITCOM tickets, relating to the block. 

86 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint` 

87 The states of the script during the block. Each element is a 

88 ``ScriptStatePoint`` which contains: 

89 - the time, as an astropy.time.Time 

90 - the state, as a ``ScriptState`` enum 

91 - the reason for state change, as a string, if present 

92 """ 

93 

94 blockNumber: int 

95 blockId: str 

96 dayObs: int 

97 seqNum: int 

98 begin: Time 

99 end: Time 

100 salIndices: list 

101 tickets: list 

102 states: list 

103 

104 def __repr__(self) -> str: 

105 return ( 

106 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices}," 

107 f" tickets={self.tickets}, states={self.states!r}" 

108 ) 

109 

110 def _ipython_display_(self) -> None: 

111 """This is the function which runs when someone executes a cell in a 

112 notebook with just the class instance on its own, without calling 

113 print() or str() on it. 

114 """ 

115 print(self.__str__()) 

116 

117 def __str__(self) -> str: 

118 # no literal \n allowed inside {} portion of f-strings until python 

119 # 3.12, but it can go in via a variable 

120 newline = " \n" 

121 return ( 

122 f"dayObs: {self.dayObs}\n" 

123 f"seqNum: {self.seqNum}\n" 

124 f"blockNumber: {self.blockNumber}\n" 

125 f"blockId: {self.blockId}\n" 

126 f"begin: {self.begin.isot}\n" 

127 f"end: {self.end.isot}\n" 

128 f"salIndices: {self.salIndices}\n" 

129 f"tickets: {self.tickets}\n" 

130 f"states: \n{newline.join([str(state) for state in self.states])}" 

131 ) 

132 

133 

134@dataclass(kw_only=True, frozen=True) 

135class ScriptStatePoint: 

136 """The execution state of a script at a point in time. 

137 

138 Parameters 

139 ---------- 

140 time : `astropy.time.Time` 

141 The time of the state change. 

142 state : `lsst.summit.utils.enums.ScriptState` 

143 The state of the script at this point in time. 

144 reason : `str` 

145 The reason for the state change, if given. 

146 """ 

147 

148 time: Time 

149 state: ScriptState 

150 reason: str 

151 

152 def __repr__(self) -> str: 

153 return f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})" 

154 

155 def _ipython_display_(self) -> None: 

156 """This is the function which runs when someone executes a cell in a 

157 notebook with just the class instance on its own, without calling 

158 print() or str() on it. 

159 """ 

160 print(self.__str__()) 

161 

162 def __str__(self) -> str: 

163 reasonStr = f" - {self.reason}" if self.reason else "" 

164 return f"{self.state.name:>10} @ {self.time.isot}{reasonStr}" 

165 

166 

167class BlockParser: 

168 """A class to parse BLOCK data from the EFD. 

169 

170 Information on executed blocks is stored in the EFD (Electronic Facilities 

171 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class 

172 parses that topic and provides methods to get information on the blocks 

173 which were run on a given dayObs. It also provides methods to get the 

174 events which occurred during a given block, and also to get the block in 

175 which a specified event occurred, if any. 

176 

177 Parameters 

178 ---------- 

179 dayObs : `int` 

180 The dayObs to get the block data for. 

181 client : `lsst_efd_client.efd_client.EfdClient`, optional 

182 The EFD client to use. If not specified, a new one is created. 

183 """ 

184 

185 def __init__(self, dayObs: int, client: EfdClient | None = None): 

186 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser") 

187 self.dayObs = dayObs 

188 

189 self.client = client 

190 if client is None: 

191 self.client = makeEfdClient() 

192 

193 t0 = time.time() 

194 self.getDataForDayObs() 

195 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds") 

196 t0 = time.time() 

197 self.augmentData() 

198 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds") 

199 

200 def getDataForDayObs(self) -> None: 

201 """Retrieve the data for the specified dayObs from the EFD.""" 

202 # Tiago thinks no individual block seqNums should take more than an 

203 # hour to run, so pad the dayObs by 1.5 hours to make sure we catch 

204 # any blocks which might span the end of the day. 

205 padding = 1.5 * 60 * 60 

206 data = getEfdData( 

207 self.client, "lsst.sal.Script.logevent_state", dayObs=self.dayObs, postPadding=padding 

208 ) 

209 self.data = data 

210 

211 def augmentDataSlow(self) -> None: 

212 """Parse each row in the data frame individually, pulling the 

213 information out into its own columns. 

214 """ 

215 data = self.data 

216 blockPattern = r"BLOCK-(\d+)" 

217 blockIdPattern = r"BL\d+(?:_\w+)+" 

218 

219 data["blockNum"] = pd.Series() 

220 data["blockId"] = pd.Series() 

221 data["blockDayObs"] = pd.Series() 

222 data["blockSeqNum"] = pd.Series() 

223 

224 if "lastCheckpoint" not in self.data.columns: 

225 nRows = len(self.data) 

226 self.log.warning( 

227 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

228 " so block data cannot be parsed." 

229 ) 

230 

231 for index, row in data.iterrows(): 

232 rowStr = row["lastCheckpoint"] 

233 

234 blockMatch = re.search(blockPattern, rowStr) 

235 blockNumber = int(blockMatch.group(1)) if blockMatch else None 

236 data.loc[index, "blockNum"] = blockNumber 

237 

238 blockIdMatch = re.search(blockIdPattern, rowStr) 

239 blockId = blockIdMatch.group(0) if blockIdMatch else None 

240 data.loc[index, "blockId"] = blockId 

241 if blockId is not None: 

242 blockDayObs = int(blockId.split("_")[2]) 

243 blockSeqNum = int(blockId.split("_")[3]) 

244 data.loc[index, "blockDayObs"] = blockDayObs 

245 data.loc[index, "blockSeqNum"] = blockSeqNum 

246 

247 def augmentData(self) -> None: 

248 """Parse the dataframe using vectorized methods, pulling the 

249 information out into its own columns. 

250 

251 This method is much faster for large dataframes than augmentDataSlow, 

252 but is also much harder to maintain/debug, as the vectorized regexes 

253 are hard to work with, and to know which row is causing problems. 

254 """ 

255 if "lastCheckpoint" not in self.data.columns: 

256 nRows = len(self.data) 

257 self.log.warning( 

258 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

259 " so block data cannot be parsed." 

260 ) 

261 # add the columns that would have been added for consistency 

262 self.data["blockNum"] = pd.Series() 

263 self.data["blockId"] = pd.Series() 

264 self.data["blockDayObs"] = pd.Series() 

265 self.data["blockSeqNum"] = pd.Series() 

266 return 

267 

268 data = self.data 

269 blockPattern = r"BLOCK-(\d+)" 

270 blockIdPattern = r"(BL\d+(?:_\w+)+)" 

271 

272 col = data["lastCheckpoint"] 

273 data["blockNum"] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype()) 

274 data["blockId"] = col.str.extract(blockIdPattern, expand=False) 

275 

276 blockIdSplit = data["blockId"].str.split("_", expand=True) 

277 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded 

278 data["blockDayObs"] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype()) 

279 data["blockSeqNum"] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype()) 

280 else: # make nan filled columns for these 

281 nanSeries = pd.Series([np.nan] * len(data)) 

282 data["blockDayObs"] = nanSeries 

283 data["blockSeqNum"] = nanSeries 

284 

285 def _listColumnValues(self, column: str, removeNone: bool = True) -> list: 

286 """Get all the different values for the specified column, as a list. 

287 

288 Parameters 

289 ---------- 

290 column : `str` 

291 The column to get the values for. 

292 removeNone : `bool` 

293 Whether to remove None from the list of values. 

294 

295 Returns 

296 ------- 

297 values : `list` 

298 The values for the specified column. 

299 """ 

300 values = set(self.data[column].dropna()) 

301 if None in values and removeNone: 

302 values.remove(None) 

303 return sorted(values) 

304 

305 def getBlockNums(self) -> list[int]: 

306 """Get the block numbers which were run on the specified dayObs. 

307 

308 Returns 

309 ------- 

310 blockNums : `list` of `int` 

311 The blocks which were run on the specified dayObs. 

312 """ 

313 return self._listColumnValues("blockNum") 

314 

315 def getSeqNums(self, block: int) -> list[int]: 

316 """Get the seqNums for the specified block. 

317 

318 Parameters 

319 ---------- 

320 block : `int` 

321 The block number to get the events for. 

322 

323 Returns 

324 ------- 

325 seqNums : `list` of `int` 

326 The sequence numbers for the specified block. 

327 """ 

328 seqNums = self.data[self.data["blockNum"] == block]["blockSeqNum"] 

329 # block header rows have no blockId or seqNum, but do have a blockNum 

330 # so appear here, so drop the nans as they don't relate to an actual 

331 # run of a block 

332 seqNums = seqNums.dropna() 

333 return sorted(set(seqNums)) 

334 

335 def getRows(self, block: int, seqNum: int | None = None): 

336 """Get all rows of data which relate to the specified block. 

337 

338 If the seqNum is specified, only the rows for that sequence number are 

339 returned, otherwise all the rows relating to any block execution that 

340 day are returned. If the specified seqNum doesn't occur on the current 

341 day, an empty dataframe is returned. 

342 

343 Parameters 

344 ---------- 

345 block : `int` 

346 The block number to get the events for. 

347 seqNum : `int`, optional 

348 The sequence number, if specified, to get the row data for. If not 

349 specified, all data for the specified block is returned. 

350 

351 Returns 

352 ------- 

353 data : `pandas.DataFrame` 

354 The row data. 

355 """ 

356 # Because we query for a whole dayObs, but BLOCKs can overlap the day 

357 # start/end, it's possible for the block's blockDayObs not to be the 

358 # same as self.dayObs around the beginning or end of the day, so filter 

359 # with an extra `& (self.data['blockDayObs'] == self.dayObs` when 

360 # getting the relevant rows. 

361 rowsForBlock = self.data[ 

362 np.logical_and(self.data["blockNum"] == block, self.data["blockDayObs"] == self.dayObs) 

363 ] 

364 if rowsForBlock.empty: 

365 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}") 

366 if seqNum is None: 

367 return rowsForBlock 

368 return rowsForBlock[rowsForBlock["blockSeqNum"] == seqNum] 

369 

370 def printBlockEvolution(self, block: int, seqNum: int | None = None): 

371 """Display the evolution of the specified block. 

372 

373 If the seqNum is specified, the evolution of that specific block 

374 exection is displayed, otherwise all executions of that block are 

375 printed. 

376 

377 Parameters 

378 ---------- 

379 block : `int` 

380 The block number to get the events for. 

381 seqNum : `int`, optional 

382 The sequence number, if specified, to print the evolution of. If 

383 not specified, all sequence numbers for the block are printed. 

384 """ 

385 if seqNum is None: 

386 seqNums = self.getSeqNums(block) 

387 else: 

388 seqNums = [seqNum] 

389 print(f"Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:") 

390 for seqNum in seqNums: 

391 blockInfo = self.getBlockInfo(block, seqNum) 

392 print(blockInfo, "\n") 

393 

394 def getBlockInfo(self, block: int, seqNum: int): 

395 """Get the block info for the specified block. 

396 

397 Parses the rows relating to this block execution, and returns 

398 the information as a ``BlockInfo`` instance. 

399 

400 Parameters 

401 ---------- 

402 block : `int` 

403 The block number. 

404 seqNum : `int` 

405 The sequence number. 

406 

407 Returns 

408 ------- 

409 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo` 

410 The block info. 

411 """ 

412 rows = self.getRows(block, seqNum=seqNum) 

413 if rows.empty: 

414 print(f"No {seqNum=} on dayObs={self.dayObs} for {block=}") 

415 return 

416 

417 blockIds = set() 

418 tickets = set() 

419 salIndices = set() 

420 statePoints = [] 

421 sitcomPattern = r"SITCOM-(\d+)" 

422 

423 for index, row in rows.iterrows(): 

424 salIndices.add(row["salIndex"]) 

425 blockIds.add(row["blockId"]) 

426 

427 lastCheckpoint = row["lastCheckpoint"] 

428 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint) 

429 tickets.update(sitcomMatches) 

430 

431 time = efdTimestampToAstropy(row["private_efdStamp"]) 

432 state = ScriptState(row["state"]) 

433 reason = row["reason"] 

434 statePoint = ScriptStatePoint(time=time, state=state, reason=reason) 

435 statePoints.append(statePoint) 

436 

437 # likewise for the blockIds 

438 if len(blockIds) > 1: 

439 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}") 

440 blockId = blockIds.pop() 

441 

442 blockInfo = BlockInfo( 

443 blockNumber=block, 

444 blockId=blockId, 

445 dayObs=self.dayObs, 

446 seqNum=seqNum, 

447 begin=efdTimestampToAstropy(rows.iloc[0]["private_efdStamp"]), 

448 end=efdTimestampToAstropy(rows.iloc[-1]["private_efdStamp"]), 

449 salIndices=sorted(salIndices), 

450 tickets=[f"SITCOM-{ticket}" for ticket in sorted(tickets)], 

451 states=statePoints, 

452 ) 

453 

454 return blockInfo 

455 

456 def getEventsForBlock(self, events: list[TMAEvent], block: int, seqNum: int) -> list[TMAEvent]: 

457 """Get the events which occurred during the specified block. 

458 

459 Parameters 

460 ---------- 

461 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

462 The list of candidate events. 

463 block : `int` 

464 The block number to get the events for. 

465 seqNum : `int` 

466 The sequence number to get the events for. 

467 

468 Returns 

469 ------- 

470 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

471 The events. 

472 """ 

473 blockInfo = self.getBlockInfo(block, seqNum) 

474 begin = blockInfo.begin 

475 end = blockInfo.end 

476 

477 # each event's end being past the begin time and their 

478 # starts being before the end time means we get all the 

479 # events in the window and also those that overlap the 

480 # start/end too 

481 return [e for e in events if e.end >= begin and e.begin <= end]