Coverage for python/lsst/summit/utils/blockUtils.py: 24%

160 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-17 04:43 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import logging 

23import re 

24import time 

25from dataclasses import dataclass 

26 

27import numpy as np 

28import pandas as pd 

29from astropy.time import Time 

30 

31from .efdUtils import efdTimestampToAstropy, getEfdData, makeEfdClient 

32from .enums import ScriptState 

33 

34__all__ = ("BlockParser", "BlockInfo", "ScriptStatePoint") 

35 

36 

37@dataclass(kw_only=True, frozen=True) 

38class BlockInfo: 

39 """Information about the execution of a "block". 

40 

41 Each BlockInfo instance contains information about a single block 

42 execution. This is identified by the block number and sequence number, 

43 which, when combined with the dayObs, define the block ID. 

44 

45 Each BlockInfo instance contains the following information: 

46 * The block ID - this is the primary identifier, as a string, for 

47 example "BL52_20230615_02", which is parsed into: 

48 * The block number, as an integer, for example 52, for "BLOCK-52". 

49 * The dayObs, as an integer, for example 20230615. 

50 * The seqNum - the execution number of that block on that day. 

51 * The begin and end times of the block execution, as astropy.time.Time 

52 * The SAL indices which were involved in the block execution, as a list 

53 * The SITCOM tickets which were involved in the block execution, as a 

54 list of strings, including the SITCOM- prefix. 

55 * The states of the script during the block execution, as a list of 

56 ``ScriptStatePoint`` instances. 

57 

58 Parameters 

59 ---------- 

60 blockNumber : `int` 

61 The block number, as an integer. 

62 blockId : `str` 

63 The block ID, as a string. 

64 dayObs : `int` 

65 The dayObs the block was run on. 

66 seqNum : `int` 

67 The sequence number of the block. 

68 begin : `astropy.time.Time` 

69 The time the block execution began. 

70 end : `astropy.time.Time` 

71 The time the block execution ended. 

72 salIndices : `list` of `int` 

73 One or more SAL indices, relating to the block. 

74 tickets : `list` of `str` 

75 One or more SITCOM tickets, relating to the block. 

76 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint` 

77 The states of the script during the block. Each element is a 

78 ``ScriptStatePoint`` which contains: 

79 - the time, as an astropy.time.Time 

80 - the state, as a ``ScriptState`` enum 

81 - the reason for state change, as a string, if present 

82 """ 

83 

84 blockNumber: int 

85 blockId: str 

86 dayObs: int 

87 seqNum: int 

88 begin: Time 

89 end: Time 

90 salIndices: list 

91 tickets: list 

92 states: list 

93 

94 def __repr__(self): 

95 return ( 

96 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices}," 

97 f" tickets={self.tickets}, states={self.states!r}" 

98 ) 

99 

100 def _ipython_display_(self): 

101 """This is the function which runs when someone executes a cell in a 

102 notebook with just the class instance on its own, without calling 

103 print() or str() on it. 

104 """ 

105 print(self.__str__()) 

106 

107 def __str__(self): 

108 # no literal \n allowed inside {} portion of f-strings until python 

109 # 3.12, but it can go in via a variable 

110 newline = " \n" 

111 return ( 

112 f"dayObs: {self.dayObs}\n" 

113 f"seqNum: {self.seqNum}\n" 

114 f"blockNumber: {self.blockNumber}\n" 

115 f"blockId: {self.blockId}\n" 

116 f"begin: {self.begin.isot}\n" 

117 f"end: {self.end.isot}\n" 

118 f"salIndices: {self.salIndices}\n" 

119 f"tickets: {self.tickets}\n" 

120 f"states: \n{newline.join([str(state) for state in self.states])}" 

121 ) 

122 

123 

124@dataclass(kw_only=True, frozen=True) 

125class ScriptStatePoint: 

126 """The execution state of a script at a point in time. 

127 

128 Parameters 

129 ---------- 

130 time : `astropy.time.Time` 

131 The time of the state change. 

132 state : `lsst.summit.utils.enums.ScriptState` 

133 The state of the script at this point in time. 

134 reason : `str` 

135 The reason for the state change, if given. 

136 """ 

137 

138 time: Time 

139 state: ScriptState 

140 reason: str 

141 

142 def __repr__(self): 

143 return f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})" 

144 

145 def _ipython_display_(self): 

146 """This is the function which runs when someone executes a cell in a 

147 notebook with just the class instance on its own, without calling 

148 print() or str() on it. 

149 """ 

150 print(self.__str__()) 

151 

152 def __str__(self): 

153 reasonStr = f" - {self.reason}" if self.reason else "" 

154 return f"{self.state.name:>10} @ {self.time.isot}{reasonStr}" 

155 

156 

157class BlockParser: 

158 """A class to parse BLOCK data from the EFD. 

159 

160 Information on executed blocks is stored in the EFD (Electronic Facilities 

161 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class 

162 parses that topic and provides methods to get information on the blocks 

163 which were run on a given dayObs. It also provides methods to get the 

164 events which occurred during a given block, and also to get the block in 

165 which a specified event occurred, if any. 

166 

167 Parameters 

168 ---------- 

169 dayObs : `int` 

170 The dayObs to get the block data for. 

171 client : `lsst_efd_client.efd_client.EfdClient`, optional 

172 The EFD client to use. If not specified, a new one is created. 

173 """ 

174 

175 def __init__(self, dayObs, client=None): 

176 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser") 

177 self.dayObs = dayObs 

178 

179 self.client = client 

180 if client is None: 

181 self.client = makeEfdClient() 

182 

183 t0 = time.time() 

184 self.getDataForDayObs() 

185 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds") 

186 t0 = time.time() 

187 self.augmentData() 

188 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds") 

189 

190 def getDataForDayObs(self): 

191 """Retrieve the data for the specified dayObs from the EFD.""" 

192 # Tiago thinks no individual block seqNums should take more than an 

193 # hour to run, so pad the dayObs by 1.5 hours to make sure we catch 

194 # any blocks which might span the end of the day. 

195 padding = 1.5 * 60 * 60 

196 data = getEfdData( 

197 self.client, "lsst.sal.Script.logevent_state", dayObs=self.dayObs, postPadding=padding 

198 ) 

199 self.data = data 

200 

201 def augmentDataSlow(self): 

202 """Parse each row in the data frame individually, pulling the 

203 information out into its own columns. 

204 """ 

205 data = self.data 

206 blockPattern = r"BLOCK-(\d+)" 

207 blockIdPattern = r"BL\d+(?:_\w+)+" 

208 

209 data["blockNum"] = pd.Series() 

210 data["blockId"] = pd.Series() 

211 data["blockDayObs"] = pd.Series() 

212 data["blockSeqNum"] = pd.Series() 

213 

214 if "lastCheckpoint" not in self.data.columns: 

215 nRows = len(self.data) 

216 self.log.warning( 

217 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

218 " so block data cannot be parsed." 

219 ) 

220 

221 for index, row in data.iterrows(): 

222 rowStr = row["lastCheckpoint"] 

223 

224 blockMatch = re.search(blockPattern, rowStr) 

225 blockNumber = int(blockMatch.group(1)) if blockMatch else None 

226 data.loc[index, "blockNum"] = blockNumber 

227 

228 blockIdMatch = re.search(blockIdPattern, rowStr) 

229 blockId = blockIdMatch.group(0) if blockIdMatch else None 

230 data.loc[index, "blockId"] = blockId 

231 if blockId is not None: 

232 blockDayObs = int(blockId.split("_")[2]) 

233 blockSeqNum = int(blockId.split("_")[3]) 

234 data.loc[index, "blockDayObs"] = blockDayObs 

235 data.loc[index, "blockSeqNum"] = blockSeqNum 

236 

237 def augmentData(self): 

238 """Parse the dataframe using vectorized methods, pulling the 

239 information out into its own columns. 

240 

241 This method is much faster for large dataframes than augmentDataSlow, 

242 but is also much harder to maintain/debug, as the vectorized regexes 

243 are hard to work with, and to know which row is causing problems. 

244 """ 

245 if "lastCheckpoint" not in self.data.columns: 

246 nRows = len(self.data) 

247 self.log.warning( 

248 f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

249 " so block data cannot be parsed." 

250 ) 

251 # add the columns that would have been added for consistency 

252 self.data["blockNum"] = pd.Series() 

253 self.data["blockId"] = pd.Series() 

254 self.data["blockDayObs"] = pd.Series() 

255 self.data["blockSeqNum"] = pd.Series() 

256 return 

257 

258 data = self.data 

259 blockPattern = r"BLOCK-(\d+)" 

260 blockIdPattern = r"(BL\d+(?:_\w+)+)" 

261 

262 col = data["lastCheckpoint"] 

263 data["blockNum"] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype()) 

264 data["blockId"] = col.str.extract(blockIdPattern, expand=False) 

265 

266 blockIdSplit = data["blockId"].str.split("_", expand=True) 

267 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded 

268 data["blockDayObs"] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype()) 

269 data["blockSeqNum"] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype()) 

270 else: # make nan filled columns for these 

271 nanSeries = pd.Series([np.nan] * len(data)) 

272 data["blockDayObs"] = nanSeries 

273 data["blockSeqNum"] = nanSeries 

274 

275 def _listColumnValues(self, column, removeNone=True): 

276 """Get all the different values for the specified column, as a list. 

277 

278 Parameters 

279 ---------- 

280 column : `str` 

281 The column to get the values for. 

282 removeNone : `bool` 

283 Whether to remove None from the list of values. 

284 

285 Returns 

286 ------- 

287 values : `list` 

288 The values for the specified column. 

289 """ 

290 values = set(self.data[column].dropna()) 

291 if None in values and removeNone: 

292 values.remove(None) 

293 return sorted(values) 

294 

295 def getBlockNums(self): 

296 """Get the block numbers which were run on the specified dayObs. 

297 

298 Returns 

299 ------- 

300 blockNums : `list` of `int` 

301 The blocks which were run on the specified dayObs. 

302 """ 

303 return self._listColumnValues("blockNum") 

304 

305 def getSeqNums(self, block): 

306 """Get the seqNums for the specified block. 

307 

308 Parameters 

309 ---------- 

310 block : `int` 

311 The block number to get the events for. 

312 

313 Returns 

314 ------- 

315 seqNums : `list` of `int` 

316 The sequence numbers for the specified block. 

317 """ 

318 seqNums = self.data[self.data["blockNum"] == block]["blockSeqNum"] 

319 # block header rows have no blockId or seqNum, but do have a blockNum 

320 # so appear here, so drop the nans as they don't relate to an actual 

321 # run of a block 

322 seqNums = seqNums.dropna() 

323 return sorted(set(seqNums)) 

324 

325 def getRows(self, block, seqNum=None): 

326 """Get all rows of data which relate to the specified block. 

327 

328 If the seqNum is specified, only the rows for that sequence number are 

329 returned, otherwise all the rows relating to any block execution that 

330 day are returned. If the specified seqNum doesn't occur on the current 

331 day, an empty dataframe is returned. 

332 

333 Parameters 

334 ---------- 

335 block : `int` 

336 The block number to get the events for. 

337 seqNum : `int`, optional 

338 The sequence number, if specified, to get the row data for. If not 

339 specified, all data for the specified block is returned. 

340 

341 Returns 

342 ------- 

343 data : `pandas.DataFrame` 

344 The row data. 

345 """ 

346 # Because we query for a whole dayObs, but BLOCKs can overlap the day 

347 # start/end, it's possible for the block's blockDayObs not to be the 

348 # same as self.dayObs around the beginning or end of the day, so filter 

349 # with an extra `& (self.data['blockDayObs'] == self.dayObs` when 

350 # getting the relevant rows. 

351 rowsForBlock = self.data[ 

352 np.logical_and(self.data["blockNum"] == block, self.data["blockDayObs"] == self.dayObs) 

353 ] 

354 if rowsForBlock.empty: 

355 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}") 

356 if seqNum is None: 

357 return rowsForBlock 

358 return rowsForBlock[rowsForBlock["blockSeqNum"] == seqNum] 

359 

360 def printBlockEvolution(self, block, seqNum=None): 

361 """Display the evolution of the specified block. 

362 

363 If the seqNum is specified, the evolution of that specific block 

364 exection is displayed, otherwise all executions of that block are 

365 printed. 

366 

367 Parameters 

368 ---------- 

369 block : `int` 

370 The block number to get the events for. 

371 seqNum : `int`, optional 

372 The sequence number, if specified, to print the evolution of. If 

373 not specified, all sequence numbers for the block are printed. 

374 """ 

375 if seqNum is None: 

376 seqNums = self.getSeqNums(block) 

377 else: 

378 seqNums = [seqNum] 

379 print(f"Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:") 

380 for seqNum in seqNums: 

381 blockInfo = self.getBlockInfo(block, seqNum) 

382 print(blockInfo, "\n") 

383 

384 def getBlockInfo(self, block, seqNum): 

385 """Get the block info for the specified block. 

386 

387 Parses the rows relating to this block execution, and returns 

388 the information as a ``BlockInfo`` instance. 

389 

390 Parameters 

391 ---------- 

392 block : `int` 

393 The block number. 

394 seqNum : `int` 

395 The sequence number. 

396 

397 Returns 

398 ------- 

399 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo` 

400 The block info. 

401 """ 

402 rows = self.getRows(block, seqNum=seqNum) 

403 if rows.empty: 

404 print(f"No {seqNum=} on dayObs={self.dayObs} for {block=}") 

405 return 

406 

407 blockIds = set() 

408 tickets = set() 

409 salIndices = set() 

410 statePoints = [] 

411 sitcomPattern = r"SITCOM-(\d+)" 

412 

413 for index, row in rows.iterrows(): 

414 salIndices.add(row["salIndex"]) 

415 blockIds.add(row["blockId"]) 

416 

417 lastCheckpoint = row["lastCheckpoint"] 

418 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint) 

419 tickets.update(sitcomMatches) 

420 

421 time = efdTimestampToAstropy(row["private_efdStamp"]) 

422 state = ScriptState(row["state"]) 

423 reason = row["reason"] 

424 statePoint = ScriptStatePoint(time=time, state=state, reason=reason) 

425 statePoints.append(statePoint) 

426 

427 # likewise for the blockIds 

428 if len(blockIds) > 1: 

429 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}") 

430 blockId = blockIds.pop() 

431 

432 blockInfo = BlockInfo( 

433 blockNumber=block, 

434 blockId=blockId, 

435 dayObs=self.dayObs, 

436 seqNum=seqNum, 

437 begin=efdTimestampToAstropy(rows.iloc[0]["private_efdStamp"]), 

438 end=efdTimestampToAstropy(rows.iloc[-1]["private_efdStamp"]), 

439 salIndices=sorted(salIndices), 

440 tickets=[f"SITCOM-{ticket}" for ticket in sorted(tickets)], 

441 states=statePoints, 

442 ) 

443 

444 return blockInfo 

445 

446 def getEventsForBlock(self, events, block, seqNum): 

447 """Get the events which occurred during the specified block. 

448 

449 Parameters 

450 ---------- 

451 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

452 The list of candidate events. 

453 block : `int` 

454 The block number to get the events for. 

455 seqNum : `int` 

456 The sequence number to get the events for. 

457 

458 Returns 

459 ------- 

460 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

461 The events. 

462 """ 

463 blockInfo = self.getBlockInfo(block, seqNum) 

464 begin = blockInfo.begin 

465 end = blockInfo.end 

466 

467 # each event's end being past the begin time and their 

468 # starts being before the end time means we get all the 

469 # events in the window and also those that overlap the 

470 # start/end too 

471 return [e for e in events if e.end >= begin and e.begin <= end]