Coverage for python/lsst/summit/utils/blockUtils.py: 25%

157 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-21 15:15 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import re 

23import time 

24import logging 

25import pandas as pd 

26import numpy as np 

27from dataclasses import dataclass 

28from astropy.time import Time 

29 

30from .enums import ScriptState 

31from .efdUtils import (getEfdData, 

32 makeEfdClient, 

33 efdTimestampToAstropy, 

34 ) 

35 

36__all__ = ( 

37 'BlockParser', 

38 'BlockInfo', 

39 'ScriptStatePoint' 

40) 

41 

42 

43@dataclass(kw_only=True, frozen=True) 

44class BlockInfo: 

45 """Information about the execution of a "block". 

46 

47 Each BlockInfo instance contains information about a single block 

48 execution. This is identified by the block number and sequence number, 

49 which, when combined with the dayObs, define the block ID. 

50 

51 Each BlockInfo instance contains the following information: 

52 * The block ID - this is the primary identifier, as a string, for 

53 example "BL52_20230615_02", which is parsed into: 

54 * The block number, as an integer, for example 52, for "BLOCK-52". 

55 * The dayObs, as an integer, for example 20230615. 

56 * The seqNum - the execution number of that block on that day. 

57 * The begin and end times of the block execution, as astropy.time.Time 

58 * The SAL indices which were involved in the block execution, as a list 

59 * The SITCOM tickets which were involved in the block execution, as a 

60 list of strings, including the SITCOM- prefix. 

61 * The states of the script during the block execution, as a list of 

62 ``ScriptStatePoint`` instances. 

63 

64 Parameters 

65 ---------- 

66 blockNumber : `int` 

67 The block number, as an integer. 

68 blockId : `str` 

69 The block ID, as a string. 

70 dayObs : `int` 

71 The dayObs the block was run on. 

72 seqNum : `int` 

73 The sequence number of the block. 

74 begin : `astropy.time.Time` 

75 The time the block execution began. 

76 end : `astropy.time.Time` 

77 The time the block execution ended. 

78 salIndices : `list` of `int` 

79 One or more SAL indices, relating to the block. 

80 tickets : `list` of `str` 

81 One or more SITCOM tickets, relating to the block. 

82 states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint` 

83 The states of the script during the block. Each element is a 

84 ``ScriptStatePoint`` which contains: 

85 - the time, as an astropy.time.Time 

86 - the state, as a ``ScriptState`` enum 

87 - the reason for state change, as a string, if present 

88 """ 

89 blockNumber: int 

90 blockId: str 

91 dayObs: int 

92 seqNum: int 

93 begin: Time 

94 end: Time 

95 salIndices: int 

96 tickets: list 

97 states: list 

98 

99 def __repr__(self): 

100 return ( 

101 f"BlockInfo(blockNumber={self.blockNumber}, blockId={self.blockId}, salIndices={self.salIndices}," 

102 f" tickets={self.tickets}, states={self.states!r}" 

103 ) 

104 

105 def _ipython_display_(self): 

106 """This is the function which runs when someone executes a cell in a 

107 notebook with just the class instance on its own, without calling 

108 print() or str() on it. 

109 """ 

110 print(self.__str__()) 

111 

112 def __str__(self): 

113 # no literal \n allowed inside {} portion of f-strings until python 

114 # 3.12, but it can go in via a variable 

115 newline = ' \n' 

116 return ( 

117 f"dayObs: {self.dayObs}\n" 

118 f"seqNum: {self.seqNum}\n" 

119 f"blockNumber: {self.blockNumber}\n" 

120 f"blockId: {self.blockId}\n" 

121 f"begin: {self.begin.isot}\n" 

122 f"end: {self.end.isot}\n" 

123 f"salIndices: {self.salIndices}\n" 

124 f"tickets: {self.tickets}\n" 

125 f"states: \n{newline.join([str(state) for state in self.states])}" 

126 ) 

127 

128 

129@dataclass(kw_only=True, frozen=True) 

130class ScriptStatePoint: 

131 """The execution state of a script at a point in time. 

132 

133 Parameters 

134 ---------- 

135 time : `astropy.time.Time` 

136 The time of the state change. 

137 state : `lsst.summit.utils.enums.ScriptState` 

138 The state of the script at this point in time. 

139 reason : `str` 

140 The reason for the state change, if given. 

141 """ 

142 time: Time 

143 state: ScriptState 

144 reason: str 

145 

146 def __repr__(self): 

147 return ( 

148 f"ScriptStatePoint(time={self.time!r}, state={self.state!r}, reason={self.reason!r})" 

149 ) 

150 

151 def _ipython_display_(self): 

152 """This is the function which runs when someone executes a cell in a 

153 notebook with just the class instance on its own, without calling 

154 print() or str() on it. 

155 """ 

156 print(self.__str__()) 

157 

158 def __str__(self): 

159 reasonStr = f" - {self.reason}" if self.reason else "" 

160 return (f"{self.state.name:>10} @ {self.time.isot}{reasonStr}") 

161 

162 

163class BlockParser: 

164 """A class to parse BLOCK data from the EFD. 

165 

166 Information on executed blocks is stored in the EFD (Electronic Facilities 

167 Database) in the ``lsst.sal.Script.logevent_state`` topic. This class 

168 parses that topic and provides methods to get information on the blocks 

169 which were run on a given dayObs. It also provides methods to get the 

170 events which occurred during a given block, and also to get the block in 

171 which a specified event occurred, if any. 

172 

173 Parameters 

174 ---------- 

175 dayObs : `int` 

176 The dayObs to get the block data for. 

177 client : `lsst_efd_client.efd_client.EfdClient`, optional 

178 The EFD client to use. If not specified, a new one is created. 

179 """ 

180 

181 def __init__(self, dayObs, client=None): 

182 self.log = logging.getLogger("lsst.summit.utils.blockUtils.BlockParser") 

183 self.dayObs = dayObs 

184 

185 self.client = client 

186 if client is None: 

187 self.client = makeEfdClient() 

188 

189 t0 = time.time() 

190 self.getDataForDayObs() 

191 self.log.debug(f"Getting data took {(time.time()-t0):.2f} seconds") 

192 t0 = time.time() 

193 self.augmentData() 

194 self.log.debug(f"Parsing data took {(time.time()-t0):.5f} seconds") 

195 

196 def getDataForDayObs(self): 

197 """Retrieve the data for the specified dayObs from the EFD. 

198 """ 

199 data = getEfdData(self.client, 'lsst.sal.Script.logevent_state', dayObs=self.dayObs) 

200 self.data = data 

201 

202 def augmentDataSlow(self): 

203 """Parse each row in the data frame individually, pulling the 

204 information out into its own columns. 

205 """ 

206 data = self.data 

207 blockPattern = r"BLOCK-(\d+)" 

208 blockIdPattern = r"BL\d+(?:_\w+)+" 

209 

210 data['blockNum'] = pd.Series() 

211 data['blockId'] = pd.Series() 

212 data['blockDayObs'] = pd.Series() 

213 data['blockSeqNum'] = pd.Series() 

214 

215 if 'lastCheckpoint' not in self.data.columns: 

216 nRows = len(self.data) 

217 self.log.warning(f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

218 " so block data cannot be parsed.") 

219 

220 for index, row in data.iterrows(): 

221 rowStr = row['lastCheckpoint'] 

222 

223 blockMatch = re.search(blockPattern, rowStr) 

224 blockNumber = int(blockMatch.group(1)) if blockMatch else None 

225 data.loc[index, 'blockNum'] = blockNumber 

226 

227 blockIdMatch = re.search(blockIdPattern, rowStr) 

228 blockId = blockIdMatch.group(0) if blockIdMatch else None 

229 data.loc[index, 'blockId'] = blockId 

230 if blockId is not None: 

231 blockDayObs = int(blockId.split('_')[2]) 

232 blockSeqNum = int(blockId.split('_')[3]) 

233 data.loc[index, 'blockDayObs'] = blockDayObs 

234 data.loc[index, 'blockSeqNum'] = blockSeqNum 

235 

236 def augmentData(self): 

237 """Parse the dataframe using vectorized methods, pulling the 

238 information out into its own columns. 

239 

240 This method is much faster for large dataframes than augmentDataSlow, 

241 but is also much harder to maintain/debug, as the vectorized regexes 

242 are hard to work with, and to know which row is causing problems. 

243 """ 

244 if 'lastCheckpoint' not in self.data.columns: 

245 nRows = len(self.data) 

246 self.log.warning(f"Found {nRows} rows of data and no 'lastCheckpoint' column was in the data," 

247 " so block data cannot be parsed.") 

248 # add the columns that would have been added for consistency 

249 self.data['blockNum'] = pd.Series() 

250 self.data['blockId'] = pd.Series() 

251 self.data['blockDayObs'] = pd.Series() 

252 self.data['blockSeqNum'] = pd.Series() 

253 return 

254 

255 data = self.data 

256 blockPattern = r"BLOCK-(\d+)" 

257 blockIdPattern = r"(BL\d+(?:_\w+)+)" 

258 

259 col = data['lastCheckpoint'] 

260 data['blockNum'] = col.str.extract(blockPattern, expand=False).astype(float).astype(pd.Int64Dtype()) 

261 data['blockId'] = col.str.extract(blockIdPattern, expand=False) 

262 

263 blockIdSplit = data['blockId'].str.split('_', expand=True) 

264 if blockIdSplit.columns.max() > 1: # parsing the blockId succeeded 

265 data['blockDayObs'] = blockIdSplit[2].astype(float).astype(pd.Int64Dtype()) 

266 data['blockSeqNum'] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype()) 

267 else: # make nan filled columns for these 

268 nanSeries = pd.Series([np.nan] * len(data)) 

269 data['blockDayObs'] = nanSeries 

270 data['blockSeqNum'] = nanSeries 

271 

272 def _listColumnValues(self, column, removeNone=True): 

273 """Get all the different values for the specified column, as a list. 

274 

275 Parameters 

276 ---------- 

277 column : `str` 

278 The column to get the values for. 

279 removeNone : `bool` 

280 Whether to remove None from the list of values. 

281 

282 Returns 

283 ------- 

284 values : `list` 

285 The values for the specified column. 

286 """ 

287 values = set(self.data[column].dropna()) 

288 if None in values and removeNone: 

289 values.remove(None) 

290 return sorted(values) 

291 

292 def getBlockNums(self): 

293 """Get the block numbers which were run on the specified dayObs. 

294 

295 Returns 

296 ------- 

297 blockNums : `list` of `int` 

298 The blocks which were run on the specified dayObs. 

299 """ 

300 return self._listColumnValues('blockNum') 

301 

302 def getSeqNums(self, block): 

303 """Get the seqNums for the specified block. 

304 

305 Parameters 

306 ---------- 

307 block : `int` 

308 The block number to get the events for. 

309 

310 Returns 

311 ------- 

312 seqNums : `list` of `int` 

313 The sequence numbers for the specified block. 

314 """ 

315 return sorted(set(self.data[self.data['blockNum'] == block]['blockSeqNum'])) 

316 

317 def getRows(self, block, seqNum=None): 

318 """Get all rows of data which relate to the specified block. 

319 

320 If the seqNum is specified, only the rows for that sequence number are 

321 returned, otherwise all the rows relating to any block execution that 

322 day are returned. If the specified seqNum doesn't occur on the current 

323 day, an empty dataframe is returned. 

324 

325 Parameters 

326 ---------- 

327 block : `int` 

328 The block number to get the events for. 

329 seqNum : `int`, optional 

330 The sequence number, if specified, to get the row data for. If not 

331 specified, all data for the specified block is returned. 

332 

333 Returns 

334 ------- 

335 data : `pandas.DataFrame` 

336 The row data. 

337 """ 

338 rowsForBlock = self.data[self.data['blockNum'] == block] 

339 if rowsForBlock.empty: 

340 self.log.warning(f"No rows found for {block=} on dayObs={self.dayObs}") 

341 if seqNum is None: 

342 return rowsForBlock 

343 return rowsForBlock[rowsForBlock['blockSeqNum'] == seqNum] 

344 

345 def printBlockEvolution(self, block, seqNum=None): 

346 """Display the evolution of the specified block. 

347 

348 If the seqNum is specified, the evolution of that specific block 

349 exection is displayed, otherwise all executions of that block are 

350 printed. 

351 

352 Parameters 

353 ---------- 

354 block : `int` 

355 The block number to get the events for. 

356 seqNum : `int`, optional 

357 The sequence number, if specified, to print the evolution of. If 

358 not specified, all sequence numbers for the block are printed. 

359 """ 

360 if seqNum is None: 

361 seqNums = self.getSeqNums(block) 

362 else: 

363 seqNums = [seqNum] 

364 print(f'Evolution of BLOCK {block} for dayObs={self.dayObs} {seqNum=}:') 

365 for seqNum in seqNums: 

366 blockInfo = self.getBlockInfo(block, seqNum) 

367 print(blockInfo, '\n') 

368 

369 def getBlockInfo(self, block, seqNum): 

370 """Get the block info for the specified block. 

371 

372 Parses the rows relating to this block execution, and returns 

373 the information as a ``BlockInfo`` instance. 

374 

375 Parameters 

376 ---------- 

377 block : `int` 

378 The block number. 

379 seqNum : `int` 

380 The sequence number. 

381 

382 Returns 

383 ------- 

384 blockInfo : `lsst.summit.utils.blockUtils.BlockInfo` 

385 The block info. 

386 """ 

387 rows = self.getRows(block, seqNum=seqNum) 

388 if rows.empty: 

389 print(f'No {seqNum=} on dayObs={self.dayObs} for {block=}') 

390 return 

391 

392 blockIds = set() 

393 tickets = set() 

394 salIndices = set() 

395 statePoints = [] 

396 sitcomPattern = r"SITCOM-(\d+)" 

397 

398 for index, row in rows.iterrows(): 

399 salIndices.add(row['salIndex']) 

400 blockIds.add(row['blockId']) 

401 

402 lastCheckpoint = row['lastCheckpoint'] 

403 sitcomMatches = re.findall(sitcomPattern, lastCheckpoint) 

404 tickets.update(sitcomMatches) 

405 

406 time = efdTimestampToAstropy(row['private_efdStamp']) 

407 state = ScriptState(row['state']) 

408 reason = row['reason'] 

409 statePoint = ScriptStatePoint(time=time, state=state, reason=reason) 

410 statePoints.append(statePoint) 

411 

412 # likewise for the blockIds 

413 if len(blockIds) > 1: 

414 raise RuntimeError(f"Found multiple blockIds ({blockIds}) for {seqNum=}") 

415 blockId = blockIds.pop() 

416 

417 blockInfo = BlockInfo( 

418 blockNumber=block, 

419 blockId=blockId, 

420 dayObs=self.dayObs, 

421 seqNum=seqNum, 

422 begin=efdTimestampToAstropy(rows.iloc[0]['private_efdStamp']), 

423 end=efdTimestampToAstropy(rows.iloc[-1]['private_efdStamp']), 

424 salIndices=sorted(salIndices), 

425 tickets=[f'SITCOM-{ticket}' for ticket in sorted(tickets)], 

426 states=statePoints, 

427 ) 

428 

429 return blockInfo 

430 

431 def getEventsForBlock(self, events, block, seqNum): 

432 """Get the events which occurred during the specified block. 

433 

434 Parameters 

435 ---------- 

436 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

437 The list of candidate events. 

438 block : `int` 

439 The block number to get the events for. 

440 seqNum : `int` 

441 The sequence number to get the events for. 

442 

443 Returns 

444 ------- 

445 events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` 

446 The events. 

447 """ 

448 blockInfo = self.getBlockInfo(block, seqNum) 

449 begin = blockInfo.begin 

450 end = blockInfo.end 

451 

452 # each event's end being past the begin time and their 

453 # starts being before the end time means we get all the 

454 # events in the window and also those that overlap the 

455 # start/end too 

456 return [e for e in events if e.end >= begin and e.begin <= end]