mirror of
https://github.com/xcat2/confluent.git
synced 2026-06-10 09:55:07 +00:00
Fx issues with IPMI session management
Do not continue waiting when session is broken. Do not call _timedout without releasing the lock first. Properly await on relog with bad rakp4 If an accounting issue pushes logontries too far without touching zero, then still recognize retries were exhausted. Timeout on missing RAKP2 if retries were already exhausted.
This commit is contained in:
@@ -549,7 +549,7 @@ class Session(object):
|
||||
self.socketchecking.release()
|
||||
await self.login()
|
||||
if not self.async_:
|
||||
while self.logging:
|
||||
while self.logging and not self.broken:
|
||||
await Session.wait_for_rsp()
|
||||
if self.broken:
|
||||
raise exc.IpmiException(self.errormsg)
|
||||
@@ -792,9 +792,9 @@ class Session(object):
|
||||
self.waiting_sessions.pop(self, None)
|
||||
if not self.lastpayload and not self.logging:
|
||||
return
|
||||
await self._timedout()
|
||||
finally:
|
||||
WAITING_SESSIONS.release()
|
||||
await self._timedout()
|
||||
finally:
|
||||
self.awaitingresponse = False
|
||||
|
||||
@@ -1668,7 +1668,7 @@ class Session(object):
|
||||
if data[1] == 2 and self.logontries: # if we retried RAKP3 because
|
||||
# RAKP4 got dropped, BMC can consider it done and we must
|
||||
# restart
|
||||
self._relog()
|
||||
await self._relog()
|
||||
return
|
||||
# ignore 15 value if we are retrying.
|
||||
# xCAT did but I can't recall why exactly
|
||||
@@ -1804,7 +1804,7 @@ class Session(object):
|
||||
self.nowait = True
|
||||
self.timeout += 1
|
||||
if self.timeout > self.maxtimeout:
|
||||
if not self.logontries:
|
||||
if self.logontries <= 0:
|
||||
response = {'error': 'timeout', 'code': 0xffff}
|
||||
if self.ipmicallback:
|
||||
await self.ipmicallback(response)
|
||||
@@ -1834,8 +1834,20 @@ class Session(object):
|
||||
or self.sessioncontext == 'EXPECTINGRAKP4'):
|
||||
# If we can't be sure which RAKP was dropped or if RAKP3/4 was just
|
||||
# delayed, the most reliable thing to do is rewind and start over
|
||||
# bmcs do not take kindly to receiving RAKP1 or RAKP3 twice
|
||||
await self._relog()
|
||||
# bmcs do not take kindly to receiving RAKP1 or RAKP3 twice.
|
||||
# Only do this while we still have login attempts budgeted;
|
||||
# otherwise each lost RAKP would spawn a fresh RAKP1 forever
|
||||
# (_relog resets self.timeout, so the timeout budget above never
|
||||
# trips during the RAKP phase).
|
||||
if self.logontries > 0:
|
||||
await self._relog()
|
||||
else:
|
||||
response = {'error': 'timeout', 'code': 0xffff}
|
||||
if self.ipmicallback:
|
||||
await self.ipmicallback(response)
|
||||
self.nowait = False
|
||||
await self._mark_broken()
|
||||
return
|
||||
else: # in IPMI case, the only recourse is to act as if the packet is
|
||||
# idempotent. SOL has more sophisticated retry handling
|
||||
# the biggest risks are reset sp which is often fruitless to retry
|
||||
|
||||
Reference in New Issue
Block a user