Skip to content

Commit

Permalink
Reduce sleep() in CAP library code (microsoft#2189)
Browse files Browse the repository at this point in the history
* 1) Removed most framework sleeps 2) refactored connection code

* pre-commit fixes

* pre-commit

* ignore protobuf files in pre-commit checks

* Fix duplicate actor registration

* refactor change

* Nicer printing of Actors

* 1) Report recv_multipart errors 4) Always send 4 parts

* AutoGen generate_reply expects to wait indefinitely for an answer.  CAP can wait a certain amount and give up.   In order to reconcile the two, AutoGenConnector is set to wait indefinitely.

* pre-commit formatting fixes

* pre-commit format changes

* don't check autogenerated proto py files
  • Loading branch information
rajan-chari authored Apr 2, 2024
1 parent c3193f8 commit db30ec8
Show file tree
Hide file tree
Showing 16 changed files with 242 additions and 86 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ exclude = [
# This file needs to be either upgraded or removed and therefore should be
# ignore from type checking for now
"math_utils\\.py$",
"samples\\apps\\cap\\py\\autogencap\\proto\\.*\\.py",
"**/cap/py/autogencap/proto/*",
]
ignore-init-module-imports = true
unfixable = ["F401"]
Expand Down
27 changes: 17 additions & 10 deletions samples/apps/cap/py/autogencap/Actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import threading
import traceback
import time
from .DebugLog import Debug, Info
from .DebugLog import Debug, Info, Error
from .Config import xpub_url


Expand All @@ -11,6 +11,7 @@ def __init__(self, agent_name: str, description: str):
self.actor_name: str = agent_name
self.agent_description: str = description
self.run = False
self._start_event = threading.Event()

def connect_network(self, network):
Debug(self.actor_name, f"is connecting to {network}")
Expand All @@ -25,14 +26,15 @@ def _process_bin_msg(self, msg: bytes, msg_type: str, topic: str, sender: str) -
return True

def _recv_thread(self):
Debug(self.actor_name, "recv thread started")
self._socket: zmq.Socket = self._context.socket(zmq.SUB)
self._socket.setsockopt(zmq.RCVTIMEO, 500)
self._socket.connect(xpub_url)
str_topic = f"{self.actor_name}"
Debug(self.actor_name, f"subscribe to: {str_topic}")
self._socket.setsockopt_string(zmq.SUBSCRIBE, f"{str_topic}")
try:
Debug(self.actor_name, "recv thread started")
self._socket: zmq.Socket = self._context.socket(zmq.SUB)
self._socket.setsockopt(zmq.RCVTIMEO, 500)
self._socket.connect(xpub_url)
str_topic = f"{self.actor_name}"
Debug(self.actor_name, f"subscribe to: {str_topic}")
self._socket.setsockopt_string(zmq.SUBSCRIBE, f"{str_topic}")
self._start_event.set()
while self.run:
try:
topic, msg_type, sender_topic, msg = self._socket.recv_multipart()
Expand All @@ -41,7 +43,9 @@ def _recv_thread(self):
sender_topic = sender_topic.decode("utf-8") # Convert bytes to string
except zmq.Again:
continue # No message received, continue to next iteration
except Exception:
except Exception as e:
Error(self.actor_name, f"recv thread encountered an error: {e}")
traceback.print_exc()
continue
if msg_type == "text":
msg = msg.decode("utf-8") # Convert bytes to string
Expand All @@ -57,14 +61,17 @@ def _recv_thread(self):
traceback.print_exc()
finally:
self.run = False
# In case there was an exception at startup signal
# the main thread.
self._start_event.set()
Debug(self.actor_name, "recv thread ended")

def start(self, context: zmq.Context):
self._context = context
self.run: bool = True
self._thread = threading.Thread(target=self._recv_thread)
self._thread.start()
time.sleep(0.01)
self._start_event.wait()

def disconnect_network(self, network):
Debug(self.actor_name, f"is disconnecting from {network}")
Expand Down
140 changes: 101 additions & 39 deletions samples/apps/cap/py/autogencap/ActorConnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,142 @@
from zmq.utils.monitor import recv_monitor_message
import time
import uuid
from .DebugLog import Debug, Error
from .DebugLog import Debug, Error, Info
from .Config import xsub_url, xpub_url, router_url
from typing import Any, Dict


class ActorConnector:
class ActorSender:
def __init__(self, context, topic):
self._context = context

self._resp_socket = self._context.socket(zmq.SUB)
self._resp_socket.setsockopt(zmq.LINGER, 0)
self._resp_socket.setsockopt(zmq.RCVTIMEO, 250)
self._resp_socket.connect(xpub_url)
self._resp_topic = str(uuid.uuid4())
Debug("AgentConnector", f"subscribe to: {self._resp_topic}")
self._resp_socket.setsockopt_string(zmq.SUBSCRIBE, f"{self._resp_topic}")
self._topic = topic

self._connect_pub_socket()

def _send_recv_router_msg(self):
# Send a request to the router and wait for a response
req_socket = self._context.socket(zmq.REQ)
req_socket.connect(router_url)
try:
Debug("ActorConnector", "Broker Check Request Sent")
req_socket.send_string("Request")
_ = req_socket.recv_string()
Debug("ActorConnector", "Broker Check Response Received")
finally:
req_socket.close()

def _connect_pub_socket(self):
Debug("ActorSender", f"Connecting pub socket {self._topic}")
self._pub_socket = self._context.socket(zmq.PUB)
self._pub_socket.setsockopt(zmq.LINGER, 0)
monitor = self._pub_socket.get_monitor_socket()
self._pub_socket.setsockopt(zmq.LINGER, 0)
self._pub_socket.connect(xsub_url)
# Monitor handshake on the pub socket
while monitor.poll():
evt: Dict[str, Any] = {}
mon_evt = recv_monitor_message(monitor)
evt.update(mon_evt)
if evt["event"] == zmq.EVENT_MONITOR_STOPPED or evt["event"] == zmq.EVENT_HANDSHAKE_SUCCEEDED:
Debug("ActorConnector", "Handshake received (Or Monitor stopped)")
Debug("ActorSender", "Handshake received (Or Monitor stopped)")
break
self._pub_socket.disable_monitor()
monitor.close()
self._send_recv_router_msg()

def _send_recv_router_msg(self):
# Send a request to the router and wait for a response
req_socket = self._context.socket(zmq.REQ)
req_socket.connect(router_url)
try:
Debug("ActorSender", "Broker Check Request Sent")
req_socket.send_string("Request")
_ = req_socket.recv_string()
Debug("ActorSender", "Broker Check Response Received")
finally:
req_socket.close()

def send_txt_msg(self, msg):
Debug("ActorSender", f"[{self._topic}] send_txt_msg: {msg}")
self._pub_socket.send_multipart(
[self._topic.encode("utf8"), "text".encode("utf8"), self._resp_topic.encode("utf8"), msg.encode("utf8")]
[self._topic.encode("utf8"), "text".encode("utf8"), "no_resp".encode("utf8"), msg.encode("utf8")]
)

def send_bin_msg(self, msg_type: str, msg):
Debug("ActorSender", f"[{self._topic}] send_bin_msg: {msg_type}")
self._pub_socket.send_multipart(
[self._topic.encode("utf8"), msg_type.encode("utf8"), self._resp_topic.encode("utf8"), msg]
[self._topic.encode("utf8"), msg_type.encode("utf8"), "no_resp".encode("utf8"), msg]
)

def binary_request(self, msg_type: str, msg, retry=5):
def send_bin_request_msg(self, msg_type: str, msg, resp_topic: str):
Debug("ActorSender", f"[{self._topic}] send_bin_request_msg: {msg_type}")
self._pub_socket.send_multipart(
[self._topic.encode("utf8"), msg_type.encode("utf8"), self._resp_topic.encode("utf8"), msg]
[self._topic.encode("utf8"), msg_type.encode("utf8"), resp_topic.encode("utf8"), msg]
)
for i in range(retry + 1):
try:
resp_topic, resp_msg_type, resp_sender_topic, resp = self._resp_socket.recv_multipart()
return resp_topic, resp_msg_type, resp_sender_topic, resp
except zmq.Again:
Debug("ActorConnector", f"binary_request: No response received. retry_count={i}, max_retry={retry}")
time.sleep(0.01) # Wait a bit before retrying
continue
Error("ActorConnector", "binary_request: No response received. Giving up.")
return None, None, None, None

def close(self):
self._pub_socket.close()


class ActorConnector:
def __init__(self, context, topic):
self._context = context
self._topic = topic
self._connect_sub_socket()
self._sender = ActorSender(context, topic)
time.sleep(0.1) # Wait for the socket to connect

def _connect_sub_socket(self):
self._resp_socket = self._context.socket(zmq.SUB)
monitor = self._resp_socket.get_monitor_socket()
self._resp_socket.setsockopt(zmq.LINGER, 0)
self._resp_socket.setsockopt(zmq.RCVTIMEO, 250)
self._resp_socket.connect(xpub_url)
self._resp_topic = str(uuid.uuid4())
Debug("ActorConnector", f"subscribe to: {self._resp_topic}")
self._resp_socket.setsockopt_string(zmq.SUBSCRIBE, f"{self._resp_topic}")
while monitor.poll():
evt: Dict[str, Any] = {}
mon_evt = recv_monitor_message(monitor)
evt.update(mon_evt)
Debug("ActorConnector", evt)
if evt["event"] == zmq.EVENT_MONITOR_STOPPED or evt["event"] == zmq.EVENT_HANDSHAKE_SUCCEEDED:
Debug("ActorConnector", "Handshake received (Or Monitor stopped)")
break
self._resp_socket.disable_monitor()
monitor.close()
self._send_recv_router_msg()

def _send_recv_router_msg(self):
# Send a request to the router and wait for a response
req_socket = self._context.socket(zmq.REQ)
req_socket.connect(router_url)
try:
Debug("ActorConnector", "Broker Check Request Sent")
req_socket.send_string("Request")
_ = req_socket.recv_string()
Debug("ActorConnector", "Broker Check Response Received")
finally:
req_socket.close()

def send_txt_msg(self, msg):
self._sender.send_txt_msg(msg)

def send_bin_msg(self, msg_type: str, msg):
self._sender.send_bin_msg(msg_type, msg)

def binary_request(self, msg_type: str, msg, retry=5):
original_timeout: int = 0
if retry == -1:
original_timeout = self._resp_socket.getsockopt(zmq.RCVTIMEO)
self._resp_socket.setsockopt(zmq.RCVTIMEO, 1000)

try:
self._sender.send_bin_request_msg(msg_type, msg, self._resp_topic)
while retry == -1 or retry > 0:
try:
topic, resp_msg_type, _, resp = self._resp_socket.recv_multipart()
return topic, resp_msg_type, resp
except zmq.Again:
Debug(
"ActorConnector", f"{self._topic}: No response received. retry_count={retry}, max_retry={retry}"
)
time.sleep(0.01)
if retry != -1:
retry -= 1
finally:
if retry == -1:
self._resp_socket.setsockopt(zmq.RCVTIMEO, original_timeout)

Error("ActorConnector", f"{self._topic}: No response received. Giving up.")
return None, None, None

def close(self):
self._sender.close()
self._resp_socket.close()
11 changes: 9 additions & 2 deletions samples/apps/cap/py/autogencap/Broker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def __init__(self, context: zmq.Context = zmq.Context()):
self._xpub: zmq.Socket = None
self._xsub: zmq.Socket = None
self._router: zmq.Socket = None
self._start_event = threading.Event()

def _init_sockets(self):
try:
Expand Down Expand Up @@ -44,8 +45,9 @@ def start(self) -> bool:
self._run = True
self._broker_thread: threading.Thread = threading.Thread(target=self.thread_fn)
self._broker_thread.start()
time.sleep(0.01)
return True
self._start_event.wait()
# this will be false if the thread is not running
return self._run

def stop(self):
if not self._run:
Expand All @@ -67,6 +69,7 @@ def thread_fn(self):
if not self._init_sockets():
Debug("BROKER", "Receive thread not started since sockets were not initialized")
self._run = False
self._start_event.set()
return

# Poll sockets for events
Expand All @@ -76,6 +79,8 @@ def thread_fn(self):
self._poller.register(self._router, zmq.POLLIN)

Info("BROKER", "Started. Waiting for events")
# signal to the main thread that Broker has started
self._start_event.set()
# Receive msgs, forward and process
while self._run:
events = dict(self._poller.poll(500))
Expand Down Expand Up @@ -131,6 +136,8 @@ def main():
Info("BROKER", "Running.")
last_time = current_time
try:
# Hang out for a while and print out
# status every now and then
time.sleep(0.5)
except KeyboardInterrupt:
Info("BROKER", "KeyboardInterrupt. Stopping the broker.")
Expand Down
Loading

0 comments on commit db30ec8

Please sign in to comment.