[kafka] Extended features.

- Added partition, key, offset, timestamp and headers to
  `KafkaMessageEvent`.

- Added `group_id` to consumers configuration.

- Added `key`, `timeout` and `compression_type` to `kafka.publish`.
This commit is contained in:
Fabio Manganiello 2024-12-15 20:51:39 +01:00
parent f712537673
commit f3aa245c0e
Signed by untrusted user: blacklight
GPG key ID: D90FBA7F76362774
2 changed files with 86 additions and 15 deletions

View file

@ -1,4 +1,5 @@
from typing import Union from typing import Iterable, Optional, Union
from platypush.message.event import Event from platypush.message.event import Event
@ -15,6 +16,11 @@ class KafkaMessageEvent(Event):
topic: str, topic: str,
host: str, host: str,
port: int, port: int,
partition: int,
offset: int,
timestamp: float,
key: Optional[str] = None,
headers: Optional[Iterable] = None,
**kwargs **kwargs
): ):
""" """
@ -24,8 +30,25 @@ class KafkaMessageEvent(Event):
:param topic: Topic where the message was received. :param topic: Topic where the message was received.
:param host: Host where the message was received. :param host: Host where the message was received.
:param port: Port where the message was received. :param port: Port where the message was received.
:param partition: Partition where the message was received.
:param offset: Offset of the message.
:param timestamp: Timestamp of the message.
:param key: Optional message key.
:param headers: Optional message headers.
""" """
super().__init__(*args, msg=msg, topic=topic, host=host, port=port, **kwargs) super().__init__(
*args,
msg=msg,
topic=topic,
host=host,
port=port,
partition=partition,
offset=offset,
timestamp=timestamp,
key=key,
headers=headers,
**kwargs,
)
# vim:sw=4:ts=4:et: # vim:sw=4:ts=4:et:

View file

@ -22,6 +22,7 @@ class KafkaPlugin(RunnablePlugin):
port: int = 9092, port: int = 9092,
listeners: Optional[Iterable[dict]] = None, listeners: Optional[Iterable[dict]] = None,
connection_retry_secs: float = 5.0, connection_retry_secs: float = 5.0,
group_id: str = 'platypush',
**kwargs, **kwargs,
): ):
""" """
@ -30,6 +31,7 @@ class KafkaPlugin(RunnablePlugin):
:param port: Default Kafka server port (default: 9092). :param port: Default Kafka server port (default: 9092).
:param connection_retry_secs: Seconds to wait before retrying to :param connection_retry_secs: Seconds to wait before retrying to
connect to the Kafka server after a connection error (default: 5). connect to the Kafka server after a connection error (default: 5).
:param group_id: Default Kafka consumer group ID (default: platypush).
:param listeners: If specified, the Kafka plugin will listen for :param listeners: If specified, the Kafka plugin will listen for
messages on these topics. Use this parameter if you also want to messages on these topics. Use this parameter if you also want to
listen on other Kafka brokers other than the primary one. This listen on other Kafka brokers other than the primary one. This
@ -54,18 +56,20 @@ class KafkaPlugin(RunnablePlugin):
port: 19200 port: 19200
username: myuser username: myuser
password: secret password: secret
group_id: mygroup
compression_type: gzip
topics: topics:
- topic4 - topic4
- topic5 - topic5
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self.host = host self.host = host
self.port = port self.port = port
self.group_id = group_id
self._conn_retry_secs = connection_retry_secs self._conn_retry_secs = connection_retry_secs
self._listeners = listeners or [] self._listeners = list(self._convert_listeners(listeners))
# `server:port` -> KafkaProducer mapping # `server:port` -> KafkaProducer mapping
self.producers: Dict[str, KafkaProducer] = {} self.producers: Dict[str, KafkaProducer] = {}
@ -80,6 +84,13 @@ class KafkaPlugin(RunnablePlugin):
# Kafka can be very noisy # Kafka can be very noisy
logging.getLogger('kafka').setLevel(logging.ERROR) logging.getLogger('kafka').setLevel(logging.ERROR)
def _convert_listeners(self, listeners: Optional[Iterable[dict]]) -> Iterable[dict]:
for listener in listeners or []:
listener['host'] = listener.get('host', self.host)
listener['port'] = listener.get('port', self.port)
listener['topics'] = listener.get('topics', [])
yield listener
def _get_srv_str( def _get_srv_str(
self, host: Optional[str] = None, port: Optional[int] = None self, host: Optional[str] = None, port: Optional[int] = None
) -> str: ) -> str:
@ -104,7 +115,10 @@ class KafkaPlugin(RunnablePlugin):
return self.producers[srv_str] return self.producers[srv_str]
def _get_consumer( def _get_consumer(
self, host: Optional[str] = None, port: Optional[int] = None, **kwargs self,
host: Optional[str] = None,
port: Optional[int] = None,
**kwargs,
): ):
srv_str = self._get_srv_str(host, port) srv_str = self._get_srv_str(host, port)
with self._consumers_locks[srv_str]: with self._consumers_locks[srv_str]:
@ -112,6 +126,7 @@ class KafkaPlugin(RunnablePlugin):
self.consumers[srv_str] = KafkaConsumer( self.consumers[srv_str] = KafkaConsumer(
bootstrap_servers=srv_str, **kwargs bootstrap_servers=srv_str, **kwargs
) )
return self.consumers[srv_str] return self.consumers[srv_str]
def _on_msg(self, record, host: str, port: int): def _on_msg(self, record, host: str, port: int):
@ -125,8 +140,24 @@ class KafkaPlugin(RunnablePlugin):
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
try:
key = record.key.decode()
except Exception:
key = record.key
self._bus.post( self._bus.post(
KafkaMessageEvent(msg=msg, topic=record.topic, host=host, port=port) KafkaMessageEvent(
msg=msg,
topic=record.topic,
host=host,
port=port,
partition=record.partition,
offset=record.offset,
timestamp=float(record.timestamp / 1000) if record.timestamp else None,
key=key,
headers=record.headers,
size=record.serialized_value_size,
)
) )
def _consumer_monitor(self, consumer: KafkaConsumer, host: str, port: int): def _consumer_monitor(self, consumer: KafkaConsumer, host: str, port: int):
@ -149,10 +180,23 @@ class KafkaPlugin(RunnablePlugin):
self.wait_stop(self._conn_retry_secs) self.wait_stop(self._conn_retry_secs)
@action @action
def publish(self, msg: Union[str, list, dict, tuple, bytes], topic: str, **kwargs): def publish(
self,
msg: Union[str, list, dict, tuple, bytes],
topic: str,
key: Optional[str] = None,
timeout: Optional[float] = 60.0,
compression_type: Optional[str] = None,
**kwargs,
):
""" """
:param msg: Message to send. :param msg: Message to send.
:param topic: Topic to send the message to. :param topic: Topic to send the message to.
:param key: For hashed-based partitioning, the key to use to determine
the partition.
:param timeout: Timeout in seconds for the message to be sent.
:param compression_type: Compression type to use for the message (e.g.
``gzip``).
:param kwargs: Additional arguments to pass to the KafkaConsumer, :param kwargs: Additional arguments to pass to the KafkaConsumer,
including ``host`` and ``port``. including ``host`` and ``port``.
""" """
@ -163,9 +207,10 @@ class KafkaPlugin(RunnablePlugin):
if not isinstance(msg, bytes): if not isinstance(msg, bytes):
msg = str(msg).encode() msg = str(msg).encode()
producer = self._get_producer(**kwargs) encoded_key = key.encode() if key else None
producer.send(topic, msg) producer = self._get_producer(compression_type=compression_type, **kwargs)
producer.flush() future = producer.send(topic, key=encoded_key, value=msg)
future.get(timeout=timeout)
@action @action
def send_message( def send_message(
@ -177,14 +222,17 @@ class KafkaPlugin(RunnablePlugin):
return self.send_message(msg=msg, topic=topic, **kwargs) return self.send_message(msg=msg, topic=topic, **kwargs)
@action @action
def subscribe(self, topic: str, **kwargs): def subscribe(self, topic: str, group_id: Optional[str] = None, **kwargs):
""" """
Subscribe to a topic. Subscribe to a topic.
:param topic: Topic to subscribe to. :param topic: Topic to subscribe to.
:param group_id: Group ID to use for the consumer. If None, then the
group ID from the plugin configuration will be used.
:param kwargs: Additional arguments to pass to the KafkaConsumer, :param kwargs: Additional arguments to pass to the KafkaConsumer,
including ``host`` and ``port``. including ``host`` and ``port``.
""" """
kwargs['group_id'] = kwargs.get('group_id', group_id or self.group_id)
consumer = self._get_consumer(**kwargs) consumer = self._get_consumer(**kwargs)
consumer.subscribe([topic]) consumer.subscribe([topic])
@ -201,16 +249,16 @@ class KafkaPlugin(RunnablePlugin):
def main(self): def main(self):
for listener in self._listeners: for listener in self._listeners:
host = listener.get('host', self.host) host = listener['host']
port = listener.get('port', self.port) port = listener['port']
topics = listener.get('topics') topics = listener['topics']
if not topics: if not topics:
continue continue
consumer = self._get_consumer( consumer = self._get_consumer(
host=host, host=host,
port=port, port=port,
group_id='platypush', group_id=self.group_id,
auto_offset_reset='earliest', auto_offset_reset='earliest',
) )