commit 6b59bbeb41743e66e06d593201b33c66c61e5b3d Author: Aykhan Shahsuvarov Date: Thu Jul 11 02:32:31 2024 +0400 first commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..d748da1 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# Implementation of the Wikimedia Project in Golang + +This project provides a Golang implementation of the Wikimedia project as discussed by [Stephane Maarek](https://github.com/simplesteph) in his Kafka beginners course. The original Java consumer and producer repositories can be found here: + +- [Consumer](https://github.com/conduktor/kafka-beginners-course/tree/main/kafka-consumer-opensearch) +- [Producer](https://github.com/conduktor/kafka-beginners-course/tree/main/kafka-producer-wikimedia) + +## Prerequisites + +Before you begin, ensure the following software is installed on your system: + +- [Docker](https://docs.docker.com/engine/install/) +- [Apache Kafka](https://kafka.apache.org/downloads) +- [Golang](https://go.dev/doc/install) + +## Running the Project + +### 1. Start Kafka in Kraft Mode + +Start Kafka by executing the following command in your terminal: + +```bash +docker-compose -f docker-compose-kafka-kraft.yml up --build +``` + +### 2. Start OpenSearch + +Open a new terminal window and execute: + +```bash +docker-compose -f docker-compose-opensearch.yml up --build +``` + +### 3. Create Kafka Topic + +Once Kafka is up, create the Kafka topic with the following command: + +```bash +kafka-topics.sh --bootstrap-server localhost:9092 --topic wikimedia.recentchange --create --partitions 3 --replication-factor 2 +``` + +### 4. Start Wikimedia Kafka Producer + +Navigate to the `producer-wikimedia` directory in a new terminal window and run: + +```bash +cd producer-wikimedia +go run ./producer.go +``` + +### 5. Start OpenSearch Kafka Consumer + +Navigate to the `consumer-opensearch` directory in another terminal window and run: + +```bash +cd consumer-opensearch +go run ./consumer.go +``` + +## Viewing Consumed Data in OpenSearch Dashboard + +To view the consumed data in the OpenSearch dashboard, go to [OpenSearch Dev Tools](http://localhost:5601/app/dev_tools#/console). + +- **Username:** admin +- **Password:** Toor1234_ + +### Retrieve Consumed Data + +Execute the following command to retrieve data consumed and sent to OpenSearch by the Kafka consumer: + +```bash +GET /wikimedia/_search +``` \ No newline at end of file diff --git a/consumer-opensearch/consumer.go b/consumer-opensearch/consumer.go new file mode 100644 index 0000000..69a2760 --- /dev/null +++ b/consumer-opensearch/consumer.go @@ -0,0 +1,253 @@ +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "log" + "net/http" + "os" + "os/signal" + "strings" + "sync" + "syscall" + + "github.com/IBM/sarama" + "github.com/opensearch-project/opensearch-go" +) + +var ( + kafkaBootstrapServers = []string{"localhost:9092"} + kafkaTopic = "wikimedia.recentchange" + opensearchAddresses = []string{"https://localhost:9200"} + opensearchUsername = "admin" + opensearchPassword = "Toor1234_" + opensearchIndex = "wikimedia" +) + +type OpensearchMessage struct { + Message []byte + ID string +} + +type WikiData struct { + Schema string `json:"$schema"` + ID int `json:"id"` + Type string `json:"type"` + Namespace int `json:"namespace"` + Title string `json:"title"` + TitleURL string `json:"title_url"` + Comment string `json:"comment"` + Timestamp int `json:"timestamp"` + User string `json:"user"` + Bot bool `json:"bot"` + NotifyURL string `json:"notify_url"` + Minor bool `json:"minor"` + Patrolled bool `json:"patrolled"` + ServerURL string `json:"server_url"` + ServerName string `json:"server_name"` + ServerScriptPath string `json:"server_script_path"` + Wiki string `json:"wiki"` + ParsedComment string `json:"parsedcomment"` + Meta struct { + URI string `json:"uri"` + RequestID string `json:"request_id"` + ID string `json:"id"` + DT string `json:"dt"` + Domain string `json:"domain"` + Stream string `json:"stream"` + Topic string `json:"topic"` + Partition int `json:"partition"` + Offset int `json:"offset"` + } `json:"meta"` + Length struct { + Old int `json:"old"` + New int `json:"new"` + } `json:"length"` + Revision struct { + Old int `json:"old"` + New int `json:"new"` + } `json:"revision"` +} + +// IMessageHandler represents an interface for handling messages in the Kafka consumer. +type IMessageHandler interface { + // Setup performs any necessary setup tasks before starting message processing to Opensearch. + Setup() + + // Cleanup performs any necessary cleanup tasks after message consumption is complete. + Cleanup() + + // OnMessage is called for each incoming message from Kafka. + // It takes a single parameter, `messages`, which represents the Opensearch message to be processed. + OnMessage(messages OpensearchMessage) +} + +type opensearchHandler struct { + client *opensearch.Client +} + +// Setup initializes the OpenSearch client and creates the necessary index if it doesn't exist. +func (h *opensearchHandler) Setup() { + log.Println("Setting up OpenSearch client") + var err error + client := newOpensearchClient() + _, err = client.Info() + if err != nil { + log.Fatal(err) + } + + response, err := client.Indices.Exists([]string{opensearchIndex}) + if err != nil { + log.Fatal(err) + } + response.Body.Close() + if response.StatusCode == 404 { + response, err = client.Indices.Create(opensearchIndex) + if err != nil { + log.Fatal(err) + } + response.Body.Close() + } + h.client = client +} + +func (h *opensearchHandler) Cleanup() { + log.Println("Closing OpenSearch client") +} + +// OnMessage is a method that handles incoming Opensearch messages. +// It indexes the message content into Opensearch and logs the response status code. +func (h *opensearchHandler) OnMessage(messages OpensearchMessage) { + response, err := h.client.Index( + opensearchIndex, + strings.NewReader(string(messages.Message)), + h.client.Index.WithDocumentID(messages.ID), + ) + if err != nil { + log.Fatal(err) + } + log.Println("Index document:", response.StatusCode) + response.Body.Close() +} + +// newOpensearchClient creates a new instance of the opensearch.Client. +// It configures the client with the provided opensearch addresses, username, and password. +// It also sets up a custom transport with TLS configuration to skip certificate verification. +// If any error occurs during the creation of the client, it logs the error and exits the program. +func newOpensearchClient() *opensearch.Client { + client, err := opensearch.NewClient(opensearch.Config{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + Addresses: opensearchAddresses, + Username: opensearchUsername, + Password: opensearchPassword, + }) + if err != nil { + log.Fatal(err) + } + return client +} + +type kafkaConsumerGroupHandler struct { + messageHandler IMessageHandler +} + +// Setup initializes the consumer group handler. +// It sets up any necessary resources or configurations required for the handler to function properly. +// This method is called by the Sarama library when a new consumer group session is started. +// It returns an error if there was a problem setting up the handler. +func (h *kafkaConsumerGroupHandler) Setup(sarama.ConsumerGroupSession) error { + // h.messageHandler.Setup() + return nil +} + +// Cleanup is called when the consumer group session is ending. +// It is responsible for cleaning up any resources used by the consumer group handler. +func (h *kafkaConsumerGroupHandler) Cleanup(sarama.ConsumerGroupSession) error { + // h.messageHandler.Cleanup() + return nil +} + +// ConsumeClaim consumes messages from a Kafka consumer group claim. +// It processes each message by unmarshaling it into a WikiData struct, +// calling the message handler's OnMessage method with the OpensearchMessage, +// marking the message as processed, and committing the session. +// If there is an error during unmarshaling, it returns the error. +// It returns nil if all messages are consumed successfully. +func (h *kafkaConsumerGroupHandler) ConsumeClaim(sess sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { + for message := range claim.Messages() { + wikiData := &WikiData{} + err := json.Unmarshal(message.Value, wikiData) + if err != nil { + return err + } + h.messageHandler.OnMessage(OpensearchMessage{Message: message.Value, ID: wikiData.Meta.ID}) + sess.MarkMessage(message, "") + sess.Commit() + } + return nil +} + +// Consume consumes messages from a Kafka topic using a consumer group. +// It takes an IMessageHandler as a parameter to handle the consumed messages. +// The function creates a new consumer group with the specified group ID and configuration. +// It then starts consuming messages from the Kafka topic using the specified message handler. +// The function blocks until the consumer is closed or an error occurs. +func Consume(messageHandler IMessageHandler) { + groupID := "consumer-opensearch-demo" + + config := sarama.NewConfig() + config.Consumer.Offsets.Initial = sarama.OffsetNewest + config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategySticky()} + config.Consumer.Offsets.AutoCommit.Enable = false + // config.Consumer.Offsets.AutoCommit.Interval = time.Millisecond * 5000 + // config.RackID = "rack1" + + consumer, err := sarama.NewConsumerGroup(kafkaBootstrapServers, groupID, config) + if err != nil { + log.Fatalf("Error creating consumer group: %v", err) + } + defer func() { + if err := consumer.Close(); err != nil { + log.Fatalf("Error closing consumer group: %v", err) + } + }() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle SIGINT and SIGTERM signals to gracefully shut down the consumer + go func() { + sigterm := make(chan os.Signal, 1) + signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM) + <-sigterm + cancel() + }() + + handler := kafkaConsumerGroupHandler{messageHandler: messageHandler} + handler.messageHandler.Setup() + defer handler.messageHandler.Cleanup() + wg := &sync.WaitGroup{} + wg.Add(1) + + go func() { + defer wg.Done() + for { + if err := consumer.Consume(ctx, []string{kafkaTopic}, &handler); err != nil { + log.Fatalf("Error consuming messages: %v", err) + } + if ctx.Err() != nil { + log.Println("Consumer closed") + return + } + } + }() + + wg.Wait() +} + +func main() { + Consume(&opensearchHandler{}) +} diff --git a/consumer-opensearch/go.mod b/consumer-opensearch/go.mod new file mode 100644 index 0000000..ea96048 --- /dev/null +++ b/consumer-opensearch/go.mod @@ -0,0 +1,29 @@ +module consumer-opensearch + +go 1.22.3 + +require ( + github.com/IBM/sarama v1.43.2 + github.com/opensearch-project/opensearch-go v1.1.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/eapache/go-resiliency v1.6.0 // indirect + github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect + github.com/eapache/queue v1.1.0 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/go-uuid v1.0.3 // indirect + github.com/jcmturner/aescts/v2 v2.0.0 // indirect + github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect + github.com/jcmturner/gofork v1.7.6 // indirect + github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect + github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/klauspost/compress v1.17.8 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect + golang.org/x/crypto v0.22.0 // indirect + golang.org/x/net v0.24.0 // indirect +) diff --git a/consumer-opensearch/go.sum b/consumer-opensearch/go.sum new file mode 100644 index 0000000..54ae6f8 --- /dev/null +++ b/consumer-opensearch/go.sum @@ -0,0 +1,105 @@ +github.com/IBM/sarama v1.43.2 h1:HABeEqRUh32z8yzY2hGB/j8mHSzC/HA9zlEjqFNCzSw= +github.com/IBM/sarama v1.43.2/go.mod h1:Kyo4WkF24Z+1nz7xeVUFWIuKVV8RS3wM8mkvPKMdXFQ= +github.com/aws/aws-sdk-go v1.42.27/go.mod h1:OGr6lGMAKGlG9CVrYnWYDKIyb829c6EVBRjxqjmPepc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/eapache/go-resiliency v1.6.0 h1:CqGDTLtpwuWKn6Nj3uNUdflaq+/kIPsg0gfNzHton30= +github.com/eapache/go-resiliency v1.6.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 h1:Oy0F4ALJ04o5Qqpdz8XLIpNA3WM/iSIXqxtqo7UGVws= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3/go.mod h1:YvSRo5mw33fLEx1+DlK6L2VV43tJt5Eyel9n9XBcR+0= +github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= +github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/opensearch-project/opensearch-go v1.1.0 h1:eG5sh3843bbU1itPRjA9QXbxcg8LaZ+DjEzQH9aLN3M= +github.com/opensearch-project/opensearch-go v1.1.0/go.mod h1:+6/XHCuTH+fwsMJikZEWsucZ4eZMma3zNSeLrTtVGbo= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/docker-compose-kafka-kraft.yml b/docker-compose-kafka-kraft.yml new file mode 100644 index 0000000..9b11fef --- /dev/null +++ b/docker-compose-kafka-kraft.yml @@ -0,0 +1,43 @@ +services: + kafka-1: + image: confluentinc/cp-kafka:latest + container_name: kafka-1 + hostname: kafka-1 + ports: + - "9092:9092" + environment: + KAFKA_NODE_ID: 1 + KAFKA_PROCESS_ROLES: broker,controller + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:29092,PLAINTEXT_HOST://localhost:9092 + KAFKA_LISTENERS: PLAINTEXT://kafka-1:29092,PLAINTEXT_HOST://0.0.0.0:9092,CONTROLLER://kafka-1:29093 + KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:29093,2@kafka-2:29093 + KAFKA_LOG_DIRS: /var/lib/kafka/data + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 2 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + CLUSTER_ID: NeEcW4ddRIWWZOxkgFaC4w + # https://docs.confluent.io/platform/current/multi-dc-deployments/multi-region.html + # KAFKA_REPLICA_SELECTOR_CLASS: org.apache.kafka.common.replica.RackAwareReplicaSelector + # KAFKA_BROKER_RACK: rack-1 + + kafka-2: + image: confluentinc/cp-kafka:latest + container_name: kafka-2 + hostname: kafka-2 + ports: + - "9093:9092" + environment: + KAFKA_NODE_ID: 2 + KAFKA_PROCESS_ROLES: broker,controller + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:29092,PLAINTEXT_HOST://localhost:9093 + KAFKA_LISTENERS: PLAINTEXT://kafka-2:29092,PLAINTEXT_HOST://0.0.0.0:9092,CONTROLLER://kafka-2:29093 + KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:29093,2@kafka-2:29093 + KAFKA_LOG_DIRS: /var/lib/kafka/data + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 2 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + CLUSTER_ID: NeEcW4ddRIWWZOxkgFaC4w diff --git a/docker-compose-opensearch.yml b/docker-compose-opensearch.yml new file mode 100644 index 0000000..e300ff4 --- /dev/null +++ b/docker-compose-opensearch.yml @@ -0,0 +1,71 @@ +--- +services: + opensearch-node1: + image: opensearchproject/opensearch:latest + container_name: opensearch-node1 + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch-node1 + - discovery.seed_hosts=opensearch-node1,opensearch-node2 + - cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2 + - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m # minimum and maximum Java heap size, recommend setting both to 50% of system RAM + - compatibility.override_main_response_version=true + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=Toor1234_ # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and higher + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems + hard: 65536 + volumes: + - opensearch-data1:/usr/share/opensearch/data + ports: + - 9200:9200 + - 9600:9600 # required for Performance Analyzer + networks: + - opensearch-net + + opensearch-node2: + image: opensearchproject/opensearch:latest + container_name: opensearch-node2 + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch-node2 + - discovery.seed_hosts=opensearch-node1,opensearch-node2 + - cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2 + - bootstrap.memory_lock=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m + - compatibility.override_main_response_version=true + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=Toor1234_ + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch-data2:/usr/share/opensearch/data + networks: + - opensearch-net + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:latest + container_name: opensearch-dashboards + ports: + - 5601:5601 + expose: + - '5601' + environment: + OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200"]' + networks: + - opensearch-net + +volumes: + opensearch-data1: + opensearch-data2: + +networks: + opensearch-net: diff --git a/producer-wikimedia/go.mod b/producer-wikimedia/go.mod new file mode 100644 index 0000000..ad7cac4 --- /dev/null +++ b/producer-wikimedia/go.mod @@ -0,0 +1,26 @@ +module producer-wikimedia + +go 1.22.3 + +require github.com/IBM/sarama v1.43.2 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/eapache/go-resiliency v1.6.0 // indirect + github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect + github.com/eapache/queue v1.1.0 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/go-uuid v1.0.3 // indirect + github.com/jcmturner/aescts/v2 v2.0.0 // indirect + github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect + github.com/jcmturner/gofork v1.7.6 // indirect + github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect + github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/klauspost/compress v1.17.8 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect + golang.org/x/crypto v0.22.0 // indirect + golang.org/x/net v0.24.0 // indirect +) diff --git a/producer-wikimedia/go.sum b/producer-wikimedia/go.sum new file mode 100644 index 0000000..636b5b3 --- /dev/null +++ b/producer-wikimedia/go.sum @@ -0,0 +1,94 @@ +github.com/IBM/sarama v1.43.2 h1:HABeEqRUh32z8yzY2hGB/j8mHSzC/HA9zlEjqFNCzSw= +github.com/IBM/sarama v1.43.2/go.mod h1:Kyo4WkF24Z+1nz7xeVUFWIuKVV8RS3wM8mkvPKMdXFQ= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/eapache/go-resiliency v1.6.0 h1:CqGDTLtpwuWKn6Nj3uNUdflaq+/kIPsg0gfNzHton30= +github.com/eapache/go-resiliency v1.6.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 h1:Oy0F4ALJ04o5Qqpdz8XLIpNA3WM/iSIXqxtqo7UGVws= +github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3/go.mod h1:YvSRo5mw33fLEx1+DlK6L2VV43tJt5Eyel9n9XBcR+0= +github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= +github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM= +github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/producer-wikimedia/producer.go b/producer-wikimedia/producer.go new file mode 100644 index 0000000..7b568e5 --- /dev/null +++ b/producer-wikimedia/producer.go @@ -0,0 +1,227 @@ +package main + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "os/signal" + "strings" + "sync" + "syscall" + "time" + + "github.com/IBM/sarama" +) + +var ( + kafkaBootstrapServers = []string{"localhost:9092"} + kafkaTopic = "wikimedia.recentchange" + wikimediaStreamURL = "https://stream.wikimedia.org/v2/stream/recentchange" +) + +// WikiData represents the structure of the data received from Wikimedia. +type WikiData struct { + Schema string `json:"$schema"` + ID int `json:"id"` + Type string `json:"type"` + Namespace int `json:"namespace"` + Title string `json:"title"` + TitleURL string `json:"title_url"` + Comment string `json:"comment"` + Timestamp int `json:"timestamp"` + User string `json:"user"` + Bot bool `json:"bot"` + NotifyURL string `json:"notify_url"` + Minor bool `json:"minor"` + Patrolled bool `json:"patrolled"` + ServerURL string `json:"server_url"` + ServerName string `json:"server_name"` + ServerScriptPath string `json:"server_script_path"` + Wiki string `json:"wiki"` + ParsedComment string `json:"parsedcomment"` + Meta struct { + URI string `json:"uri"` + RequestID string `json:"request_id"` + ID string `json:"id"` + DT string `json:"dt"` + Domain string `json:"domain"` + Stream string `json:"stream"` + Topic string `json:"topic"` + Partition int `json:"partition"` + Offset int `json:"offset"` + } `json:"meta"` + Length struct { + Old int `json:"old"` + New int `json:"new"` + } `json:"length"` + Revision struct { + Old int `json:"old"` + New int `json:"new"` + } `json:"revision"` +} + +func (wikiData *WikiData) Marshal() []byte { + data, _ := json.Marshal(wikiData) + return data +} + +// IMessageHandler is an interface that defines the methods for handling messages. +type IMessageHandler interface { + // Setup is called to set up any necessary resources before starting message handling. + Setup() + + // Cleanup is called to clean up any resources after message handling is complete. + Cleanup() + + // OnMessage is called when a new message is received. + // It takes a pointer to a WikiData object as a parameter. + OnMessage(wikiData *WikiData) +} + +type kafkaMessageHandler struct { + producer sarama.SyncProducer +} + +// Setup initializes the Kafka message handler by creating a new producer. +func (h *kafkaMessageHandler) Setup() { + h.producer = newProducer() +} + +// Cleanup closes the Kafka producer and performs any necessary cleanup operations. +func (h *kafkaMessageHandler) Cleanup() { + if err := h.producer.Close(); err != nil { + log.Fatalf("Failed to close Kafka producer: %v", err) + } +} + +// OnMessage is a method that handles incoming WikiData messages. +// It sends the message to a Kafka topic and logs the result. +func (h *kafkaMessageHandler) OnMessage(wikiData *WikiData) { + message := &sarama.ProducerMessage{ + Topic: kafkaTopic, + Value: sarama.StringEncoder(wikiData.Marshal()), + } + partition, offset, err := h.producer.SendMessage(message) + if err != nil { + log.Fatalf("Failed to send message: %v", err) + } + log.Printf("Message is stored in: topic(%s) - partition(%d) - offset(%d)\n", kafkaTopic, partition, offset) +} + +// newProducer creates a new instance of a Kafka producer with the specified configuration. +// It returns a sarama.SyncProducer that can be used to send messages to Kafka topics. +func newProducer() sarama.SyncProducer { + config := sarama.NewConfig() + config.Producer.Return.Successes = true // enable message delivery reports + config.Producer.RequiredAcks = sarama.WaitForAll // require all in-sync replicas to acknowledge the message + config.Producer.Retry.Max = 5 // number of retries before giving up on sending a message to a partition + config.Producer.Retry.Backoff = time.Second * 60 // time to wait between retries + config.Producer.Partitioner = sarama.NewRoundRobinPartitioner // walks through the available partitions one at a time + config.Producer.Compression = sarama.CompressionSnappy // compress messages using Snappy + config.Producer.Idempotent = true // producer will ensure that messages are successfully sent and acknowledged + // linger.ms + config.Producer.Flush.Frequency = time.Millisecond * 20 // time to wait before sending a batch of messages + // batch.size + config.Producer.Flush.Bytes = 32 * 1024 // number of bytes to trigger a batch of messages + config.Net.MaxOpenRequests = 1 + + producer, err := sarama.NewSyncProducer(kafkaBootstrapServers, config) + if err != nil { + log.Fatalf("Failed to start Kafka producer: %v", err) + } + return producer +} + +// WikimediaEventHandler connects to the Wikimedia stream and handles incoming events. +// It takes an IMessageHandler as a parameter, which is responsible for setting up and cleaning up the message handling logic. +// The function reads events from the stream, parses the JSON data, and passes it to the message handler. +// It also counts the number of messages processed and prints the total count at the end. +func WikimediaEventHandler(messageHandler IMessageHandler) { + // Connect to the Wikimedia stream + resp, err := http.Get(wikimediaStreamURL) + if err != nil { + log.Fatalf("Failed to connect to SSE endpoint: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + log.Fatalf("Failed to connect to SSE endpoint: %s", resp.Status) + } + + reader := bufio.NewReader(resp.Body) + // Read the initial response from the stream to confirm the connection + line, err := reader.ReadString('\n') + if err != nil { + log.Fatalf("Failed to read from SSE endpoint: %v", err) + } + line = strings.TrimSpace(line) + if line != ":ok" { + log.Fatalf("Failed to connect to SSE endpoint: %s", line) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle SIGINT and SIGTERM signals to gracefully shut down the producer + go func() { + sigterm := make(chan os.Signal, 1) + signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM) + <-sigterm + cancel() + }() + + messageHandler.Setup() + defer messageHandler.Cleanup() + messageCnt := 0 + wg := &sync.WaitGroup{} + wg.Add(1) + + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + fmt.Println("Producer is shutting down") + return + default: + line, err := reader.ReadString('\n') + if err != nil { + log.Fatalf("Failed to read from SSE endpoint: %v", err) + } + // Trim leading and trailing whitespace from the line and ignore empty lines + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + + wikiData := &WikiData{} + switch { + // Check for the event type and ignore any other events except "message" + case strings.HasPrefix(line, "event: "): + if line != "event: message" { + log.Fatalf("Failed to read from SSE endpoint: %s", line) + } + // Parse the JSON data and pass it to the message handler + case strings.HasPrefix(line, "data: "): + err = json.Unmarshal([]byte(line[6:]), &wikiData) + if err != nil { + log.Fatalf("Failed to unmarshal JSON: %v", err) + } + messageHandler.OnMessage(wikiData) + messageCnt++ + time.Sleep(1 * time.Second) + } + } + } + }() + wg.Wait() + fmt.Printf("Total messages: %d\n", messageCnt) +} + +func main() { + WikimediaEventHandler(&kafkaMessageHandler{}) +}