mirror of
https://github.com/aykhans/bsky-feedgen.git
synced 2025-07-17 13:24:01 +00:00
🦋
This commit is contained in:
17
cmd/consumer/Dockerfile
Normal file
17
cmd/consumer/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM golang:1.24-alpine AS builder
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
COPY ../../pkg ./pkg
|
||||
COPY ../../cmd/consumer ./cmd/consumer
|
||||
|
||||
RUN CGO_ENABLED=0 go build -ldflags "-s -w" -o consumer ./cmd/consumer/main.go
|
||||
|
||||
FROM gcr.io/distroless/static-debian12:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /src/consumer .
|
||||
|
||||
ENTRYPOINT ["/app/consumer"]
|
40
cmd/consumer/README.md
Normal file
40
cmd/consumer/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Consumer Service
|
||||
|
||||
## Overview
|
||||
|
||||
The Consumer service is responsible for connecting to the Bluesky firehose, processing incoming posts, and storing them in MongoDB for later use by the feed generator.
|
||||
|
||||
**Pre-Built Docker Image**: `git.aykhans.me/bsky/feedgen-consumer:latest`
|
||||
|
||||
## Features
|
||||
|
||||
- Connects to the Bluesky firehose websocket
|
||||
- Processes and filters incoming posts
|
||||
- Stores relevant post data in MongoDB
|
||||
- Includes data management via cron jobs
|
||||
- Implements collection size limits
|
||||
- Prunes older data to prevent storage issues
|
||||
|
||||
## Command Line Options
|
||||
|
||||
- `-cursor`: Specify the starting point for data consumption
|
||||
- `last-consumed`: Resume from the last processed data (default)
|
||||
- `first-stream`: Start from the beginning of the firehose
|
||||
- `current-stream`: Start from the current position in the firehose
|
||||
|
||||
## Running the Service
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
docker build -f cmd/consumer/Dockerfile -t bsky-feedgen-consumer .
|
||||
docker --env-file config/app/.consumer.env --env-file config/app/.mongodb.env run bsky-feedgen-consumer
|
||||
```
|
||||
|
||||
### Local Development
|
||||
|
||||
```bash
|
||||
task run-consumer
|
||||
# or
|
||||
make run-consumer
|
||||
```
|
122
cmd/consumer/main.go
Normal file
122
cmd/consumer/main.go
Normal file
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/aykhans/bsky-feedgen/pkg/consumer"
|
||||
"github.com/aykhans/bsky-feedgen/pkg/types"
|
||||
|
||||
"github.com/aykhans/bsky-feedgen/pkg/config"
|
||||
"github.com/aykhans/bsky-feedgen/pkg/logger"
|
||||
"github.com/aykhans/bsky-feedgen/pkg/storage/mongodb"
|
||||
"github.com/aykhans/bsky-feedgen/pkg/storage/mongodb/collections"
|
||||
)
|
||||
|
||||
func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go listenForTermination(func() { cancel() })
|
||||
|
||||
flag.Usage = func() {
|
||||
fmt.Println(
|
||||
`Usage:
|
||||
|
||||
consumer [flags]
|
||||
|
||||
Flags:
|
||||
-h, -help Display this help message
|
||||
-cursor string Specify the starting point for data consumption (default: last-consumed)
|
||||
Options:
|
||||
last-consumed: Resume from the last processed data in storage
|
||||
first-stream: Start from the beginning of the firehose
|
||||
current-stream: Start from the current position in the firehose stream`)
|
||||
}
|
||||
|
||||
var cursorOption types.ConsumerCursor
|
||||
flag.Var(&cursorOption, "cursor", "")
|
||||
flag.Parse()
|
||||
|
||||
if args := flag.Args(); len(args) > 0 {
|
||||
if len(args) == 1 {
|
||||
fmt.Printf("unexpected argument: %s\n\n", args[0])
|
||||
} else {
|
||||
fmt.Printf("unexpected arguments: %v\n\n", strings.Join(args, ", "))
|
||||
}
|
||||
flag.CommandLine.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if cursorOption == "" {
|
||||
_ = cursorOption.Set("")
|
||||
}
|
||||
|
||||
consumerConfig, errMap := config.NewConsumerConfig()
|
||||
if errMap != nil {
|
||||
logger.Log.Error("consumer ENV error", "error", errMap.ToStringMap())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
mongoDBConfig, errMap := config.NewMongoDBConfig()
|
||||
if errMap != nil {
|
||||
logger.Log.Error("mongodb ENV error", "error", errMap.ToStringMap())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
client, err := mongodb.NewDB(ctx, mongoDBConfig)
|
||||
if err != nil {
|
||||
logger.Log.Error("mongodb connection error", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
postCollection, err := collections.NewPostCollection(client)
|
||||
if err != nil {
|
||||
logger.Log.Error(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
startCrons(ctx, consumerConfig, postCollection)
|
||||
logger.Log.Info("Cron jobs started")
|
||||
|
||||
err = consumer.ConsumeAndSaveToMongoDB(
|
||||
ctx,
|
||||
postCollection,
|
||||
"wss://bsky.network",
|
||||
cursorOption,
|
||||
consumerConfig.PostMaxDate, // Save only posts created before PostMaxDate
|
||||
10*time.Second, // Save consumed data to MongoDB every 10 seconds
|
||||
)
|
||||
if err != nil {
|
||||
logger.Log.Error(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func startCrons(ctx context.Context, consumerConfig *config.ConsumerConfig, postCollection *collections.PostCollection) {
|
||||
// Post collection cutoff
|
||||
go func() {
|
||||
for {
|
||||
startTime := time.Now()
|
||||
deleteCount, err := postCollection.CutoffByCount(ctx, consumerConfig.PostCollectionCutoffCronMaxDocument)
|
||||
if err != nil {
|
||||
logger.Log.Error("Post collection cutoff cron error", "error", err)
|
||||
}
|
||||
elapsedTime := time.Since(startTime)
|
||||
logger.Log.Info("Post collection cutoff cron completed", "count", deleteCount, "time", elapsedTime)
|
||||
|
||||
time.Sleep(consumerConfig.PostCollectionCutoffCronDelay)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func listenForTermination(do func()) {
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sigChan
|
||||
do()
|
||||
}
|
Reference in New Issue
Block a user