mirror of
https://github.com/aykhans/bsky-feedgen.git
synced 2025-06-06 20:49:02 +00:00
Compare commits
No commits in common. "bcd721e071134d0e161c00517b9498e0bdccb2c4" and "c4bd6affa6a2d06b8c9a34ff3a3d5d9ebdeb61d1" have entirely different histories.
bcd721e071
...
c4bd6affa6
@ -10,7 +10,7 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
feedgenAz "github.com/aykhans/bsky-feedgen/pkg/generator/az"
|
"github.com/aykhans/bsky-feedgen/pkg/generator"
|
||||||
"github.com/aykhans/bsky-feedgen/pkg/types"
|
"github.com/aykhans/bsky-feedgen/pkg/types"
|
||||||
|
|
||||||
"github.com/aykhans/bsky-feedgen/pkg/config"
|
"github.com/aykhans/bsky-feedgen/pkg/config"
|
||||||
@ -87,7 +87,7 @@ Flags:
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
feedGeneratorAz := feedgenAz.NewGenerator(postCollection, feedAzCollection)
|
feedGeneratorAz := generator.NewFeedGeneratorAz(postCollection, feedAzCollection)
|
||||||
|
|
||||||
startCrons(ctx, feedGenAzConfig, feedGeneratorAz, feedAzCollection, cursorOption)
|
startCrons(ctx, feedGenAzConfig, feedGeneratorAz, feedAzCollection, cursorOption)
|
||||||
logger.Log.Info("Cron jobs started")
|
logger.Log.Info("Cron jobs started")
|
||||||
@ -98,7 +98,7 @@ Flags:
|
|||||||
func startCrons(
|
func startCrons(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
feedGenAzConfig *config.FeedGenAzConfig,
|
feedGenAzConfig *config.FeedGenAzConfig,
|
||||||
feedGeneratorAz *feedgenAz.Generator,
|
feedGeneratorAz *generator.FeedGeneratorAz,
|
||||||
feedAzCollection *collections.FeedAzCollection,
|
feedAzCollection *collections.FeedAzCollection,
|
||||||
cursorOption types.GeneratorCursor,
|
cursorOption types.GeneratorCursor,
|
||||||
) {
|
) {
|
||||||
|
@ -1,74 +0,0 @@
|
|||||||
x-common-mongodb-environment: &common-mongodb-environment
|
|
||||||
MONGODB_HOST: mongodb
|
|
||||||
MONGODB_PORT: 27017
|
|
||||||
MONGODB_USERNAME: aykhan
|
|
||||||
MONGODB_PASSWORD: ch7278832gf99010hgbddewd2y28982v3dvbv28vv2dv2d2gbvby2
|
|
||||||
|
|
||||||
services:
|
|
||||||
mongodb:
|
|
||||||
image: mongo
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- 27017:27017
|
|
||||||
volumes:
|
|
||||||
- mongodb_data:/data/db
|
|
||||||
environment:
|
|
||||||
MONGO_INITDB_ROOT_USERNAME: aykhan
|
|
||||||
MONGO_INITDB_ROOT_PASSWORD: ch7278832gf99010hgbddewd2y28982v3dvbv28vv2dv2d2gbvby2
|
|
||||||
healthcheck:
|
|
||||||
test: echo 'db.runCommand("ping").ok' | mongosh --quiet
|
|
||||||
interval: 10s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
start_period: 20s
|
|
||||||
|
|
||||||
consumer:
|
|
||||||
image: git.aykhans.me/bsky/feedgen-consumer:latest
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
<<: *common-mongodb-environment
|
|
||||||
POST_MAX_DATE: 720h # Save only posts created in the last month
|
|
||||||
POST_COLLECTION_CUTOFF_CRON_DELAY: 10m # 10 minutes
|
|
||||||
POST_COLLECTION_CUTOFF_CRON_MAX_DOCUMENT: 2900000 # Delete post documents after 4 million
|
|
||||||
depends_on:
|
|
||||||
mongodb:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
||||||
feedgen_az:
|
|
||||||
image: git.aykhans.me/bsky/feedgen-generator-az:latest
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
<<: *common-mongodb-environment
|
|
||||||
FEED_AZ_GENERATER_CRON_DELAY: 1m # 1 minute
|
|
||||||
FEED_AZ_COLLECTION_CUTOFF_CRON_DELAY: 30m # 30 minutes
|
|
||||||
FEED_AZ_COLLECTION_CUTOFF_CRON_MAX_DOCUMENT: 10000
|
|
||||||
depends_on:
|
|
||||||
mongodb:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
||||||
api:
|
|
||||||
image: git.aykhans.me/bsky/feedgen-api:latest
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- 8421:8421
|
|
||||||
environment:
|
|
||||||
<<: *common-mongodb-environment
|
|
||||||
FEEDGEN_HOSTNAME: https://feeds.bsky.aykhans.me
|
|
||||||
FEEDGEN_PUBLISHER_DID: did:plc:cs2cbzojm6hmx5lfxiuft3mq
|
|
||||||
API_PORT: 8421
|
|
||||||
depends_on:
|
|
||||||
mongodb:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
||||||
caddy:
|
|
||||||
image: caddy:2.10.0-alpine
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- 80:80
|
|
||||||
- 443:443
|
|
||||||
- 443:443/udp
|
|
||||||
volumes:
|
|
||||||
- ./Caddyfile:/etc/caddy/Caddyfile
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
mongodb_data:
|
|
@ -264,7 +264,7 @@ func ConsumeAndSaveToMongoDB(
|
|||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
if len(postBatch) > 0 {
|
if len(postBatch) > 0 {
|
||||||
consumerLastFlushingTime = time.Now()
|
consumerLastFlushingTime = time.Now()
|
||||||
// logger.Log.Info("flushing post batch", "count", len(postBatch))
|
logger.Log.Info("flushing post batch", "count", len(postBatch))
|
||||||
err := postCollection.Insert(ctx, true, postBatch...)
|
err := postCollection.Insert(ctx, true, postBatch...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("mongodb post insert error: %v", err)
|
return fmt.Errorf("mongodb post insert error: %v", err)
|
||||||
@ -272,7 +272,7 @@ func ConsumeAndSaveToMongoDB(
|
|||||||
postBatch = []*collections.Post{} // Clear batch after insert
|
postBatch = []*collections.Post{} // Clear batch after insert
|
||||||
} else {
|
} else {
|
||||||
// If we haven't seen any data for 25 seconds, cancel the consumer connection
|
// If we haven't seen any data for 25 seconds, cancel the consumer connection
|
||||||
if consumerLastFlushingTime.Add(time.Second * 25).Before(time.Now()) {
|
if consumerLastFlushingTime.Add(time.Second*25).Before(time.Now()) {
|
||||||
cancel()
|
cancel()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package az
|
package generator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@ -13,24 +13,39 @@ import (
|
|||||||
"go.mongodb.org/mongo-driver/mongo/options"
|
"go.mongodb.org/mongo-driver/mongo/options"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Generator struct {
|
var azInvalidUser []string = []string{
|
||||||
|
"did:plc:5zww7zorx2ajw7hqrhuix3ba",
|
||||||
|
"did:plc:c4vhz47h566t2ntgd7gtawen",
|
||||||
|
}
|
||||||
|
|
||||||
|
var azValidUsers []string = []string{
|
||||||
|
"did:plc:jbt4qi6psd7rutwzedtecsq7",
|
||||||
|
"did:plc:yzgdpxsklrmfgqmjghdvw3ti",
|
||||||
|
"did:plc:g7ebgiai577ln3avsi2pt3sn",
|
||||||
|
"did:plc:phtq2rhgbwipyx5ie3apw44j",
|
||||||
|
"did:plc:jfdvklrs5n5qv7f25v6swc5h",
|
||||||
|
"did:plc:u5ez5w6qslh6advti4wyddba",
|
||||||
|
"did:plc:cs2cbzojm6hmx5lfxiuft3mq",
|
||||||
|
}
|
||||||
|
|
||||||
|
type FeedGeneratorAz struct {
|
||||||
postCollection *collections.PostCollection
|
postCollection *collections.PostCollection
|
||||||
feedAzCollection *collections.FeedAzCollection
|
feedAzCollection *collections.FeedAzCollection
|
||||||
textRegex *regexp.Regexp
|
textRegex *regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewGenerator(
|
func NewFeedGeneratorAz(
|
||||||
postCollection *collections.PostCollection,
|
postCollection *collections.PostCollection,
|
||||||
feedAzCollection *collections.FeedAzCollection,
|
feedAzCollection *collections.FeedAzCollection,
|
||||||
) *Generator {
|
) *FeedGeneratorAz {
|
||||||
return &Generator{
|
return &FeedGeneratorAz{
|
||||||
postCollection: postCollection,
|
postCollection: postCollection,
|
||||||
feedAzCollection: feedAzCollection,
|
feedAzCollection: feedAzCollection,
|
||||||
textRegex: regexp.MustCompile("(?i)(azerbaijan|azərbaycan|aзербайджан|azerbaycan)"),
|
textRegex: regexp.MustCompile("(?i)(azerbaijan|azərbaycan|aзербайджан|azerbaycan)"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (generator *Generator) Start(ctx context.Context, cursorOption types.GeneratorCursor, batchSize int) error {
|
func (generator *FeedGeneratorAz) Start(ctx context.Context, cursorOption types.GeneratorCursor, batchSize int) error {
|
||||||
var mongoCursor *mongo.Cursor
|
var mongoCursor *mongo.Cursor
|
||||||
switch cursorOption {
|
switch cursorOption {
|
||||||
case types.GeneratorCursorLastGenerated:
|
case types.GeneratorCursorLastGenerated:
|
||||||
@ -109,16 +124,17 @@ func (generator *Generator) Start(ctx context.Context, cursorOption types.Genera
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (generator *Generator) IsValid(post *collections.Post) bool {
|
func (generator *FeedGeneratorAz) IsValid(post *collections.Post) bool {
|
||||||
if post.Reply != nil && post.Reply.RootURI != post.Reply.ParentURI {
|
if post.Reply != nil && post.Reply.RootURI != post.Reply.ParentURI {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if isValidUser := users.IsValid(post.DID); isValidUser != nil {
|
if slices.Contains(azInvalidUser, post.DID) {
|
||||||
return *isValidUser
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slices.Contains(post.Langs, "az") && len(post.Langs) < 3) || // Posts in Azerbaijani language with fewer than 3 languages
|
if slices.Contains(azValidUsers, post.DID) || // Posts from always-valid users
|
||||||
|
(slices.Contains(post.Langs, "az") && len(post.Langs) < 3) || // Posts in Azerbaijani language with fewer than 3 languages
|
||||||
generator.textRegex.MatchString(post.Text) { // Posts containing Azerbaijan-related keywords
|
generator.textRegex.MatchString(post.Text) { // Posts containing Azerbaijan-related keywords
|
||||||
return true
|
return true
|
||||||
}
|
}
|
@ -1,21 +0,0 @@
|
|||||||
package az
|
|
||||||
|
|
||||||
import "github.com/aykhans/bsky-feedgen/pkg/generator"
|
|
||||||
|
|
||||||
var users = generator.Users{
|
|
||||||
// Invalid
|
|
||||||
"did:plc:5zww7zorx2ajw7hqrhuix3ba": false,
|
|
||||||
"did:plc:c4vhz47h566t2ntgd7gtawen": false,
|
|
||||||
"did:plc:lc7j7xdq67gn7vc6vzmydfqk": false,
|
|
||||||
"did:plc:msian4dqa2rqalf3biilnf3m": false,
|
|
||||||
"did:plc:gtosalycg7snvodjhsze35jm": false,
|
|
||||||
|
|
||||||
// Valid
|
|
||||||
"did:plc:jbt4qi6psd7rutwzedtecsq7": true,
|
|
||||||
"did:plc:yzgdpxsklrmfgqmjghdvw3ti": true,
|
|
||||||
"did:plc:g7ebgiai577ln3avsi2pt3sn": true,
|
|
||||||
"did:plc:phtq2rhgbwipyx5ie3apw44j": true,
|
|
||||||
"did:plc:jfdvklrs5n5qv7f25v6swc5h": true,
|
|
||||||
"did:plc:u5ez5w6qslh6advti4wyddba": true,
|
|
||||||
"did:plc:cs2cbzojm6hmx5lfxiuft3mq": true,
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
package generator
|
|
||||||
|
|
||||||
import "github.com/aykhans/bsky-feedgen/pkg/utils"
|
|
||||||
|
|
||||||
type Users map[string]bool
|
|
||||||
|
|
||||||
func (u Users) IsValid(did string) *bool {
|
|
||||||
isValid, ok := u[did]
|
|
||||||
if ok == false {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return utils.ToPtr(isValid)
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user