mirror of
https://github.com/aykhans/bsky-feedgen.git
synced 2025-05-31 02:50:03 +00:00
131 lines
3.3 KiB
Go
131 lines
3.3 KiB
Go
package generator
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"regexp"
|
||
"slices"
|
||
|
||
"github.com/aykhans/bsky-feedgen/pkg/storage/mongodb/collections"
|
||
"github.com/aykhans/bsky-feedgen/pkg/types"
|
||
"go.mongodb.org/mongo-driver/bson"
|
||
"go.mongodb.org/mongo-driver/mongo"
|
||
"go.mongodb.org/mongo-driver/mongo/options"
|
||
)
|
||
|
||
var azValidUsers []string = []string{
|
||
"did:plc:jbt4qi6psd7rutwzedtecsq7",
|
||
"did:plc:yzgdpxsklrmfgqmjghdvw3ti",
|
||
"did:plc:cs2cbzojm6hmx5lfxiuft3mq",
|
||
}
|
||
|
||
type FeedGeneratorAz struct {
|
||
postCollection *collections.PostCollection
|
||
feedAzCollection *collections.FeedAzCollection
|
||
textRegex *regexp.Regexp
|
||
}
|
||
|
||
func NewFeedGeneratorAz(
|
||
postCollection *collections.PostCollection,
|
||
feedAzCollection *collections.FeedAzCollection,
|
||
) *FeedGeneratorAz {
|
||
return &FeedGeneratorAz{
|
||
postCollection: postCollection,
|
||
feedAzCollection: feedAzCollection,
|
||
textRegex: regexp.MustCompile("(?i)(azerbaijan|azərbaycan|aзербайджан|azerbaycan)"),
|
||
}
|
||
}
|
||
|
||
func (generator *FeedGeneratorAz) Start(ctx context.Context, cursorOption types.GeneratorCursor, batchSize int) error {
|
||
var mongoCursor *mongo.Cursor
|
||
switch cursorOption {
|
||
case types.GeneratorCursorLastGenerated:
|
||
sequenceCursor, err := generator.feedAzCollection.GetMaxSequence(ctx)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if sequenceCursor == nil {
|
||
mongoCursor, err = generator.postCollection.Collection.Find(
|
||
ctx,
|
||
bson.D{},
|
||
options.Find().SetSort(bson.D{{Key: "sequence", Value: 1}}),
|
||
)
|
||
} else {
|
||
mongoCursor, err = generator.postCollection.Collection.Find(
|
||
ctx,
|
||
bson.M{"sequence": bson.M{"$gt": *sequenceCursor}},
|
||
)
|
||
}
|
||
if err != nil {
|
||
return err
|
||
}
|
||
case types.GeneratorCursorFirstPost:
|
||
var err error
|
||
mongoCursor, err = generator.postCollection.Collection.Find(
|
||
ctx,
|
||
bson.D{},
|
||
options.Find().SetSort(bson.D{{Key: "sequence", Value: 1}}),
|
||
)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
defer func() { _ = mongoCursor.Close(ctx) }()
|
||
|
||
feedAzBatch := []*collections.FeedAz{}
|
||
for mongoCursor.Next(ctx) {
|
||
var doc *collections.Post
|
||
if err := mongoCursor.Decode(&doc); err != nil {
|
||
return fmt.Errorf("mongodb cursor decode error: %v", err)
|
||
}
|
||
|
||
if generator.IsValid(doc) == false {
|
||
continue
|
||
}
|
||
|
||
feedAzBatch = append(
|
||
feedAzBatch,
|
||
&collections.FeedAz{
|
||
ID: doc.ID,
|
||
Sequence: doc.Sequence,
|
||
DID: doc.DID,
|
||
RecordKey: doc.RecordKey,
|
||
CreatedAt: doc.CreatedAt,
|
||
},
|
||
)
|
||
|
||
if len(feedAzBatch)%batchSize == 0 {
|
||
err := generator.feedAzCollection.Insert(ctx, true, feedAzBatch...)
|
||
if err != nil {
|
||
return fmt.Errorf("insert FeedAz error: %v", err)
|
||
}
|
||
feedAzBatch = []*collections.FeedAz{}
|
||
}
|
||
}
|
||
|
||
if len(feedAzBatch) > 0 {
|
||
err := generator.feedAzCollection.Insert(ctx, true, feedAzBatch...)
|
||
if err != nil {
|
||
return fmt.Errorf("insert FeedAz error: %v", err)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (generator *FeedGeneratorAz) IsValid(post *collections.Post) bool {
|
||
if post.Reply != nil && post.Reply.RootURI != post.Reply.ParentURI {
|
||
return false
|
||
}
|
||
|
||
if slices.Contains(azValidUsers, post.DID) || // Posts from always-valid users
|
||
(slices.Contains(post.Langs, "az") && len(post.Langs) < 3) || // Posts in Azerbaijani language with fewer than 3 languages
|
||
generator.textRegex.MatchString(post.Text) { // Posts containing Azerbaijan-related keywords
|
||
return true
|
||
}
|
||
|
||
return false
|
||
}
|