diff --git a/pkg/generator/az/base.go b/pkg/generator/az/base.go index 2f7d5b9..4245d50 100644 --- a/pkg/generator/az/base.go +++ b/pkg/generator/az/base.go @@ -110,16 +110,24 @@ func (generator *Generator) Start(ctx context.Context, cursorOption types.Genera } func (generator *Generator) IsValid(post *collections.Post) bool { + // Skip posts that are deep replies (not direct replies to original posts) if post.Reply != nil && post.Reply.RootURI != post.Reply.ParentURI { return false } + // Check if the user who created this post is in our pre-defined list + // This allows for explicit inclusion/exclusion of specific users if isValidUser := Users.IsValid(post.DID); isValidUser != nil { return *isValidUser } - if (slices.Contains(post.Langs, "az") && len(post.Langs) < 3) || // Posts in Azerbaijani language with fewer than 3 languages - generator.textRegex.MatchString(post.Text) { // Posts containing Azerbaijan-related keywords + // A post is considered valid if it meets either of the following criteria: + // 1. It's primarily in Azerbaijani (language code "az") with less than 3 detected languages + // (to filter out multi-language spam) + // 2. It contains Azerbaijan-related keywords in the text AND has at least one valid language + // from our approved language list + if (slices.Contains(post.Langs, "az") && len(post.Langs) < 3) || + (generator.textRegex.MatchString(post.Text) && Langs.IsExistsAny(post.Langs)) { return true } diff --git a/pkg/generator/az/lists.go b/pkg/generator/az/lists.go index 1a9ceed..a15f413 100644 --- a/pkg/generator/az/lists.go +++ b/pkg/generator/az/lists.go @@ -42,3 +42,10 @@ var Users = generator.Users{ "did:plc:ftoopigdpuzqt2kpeyqxsofx": true, "did:plc:cs2cbzojm6hmx5lfxiuft3mq": true, } + +var Langs = generator.Langs{ + "az": true, + "en": true, + "tr": true, + "ru": true, +} diff --git a/pkg/generator/base.go b/pkg/generator/base.go index 16bbcb9..da795d0 100644 --- a/pkg/generator/base.go +++ b/pkg/generator/base.go @@ -67,3 +67,23 @@ func (u Users) GetAll() []string { return allUsers } + +type Langs map[string]bool + +// IsExistsAny checks if any of the given language codes exist in the Langs map. +// +// Parameters: +// - langs: A slice of language code strings to check for existence +// +// Returns: +// - bool: true if at least one language code from the input slice exists in the map, +// false if none of the provided language codes exist +func (l Langs) IsExistsAny(langs []string) bool { + for _, lang := range langs { + if _, ok := l[lang]; ok { + return true + } + } + + return false +}