Optimize mongodb collections Cutoff methods

This commit is contained in:
2025-05-19 16:59:50 +04:00
parent 259c139d92
commit 16a2a62ba1
2 changed files with 92 additions and 41 deletions

View File

@@ -163,7 +163,8 @@ func (f FeedAzCollection) CutoffByCount(
findOpts := options.Find().
SetSort(bson.D{{Key: "created_at", Value: 1}}).
SetLimit(deleteCount)
SetLimit(deleteCount).
SetProjection(bson.M{"_id": 1})
cursor, err := f.Collection.Find(ctx, bson.M{}, findOpts)
if err != nil {
@@ -171,24 +172,46 @@ func (f FeedAzCollection) CutoffByCount(
}
defer func() { _ = cursor.Close(ctx) }()
var docsToDelete []bson.M
if err = cursor.All(ctx, &docsToDelete); err != nil {
return 0, err
// Process documents in batches to avoid potential memory issues
const batchSize = 10000
var totalDeleted int64 = 0
for {
batch := make([]string, 0, batchSize)
batchCount := 0
for cursor.Next(ctx) && batchCount < batchSize {
var doc struct {
ID string `bson:"_id"`
}
if err = cursor.Decode(&doc); err != nil {
return totalDeleted, err
}
batch = append(batch, doc.ID)
batchCount++
}
if len(batch) == 0 {
break
}
// Delete the batch
result, err := f.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": batch}})
if err != nil {
return totalDeleted, err
}
totalDeleted += result.DeletedCount
if cursor.Err() != nil {
return totalDeleted, cursor.Err()
}
// If we didn't fill the batch, we're done
if batchCount < batchSize {
break
}
}
if len(docsToDelete) == 0 {
return 0, nil
}
ids := make([]any, len(docsToDelete))
for i := range docsToDelete {
ids[i] = docsToDelete[i]["_id"]
}
result, err := f.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": ids}})
if err != nil {
return 0, err
}
return result.DeletedCount, nil
return totalDeleted, nil
}