Optimize mongodb collections Cutoff methods

This commit is contained in:
2025-05-19 16:59:50 +04:00
parent 259c139d92
commit 16a2a62ba1
2 changed files with 92 additions and 41 deletions

View File

@@ -163,7 +163,8 @@ func (f FeedAzCollection) CutoffByCount(
findOpts := options.Find(). findOpts := options.Find().
SetSort(bson.D{{Key: "created_at", Value: 1}}). SetSort(bson.D{{Key: "created_at", Value: 1}}).
SetLimit(deleteCount) SetLimit(deleteCount).
SetProjection(bson.M{"_id": 1})
cursor, err := f.Collection.Find(ctx, bson.M{}, findOpts) cursor, err := f.Collection.Find(ctx, bson.M{}, findOpts)
if err != nil { if err != nil {
@@ -171,24 +172,46 @@ func (f FeedAzCollection) CutoffByCount(
} }
defer func() { _ = cursor.Close(ctx) }() defer func() { _ = cursor.Close(ctx) }()
var docsToDelete []bson.M // Process documents in batches to avoid potential memory issues
if err = cursor.All(ctx, &docsToDelete); err != nil { const batchSize = 10000
return 0, err var totalDeleted int64 = 0
for {
batch := make([]string, 0, batchSize)
batchCount := 0
for cursor.Next(ctx) && batchCount < batchSize {
var doc struct {
ID string `bson:"_id"`
}
if err = cursor.Decode(&doc); err != nil {
return totalDeleted, err
}
batch = append(batch, doc.ID)
batchCount++
} }
if len(docsToDelete) == 0 { if len(batch) == 0 {
return 0, nil break
} }
ids := make([]any, len(docsToDelete)) // Delete the batch
for i := range docsToDelete { result, err := f.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": batch}})
ids[i] = docsToDelete[i]["_id"]
}
result, err := f.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": ids}})
if err != nil { if err != nil {
return 0, err return totalDeleted, err
} }
return result.DeletedCount, nil totalDeleted += result.DeletedCount
if cursor.Err() != nil {
return totalDeleted, cursor.Err()
}
// If we didn't fill the batch, we're done
if batchCount < batchSize {
break
}
}
return totalDeleted, nil
} }

View File

@@ -17,11 +17,16 @@ type PostCollection struct {
func NewPostCollection(client *mongo.Client) (*PostCollection, error) { func NewPostCollection(client *mongo.Client) (*PostCollection, error) {
client.Database(config.MongoDBBaseDB).Collection("") client.Database(config.MongoDBBaseDB).Collection("")
coll := client.Database(config.MongoDBBaseDB).Collection("post") coll := client.Database(config.MongoDBBaseDB).Collection("post")
_, err := coll.Indexes().CreateOne( _, err := coll.Indexes().CreateMany(
context.Background(), context.Background(),
mongo.IndexModel{ []mongo.IndexModel{
{
Keys: bson.D{{Key: "sequence", Value: -1}}, Keys: bson.D{{Key: "sequence", Value: -1}},
}, },
{
Keys: bson.D{{Key: "created_at", Value: 1}},
},
},
) )
if err != nil { if err != nil {
return nil, err return nil, err
@@ -71,7 +76,8 @@ func (p PostCollection) CutoffByCount(
findOpts := options.Find(). findOpts := options.Find().
SetSort(bson.D{{Key: "created_at", Value: 1}}). SetSort(bson.D{{Key: "created_at", Value: 1}}).
SetLimit(deleteCount) SetLimit(deleteCount).
SetProjection(bson.M{"_id": 1})
cursor, err := p.Collection.Find(ctx, bson.M{}, findOpts) cursor, err := p.Collection.Find(ctx, bson.M{}, findOpts)
if err != nil { if err != nil {
@@ -79,26 +85,48 @@ func (p PostCollection) CutoffByCount(
} }
defer func() { _ = cursor.Close(ctx) }() defer func() { _ = cursor.Close(ctx) }()
var docsToDelete []bson.M // Process documents in batches to avoid potential memory issues
if err = cursor.All(ctx, &docsToDelete); err != nil { const batchSize = 10000
return 0, err var totalDeleted int64 = 0
for {
batch := make([]string, 0, batchSize)
batchCount := 0
for cursor.Next(ctx) && batchCount < batchSize {
var doc struct {
ID string `bson:"_id"`
}
if err = cursor.Decode(&doc); err != nil {
return totalDeleted, err
}
batch = append(batch, doc.ID)
batchCount++
} }
if len(docsToDelete) == 0 { if len(batch) == 0 {
return 0, nil break
} }
ids := make([]any, len(docsToDelete)) // Delete the batch
for i := range docsToDelete { result, err := p.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": batch}})
ids[i] = docsToDelete[i]["_id"]
}
result, err := p.Collection.DeleteMany(ctx, bson.M{"_id": bson.M{"$in": ids}})
if err != nil { if err != nil {
return 0, err return totalDeleted, err
} }
return result.DeletedCount, nil totalDeleted += result.DeletedCount
if cursor.Err() != nil {
return totalDeleted, cursor.Err()
}
// If we didn't fill the batch, we're done
if batchCount < batchSize {
break
}
}
return totalDeleted, nil
} }
func (p PostCollection) GetMaxSequence(ctx context.Context) (*int64, error) { func (p PostCollection) GetMaxSequence(ctx context.Context) (*int64, error) {