Cluster Reliability / Troubleshooting Replication Issues
Code Recap: Identifying Replication Lag Issues
Resize the oplog with replSetResizeOplog
replSetResizeOplog
Admin Command
Use the replSetResizeOplog
admin command to resize the oplog or its minimum retention period dynamically without restarting the mongod
process.
db.adminCommand(
{
replSetResizeOplog: <int>,
size: <double>,
minRetentionHours: <double>
}
)
Set minimum retention hours for the oplog with replSetResizeOplog
Below is an example of a mongod.conf file that has been edited to specify.
Configure the storage.oplogMinRetentionHours
setting in mongod.conf
You may also use the storage.oplogMinRetentionHours
setting in your mongod.conf
file to set a minimum oplog retention period (in hours):
# mongod.conf
# where to write logging data.
systemLog:
destination: file
logAppend: true
path: /var/log/mongodb/mongod.log
# Where and how to store data.
storage:
dbPath: /var/lib/mongo
size: <double>
oplogMinRetentionHours: <double>
}
)
Check replica set health and replication lag with rs.status()
and rs.printSecondaryReplicationInfo()
rs.status()
Method
Use the rs.status()
method to return the replica set status from the point of view of the member where the method is run.
rs.status()
Example Output:
{
"set" : "replset",
"date" : ISODate("2024-08-15T23:06:13.978Z"),
"myState" : 1,
"term" : Long(3),
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : Long(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"votingMembersCount" : 3,
"writableVotingMembersCount" : 3,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"lastCommittedWallTime" : ISODate("2024-08-15T23:06:13.978Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"readConcernMajorityWallTime" : ISODate("2024-08-15T23:06:13.978Z"),
"appliedOpTime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"durableOpTime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"lastAppliedWallTime" : ISODate("2020-03-05T05:24:38.122Z"),
"lastDurableWallTime" : ISODate("2020-03-05T05:24:38.122Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1723763173, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "stepUpRequestSkipDryRun",
"lastElectionDate" : ISODate("2024-08-15T23:06:13.978Z"),
"electionTerm" : Long(3),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(2)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(2)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 1,
"electionTimeoutMillis" : Long(10000),
"priorPrimaryMemberId" : 1,
"numCatchUpOps" : Long(0),
"newTermStartDate" : ISODate("2024-08-15T23:06:13.978Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2024-08-15T23:06:13.978Z")
},
"electionParticipantMetrics" : {
"votedForCandidate" : true,
"electionTerm" : Long(2),
"lastVoteDate" : ISODate("2024-08-15T23:06:13.978Z"),
"electionCandidateMemberId" : 1,
"voteReason" : "",
"lastAppliedOpTimeAtElection" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(1)
},
"maxAppliedOpTimeInSet" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(1)
},
"priorityAtElection" : 1
},
…
"members" : [
{
"_id" : 0,
"name" : "m1.example.net:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 269,
"optime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDurable" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDate" : ISODate("2024-08-15T23:06:13.978Z"),
"optimeWrittenDate" : ISODate("2024-08-15T23:06:13.978Z"),
"lastAppliedWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"lastDurableWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1723763173, 1),
"electionDate" : ISODate("2024-08-15T23:06:13.978Z"),
"configVersion" : 1,
"configTerm" : 0,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "m2.example.net:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 266,
"optime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDurable" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDate" : ISODate("2024-08-15T23:06:13.978Z"),
"optimeDurableDate" : ISODate("2024-08-15T23:06:13.978Z"),
"lastAppliedWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"lastDurableWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"lastHeartbeat" : ISODate("2024-08-15T23:06:13.978Z"),
"lastHeartbeatRecv" : ISODate("2024-08-15T23:06:13.978Z"),
"pingMs" : Long(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "m3.example.net:27017",
"syncSourceId" : 2,
"infoMessage" : "",
"configVersion" : 1
},
{
"_id" : 2,
"name" : "m3.example.net:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 266,
"optime" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDurable" : {
"ts" : Timestamp(1723763173, 1),
"t" : Long(3)
},
"optimeDate" : ISODate("2024-08-15T23:06:13.978Z"),
"optimeDurableDate" : ISODate("2024-08-15T23:06:13.978Z"),
"lastAppliedWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"lastDurableWallTime": ISODate("2024-08-15T23:06:13.978Z"),
"lastHeartbeat" : ISODate("2024-08-15T23:06:13.978Z"),
"lastHeartbeatRecv" : ISODate("2024-08-15T23:06:13.978Z"),
"pingMs" : Long(0),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "m1.example.net:27017",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 1
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1723763173, 1),
"signature" : {
"hash" : BinData(0,"9C2qcGVkipEGJW3iF90qxb/gIwc="),
"keyId" : Long("6800589497806356482")
}
},
"operationTime" : Timestamp(1723763173, 1)
}
rs.printSecondaryReplicationInfo()
Method
Use the rs.printSecondaryReplicationInfo()
method to return a formatted report of the replica set status from the perspective of the secondary member of the set. The output is identical to db.printSecondaryReplicationInfo()
.
rs.printSecondaryReplicationInfo()
Example Output:
source: m1.example.net:27002
syncedTo: Mon Mar 01 2021 16:30:50 GMT-0800 (PST)
0 secs (0 hrs) behind the primary
source: m2.example.net:27003
syncedTo: Mon Mar 01 2021 16:30:50 GMT-0800 (PST)
0 secs (0 hrs) behind the primary