Replication Bumper

Purpose: This script bumps any replication that has entered a paused state due to a replication error. The script will record failed attempts at restarting the replication. The logs will rotate out every 5-days.

# Define the directory to store the log files
$logDir = "C:\ClusterStorage\Volume1\Scripts\Logs"
if (-not (Test-Path $logDir)) {
    New-Item -Path $logDir -ItemType Directory
}

# Get today's date and format it for the log file name
$today = Get-Date -Format "yyyyMMdd"
$logFile = Join-Path -Path $logDir -ChildPath "ReplicationLog_$today.txt"

# Manually create the log file if it doesn't exist
if (-not (Test-Path $logFile)) {
    Write-Host "Log file does not exist. Attempting creation..."
    try {
        New-Item -Path $logFile -ItemType File
        Write-Host "Log file $logFile created successfully."
    } catch {
        Write-Error "Failed to create log file. Error: $_"
    }
}

# Delete log files older than 5 days
Get-ChildItem -Path $logDir -Filter "ReplicationLog_*.txt" | Where-Object { 
    $_.CreationTime -lt (Get-Date).AddDays(-5)
} | Remove-Item

# Get a list of all nodes in the cluster
$clusterNodes = Get-ClusterNode

# Iterate over each cluster node
foreach ($node in $clusterNodes) {
    try {
        # Get VMs with Critical ReplicationHealth from the current node
        $vmsInCriticalState = Get-VMReplication -ComputerName $node.Name | Where-Object { $_.ReplicationHealth -eq 'Critical' }
    } catch {
        Write-Error "Failed to retrieve VMs from Node: $($node.Name). Error: $_"
        # Log the error and continue to the next node
        Add-Content -Path $logFile -Value "Failed to retrieve VMs from Node: $($node.Name) at $(Get-Date)"
        continue
    }

    foreach ($vm in $vmsInCriticalState) {
        Write-Host "Checking VM: $($vm.Name) on Node: $($node.Name) for replication issues."
        Write-Host "Replication State for VM: $($vm.Name) is $($vm.ReplicationState)"

        # Check if the replication state is valid to resume
        if ($vm.ReplicationState -eq 'Resynchronization required' -or $vm.ReplicationState -eq 'WaitingForStartResynchronize') {
            Write-Warning "Replication for VM: $($vm.Name) on Node: $($node.Name) is in '$($vm.ReplicationState)' state. Skipping..."
            # Log the VM that is in 'Resynchronization required' or 'WaitingForStartResynchronize' state
            Add-Content -Path $logFile -Value "Replication for VM: $($vm.Name) on Node: $($node.Name) is in '$($vm.ReplicationState)' state at $(Get-Date)"
            continue
        }

        try {
            # Try to resume replication for the VM
            Resume-VMReplication -VMName $vm.Name -ComputerName $node.Name
            Write-Host "Resumed replication for VM: $($vm.Name) on Node: $($node.Name)"
        } catch {
            Write-Error "Failed to resume replication for VM: $($vm.Name) on Node: $($node.Name) - $_"
            # Write the failed VM name to the log file
            Add-Content -Path $logFile -Value "Failed to resume replication for VM: $($vm.Name) on Node: $($node.Name) at $(Get-Date)"
        }
    }
}