#!/bin/bash

# Expect base collection path as an argument
BASE_COLLECTION_PATH=$1

# Expect time option as an argument
SINCE_TIME=$2

gather_common_ceph_resources "${BASE_COLLECTION_PATH}" "${SINCE_TIME}"

# Use PWD as base path if no argument is passed
if [ "${BASE_COLLECTION_PATH}" = "" ]; then
    BASE_COLLECTION_PATH=$(pwd)
fi

CEPH_COLLECTION_PATH="${BASE_COLLECTION_PATH}/ceph"

# Ceph resources
ceph_resources=()
ceph_resources+=(cephblockpools)
ceph_resources+=(cephfilesystems)

# Ceph commands
ceph_commands=()
ceph_commands+=("ceph auth list")
ceph_commands+=("ceph balancer dump")
ceph_commands+=("ceph balancer pool ls")
ceph_commands+=("ceph balancer status")
ceph_commands+=("ceph config dump")
ceph_commands+=("ceph config-key ls")
ceph_commands+=("ceph crash ls")
ceph_commands+=("ceph crash stat")
ceph_commands+=("ceph device ls")
ceph_commands+=("ceph df detail")
ceph_commands+=("ceph fs dump")
ceph_commands+=("ceph fs ls")
ceph_commands+=("ceph fs status")
ceph_commands+=("ceph fs subvolumegroup ls ocs-storagecluster-cephfilesystem")
ceph_commands+=("ceph fs subvolume ls ocs-storagecluster-cephfilesystem csi")
ceph_commands+=("ceph health detail")
ceph_commands+=("ceph mds stat")
ceph_commands+=("ceph mgr dump")
ceph_commands+=("ceph mgr module ls")
ceph_commands+=("ceph mgr services")
ceph_commands+=("ceph mon stat")
ceph_commands+=("ceph mon dump")
ceph_commands+=("ceph osd df tree")
ceph_commands+=("ceph osd tree")
ceph_commands+=("ceph osd stat")
ceph_commands+=("ceph osd dump")
ceph_commands+=("ceph osd utilization")
ceph_commands+=("ceph osd crush show-tunables")
ceph_commands+=("ceph osd crush dump")
ceph_commands+=("ceph osd crush weight-set ls")
ceph_commands+=("ceph osd crush weight-set dump")
ceph_commands+=("ceph osd crush rule dump")
ceph_commands+=("ceph osd crush rule ls")
ceph_commands+=("ceph osd crush class ls")
ceph_commands+=("ceph osd perf")
ceph_commands+=("ceph osd numa-status")
ceph_commands+=("ceph osd getmaxosd")
ceph_commands+=("ceph osd drain status")
ceph_commands+=("ceph osd pool ls detail")
ceph_commands+=("ceph osd lspools")
ceph_commands+=("ceph osd df")
ceph_commands+=("ceph osd blocked-by")
ceph_commands+=("ceph osd blacklist ls")
ceph_commands+=("ceph pg dump")
ceph_commands+=("ceph pg stat")
ceph_commands+=("ceph pool autoscale-status")
ceph_commands+=("ceph progress")
ceph_commands+=("ceph progress json")
ceph_commands+=("ceph quorum_status")
ceph_commands+=("ceph report")
ceph_commands+=("ceph service dump")
ceph_commands+=("ceph status")
ceph_commands+=("ceph time-sync-status")
ceph_commands+=("ceph versions")

# Ceph volume commands
ceph_volume_commands+=()
ceph_volume_commands+=("ceph-volume lvm list")
ceph_volume_commands+=("ceph-volume raw list")

# Inspecting ceph related custom resources for all namespaces
for resource in "${ceph_resources[@]}"; do
    echo "collecting dump ${resource}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
    { oc adm --dest-dir="${CEPH_COLLECTION_PATH}" inspect "${resource}" --all-namespaces --"${SINCE_TIME}"; } >> "${BASE_COLLECTION_PATH}"/gather-debug.log 2>&1
done

namespaces=$(oc get deploy --all-namespaces -o go-template --template='{{range .items}}{{if .metadata.labels}}{{printf "%s %v" .metadata.namespace (index .metadata.labels "olm.owner")}} {{printf "\n"}}{{end}}{{end}}' | grep ocs-operator | awk '{print $1}' | uniq)
# Inspecting the namespace where ocs-cluster is installed
for ns in $namespaces; do
    ceph_collection(){
        COMMAND_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands
        COMMAND_JSON_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands_json_output
        COMMAND_ERR_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/logs
        mkdir -p "${COMMAND_OUTPUT_DIR}"
        mkdir -p "${COMMAND_JSON_OUTPUT_DIR}"
        mkdir -p "${COMMAND_ERR_OUTPUT_DIR}"

        pids_ceph=()

        # Collecting output of ceph osd config
        for i in $(timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph osd tree --connect-timeout=15 |  grep up "| awk '{print $4}'); do
            { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph config show $i" >> "${COMMAND_OUTPUT_DIR}/config_$i"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-config-"$i"-debug.log 2>&1 &
            pids_ceph+=($!)
        done

        # Collecting output of ceph commands
        for ((i = 0; i < ${#ceph_commands[@]}; i++)); do
             printf "collecting command output for: %s\n"  "${ceph_commands[$i]}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
             COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_commands[$i]// /_}
             JSON_COMMAND_OUTPUT_FILE=${COMMAND_JSON_OUTPUT_DIR}/${ceph_commands[$i]// /_}_--format_json-pretty
             { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "${ceph_commands[$i]} --connect-timeout=15" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands[$i]}"-debug.log 2>&1 &
             pids_ceph+=($!)
             { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "${ceph_commands[$i]} --connect-timeout=15 --format json-pretty" >> "${JSON_COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands[$i]}"-json-debug.log 2>&1 &
             pids_ceph+=($!)
        done
        for i in $(timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph osd lspools --connect-timeout=15"|awk '{print $2}'); do
             { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd ls -p $i" >> "${COMMAND_OUTPUT_DIR}/pools_rbd_$i"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-"$i"-debug.log 2>&1 &
             pids_ceph+=($!)
        done

        # Collecting snapshot info for ceph rbd volumes
        printf "collecting snapshot info for ceph rbd volumes \n" | tee -a  "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
        COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/rbd_vol_and_snap_info
        # Inspecting ceph block pools for ceph rbd
        blockpools=$(timeout 60 oc get cephblockpools.ceph.rook.io -n openshift-storage -o jsonpath="{range .items[*]}{@.metadata.name}{'\n'}{end}")
        for bp in $blockpools; do
            { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd mirror snapshot schedule list --pool=$bp" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-mirror-snap-schedule-list-"${bp}"-debug.log 2>&1 &
            pids_ceph+=($!)
            { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd mirror pool status $bp --format=json" >> "${JSON_COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-mirror-pool-status-"${bp}"-debug.log 2>&1 &
            pids_ceph+=($!)
            { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd mirror pool info $bp --format=json" >> "${JSON_COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-mirror-pool-info-"${bp}"-debug.log 2>&1 &
            pids_ceph+=($!)
            images=$(timeout 60 oc -n openshift-storage exec "${HOSTNAME}"-helper -- bash -c "rbd ls -p $bp")
            for image in $images; do
                { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd info $image --pool $bp" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-image-"${image}"-debug.log 2>&1 &
                pids_ceph+=($!)
                { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd snap ls --all $image --pool $bp" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-snap-"${image}"-debug.log 2>&1 &
                pids_ceph+=($!)
                { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "rbd mirror image status $bp/$image --format=json" >> "${JSON_COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-rbd-mirror-image-status-"${bp}"-"${image}"-debug.log 2>&1 &
                pids_ceph+=($!)
            done
        done

        # Collecting snapshot information for ceph subvolumes
        printf "collecting snapshot info for cephFS subvolumes \n" | tee -a "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
        COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/cephfs_subvol_and_snap_info

        # Inspecting CephFS filesystems
        filesystems=$(timeout 60 oc get cephfilesystems.ceph.rook.io -n openshift-storage -o jsonpath="{range .items[*]}{@.metadata.name}{'\n'}{end}")
        # Default subvolumegroup in OCS is 'csi'
        svg="csi"
        for fs in $filesystems; do
             subvols=$(timeout 60 oc -n openshift-storage exec "${HOSTNAME}"-helper -- bash -c "ceph fs subvolume ls $fs $svg | jq --raw-output '.[].name' ")
             for subvol in $subvols; do
                 { printf "Information for subvolume: %s\n" "${subvol}" >> "${COMMAND_OUTPUT_FILE}"; }
                 { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph fs subvolume info $fs $subvol $svg --connect-timeout=15" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-ceph-fs-"${subvol}"-debug.log 2>&1 &
                 pids_ceph+=($!)
                 snaps=$(timeout 60 oc -n openshift-storage exec "${HOSTNAME}"-helper -- bash -c "ceph fs subvolume snapshot ls $fs $subvol $svg | jq --raw-output '.[].name'")
                 count=$(timeout 60 oc -n openshift-storage exec "${HOSTNAME}"-helper -- bash -c "ceph fs subvolume snapshot ls $fs $subvol $svg | jq --raw-output '.[].name' | wc -l")
                 { printf "Snapshot count in subvolume: %s=%s\n" "${subvol}" "${count}" >> "${COMMAND_OUTPUT_FILE}"; }
                 for snap in $snaps; do
                     { printf "Information for snapshot: %s\n" "${snap}" >> "${COMMAND_OUTPUT_FILE}"; }
                     { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph fs subvolume snapshot info $fs $subvol $snap $svg --connect-timeout=15" >> "${COMMAND_OUTPUT_FILE}"; } >> "${COMMAND_ERR_OUTPUT_DIR}"/gather-ceph-fs-"${subvol}"-"${snap}"-debug.log 2>&1 &
                     pids_ceph+=($!)
                 done
             done
        done
        printf "waiting for pids to finish \n" | tee -a  "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
        wait "${pids_ceph[@]}"
    }
    if [ "$(oc get pods --no-headers -n openshift-storage -l  must-gather-helper-pod='' | awk '{print $2}')" == "1/1" ] ; then
        ceph_collection
    else
        echo "skipping the ceph collection" | tee -a  "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
    fi
    # Collecting output of ceph volume commands
    for ((i = 0; i < ${#ceph_volume_commands[@]}; i++)); do
        printf "collecting command output for: %s\n"  "${ceph_volume_commands[$i]}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
        for osdPod in $(oc get pods -n "${ns}" -l app=rook-ceph-osd --no-headers | awk '{print $1}'); do
            pod_status=$(oc get po "${osdPod}" -n "${ns}" -o jsonpath='{.status.phase}')
            if [ "${pod_status}" != "Running" ]; then
                continue
            fi
            COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_volume_commands[$i]// /_}
            { timeout 120 oc -n "${ns}" exec "${osdPod}" -- bash -c "${ceph_volume_commands[$i]}" >> "${COMMAND_OUTPUT_FILE}"; } >> "${BASE_COLLECTION_PATH}"/gather-debug.log 2>&1
        done
    done

    for i in $(timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph crash ls --connect-timeout=15"| awk '{print $1}'); do
        { timeout 120 oc -n "${ns}" exec "${HOSTNAME}"-helper -- bash -c "ceph crash info $i --connect-timeout=15" >> "${COMMAND_OUTPUT_DIR}"/crash_"${i}"; } >> "${BASE_COLLECTION_PATH}"/gather-debug.log 2>&1;
    done

    # Add Ready nodes to the list
    nodes=$(oc get nodes -l cluster.ocs.openshift.io/openshift-storage='' --no-headers | awk '/\yReady\y/{print $1}')

    # Collecting ceph prepare volume logs
    volume_collection(){
        printf "collecting prepare volume logs from node %s \n"  "${node}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
        oc rsync -n "${ns}" "$(oc get pods -n "${ns}"| grep "${node//./}-debug"| awk '{print $1}')":/host/var/lib/rook/openshift-storage/log "${NODE_OUTPUT_DIR}"
    }

    crash_collection(){
        printf "collecting crash core dump from node %s \n" "${node}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
        oc rsync -n "${ns}" "$(oc get pods -n "${ns}" -l "${node//./}"-debug='ready' --no-headers | awk '{print $1}')":/host/var/lib/rook/openshift-storage/crash/ "${CRASH_OUTPUT_DIR}"
    }

    # creating a counter variable for collecting PID in array
    pids=()
    # Collecting ceph crash dump
    for node in ${nodes}; do
        printf "collecting crash and volume logs from node %s \n"  "${node}" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
        CRASH_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/crash_${node}
        VOLUME_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/volume_collection_${node}
        mkdir -p "${CRASH_OUTPUT_DIR}"
        mkdir -p "${VOLUME_OUTPUT_DIR}"
        volume_collection &
        pids+=($!)
        crash_collection &
        pids+=($!)
    done

    if [ -n "${pids[*]}" ]; then
        # wait for all pids
        echo "waiting for ${pids[*]} to terminate" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
        wait "${pids[@]}"
    fi

    echo "ceph core dump collection completed" | tee -a  "${BASE_COLLECTION_PATH}"/gather-debug.log
done

cat "${BASE_COLLECTION_PATH}"/gather-ceph-debug.log >> "${BASE_COLLECTION_PATH}"/gather-debug.log 2>&1
rm -rf "${BASE_COLLECTION_PATH}/"gather-ceph-debug.log >> "${BASE_COLLECTION_PATH}"/gather-debug.log 2>&1
