#! /usr/bin/env bash

# Mirror repositories. For example:
#
# mrrepo -s git.example.org /var/scm
#
# Will first download (via http or https if -s specified) the manifest file
# from git.example.org which should list all publicly available repositories.
# It will then pull-mirror each remote repository locally in /var/scm using
# the git protocol.
#
# Afterwards it may push-mirror them as well as local repositories (specified
# in the local manifest) further to a remote repository that can be specified
# in the manifest files. If mirroring via https, then you also most likely
# need to provide credentials for the remote https URLs in the mrrepo-config
# file. This file should be placed next to and will be sourced by the mrrepo
# script (remember to adjust its permissions).
#
# The manifest file line format (lines starting with # are ignored):
#
# <repository-path>[ <remote-mirror-url>]
#
# To specify another credential for a URL add the following line to
# mrrepo-config:
#
# credentials['<https-url-prefix>']='<user>:<password>'
#
# -v
#  Run verbose.
#
# -s
#  Use https rather than http to download the manifest (git protocol is still
#  used for mirroring).
#
# Notes:
#   - needs curl
#   - run from cron as user scm (which belongs to the group scm).
#
# To test, run:
#
# runuser -u scm -- /var/scm/mrrepo -s -v git.example.org /var/scm
#
usage="usage: $0 [-v] [-s] <host> <path>"

owd="$(pwd)"
trap "{ cd '$owd'; exit 1; }" ERR
set -o errtrace # Trap in functions.

function info () { echo "$*" 1>&2; }
function error () { info "$*"; exit 1; }

prot="http"
host=
path=
verb=0

while [ "$#" -gt 0 ]; do
  case "$1" in
    -v)
      verb=1
      shift
      ;;
    -s)
      prot="https"
      shift
      ;;
    *)
      if [ -z "$host" ]; then
        host="$1"
      elif [ -z "$path" ]; then
        path="${1%/}"
      else
        error "$usage"
      fi
      shift
      ;;
  esac
done

if [ -z "$host" -o -z "$path" ]; then
  error "$usage"
fi

if [ ! -d "$path" ]; then
  error "$path is not a directory"
fi

declare -A credentials
config="$(realpath "${BASH_SOURCE[0]}")-config"

if [ -f "$config" ]; then
  source "$config"

  for p in "${!credentials[@]}"; do
    if [ "${p:0:8}" != "https://" ]; then
      error "https protocol is expected for '$p' in '$config'"
    fi
  done
fi

cd "$path"

curl_ops=()
curl_ops+=(-f)            # Fail on HTTP errors.
curl_ops+=(--max-time 30) # Finish in 30 seconds.

if [ "$verb" -ge 1 ]; then
  curl_ops+=(--progress-bar)
else
  curl_ops+=(-s -S)       # Silent but show errors.
fi

function fetch () # <url> [<curl-options>]
{
  local u="$1"; shift

  if [ "$verb" -ge 1 ]; then
    info curl "${curl_ops[@]}" "$@" "$u"
  fi

  curl "${curl_ops[@]}" "$@" "$u"
}

fetch "$prot://$host/manifest" -z remote.manifest -o remote.manifest

function manifest_filter () # <file>
{
  sed -e '/^\s*#/d;/^\s*$/d;s/\s\s*/ /g' "$1"
}

function manifest_field () # <line> <num> [<name>]
{
  local r
  r="$(echo "$1 " | cut -d ' ' -f "$2")"

  if [ "$3" -a -z "$r" ]; then
    error "field <$3> (#$2) missing in '$1'"
  fi
  echo "$r"
}

# Collect remote repositories (in the remote array) and while at it fix up
# push URLs with credentials (in the push_auth map). Note that we also save
# the original push URLs (in push_orig) to use them for diagnostics so that we
# don't expose credentials (think about cron job diagnostics sent by email).
#
remote=()
declare -A push_orig
declare -A push_auth

function push_add () # <rep> <url>
{
  # Note that currently we only support adding credentials for https URLs.
  #
  local r="$1"
  local u="$2"

  push_orig["$r"]="$u"

  local p c
  for p in "${!credentials[@]}"; do
    if [[ "$u" == "$p"* ]]; then
      c="${credentials[$p]}"
      u="$(echo "$u" | sed 's%^\(https://\)\(.*\)$%\1'"$c"'@\2%')"
      break;
    fi
  done

  push_auth["$r"]="$u"
}

while read l || [ -n "$l" ]; do
  r=$(manifest_field "$l" 1 'path')
  u=$(manifest_field "$l" 2)

  remote+=("$r")

  # If the push URL is specified then add it to auth/orig maps.
  #
  if [ -n "$u" ]; then
    push_add "$r" "$u"
  fi
done < <(manifest_filter remote.manifest)

# Find all the existing repositories (directories that end with .git) and sort
# them out into mirrored and local public. Note that local private will end up
# in the mirrored array and will require ad hoc handling.
#
all=($(find . -type d -name '*.git' -print -prune | sed -e 's%^./%%' -))

mirror=()
local=()

# If we have local manifest, load its repositories and also verify they are
# not in remotes. Also add their push URLs similar to remotes.
#
if test -f manifest; then

  while read l || [ -n "$l" ]; do
    r=$(manifest_field "$l" 1 'path')
    u=$(manifest_field "$l" 2)

    for i in "${remote[@]}"; do
      if [ "$i" = "$r" ]; then
	error "attempt to mirror into local public repository $r"
      fi
    done

    local+=("$r")

    # If the push URL is specified then add it to auth/orig maps.
    #
    if [ -n "$u" ]; then
      push_add "$r" "$u"
    fi
  done < <(manifest_filter manifest)

  # Everything that is not in local is mirrored (or local private).
  #
  for r in "${all[@]}"; do

    for i in "${local[@]}"; do
      if [ "$i" = "$r" ]; then
        if [ "$verb" -ge 1 ]; then
	  info "local public repository $r"
	fi
	r=
	break
      fi
    done

    if [ -n "$r" ]; then
      mirror+=("$r")
    fi
  done
else
  mirror=("${all[@]}")
fi

git_ops=()
if [ "$verb" -eq 0 ]; then
  git_ops+=(-q)
fi

for r in "${remote[@]}"; do

  # Zap empty directories.
  #
  if [ -d "$r" ]; then
    if [ -z "$(ls -A "$r")" ]; then
      rm -r "$r"
    fi
  fi

  if [ ! -d "$r" ]; then

    if [ "$verb" -ge 1 ]; then
      info "new repository $r in remote manifest, cloning"
      info git clone "${git_ops[@]}" --mirror "git://$host/$r" "$r"
    fi

    mkdir -p "$r"
    git clone "${git_ops[@]}" --mirror "git://$host/$r" "$r"

    # Also copy the description file.
    #
    fetch "$prot://$host/$r/description" -o "$r/description"

  else

    # Make sure it is not a local private repository.
    #
    if test ! -f "$r/git-daemon-export-ok"; then
      error "attempt to mirror into local private repository $r"
    fi

    if [ "$verb" -ge 1 ]; then
      info "existing repository $r, fetching"
      info git -C "$r" fetch "${git_ops[@]}" --prune --tags
    fi
    git -C "$r" fetch "${git_ops[@]}" --prune --tags

    # Also update the description file.
    #
    fetch "$prot://$host/$r/description" -z "$r/description" -o "$r/description"
  fi

  # Mark as public.
  #
  if test ! -f "$r/git-daemon-export-ok"; then
     touch "$r/git-daemon-export-ok"
  fi
done

# Remove old mirrored repositories.
#
for o in "${mirror[@]}"; do

  # Don't touch if it's local private repository.
  #
  if test ! -f "$o/git-daemon-export-ok"; then
    if [ "$verb" -ge 1 ]; then
      info "skipping local private repository $o"
    fi
    continue
  fi

  for i in "${remote[@]}"; do
    if [ "$i" = "$o" ]; then
      o=
      break
    fi
  done

  if [ -n "$o" ]; then
    if [ "$verb" -ge 1 ]; then
      info "repository $o is no longer in remote manifest, removing"
    fi
    rm -rf "$o"
  fi
done

# Mirror to the push URLs.
#
for r in "${!push_auth[@]}"; do

  au="${push_auth[$r]}"
  if [ -n "$au" ]; then
    cmd=( git -C "$r" push "${git_ops[@]}" --mirror "$au" )

    # Note that in the verbose mode, for troubleshooting, we still print the
    # URLs that possibly contain credentials.
    #
    if [ "$verb" -ge 1 ]; then
      info "remote URL $au for repository $r, pushing"
      info "${cmd[@]}"
    fi

    # Disable prompting for username/password if credentials are missing for
    # the remote URL and fail instead.
    #
    # If the remote URL differs from the original one then it contains
    # credentials. It may potentially appear in git's STDERR, so we replace all
    # its occurrences with the original one, not containing credentials.
    #
    ou="${push_orig[$r]}"
    if [ "$au" != "$ou" ]; then

      # Escape special characters in sed pattern/substitution.
      #
      au="$(sed -e 's/[].*/\[]/\\&/g' <<<"$au")"
      ou="$(sed -e 's/[&/\]/\\&/g'    <<<"$ou")"

      GIT_TERMINAL_PROMPT=0 "${cmd[@]}" 2>&1 | sed "s/$au/$ou/g" >&2
    else
      GIT_TERMINAL_PROMPT=0 "${cmd[@]}"
    fi
  fi
done

cd "$owd"