Commit 86cedf33 authored by Ulf Andrae's avatar Ulf Andrae
Browse files

Ulf Andrae: Populate repo

parent 167faa57
#!/bin/bash
#
# Tar and store HARMONIE data on hsm using nfs or sftp connection
#
# Ulf Andrae SMHI, 2020
#
module load lftp/4.9.2-nsc1
module list
function simple_archive {
for DIR in $DIRS ; do
echo "Found a suitable archive:$DIR"
if [ $( find $DIR -type f | wc | awk '{print $1}' ) -gt 0 ]; then
is_present $TARGET/$DIRtar
IS_PRESENT=$?
if [ $IS_PRESENT -eq 0 -a $REPLACE == "no_way" ] ; then
echo " already exists:$TARGET/$DIR.tar"
#
# This way of cleaning is not consistent with the filtering below
[[ $CLEAN == "yes" ]] && echo " Cleaning is not done for already archived files "
else
echo " Scan:$DIR"
mkdir -p $TARTMP/$DIR
if [ $FILTER == "undef" ] ; then
ARCHIVED_FILES=$( find $DIR -type f )
else
ARCHIVED_FILES=$( find $DIR -type f | grep $REVERSE_FILTER -E $FILTER )
fi
if [ $( echo $ARCHIVED_FILES | wc | awk '{print $2}' ) -gt 0 ] ; then
if [ $DRY_RUN -gt 0 ] ; then
if [ $DRY_RUN -gt 1 ] ; then
echo " FILES:"
for F in $ARCHIVED_FILES ; do
echo " $F"
done
fi
cd $BASE
continue
fi
echo " Create tar file $TARTMP/$DIR.tar"
echo $ARCHIVED_FILES | xargs tar cvf $TARTMP/$DIR.tar
putw $TARTMP/$DIR.tar $TARGET/. || exit 1
if [ $MD5SUM == "yes" ] ; then
for FILE in $ARCHIVED_FILES ; do
md5sum $FILE >> $TARTMP/$DIR.checksum
done
cd $TARTMP/
md5sum $DIR.tar >> $TARTMP/$DIR.checksum
putw $TARTMP/$DIR.checksum $TARGET/. || exit 1
fi
rm -f $TARTMP/$DIR.tar
fi
if [ $CLEAN == "yes" ] ; then
cd $BASE
pwd
for FILE in $ARCHIVED_FILES ; do
rm -fv $DIR/$FILE
done
fi
cd $BASE
fi
else
if [ $DRY_RUN == "yes" ] ; then
echo " no files in $DIR"
fi
fi
done
}
function is_present {
case $ACCESS in
sftp)
LFTP_TARGET=${1:11}
lftp -c "open sftp://$SFTPHOST ; find $LFTP_TARGET" 1>/dev/null && return 0
;;
nfs)
[[ -s $1 ]] && \
{ echo "$TARGET/$DD/$HH.tar already exists" ; return 0 ; }
;;
*)
echo "Cannot handle this access method:$ACCESS" ; exit 1
;;
esac
return 1
}
function putw {
echo " Archive $( basename $1 ) "
case $ACCESS in
sftp)
LFTP_TARGET=${2:11}
FOO=$TARTMP/foo
cat > $FOO << EOF
open -u $USER, sftp://$SFTPHOST
mkdir -p -f $LFTP_TARGET
cd $LFTP_TARGET
put $1
bye
EOF
[[ $VERBOSE == "yes" ]] && cat $FOO
lftp -f $FOO || exit 1
rm -f $FOO
;;
nfs)
mkdir -p $2
cp -v $1 $2 || exit 1
;;
*)
echo "Cannot handle this access method:$ACCESS" ; exit 1
;;
esac
}
function usage {
ECHO="echo -e"
$ECHO "\n $0 archives a typical HARMONIE experiment to hsm@james by creating tar files per cycle or member and "
$ECHO " store them in the same structure as on disk. Data selection is possible by specifying a certain year/month/day/hour "
$ECHO " and/or by selecting, or excluding, certain file names. Data consistency can be verified by optional checksums (md5sum)."
$ECHO "\n Usage of $0:\n"
$ECHO " -h Print this help and exit"
$ECHO " -a ACCESS Set access method to nfs or sftp where $ACCESS is default"
$ECHO " -b BASE Input directory for your data, e.g. /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive/ "
$ECHO " -t TARGET Ouput hsm directory for your data, e.g. /hsm/smhid/noabckup/sm_uandr/hm_home/mcp40h12_preop"
#$ECHO " -e EXP Experiment name, if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/smhid15/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
#$ECHO " -d DISK Disk, like smhid15, if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/DISK/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
#$ECHO " -s NOBACKUP Security level can be nobackup or dblcopy.
# Applicable if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/DISK/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/NOBACKUP/USER/hm_home/EXP"
#$ECHO " -e EUSER User, like sm_uandr, if -b and -t is not used and you're archiving for someone else. Assumes:"
#$ECHO " BASE=/nobackup/DISK/EUSER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
$ECHO " -p YYYY[MM][DD][HH] select certain year/month/day/hour for data under BASE"
$ECHO " -P \"some dirs\" select certain directories under BASE."
$ECHO " -[f|F] FILTER Apply selection filter on files. -F implies inverse selection. See example below"
$ECHO " -m Create md5sum of the archived files and tarfile, checksum will be located next to the tar files in the target directory. Default is off"
$ECHO " -r Replace existing data on hsm, default is not to replace"
$ECHO " -c Clean data on disk after archiving, default is not to clean. Note that only archived files are removed"
$ECHO " -q Dry run, scan but do not archive. Add another -q to list files to be archived"
$ECHO " -v Debug mode"
$ECHO "\n Examples:"
$ECHO "\n Archive all output under archive/YYYY from mcp40h12_preop"
$ECHO " $0 -b /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive -t /hsm/smhid/nobackup/sm_uandr/hm_home/mcp40h12_preop/"
$ECHO "\n Archive all output under archive/extract from mcp40h12_preop"
$ECHO " $0 -b /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive -t /hsm/smhid/nobackup/sm_uandr/hm_home/mcp40h12_preop/ -P extract"
#$ECHO "\n Archive to /hsm/smhid/dblcopy/sm_uandr/hm_home/43_mar with checksums, replace existing data and remove original files"
#$ECHO " $0 -e 43_mar -d smhid15 -u sm_uandr -s dblcopy -c -m -r"
$ECHO "\n Archive data for a single day"
$ECHO " $0 -b /nobackup/smhid15/sm_uandr_sftp/incoming/boundaries/IFSENS/ -p 20190901 -t /hsm/smhid/nobackup/sm_uandr/boundaries/IFSENS"
$ECHO "\n Archive only files matching 'grib' from on specific cycle"
$ECHO " $0 -b /nobackup/smhid15/sm_mlind/hm_home/4DVBI/archive -p 2019082400 -t /hsm/smhid/nobackup/sm_uandr/hm_home/4DVBI -f 'grib'"
$ECHO "\n Archive all files but those matching grib and odb from one specific month"
$ECHO " $0 -b /nobackup/smhid15/sm_mlind/hm_home/4DVBI/archive -p 201908 -t /hsm/smhid/nobackup/sm_uandr/hm_home/4DVBI -F 'grib|odb'\n"
[[ $HOSTNAME =~ bi1 ]] && { tput bold ; $ECHO " WARNING: Do not run archiving from $HOSTNAME \n" ; tput sgr0 ; }
}
ACCESS=nfs
DISK=smhid15
EXP=undef
EUSER=$USER
BASE=undef
TARGET=undef
PERIOD=undef
FILTER=undef
REVERSE_FILTER=""
CLEAN=no_way
REPLACE=no_way
MD5SUM=no_way
VERBOSE=nope
NOBACKUP=nobackup
DRY_RUN=0
SFTPHOST=james.nsc.liu.se
[[ $# -eq 0 ]] && { usage ; exit 0 ; }
while getopts h:F:f:a:e:d:u:b:t:p:P:s:mrcqv i
do
case $i in
h) usage ; exit 0 ;;
a) ACCESS=$OPTARG ;;
f) FILTER=$OPTARG ;;
F) FILTER=$OPTARG ; REVERSE_FILTER="-v" ;;
e) EXP=$OPTARG ;;
d) DISK=$OPTARG ;;
u) EUSER=$OPTARG ;;
b) BASE=$OPTARG ;;
t) TARGET=$OPTARG ;;
p) PERIOD=$OPTARG ;;
P) PERIOD=dirs; DIRS=$OPTARG ;;
s) NOBACKUP=$OPTARG ;;
m) MD5SUM="yes" ;;
r) REPLACE="yes" ;;
c) CLEAN="yes" ;;
q) DRY_RUN=$(( DRY_RUN + 1 )) ;;
v) VERBOSE="yes" ;;
*) usage ; exit 1 ;;
esac
done
[[ $HOSTNAME =~ bi1 ]] && { tput bold ; echo -e "\n WARNING: Do not run archiving from $HOSTNAME \n" ; tput sgr0 ; }
[[ $VERBOSE == "yes" ]] && set -x
if [ $ACCESS == "nfs" ] ; then
[[ -d /hsm ]] || { echo " /hsm/ is not available for you" ; exit 1 ; }
groups $USER | grep -q smhihsm || { echo "You are not a member of smhihsm" ; exit 1 ; }
[[ -d /hsm/smhid/$NOBACKUP/$USER ]] || { echo " No user directory for $USER under /hsm/smhid " ; exit 1 ; }
elif [ $ACCESS == "sftp" ] ; then
lftp -c "open sftp://$SFTPHOST ; cd $NOBACKUP/$USER" || { echo "Could not access $SFTPHOST" ; exit 1 ; }
else
exit 1
fi
[[ $BASE == undef ]] && \
BASE=/nobackup/$DISK/$EUSER/hm_home/$EXP/archive
[[ $TARGET == undef ]] && \
TARGET=/hsm/smhid/$NOBACKUP/$USER/hm_home/$EXP/
[[ $TARGET =~ undef ]] && [[ $DRY_RUN -eq 0 ]] && { echo "Something is undefined in your target:$TARGET" ; exit 1 ; }
[[ -d $BASE ]] || { echo "Input directory $BASE does not exist" ; exit 1 ; }
echo "BASE=$BASE"
echo "TARGET=$TARGET"
cd $BASE || exit
TARTMP=/nobackup/smhid17/users/$USER/tartmp_$$
mkdir -p $TARTMP || exit 1
if [ $VERBOSE == "nope" ] ; then
trap "rm -rf $TARTMP" 0
fi
if [ "$PERIOD" == "undef" ] ; then
YY="[0-9]{4}"
MM="[0-9]{2}"
DD="[0-9]{2}"
DIRS=$( ls -1 | grep -E ^${YY}$ )
YMDH="^${YY}\/${MM}\/${DD}"
elif [[ "$PERIOD" == "dirs" ]] ; then
simple_archive
rm -rf $TARTMP
exit 0
else
YY=${PERIOD:0:4}
MM=${PERIOD:4:2}
DD=${PERIOD:6:2}
HH=${PERIOD:8:2}
if [[ $YY =~ ^[0-9]{4}$ ]] ; then
DIRS=$YY
else
echo "YY=$YY is not a valid year"
exit 1
fi
if [[ $MM =~ ^[0-9]{2}$ ]] ; then
DIRS="$YY/$MM"
else
echo "MM set to any month" ;
MM="[0-9]{2}"
fi
if [[ $DD =~ ^[0-9]{2}$ ]] ; then
DIRS="$YY/$MM/$DD"
else
echo "DD set to any day"
DD="[0-9]{2}"
fi
echo "YY=$YY MM=$MM DD=$DD"
if [[ $HH =~ ^[0-9]{2}$ ]] ; then
YMDH="^${YY}\/${MM}\/${DD}/${HH}"
else
YMDH="^${YY}\/${MM}\/${DD}"
fi
fi
for DIR in $DIRS ; do
echo "Found a suitable archive:$DIR"
for D in $( find $DIR -type d | sort -n ) ; do
DD=$( dirname $D )
[[ $D =~ $YMDH ]] || continue
# Exclude directories with subdirectories
if [ $( find $D -maxdepth 1 -type d | wc | awk '{print $1}' ) -gt 1 ]; then
continue
fi
if [ $( find $D -maxdepth 1 -type f | wc | awk '{print $1}' ) -gt 0 ]; then
HH=$( basename $D )
is_present $TARGET/$DD/$HH.tar
IS_PRESENT=$?
if [ $IS_PRESENT -eq 0 -a $REPLACE == "no_way" ] ; then
echo " already exists:$TARGET/$DD/$HH.tar"
#
# This way of cleaning is not consistent with the filtering below
[[ $CLEAN == "yes" ]] && echo " Cleaning is not done for already archived files "
else
cd $DD
echo " Scan:$DD/$HH"
mkdir -p $TARTMP/$DD
cd $HH
if [ $FILTER == "undef" ] ; then
ARCHIVED_FILES=$( ls -1 )
else
ARCHIVED_FILES=$( ls -1 | grep $REVERSE_FILTER -E $FILTER )
fi
if [ $( echo $ARCHIVED_FILES | wc | awk '{print $2}' ) -gt 0 ] ; then
if [ $DRY_RUN -gt 0 ] ; then
if [ $DRY_RUN -gt 1 ] ; then
echo " FILES:"
for F in $ARCHIVED_FILES ; do
echo " $F"
done
fi
cd $BASE
continue
fi
echo " Create tar file $TARTMP/$DD/$HH.tar"
echo $ARCHIVED_FILES | xargs tar cvf $TARTMP/$DD/$HH.tar
putw $TARTMP/$DD/$HH.tar $TARGET/$DD || exit 1
if [ $MD5SUM == "yes" ] ; then
for FILE in $ARCHIVED_FILES ; do
md5sum $FILE >> $TARTMP/$DD/${HH}.checksum
done
cd $TARTMP/$DD
md5sum $HH.tar >> $TARTMP/$DD/${HH}.checksum
putw $TARTMP/$DD/$HH.checksum $TARGET/$DD || exit 1
fi
rm -f $TARTMP/$DD/$HH.tar
fi
if [ $CLEAN == "yes" ] ; then
cd $BASE/$DD
pwd
ls $HH
for FILE in $ARCHIVED_FILES ; do
rm -fv $HH/$FILE
done
fi
cd $BASE
fi
else
if [ $DRY_RUN == "yes" ] ; then
echo " no files in $D"
fi
fi
done
done
rm -rf $TARTMP
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment