Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Ulf Andrae
Exp2HSM
Commits
86cedf33
Commit
86cedf33
authored
Dec 02, 2021
by
Ulf Andrae
Browse files
Ulf Andrae: Populate repo
parent
167faa57
Changes
1
Hide whitespace changes
Inline
Side-by-side
Exp2HSM
0 → 100755
View file @
86cedf33
#!/bin/bash
#
# Tar and store HARMONIE data on hsm using nfs or sftp connection
#
# Ulf Andrae SMHI, 2020
#
module load lftp/4.9.2-nsc1
module list
function
simple_archive
{
for
DIR
in
$DIRS
;
do
echo
"Found a suitable archive:
$DIR
"
if
[
$(
find
$DIR
-type
f |
wc
|
awk
'{print $1}'
)
-gt
0
]
;
then
is_present
$TARGET
/
$DIRtar
IS_PRESENT
=
$?
if
[
$IS_PRESENT
-eq
0
-a
$REPLACE
==
"no_way"
]
;
then
echo
" already exists:
$TARGET
/
$DIR
.tar"
#
# This way of cleaning is not consistent with the filtering below
[[
$CLEAN
==
"yes"
]]
&&
echo
" Cleaning is not done for already archived files "
else
echo
" Scan:
$DIR
"
mkdir
-p
$TARTMP
/
$DIR
if
[
$FILTER
==
"undef"
]
;
then
ARCHIVED_FILES
=
$(
find
$DIR
-type
f
)
else
ARCHIVED_FILES
=
$(
find
$DIR
-type
f |
grep
$REVERSE_FILTER
-E
$FILTER
)
fi
if
[
$(
echo
$ARCHIVED_FILES
|
wc
|
awk
'{print $2}'
)
-gt
0
]
;
then
if
[
$DRY_RUN
-gt
0
]
;
then
if
[
$DRY_RUN
-gt
1
]
;
then
echo
" FILES:"
for
F
in
$ARCHIVED_FILES
;
do
echo
"
$F
"
done
fi
cd
$BASE
continue
fi
echo
" Create tar file
$TARTMP
/
$DIR
.tar"
echo
$ARCHIVED_FILES
| xargs
tar
cvf
$TARTMP
/
$DIR
.tar
putw
$TARTMP
/
$DIR
.tar
$TARGET
/.
||
exit
1
if
[
$MD5SUM
==
"yes"
]
;
then
for
FILE
in
$ARCHIVED_FILES
;
do
md5sum
$FILE
>>
$TARTMP
/
$DIR
.checksum
done
cd
$TARTMP
/
md5sum
$DIR
.tar
>>
$TARTMP
/
$DIR
.checksum
putw
$TARTMP
/
$DIR
.checksum
$TARGET
/.
||
exit
1
fi
rm
-f
$TARTMP
/
$DIR
.tar
fi
if
[
$CLEAN
==
"yes"
]
;
then
cd
$BASE
pwd
for
FILE
in
$ARCHIVED_FILES
;
do
rm
-fv
$DIR
/
$FILE
done
fi
cd
$BASE
fi
else
if
[
$DRY_RUN
==
"yes"
]
;
then
echo
" no files in
$DIR
"
fi
fi
done
}
function
is_present
{
case
$ACCESS
in
sftp
)
LFTP_TARGET
=
${
1
:11
}
lftp
-c
"open sftp://
$SFTPHOST
; find
$LFTP_TARGET
"
1>/dev/null
&&
return
0
;;
nfs
)
[[
-s
$1
]]
&&
\
{
echo
"
$TARGET
/
$DD
/
$HH
.tar already exists"
;
return
0
;
}
;;
*
)
echo
"Cannot handle this access method:
$ACCESS
"
;
exit
1
;;
esac
return
1
}
function
putw
{
echo
" Archive
$(
basename
$1
)
"
case
$ACCESS
in
sftp
)
LFTP_TARGET
=
${
2
:11
}
FOO
=
$TARTMP
/foo
cat
>
$FOO
<<
EOF
open -u
$USER
, sftp://
$SFTPHOST
mkdir -p -f
$LFTP_TARGET
cd
$LFTP_TARGET
put
$1
bye
EOF
[[
$VERBOSE
==
"yes"
]]
&&
cat
$FOO
lftp
-f
$FOO
||
exit
1
rm
-f
$FOO
;;
nfs
)
mkdir
-p
$2
cp
-v
$1
$2
||
exit
1
;;
*
)
echo
"Cannot handle this access method:
$ACCESS
"
;
exit
1
;;
esac
}
function
usage
{
ECHO
=
"echo -e"
$ECHO
"
\n
$0
archives a typical HARMONIE experiment to hsm@james by creating tar files per cycle or member and "
$ECHO
" store them in the same structure as on disk. Data selection is possible by specifying a certain year/month/day/hour "
$ECHO
" and/or by selecting, or excluding, certain file names. Data consistency can be verified by optional checksums (md5sum)."
$ECHO
"
\n
Usage of
$0
:
\n
"
$ECHO
" -h Print this help and exit"
$ECHO
" -a ACCESS Set access method to nfs or sftp where
$ACCESS
is default"
$ECHO
" -b BASE Input directory for your data, e.g. /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive/ "
$ECHO
" -t TARGET Ouput hsm directory for your data, e.g. /hsm/smhid/noabckup/sm_uandr/hm_home/mcp40h12_preop"
#$ECHO " -e EXP Experiment name, if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/smhid15/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
#$ECHO " -d DISK Disk, like smhid15, if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/DISK/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
#$ECHO " -s NOBACKUP Security level can be nobackup or dblcopy.
# Applicable if -b and -t is not used. Assumes:"
#$ECHO " BASE=/nobackup/DISK/USER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/NOBACKUP/USER/hm_home/EXP"
#$ECHO " -e EUSER User, like sm_uandr, if -b and -t is not used and you're archiving for someone else. Assumes:"
#$ECHO " BASE=/nobackup/DISK/EUSER/hm_home/EXP/archive/"
#$ECHO " TARGET=/hsm/smhid/nobackup/USER/hm_home/EXP"
$ECHO
" -p YYYY[MM][DD][HH] select certain year/month/day/hour for data under BASE"
$ECHO
" -P
\"
some dirs
\"
select certain directories under BASE."
$ECHO
" -[f|F] FILTER Apply selection filter on files. -F implies inverse selection. See example below"
$ECHO
" -m Create md5sum of the archived files and tarfile, checksum will be located next to the tar files in the target directory. Default is off"
$ECHO
" -r Replace existing data on hsm, default is not to replace"
$ECHO
" -c Clean data on disk after archiving, default is not to clean. Note that only archived files are removed"
$ECHO
" -q Dry run, scan but do not archive. Add another -q to list files to be archived"
$ECHO
" -v Debug mode"
$ECHO
"
\n
Examples:"
$ECHO
"
\n
Archive all output under archive/YYYY from mcp40h12_preop"
$ECHO
"
$0
-b /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive -t /hsm/smhid/nobackup/sm_uandr/hm_home/mcp40h12_preop/"
$ECHO
"
\n
Archive all output under archive/extract from mcp40h12_preop"
$ECHO
"
$0
-b /nobackup/smhid15/sm_uandr/hm_home/mcp40h12_preop/archive -t /hsm/smhid/nobackup/sm_uandr/hm_home/mcp40h12_preop/ -P extract"
#$ECHO "\n Archive to /hsm/smhid/dblcopy/sm_uandr/hm_home/43_mar with checksums, replace existing data and remove original files"
#$ECHO " $0 -e 43_mar -d smhid15 -u sm_uandr -s dblcopy -c -m -r"
$ECHO
"
\n
Archive data for a single day"
$ECHO
"
$0
-b /nobackup/smhid15/sm_uandr_sftp/incoming/boundaries/IFSENS/ -p 20190901 -t /hsm/smhid/nobackup/sm_uandr/boundaries/IFSENS"
$ECHO
"
\n
Archive only files matching 'grib' from on specific cycle"
$ECHO
"
$0
-b /nobackup/smhid15/sm_mlind/hm_home/4DVBI/archive -p 2019082400 -t /hsm/smhid/nobackup/sm_uandr/hm_home/4DVBI -f 'grib'"
$ECHO
"
\n
Archive all files but those matching grib and odb from one specific month"
$ECHO
"
$0
-b /nobackup/smhid15/sm_mlind/hm_home/4DVBI/archive -p 201908 -t /hsm/smhid/nobackup/sm_uandr/hm_home/4DVBI -F 'grib|odb'
\n
"
[[
$HOSTNAME
=
~ bi1
]]
&&
{
tput bold
;
$ECHO
" WARNING: Do not run archiving from
$HOSTNAME
\n
"
;
tput sgr0
;
}
}
ACCESS
=
nfs
DISK
=
smhid15
EXP
=
undef
EUSER
=
$USER
BASE
=
undef
TARGET
=
undef
PERIOD
=
undef
FILTER
=
undef
REVERSE_FILTER
=
""
CLEAN
=
no_way
REPLACE
=
no_way
MD5SUM
=
no_way
VERBOSE
=
nope
NOBACKUP
=
nobackup
DRY_RUN
=
0
SFTPHOST
=
james.nsc.liu.se
[[
$#
-eq
0
]]
&&
{
usage
;
exit
0
;
}
while
getopts
h:F:f:a:e:d:u:b:t:p:P:s:mrcqv i
do
case
$i
in
h
)
usage
;
exit
0
;;
a
)
ACCESS
=
$OPTARG
;;
f
)
FILTER
=
$OPTARG
;;
F
)
FILTER
=
$OPTARG
;
REVERSE_FILTER
=
"-v"
;;
e
)
EXP
=
$OPTARG
;;
d
)
DISK
=
$OPTARG
;;
u
)
EUSER
=
$OPTARG
;;
b
)
BASE
=
$OPTARG
;;
t
)
TARGET
=
$OPTARG
;;
p
)
PERIOD
=
$OPTARG
;;
P
)
PERIOD
=
dirs
;
DIRS
=
$OPTARG
;;
s
)
NOBACKUP
=
$OPTARG
;;
m
)
MD5SUM
=
"yes"
;;
r
)
REPLACE
=
"yes"
;;
c
)
CLEAN
=
"yes"
;;
q
)
DRY_RUN
=
$((
DRY_RUN
+
1
))
;;
v
)
VERBOSE
=
"yes"
;;
*
)
usage
;
exit
1
;;
esac
done
[[
$HOSTNAME
=
~ bi1
]]
&&
{
tput bold
;
echo
-e
"
\n
WARNING: Do not run archiving from
$HOSTNAME
\n
"
;
tput sgr0
;
}
[[
$VERBOSE
==
"yes"
]]
&&
set
-x
if
[
$ACCESS
==
"nfs"
]
;
then
[[
-d
/hsm
]]
||
{
echo
" /hsm/ is not available for you"
;
exit
1
;
}
groups
$USER
|
grep
-q
smhihsm
||
{
echo
"You are not a member of smhihsm"
;
exit
1
;
}
[[
-d
/hsm/smhid/
$NOBACKUP
/
$USER
]]
||
{
echo
" No user directory for
$USER
under /hsm/smhid "
;
exit
1
;
}
elif
[
$ACCESS
==
"sftp"
]
;
then
lftp
-c
"open sftp://
$SFTPHOST
; cd
$NOBACKUP
/
$USER
"
||
{
echo
"Could not access
$SFTPHOST
"
;
exit
1
;
}
else
exit
1
fi
[[
$BASE
==
undef
]]
&&
\
BASE
=
/nobackup/
$DISK
/
$EUSER
/hm_home/
$EXP
/archive
[[
$TARGET
==
undef
]]
&&
\
TARGET
=
/hsm/smhid/
$NOBACKUP
/
$USER
/hm_home/
$EXP
/
[[
$TARGET
=
~ undef
]]
&&
[[
$DRY_RUN
-eq
0
]]
&&
{
echo
"Something is undefined in your target:
$TARGET
"
;
exit
1
;
}
[[
-d
$BASE
]]
||
{
echo
"Input directory
$BASE
does not exist"
;
exit
1
;
}
echo
"BASE=
$BASE
"
echo
"TARGET=
$TARGET
"
cd
$BASE
||
exit
TARTMP
=
/nobackup/smhid17/users/
$USER
/tartmp_
$$
mkdir
-p
$TARTMP
||
exit
1
if
[
$VERBOSE
==
"nope"
]
;
then
trap
"rm -rf
$TARTMP
"
0
fi
if
[
"
$PERIOD
"
==
"undef"
]
;
then
YY
=
"[0-9]{4}"
MM
=
"[0-9]{2}"
DD
=
"[0-9]{2}"
DIRS
=
$(
ls
-1
|
grep
-E
^
${
YY
}
$
)
YMDH
=
"^
${
YY
}
\/
${
MM
}
\/
${
DD
}
"
elif
[[
"
$PERIOD
"
==
"dirs"
]]
;
then
simple_archive
rm
-rf
$TARTMP
exit
0
else
YY
=
${
PERIOD
:0:4
}
MM
=
${
PERIOD
:4:2
}
DD
=
${
PERIOD
:6:2
}
HH
=
${
PERIOD
:8:2
}
if
[[
$YY
=
~ ^[0-9]
{
4
}
$
]]
;
then
DIRS
=
$YY
else
echo
"YY=
$YY
is not a valid year"
exit
1
fi
if
[[
$MM
=
~ ^[0-9]
{
2
}
$
]]
;
then
DIRS
=
"
$YY
/
$MM
"
else
echo
"MM set to any month"
;
MM
=
"[0-9]{2}"
fi
if
[[
$DD
=
~ ^[0-9]
{
2
}
$
]]
;
then
DIRS
=
"
$YY
/
$MM
/
$DD
"
else
echo
"DD set to any day"
DD
=
"[0-9]{2}"
fi
echo
"YY=
$YY
MM=
$MM
DD=
$DD
"
if
[[
$HH
=
~ ^[0-9]
{
2
}
$
]]
;
then
YMDH
=
"^
${
YY
}
\/
${
MM
}
\/
${
DD
}
/
${
HH
}
"
else
YMDH
=
"^
${
YY
}
\/
${
MM
}
\/
${
DD
}
"
fi
fi
for
DIR
in
$DIRS
;
do
echo
"Found a suitable archive:
$DIR
"
for
D
in
$(
find
$DIR
-type
d |
sort
-n
)
;
do
DD
=
$(
dirname
$D
)
[[
$D
=
~
$YMDH
]]
||
continue
# Exclude directories with subdirectories
if
[
$(
find
$D
-maxdepth
1
-type
d |
wc
|
awk
'{print $1}'
)
-gt
1
]
;
then
continue
fi
if
[
$(
find
$D
-maxdepth
1
-type
f |
wc
|
awk
'{print $1}'
)
-gt
0
]
;
then
HH
=
$(
basename
$D
)
is_present
$TARGET
/
$DD
/
$HH
.tar
IS_PRESENT
=
$?
if
[
$IS_PRESENT
-eq
0
-a
$REPLACE
==
"no_way"
]
;
then
echo
" already exists:
$TARGET
/
$DD
/
$HH
.tar"
#
# This way of cleaning is not consistent with the filtering below
[[
$CLEAN
==
"yes"
]]
&&
echo
" Cleaning is not done for already archived files "
else
cd
$DD
echo
" Scan:
$DD
/
$HH
"
mkdir
-p
$TARTMP
/
$DD
cd
$HH
if
[
$FILTER
==
"undef"
]
;
then
ARCHIVED_FILES
=
$(
ls
-1
)
else
ARCHIVED_FILES
=
$(
ls
-1
|
grep
$REVERSE_FILTER
-E
$FILTER
)
fi
if
[
$(
echo
$ARCHIVED_FILES
|
wc
|
awk
'{print $2}'
)
-gt
0
]
;
then
if
[
$DRY_RUN
-gt
0
]
;
then
if
[
$DRY_RUN
-gt
1
]
;
then
echo
" FILES:"
for
F
in
$ARCHIVED_FILES
;
do
echo
"
$F
"
done
fi
cd
$BASE
continue
fi
echo
" Create tar file
$TARTMP
/
$DD
/
$HH
.tar"
echo
$ARCHIVED_FILES
| xargs
tar
cvf
$TARTMP
/
$DD
/
$HH
.tar
putw
$TARTMP
/
$DD
/
$HH
.tar
$TARGET
/
$DD
||
exit
1
if
[
$MD5SUM
==
"yes"
]
;
then
for
FILE
in
$ARCHIVED_FILES
;
do
md5sum
$FILE
>>
$TARTMP
/
$DD
/
${
HH
}
.checksum
done
cd
$TARTMP
/
$DD
md5sum
$HH
.tar
>>
$TARTMP
/
$DD
/
${
HH
}
.checksum
putw
$TARTMP
/
$DD
/
$HH
.checksum
$TARGET
/
$DD
||
exit
1
fi
rm
-f
$TARTMP
/
$DD
/
$HH
.tar
fi
if
[
$CLEAN
==
"yes"
]
;
then
cd
$BASE
/
$DD
pwd
ls
$HH
for
FILE
in
$ARCHIVED_FILES
;
do
rm
-fv
$HH
/
$FILE
done
fi
cd
$BASE
fi
else
if
[
$DRY_RUN
==
"yes"
]
;
then
echo
" no files in
$D
"
fi
fi
done
done
rm
-rf
$TARTMP
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment