al37919
19-04-2007, 21:10
Хотя в принципе в конфе вопрос уже не раз обсуждался и предлагались разные скрипты для этой цели ( например, здесь: http://www.mtdev.com/2002/05/linux-wget/ ), тем не менее поделюсь скриптом собственной разработки, который осуществляет управлять закачкой с помощью wget в 1 поток. Скрипт написан на tcl. Для его работы требуется установить tcl, а также внешний wget:
ipkg install tcl
ipkg install wget
Добавление закачки производится путем добавления URL в текстовый файл ( /opt/tmp/wget/wget.list ), содержащий список закачек.
А вот собственно код
/opt/local/bin/check.wget.tcl :
#!/bin/sh
# The next line is executed by /bin/sh, but not tcl \
exec tclsh8.4 "$0" ${1+"$@"}
#This script was created by me (al37919) in an effort that it will be useful for myself.
#Use it on your ouwn risk. No warranty of any kind. No license of any kind. :)
#Version 2.0 { added support of pid file }
#Version 2.2 { bufixes }
#REFRESH_TIME is measured in seconds
set REFRESH_TIME 30
set WGET_BIN [file join / opt bin wget]
set WGET_BASE [file join / opt tmp wget]
set WGET_LIST [file join $WGET_BASE wget.list]
set WGET_LIST_COMPLETED [file join $WGET_BASE wget.completed.list]
set WGET_LIST_NOT_FOUND [file join $WGET_BASE wget.not_found.list]
set WGET_LOG [file join $WGET_BASE wget.log]
set WGET_PARTIAL [file join $WGET_BASE partial]
set WGET_COMPLETED [file join $WGET_BASE completed]
catch { exec sed -n 3p $argv0 | awk { { print $2 } } } TCLSH_BIN
#Returns list of lines read from the FileName
#Empty lines are skipped. Whitespaces are trimmed from both sides
proc FileToList {FileName} {
if {[catch {open $FileName r} f]} {
puts "Unable to open \"$FileName\" for reading; error $f."
exit
}
set lList {}
set str ""
while {![eof $f] } {
gets $f str
if {[string trim $str] ne ""} {
lappend lList [string trim $str]
}
}
close $f
return $lList
}
#Writes lList of lines into the FileName
#Whitespaces are trimmed from both sides
proc ListToFile {FileName lList} {
if {[catch {open $FileName w} f]} {
puts "Unable to open \"$FileName\" for writing; error $f."
exit
}
foreach i $lList {
puts $f [string trim $i]
}
close $f
}
#Checks that ::PIDFILE entries are really running in the memory
#In the ::PIDFILE there are 2 lines:
#Line 1 is the pid of this daemon itself (started through ::TCLSH_BIN)
#Line 2 is the pid of wget instance started from the daemon
proc CheckRunning { LineNum Name } {
set Running 0
if { [file exists $::PIDFILE] } {
catch { exec sed -n $LineNum\p $::PIDFILE } p1
catch { exec pidof $Name } p2
set lP2 [split $p2]
foreach i $lP2 {
if {$p1 == $i} {
incr Running
break
}
}
}
return $Running
}
if { $argc != 2 || [lindex $argv 0] ne "-p" } {
puts "USAGE:"
puts "$argv0 -p pidfile"
exit
}
set PIDFILE [lindex $argv 1]
if {[CheckRunning 1 $::TCLSH_BIN] > 0} {
puts "$::TCLSH_BIN $argv0 is already running"
exit
}
set lList [FileToList $::WGET_LIST]
set CurrentURL [lindex $lList 0]
#This is just precaution. If these dirs exist nothing will happen.
file mkdir $WGET_PARTIAL $WGET_COMPLETED
#The following is useful if the $WGET_LIST is empty while starting of $argv0
catch {open $PIDFILE w} f
puts $f [pid]
close $f
while {1} {
# set CurrentConnections [regexp -all {bin/wget} [exec ps]]
if {[CheckRunning 2 wget] == 0} {
set FileName [lindex [file split $CurrentURL] end]
set lList [FileToList $::WGET_LIST]
set Out [exec head -n3 $::WGET_LOG]
set rule "^.*$FileName.*"
#Do nothing if the line is empty (this includes also empty input file)
if { "X$CurrentURL" ne "X"} {
#Let's check that log corresponds to the top line of wget.list
if {[regexp $rule $Out]} {
set Out [exec tail -n3 $::WGET_LOG]
set rule "^.*$FileName.*saved.*"
#Check if the file is completed
if {[regexp $rule $Out] || [regexp {The file is already fully retrieved; nothing to do} $Out]} {
file rename -force [file join $::WGET_PARTIAL $FileName] [file join $::WGET_COMPLETED]
for {set i 0} {$i < [llength $lList]} {incr i} {
if {[lindex $lList $i] eq $CurrentURL} {
set lList [lreplace $lList $i $i]
incr i -1
}
}
ListToFile $::WGET_LIST $lList
set f [open $::WGET_LIST_COMPLETED a]
puts $f $CurrentURL
close $f
set CurrentURL ""
}
#Check if URL is not found
set rule "^.*ERROR 404.*Not Found.*|^.*Resolving.*failed: Name or service not known.*|^.*not an http or ftp url.*"
if {[regexp $rule $Out]} {
for {set i 0} {$i < [llength $lList]} {incr i} {
if {[lindex $lList $i] eq $CurrentURL} {
set lList [lreplace $lList $i $i]
incr i -1
}
}
ListToFile $::WGET_LIST $lList
set f [open $::WGET_LIST_NOT_FOUND a]
puts $f $CurrentURL
close $f
set CurrentURL ""
}
}
}
if {[llength $lList] != 0} {
set CurrentURL [lindex $lList 0]
# puts "starting new instance of wget"
# set WgetPid [exec $::WGET_BIN $CurrentURL --continue --directory-prefix=$::WGET_PARTIAL --output-file=$::WGET_LOG &]
set WgetPid [exec $::WGET_BIN $CurrentURL -c -P $::WGET_PARTIAL -o $::WGET_LOG &]
catch {open $PIDFILE w} f
puts $f [pid]
puts $f $WgetPid
close $f
}
}
after [expr {$::REFRESH_TIME * 1000}]
}
Этот скрипт сидит в оперативке и каждые 30 сек проверяет запущен ли wget. Если нет, то он запускает загрузку файла, который записан в первой строке файла /opt/tmp/wget/wget.list Первая версия использовала cron, однако, после достаточно длительного использования было обнаружено, что некоторые процессы не завершаются и остаются сидеть в памяти. Почему --- не знаю. Меня это несколько достало и я переделал этот скрипт и превратил его в демона.
второй скрипт под названием /opt/etc/init.d/S99wget запускает check.wget.tcl при перезагрузках, а также служит для запуска/остановки его вручную.
#!/bin/sh
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/opt/bin:/opt/sbin:/opt/local/bin
NAME=check.wget.tcl
DAEMON="/opt/local/bin/$NAME"
PIDFILE="/var/run/$NAME.pid"
start() {
echo -n "Starting $NAME... "
if [ ! -e $PIDFILE ] || [ -z $(ps | awk '{print $1}' | grep `sed -n 1p $PIDFILE`) ]
then
$DAEMON -p $PIDFILE &
echo "done."
else
echo "already running."
fi
}
stop() {
echo -n "Shutting down $NAME... "
if [ -e $PIDFILE ]
then
kill "`sed -n 2p $PIDFILE`" 2> /dev/null
kill "`sed -n 1p $PIDFILE`" 2> /dev/null
rm $PIDFILE
echo "done."
else
echo "not started."
fi
}
reload() {
echo -n "Reloading $NAME... "
if [ -e $PIDFILE ]
then
kill "`sed -n 2p $PIDFILE`" 2> /dev/null
echo "done."
else
echo "not started."
fi
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
stop
sleep 1
start
;;
reload)
reload
;;
*)
echo "Usage: $0 (start|stop|reload|restart)"
exit 1
;;
esac
exit 0
Продолжение см. в следующем посте --- этот стал слишком длинным и его форум не переваривает
ipkg install tcl
ipkg install wget
Добавление закачки производится путем добавления URL в текстовый файл ( /opt/tmp/wget/wget.list ), содержащий список закачек.
А вот собственно код
/opt/local/bin/check.wget.tcl :
#!/bin/sh
# The next line is executed by /bin/sh, but not tcl \
exec tclsh8.4 "$0" ${1+"$@"}
#This script was created by me (al37919) in an effort that it will be useful for myself.
#Use it on your ouwn risk. No warranty of any kind. No license of any kind. :)
#Version 2.0 { added support of pid file }
#Version 2.2 { bufixes }
#REFRESH_TIME is measured in seconds
set REFRESH_TIME 30
set WGET_BIN [file join / opt bin wget]
set WGET_BASE [file join / opt tmp wget]
set WGET_LIST [file join $WGET_BASE wget.list]
set WGET_LIST_COMPLETED [file join $WGET_BASE wget.completed.list]
set WGET_LIST_NOT_FOUND [file join $WGET_BASE wget.not_found.list]
set WGET_LOG [file join $WGET_BASE wget.log]
set WGET_PARTIAL [file join $WGET_BASE partial]
set WGET_COMPLETED [file join $WGET_BASE completed]
catch { exec sed -n 3p $argv0 | awk { { print $2 } } } TCLSH_BIN
#Returns list of lines read from the FileName
#Empty lines are skipped. Whitespaces are trimmed from both sides
proc FileToList {FileName} {
if {[catch {open $FileName r} f]} {
puts "Unable to open \"$FileName\" for reading; error $f."
exit
}
set lList {}
set str ""
while {![eof $f] } {
gets $f str
if {[string trim $str] ne ""} {
lappend lList [string trim $str]
}
}
close $f
return $lList
}
#Writes lList of lines into the FileName
#Whitespaces are trimmed from both sides
proc ListToFile {FileName lList} {
if {[catch {open $FileName w} f]} {
puts "Unable to open \"$FileName\" for writing; error $f."
exit
}
foreach i $lList {
puts $f [string trim $i]
}
close $f
}
#Checks that ::PIDFILE entries are really running in the memory
#In the ::PIDFILE there are 2 lines:
#Line 1 is the pid of this daemon itself (started through ::TCLSH_BIN)
#Line 2 is the pid of wget instance started from the daemon
proc CheckRunning { LineNum Name } {
set Running 0
if { [file exists $::PIDFILE] } {
catch { exec sed -n $LineNum\p $::PIDFILE } p1
catch { exec pidof $Name } p2
set lP2 [split $p2]
foreach i $lP2 {
if {$p1 == $i} {
incr Running
break
}
}
}
return $Running
}
if { $argc != 2 || [lindex $argv 0] ne "-p" } {
puts "USAGE:"
puts "$argv0 -p pidfile"
exit
}
set PIDFILE [lindex $argv 1]
if {[CheckRunning 1 $::TCLSH_BIN] > 0} {
puts "$::TCLSH_BIN $argv0 is already running"
exit
}
set lList [FileToList $::WGET_LIST]
set CurrentURL [lindex $lList 0]
#This is just precaution. If these dirs exist nothing will happen.
file mkdir $WGET_PARTIAL $WGET_COMPLETED
#The following is useful if the $WGET_LIST is empty while starting of $argv0
catch {open $PIDFILE w} f
puts $f [pid]
close $f
while {1} {
# set CurrentConnections [regexp -all {bin/wget} [exec ps]]
if {[CheckRunning 2 wget] == 0} {
set FileName [lindex [file split $CurrentURL] end]
set lList [FileToList $::WGET_LIST]
set Out [exec head -n3 $::WGET_LOG]
set rule "^.*$FileName.*"
#Do nothing if the line is empty (this includes also empty input file)
if { "X$CurrentURL" ne "X"} {
#Let's check that log corresponds to the top line of wget.list
if {[regexp $rule $Out]} {
set Out [exec tail -n3 $::WGET_LOG]
set rule "^.*$FileName.*saved.*"
#Check if the file is completed
if {[regexp $rule $Out] || [regexp {The file is already fully retrieved; nothing to do} $Out]} {
file rename -force [file join $::WGET_PARTIAL $FileName] [file join $::WGET_COMPLETED]
for {set i 0} {$i < [llength $lList]} {incr i} {
if {[lindex $lList $i] eq $CurrentURL} {
set lList [lreplace $lList $i $i]
incr i -1
}
}
ListToFile $::WGET_LIST $lList
set f [open $::WGET_LIST_COMPLETED a]
puts $f $CurrentURL
close $f
set CurrentURL ""
}
#Check if URL is not found
set rule "^.*ERROR 404.*Not Found.*|^.*Resolving.*failed: Name or service not known.*|^.*not an http or ftp url.*"
if {[regexp $rule $Out]} {
for {set i 0} {$i < [llength $lList]} {incr i} {
if {[lindex $lList $i] eq $CurrentURL} {
set lList [lreplace $lList $i $i]
incr i -1
}
}
ListToFile $::WGET_LIST $lList
set f [open $::WGET_LIST_NOT_FOUND a]
puts $f $CurrentURL
close $f
set CurrentURL ""
}
}
}
if {[llength $lList] != 0} {
set CurrentURL [lindex $lList 0]
# puts "starting new instance of wget"
# set WgetPid [exec $::WGET_BIN $CurrentURL --continue --directory-prefix=$::WGET_PARTIAL --output-file=$::WGET_LOG &]
set WgetPid [exec $::WGET_BIN $CurrentURL -c -P $::WGET_PARTIAL -o $::WGET_LOG &]
catch {open $PIDFILE w} f
puts $f [pid]
puts $f $WgetPid
close $f
}
}
after [expr {$::REFRESH_TIME * 1000}]
}
Этот скрипт сидит в оперативке и каждые 30 сек проверяет запущен ли wget. Если нет, то он запускает загрузку файла, который записан в первой строке файла /opt/tmp/wget/wget.list Первая версия использовала cron, однако, после достаточно длительного использования было обнаружено, что некоторые процессы не завершаются и остаются сидеть в памяти. Почему --- не знаю. Меня это несколько достало и я переделал этот скрипт и превратил его в демона.
второй скрипт под названием /opt/etc/init.d/S99wget запускает check.wget.tcl при перезагрузках, а также служит для запуска/остановки его вручную.
#!/bin/sh
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/opt/bin:/opt/sbin:/opt/local/bin
NAME=check.wget.tcl
DAEMON="/opt/local/bin/$NAME"
PIDFILE="/var/run/$NAME.pid"
start() {
echo -n "Starting $NAME... "
if [ ! -e $PIDFILE ] || [ -z $(ps | awk '{print $1}' | grep `sed -n 1p $PIDFILE`) ]
then
$DAEMON -p $PIDFILE &
echo "done."
else
echo "already running."
fi
}
stop() {
echo -n "Shutting down $NAME... "
if [ -e $PIDFILE ]
then
kill "`sed -n 2p $PIDFILE`" 2> /dev/null
kill "`sed -n 1p $PIDFILE`" 2> /dev/null
rm $PIDFILE
echo "done."
else
echo "not started."
fi
}
reload() {
echo -n "Reloading $NAME... "
if [ -e $PIDFILE ]
then
kill "`sed -n 2p $PIDFILE`" 2> /dev/null
echo "done."
else
echo "not started."
fi
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
stop
sleep 1
start
;;
reload)
reload
;;
*)
echo "Usage: $0 (start|stop|reload|restart)"
exit 1
;;
esac
exit 0
Продолжение см. в следующем посте --- этот стал слишком длинным и его форум не переваривает