25 lines
1001 B
Bash
25 lines
1001 B
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
./find-not-completed.sh > not-completed
|
||
|
# cat duplicate-data >> not-completed
|
||
|
#removes not truly finished in processed.txt
|
||
|
grep -v -f <(cat not-completed | sed -e 's@^@\t@' | sed -e 's@$@\$@') processed.txt | sponge processed.txt
|
||
|
|
||
|
#appends filenames from spider-commands.txt which are not in processed.txt
|
||
|
comm -13 <(cut -f2 processed.txt | sort -u) <(grep -o 'data/.*" 2>' spider-commands.txt | sed -Ee 's@data/|" 2>@@g' | sort) >> not-completed
|
||
|
|
||
|
sort -u not-completed | sponge not-completed
|
||
|
|
||
|
# remove data connected with not-completed e.g. logs/ data/
|
||
|
|
||
|
echo data directory file count: `ls -1 data | wc -l`
|
||
|
cd data && xargs rm -f < ../not-completed
|
||
|
cd ..
|
||
|
echo data directory file count: `ls -1 data | wc -l`
|
||
|
echo logs directory file count: `ls -1 logs | wc -l`
|
||
|
cd logs && xargs rm -f < ../not-completed
|
||
|
cd ..
|
||
|
echo logs directory file count: `ls -1 logs | wc -l`
|
||
|
|
||
|
grep -f <(cat not-completed | sed -e 's@^@"data/'@ | sed -e 's@$@"@') spider-commands.txt > spider-commands-add.txt
|