title | layout |
---|---|
How to run the analysis? |
post |
cd ~/git
git clone https://github.com/pkiraly/metadata-qa-api.git
git clone https://github.com/pkiraly/europeana-qa-api.git
git clone https://github.com/pkiraly/europeana-qa-spark.git
git clone https://github.com/pkiraly/europeana-qa-r.git
git clone https://github.com/pkiraly/europeana-qa-web.git /var/www/html/europeana-qa
cd ~/git/europeana-qa-spark
./build
hdfs dfs -mkdir /europeana
hdfs dfs -mkdir /join
hdfs dfs -put /path/to/sources/*.json /europeana
cd ~/git/europeana-qa-spark
nohup ./run-all resultXX.csv > run-all.log &
cd ~/git/europeana-qa-spark
hdfs dfs -put resultXX.csv /join
cd ~/git/europeana-qa-spark/scala
nohup ./cardinality.sh resultXX.csv > cardinality.log &
cd ~/git/europeana-qa-spark/script
php cardinality-csv2json.php
cp cardinality.json ~/git/europeana-qa-r/json2
cd ~/git/europeana-qa-spark/script
php frequency-csv2json.php
cp frequency.json ~/git/europeana-qa-r/json2
cd ~/git/europeana-qa-r
rm resultXX
ln -s ~/git/europeana-qa-spark/resultXX.csv resultXX.csv
nohup php split.php resultXX.csv &
cd ~/git/europeana-qa-r
rm r-report.txt
rm launch-report.txt
cp master-setlist.txt setlist.txt
crontab -e
*/1 * * * * cd /path/to/europeana-qa-r && php launcher.php >> launch-report.log
cd ~/git/europeana-qa-spark
nohup ./run-all-language-detection resultXX-language.csv > nohup-result14-language.log &
cd ~/git/europeana-qa-spark
hdfs dfs -put resultXX-language.csv /join
cd ~/git/europeana-qa-spark/scala
nohup ./languages.sh resultXX-language.csv > languages.log &
cd ~/git/europeana-qa-spark/scala
nohup ./languages-per-collections.sh resultXX-language.csv > languages-per-collections.log &
cd ~/git/europeana-qa-spark/scripts
php languages-csv2json.php
cp languages.json ~/git/europeana-qa-r/json2/
cd ~/git/europeana-qa-spark/scripts
php lang-group-to-json.php
cd ~/git/europeana-qa-spark
nohup ./run-all-multilingual-saturation resultXX-multilingual-saturation.csv > multilingual-saturation.log &
cd ~/git/europeana-qa-spark
hdfs dfs -put resultXX-multilingual-saturation.csv /join
cd ~/git/europeana-qa-spark/scala
nohup ./saturation.sh resultXX-multilingual-saturation.csv > resultXX-multilingual-saturation.log &
cd ~/git/europeana-qa-r
rm resultXX
ln -s ~/git/europeana-qa-spark/resultXX-multilingual-saturation.csv resultXX-multilingual-saturation.csv
nohup php split-saturation.php resultXX-multilingual-saturation.csv &
cd ~/git/europeana-qa-r
./prepare.sh saturation
crontab -e
*/1 * * * * cd /path/to/europeana-qa-r && php saturation-launcher.php >> launch-report.log
cd ~/git/europeana-qa-spark/scripts
php languages-csv2json.php
cp languages.json ~/git/europeana-qa-r/json2/
cd ~/git/europeana-qa-spark/scripts
php lang-group-to-json.php
split -l 1000000 -d --verbose --additional-suffix .csv v2018-08-completeness.csv completeness-part
split -l 1000000 -d --verbose --additional-suffix .csv v2018-08-multilingual-saturation.csv saturation-part
Total time: 29 hours 3 minutes.