Script pour "rythme"
#!/bin/bash
echo "Donnez le nom du dossier contenant les fichiers de liens http : ";
read dossier;
echo "Donnez le nom du fichier html où stocker ces liens dans des tableaux : ";
read tablo;
echo "Donne le motif recherché sur les pages originales : ";
read motif;
echo "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">
<head>
<!--Design by Free CSS Templates\n
http://www.freecsstemplates.org\n
Released for free under a Creative Commons Attribution 2.5 License\n
Name: Clear Breeze \n
Description: A two-column, fixed-width design for 1024x768 screen resolutions.\n
Version : 1.0\n
Released : 20091119 -->\n
<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\n
<title>La vie de Rythme</title>\n
<meta name=\"keywords\" content=\"\" />\n
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" media=\"screen\" /></head>\n
<body style=\"color: rgb(0, 0, 0);\" alink=\"#ee0000\" link=\"#0000ee\" vlink=\"#551a8b\">\n
<div id=\"logo\">\n
<h1><a href=\"#\">rythme <br />\n
</a></h1>\n
<h1><a href=\"http://martine.casterman.com/\"><em><img onmouseout='src=\"images/martine-copie.jpg\"' style=\"border: 0px solid ; width: 131px; height: 163px; position: absolute; margin-top: 10px; top: 5px; right: 10px;\" onmouseover='src=\"images/martine2.jpeg\"' alt=\"couverture\" src=\"images/martine-copie.jpg\" /></em></a></h1>\n
<h1><a href=\"#\"> <em> </em></a></h1>\n
<em> </em>\n
<p><em><em>la vie du mot \"rythme\" sur le web</em></em></p>\n
</div>\n
<hr />\n
<!-- end #logo -->\n
<div id=\"page\"><em> </em>\n
<div id=\"content\"><em> </em>\n
<div id=\"menu\"><em> </em>\n
<ul>\n <em> </em>\n <li style=\"color: white;\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"index.html\" class=\"first\">Accueil</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\" class=\"current_page_item\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"tablo-10-fr.htm\">Tableaux \"rythme\"<br />\n</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"tablo-10-en.htm\">Tableaux \"rhythme\"<br />\n</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"PageNuages.html\">Nuages</a></em></li>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"PageScript.html\">Scripts</a></em></li>\n <em> </em>\n</ul>\n
<em> </em></div>\n
<em> </em>\n
<div id=\"splash\"><em><img src=\"images/img03.jpg\" alt=\"\" height=\"313\" width=\"730\" /></em></div>\n
<em><!-- end #menu --> </em>" > $tablo;
i=1;
y=1;
for fichier in `ls $dossier`
do
{
echo "<div class=\"post\"><em> </em>\n
<div class="entry"><em> </em>
<table>\n
<tbody>\n" >> $tablo;
echo "<tr><td style=\"color: rgb(255, 102, 0);\"><h4>Fichier $fichier</h4></td><td>Pages Aspirées</td><td>Pages Dump</td><td>Contextes</td></tr>" >> $tablo;
for nom in `cat $dossier/$fichier`
do
{
wget -O ./PAGES-ASPIREES/$i-fr.html $nom
lynx -connect_timeout=2 -dump $nom > ./DUMP-TEXT/$i-fr.txt
cat ./DUMP-TEXT/$i-fr.txt >> ./DUMP-TEXT/dumpAll-$y-fr.txt
cat ./DUMP-TEXT/dumpAll-$y-fr.txt >> ./DUMP-TEXT/dumpFinal.txt
egrep -i -n -1 "\b$motif\b" ./DUMP-TEXT/$i-fr.txt > ./CONTEXTES/$i-fr.txt
cat ./CONTEXTES/$i-fr.txt >> ./CONTEXTES/contextAll-$y-fr.txt
cat ./CONTEXTES/contextAll-$y-fr.txt >> ./CONTEXTES/contextFinal.txt
echo "<tr><td><a href=\"$nom\">Url $i</a></td><td><a href=\"../PAGES-ASPIREES/$i-fr.html\">Page $i</a></td><td><a href=\"../DUMP-TEXT/$i-fr.txt\">Texte $i</a></td><td><a href=\"../CONTEXTES/$i-fr.txt\">Contexte $i</a></td></tr>" >> $tablo;
i=$(($i+1)) ;
}
done
echo "<tr><td></td><td></td><td><a href=\"../DUMP-TEXT/dumpAll-$y-fr.txt\">Texte $fichier</a></td><td><a href=\"../CONTEXTES/contextAll-$y-fr.txt\">Contexte $fichier</a></td></tr>" >> $tablo;
echo "</tbody></table>" >> $tablo;
echo "<em> </em></div><em> </em></div>" >> $tablo;
y=$(($y+1)) ;
}
done
echo "<em> <em><br /></em></em><div class=\"post\"><em><br /><br /></em>\n
<h4 style=\"margin-left: 40px;\">
<em><em><a href=\"../DUMP-TEXT/dumpFinal-N.txt\">Fichier Dump global<em><br /></em></a></em></em></h4>\n
<h4 style=\"margin-left: 40px;\">
<em><em><a href=\"../CONTEXTES/contextFinal.txt\">Fichier Context global<em><br /></em></a></em></em></h4>\n
</div>\n
</div>\n
</div>\n
\n
<em><em><!-- end #content --><!-- end #sidebar --> </em>\n
</em>\n
<div style=\"clear: both;\"><em><em> </em></em></div>\n
\n
<!-- end #page -->\n
<div style=\"background-color: rgb(230, 0, 102);\" id=\"footer\"><p style=\"background-color: rgb(230, 0, 102); height: 114px;\"><em><em><em><em><em>site créé par\n
Benoît legouy, pour un projet encadré par serge fleury, rachid\n
belmouhoub & jean-michel daube</em></em></em></em></em></p>\n
</div>\n
\n
<em><br />\n
</em>\n
<div style=\"text-align: center; font-size: 0.75em; background-color: rgb(255, 128, 184); margin-top: 0px; height: 43px;\"><em><em><em>Design\n
downloaded from <a href=\"http://www.freewebtemplates.com/\">free\n
website templates</a>.</em></em></em></div>\n
\n
</body></html>" >> $tablo;
sed -e /http/d ./DUMP-TEXT/dumpFinal.txt > ./DUMP-TEXT/dumpFinal-N.txt;
echo "Donnez le nom du dossier contenant les fichiers de liens http : ";
read dossier;
echo "Donnez le nom du fichier html où stocker ces liens dans des tableaux : ";
read tablo;
echo "Donne le motif recherché sur les pages originales : ";
read motif;
echo "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">
<head>
<!--Design by Free CSS Templates\n
http://www.freecsstemplates.org\n
Released for free under a Creative Commons Attribution 2.5 License\n
Name: Clear Breeze \n
Description: A two-column, fixed-width design for 1024x768 screen resolutions.\n
Version : 1.0\n
Released : 20091119 -->\n
<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\n
<title>La vie de Rythme</title>\n
<meta name=\"keywords\" content=\"\" />\n
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" media=\"screen\" /></head>\n
<body style=\"color: rgb(0, 0, 0);\" alink=\"#ee0000\" link=\"#0000ee\" vlink=\"#551a8b\">\n
<div id=\"logo\">\n
<h1><a href=\"#\">rythme <br />\n
</a></h1>\n
<h1><a href=\"http://martine.casterman.com/\"><em><img onmouseout='src=\"images/martine-copie.jpg\"' style=\"border: 0px solid ; width: 131px; height: 163px; position: absolute; margin-top: 10px; top: 5px; right: 10px;\" onmouseover='src=\"images/martine2.jpeg\"' alt=\"couverture\" src=\"images/martine-copie.jpg\" /></em></a></h1>\n
<h1><a href=\"#\"> <em> </em></a></h1>\n
<em> </em>\n
<p><em><em>la vie du mot \"rythme\" sur le web</em></em></p>\n
</div>\n
<hr />\n
<!-- end #logo -->\n
<div id=\"page\"><em> </em>\n
<div id=\"content\"><em> </em>\n
<div id=\"menu\"><em> </em>\n
<ul>\n <em> </em>\n <li style=\"color: white;\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"index.html\" class=\"first\">Accueil</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\" class=\"current_page_item\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"tablo-10-fr.htm\">Tableaux \"rythme\"<br />\n</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"tablo-10-en.htm\">Tableaux \"rhythme\"<br />\n</a></em></li>\n <em style=\"color: rgb(255, 102, 0);\"> </em>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"PageNuages.html\">Nuages</a></em></li>\n <li style=\"color: rgb(255, 102, 0);\"><em><a style=\"color: rgb(255, 255, 255);\" href=\"PageScript.html\">Scripts</a></em></li>\n <em> </em>\n</ul>\n
<em> </em></div>\n
<em> </em>\n
<div id=\"splash\"><em><img src=\"images/img03.jpg\" alt=\"\" height=\"313\" width=\"730\" /></em></div>\n
<em><!-- end #menu --> </em>" > $tablo;
i=1;
y=1;
for fichier in `ls $dossier`
do
{
echo "<div class=\"post\"><em> </em>\n
<div class="entry"><em> </em>
<table>\n
<tbody>\n" >> $tablo;
echo "<tr><td style=\"color: rgb(255, 102, 0);\"><h4>Fichier $fichier</h4></td><td>Pages Aspirées</td><td>Pages Dump</td><td>Contextes</td></tr>" >> $tablo;
for nom in `cat $dossier/$fichier`
do
{
wget -O ./PAGES-ASPIREES/$i-fr.html $nom
lynx -connect_timeout=2 -dump $nom > ./DUMP-TEXT/$i-fr.txt
cat ./DUMP-TEXT/$i-fr.txt >> ./DUMP-TEXT/dumpAll-$y-fr.txt
cat ./DUMP-TEXT/dumpAll-$y-fr.txt >> ./DUMP-TEXT/dumpFinal.txt
egrep -i -n -1 "\b$motif\b" ./DUMP-TEXT/$i-fr.txt > ./CONTEXTES/$i-fr.txt
cat ./CONTEXTES/$i-fr.txt >> ./CONTEXTES/contextAll-$y-fr.txt
cat ./CONTEXTES/contextAll-$y-fr.txt >> ./CONTEXTES/contextFinal.txt
echo "<tr><td><a href=\"$nom\">Url $i</a></td><td><a href=\"../PAGES-ASPIREES/$i-fr.html\">Page $i</a></td><td><a href=\"../DUMP-TEXT/$i-fr.txt\">Texte $i</a></td><td><a href=\"../CONTEXTES/$i-fr.txt\">Contexte $i</a></td></tr>" >> $tablo;
i=$(($i+1)) ;
}
done
echo "<tr><td></td><td></td><td><a href=\"../DUMP-TEXT/dumpAll-$y-fr.txt\">Texte $fichier</a></td><td><a href=\"../CONTEXTES/contextAll-$y-fr.txt\">Contexte $fichier</a></td></tr>" >> $tablo;
echo "</tbody></table>" >> $tablo;
echo "<em> </em></div><em> </em></div>" >> $tablo;
y=$(($y+1)) ;
}
done
echo "<em> <em><br /></em></em><div class=\"post\"><em><br /><br /></em>\n
<h4 style=\"margin-left: 40px;\">
<em><em><a href=\"../DUMP-TEXT/dumpFinal-N.txt\">Fichier Dump global<em><br /></em></a></em></em></h4>\n
<h4 style=\"margin-left: 40px;\">
<em><em><a href=\"../CONTEXTES/contextFinal.txt\">Fichier Context global<em><br /></em></a></em></em></h4>\n
</div>\n
</div>\n
</div>\n
\n
<em><em><!-- end #content --><!-- end #sidebar --> </em>\n
</em>\n
<div style=\"clear: both;\"><em><em> </em></em></div>\n
\n
<!-- end #page -->\n
<div style=\"background-color: rgb(230, 0, 102);\" id=\"footer\"><p style=\"background-color: rgb(230, 0, 102); height: 114px;\"><em><em><em><em><em>site créé par\n
Benoît legouy, pour un projet encadré par serge fleury, rachid\n
belmouhoub & jean-michel daube</em></em></em></em></em></p>\n
</div>\n
\n
<em><br />\n
</em>\n
<div style=\"text-align: center; font-size: 0.75em; background-color: rgb(255, 128, 184); margin-top: 0px; height: 43px;\"><em><em><em>Design\n
downloaded from <a href=\"http://www.freewebtemplates.com/\">free\n
website templates</a>.</em></em></em></div>\n
\n
</body></html>" >> $tablo;
sed -e /http/d ./DUMP-TEXT/dumpFinal.txt > ./DUMP-TEXT/dumpFinal-N.txt;