משתמש:Ori229/תוכנית פרל
מראה
(הופנה מהדף משתמש:Ori229\תוכנית פרל)
###############################################
# Upload of tanach.us in he.wikisource.org (by ori229)
###############################################
use strict;
use warnings;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;
use Encode qw(encode);
use utf8;
######################### data ###################
my %books;
$books{'Amos'} = 'עמוס';$books{'Chronicles_1'} = 'דברי הימים א';$books{'Chronicles_2'} = 'דברי הימים ב';
$books{'Daniel'} = 'דניאל';$books{'Deuteronomy'} = 'דברים';$books{'Ecclesiastes'} = 'קהלת';
$books{'Esther'} = 'אסתר';$books{'Exodus'} = 'שמות';$books{'Ezekiel'} = 'יחזקאל';
$books{'Ezra'} = 'עזרא';$books{'Genesis'} = 'בראשית';$books{'Habakkuk'} = 'חבקוק';
$books{'Haggai'} = 'חגי';$books{'Hosea'} = 'הושע';$books{'Isaiah'} = 'ישעיה';
$books{'Jeremiah'} = 'ירמיהו';$books{'Job'} = 'איוב';$books{'Joel'} = 'יואל';
$books{'Jonah'} = 'יונה';$books{'Joshua'} = 'יהושע';$books{'Judges'} = 'שופטים';
$books{'Kings_1'} = 'מלכים א';$books{'Kings_2'} = 'מלכים ב';$books{'Lamentations'} = 'איכה';
$books{'Leviticus'} = 'ויקרא';$books{'Malachi'} = 'מלאכי';$books{'Micah'} = 'מיכה';
$books{'Nahum'} = 'נחום';$books{'Nehemiah'} = 'נחמיה';$books{'Numbers'} = 'במדבר';
$books{'Obadiah'} = 'עובדיה';$books{'Proverbs'} = 'משלי';$books{'Psalms'} = 'תהילים';
$books{'Ruth'} = 'רות';$books{'Samuel_1'} = 'שמואל א';$books{'Samuel_2'} = 'שמואל ב';
$books{'Song_of_Songs'} = 'שיר השירים';$books{'Zechariah'} = 'זכריה';$books{'Zephaniah'} = 'צפניה';
my $Etnahta=pack "C2",0xD6,0x91; my $Segol=pack "C2",0xD6,0x92;
my $Shalshelet=pack "C2",0xD6,0x93; my $Zaqef_Qatan=pack "C2",0xD6,0x94;
my $Zaqef_Gadol=pack "C2",0xD6,0x95; my $Tipeha=pack "C2",0xD6,0x96;
my $Revia=pack "C2",0xD6,0x97; my $Zarqa=pack "C2",0xD6,0x98;
my $Pashta=pack "C2",0xD6,0x99; my $Yetiv=pack "C2",0xD6,0x9A;
my $Tevir=pack "C2",0xD6,0x9B; my $Geresh=pack "C2",0xD6,0x9C;
my $Geresh_Muqdam=pack "C2",0xD6,0x9D; my $Gershayim=pack "C2",0xD6,0x9E;
my $Qarney_Para=pack "C2",0xD6,0x9F; my $Telisha_Gedola=pack "C2",0xD6,0xA0;
my $Paser=pack "C2",0xD6,0xA1; #my $reserved=pack "C2",0xD6,0xA2; ####### not relevant
my $Munah=pack "C2",0xD6,0xA3; my $Makapakh=pack "C2",0xD6,0xA4;
my $Merkha=pack "C2",0xD6,0xA5; my $Merkha_Kefula=pack "C2",0xD6,0xA6;
my $Darga=pack "C2",0xD6,0xA7; my $Qadma=pack "C2",0xD6,0xA8;
my $Telisha_Qetana=pack "C2",0xD6,0xA9; my $Yerah_ben_Yomo=pack "C2",0xD6,0xAA;
my $Ole=pack "C2",0xD6,0xAB; my $Iluy=pack "C2",0xD6,0xAC;
my $Dehi=pack "C2",0xD6,0xAD; my $Zinor=pack "C2",0xD6,0xAE;
#my $Masora_Circle=pack "C2",0xD6,0xAF; ####### not relevant
my $Meteg=pack "C2",0xD6,0xBD; my $Paseq=pack "C2",0xD7,0x80;
my $patach=pack "C2",0xd6,0xb7; my $kamatz=pack "C2",0xd6,0xb8;
my $chirik=pack "C2",0xd6,0xb4; my $holam =pack "C2",0xd6,0xb9;
my $dagesh =pack "C2",0xd6,0xbc; my $right_shin =pack "C2",0xd7,0x81;
my $left_shin =pack "C2",0xd7,0x82;
my $aleph=pack "C2",0xd7,0x90; my $bet=pack "C2",0xd7,0x91; my $gimel=pack "C2",0xd7,0x92;
my $dalet=pack "C2",0xd7,0x93; my $he=pack "C2",0xd7,0x94; my $vav=pack "C2",0xd7,0x95;
my $zain=pack "C2",0xd7,0x96; my $het=pack "C2",0xd7,0x97; my $tet=pack "C2",0xd7,0x98;
my $yud=pack "C2",0xd7,0x99; my $final_kaf=pack "C2",0xd7,0x9a; my $kaf=pack "C2",0xd7,0x9b;
my $lamed=pack "C2",0xd7,0x9c; my $final_mem=pack "C2",0xd7,0x9d; my $mem=pack "C2",0xd7,0x9e;
my $final_nun=pack "C2",0xd7,0x9f; my $nun=pack "C2",0xd7,0xa0; my $samech=pack "C2",0xd7,0xa1;
my $ayin=pack "C2",0xd7,0xa2; my $final_pe =pack "C2",0xd7,0xa3; my $pe=pack "C2",0xd7,0xa4;
my $final_tazdik=pack "C2",0xd7,0xa5; my $tzadik=pack "C2",0xd7,0xa6; my $kuf=pack "C2",0xd7,0xa7;
my $reysh=pack "C2",0xd7,0xa8; my $shin=pack "C2",0xd7,0xa9;
my $tav=pack "C2",0xd7,0xaa;
##################################################
my $book; my $perek=''; my $pasuk=''; my $pasuk_number=0; my $add_after_pasuk='nothing';
my @responses; my $page=''; my $in_file =''; my $mahadura; my $type;
#
my $out_file = "ready4upload.txt";
open( OUT_F, ">$out_file") or die "Cannot open $out_file as output\n" ;
#make_a_book('Amos');
#make_a_book('Chronicles_1');
#make_a_book('Chronicles_2');
#make_a_book('Daniel');
#make_a_book('Deuteronomy');
#make_a_book('Ecclesiastes');
#make_a_book('Esther');
#make_a_book('Exodus');
#make_a_book('Exodus.DH');
#make_a_book('Ezekiel');
#make_a_book('Ezra');
#make_a_book('Genesis');
#make_a_book('Habakkuk');
#make_a_book('Haggai');
#make_a_book('Hosea');
#make_a_book('Isaiah');
#make_a_book('Jeremiah');
#make_a_book('Job');
#make_a_book('Joel');
#make_a_book('Jonah');
make_a_book('Joshua');
#make_a_book('Judges');
#make_a_book('Kings_1');
#make_a_book('Kings_2');
#make_a_book('Lamentations');
#make_a_book('Leviticus');
#make_a_book('Malachi');
#make_a_book('Micah');
#make_a_book('Nahum');
#make_a_book('Nehemiah');
#make_a_book('Numbers');
#make_a_book('Obadiah');
#make_a_book('Proverbs');
#make_a_book('Psalms');
#make_a_book('Ruth');
#make_a_book('Samuel_1');
#make_a_book('Samuel_2');
#make_a_book('Song_of_Songs');
#make_a_book('Tanach');
#make_a_book('Zechariah');
#make_a_book('Zephaniah');
close (OUT_F);
exit;
##################################################################################
################################################################
sub make_a_book {
my $file_name = shift;
$book=$books{$file_name};
$in_file = 'us/books/'.$file_name.'.xml';
$mahadura='ניקוד'; $type='main'; make_a_page();
$mahadura='ניקוד'; $type='normal'; make_a_page();
$mahadura='ניקוד'; $type='lines'; make_a_page();
$mahadura='טעמים'; $type='main'; make_a_page();
$mahadura='טעמים'; $type='normal'; make_a_page();
$mahadura='טעמים'; $type='lines'; make_a_page();
#$mahadura='כתיב'; $type='normal'; make_a_page();
#$mahadura='כתיב'; $type='lines'; make_a_page();
}
################################################################
sub make_a_page {
$type = 'main' if $mahadura eq 'ביאור';
$page='';
print_func ( "#####"."$book $mahadura");
print_func ( " מסומן") if $type eq 'normal';
print_func ( " מסומן בשורות") if $type eq 'lines';
print_func ( "\n");
print_func ( "{{כותרת עליונה תנך עם טעמים|$book}}\n") if $mahadura eq 'טעמים';
print_func ( "{{כותרת עליונה תנך עם ניקוד|$book}}\n") if $mahadura eq 'ניקוד';
print_func ( "{{כותרת עליונה תנך ללא ניקוד|$book}}\n") if $mahadura eq 'כתיב';
open( IN_F, $in_file) or die "Cannot open $in_file as input\n" ;
while (<IN_F>) {
chomp;
my $line = $_;
process_line($line);
}
close (IN_F);
print_func ( "\n{{כותרת תחתונה תנך עם טעמים|$book}}\n") if $mahadura eq 'טעמים';
print_func ( "\n{{כותרת תחתונה תנך עם ניקוד|$book}}\n") if $mahadura eq 'ניקוד';
print_func ( "\n{{כותרת תחתונה תנך ללא ניקוד|$book}}\n") if $mahadura eq 'כתיב';
print_func ( "ENDOFFILE\n");
}
################################################################
sub print_func {
my $line = shift;
print OUT_F $line;
}
################################################################
sub process_line {
my $line = shift;
#################### פרק
if ($line =~ /<c n="(\d+)">/) {
$perek = number2hebrew($1);
if ($type eq 'main') {
print_func ( "{{אות-פרק-שקוף|" . $book .'|' . $perek . "}}");
} else {
print_func ( "{{אות-פרק|" . $book .'|' . $perek . "}}");
}
print_func ( "<BR/>") if $type eq 'lines';
print_func ( "<section begin=פרק $perek/>\n");
}
if ($line =~ /<\/c>/) {
print_func ( "<section end=פרק $perek/>");
#print_func ( "\n");
}
################################## פסוק
if ($line =~ /<v n="(\d+)">/) {
$pasuk_number = $1;
$pasuk = number2hebrew($pasuk_number);
if ($type eq 'main') {
print_func ( "{{אות-פסוק-שקוף|" . $book .'|' . $perek .'|'. $pasuk . "}}\n");
} else {
print_func ( "{{אות-פסוק|" . $book .'|' . $perek .'|'. $pasuk . "}}\n");
}
print_func ( "<section begin=$perek$pasuk_number/>") if $type eq 'main' && $mahadura eq 'ניקוד';
print_func ( "<section begin=$perek $pasuk/>") if $type eq 'main';
print_func ( "{{#section:$book $mahadura|$perek $pasuk}}\n") if $type ne 'main';
}
if ($line =~ /<\/v>/) {
print_func ( "<section end=$perek$pasuk_number/>") if $type eq 'main' && $mahadura eq 'ניקוד';
print_func ( "<section end=$perek $pasuk/>\n") if $type eq 'main';
if ($add_after_pasuk eq 'samekh') {
print_func ( "{{ס}} ") if $type eq 'main';
print_func ( "{{ס}} ") if $type eq 'normal';
print_func ( "{ס} ") if $type eq 'lines';
}
if ($add_after_pasuk eq 'pe') {
print_func ( "{{פ}} ") if $type eq 'main';
print_func ( "{{פ}} ") if $type eq 'normal';
print_func ( "{פ} ") if $type eq 'lines';
}
$add_after_pasuk = 'nothing';
print_func ( "<BR/>") if $type eq 'lines';
}
########################################### מילה
if ($line =~ /<([wkq])>(.*?)<\/[wkq]>/) { # w=word k=ktiv q=kri
my $kind = $1;
my $word = $2;
$word =~ s|<x>.</x>||g;
$word =~ s|<s t='small'>|<small>|g;$word =~ s|</s>|<end_small>|g; #<s t='small'>ן</s>
$word=~s|/||g; $word=~s|end_|/|g;
$word=~s| ||g;
if ($mahadura eq 'ביאור') {
$word = "$word," if $word=~/$Etnahta/;
$word = "<small>($word)</small> " if $kind eq 'k';
} else {
$word = "[$word] " if $kind eq 'q';
$word = "<small>$word</small> " if $kind eq 'k';
}
$word = "$word " if $kind eq 'w';
my $makaf=pack "C2",0xd6,0xbe; # instead of $word=~s|־| |g;
$word=~s|$makaf |$makaf| if $mahadura eq 'טעמים';
$word=~s|$makaf | | if $mahadura ne 'טעמים';$word=~s|$makaf|| if $mahadura ne 'טעמים';
my $sof_pasuk=pack "C2",0xd7,0x83; # instead of $word=~s|׃||g;
$word=~s|$sof_pasuk |$sof_pasuk| if $mahadura eq 'טעמים';
$word=~s|$sof_pasuk\]|]$sof_pasuk| if $mahadura eq 'טעמים'; #in case of ktiv in last word
$word=~s|$sof_pasuk ?|.| if $mahadura ne 'טעמים'; $word=~s|\.\]|].| if $mahadura ne 'טעמים';
$word = remove_teamim($word) if $mahadura ne 'טעמים';
#$word=~s|([$patach$kamatz])$chirik|$1\{\{ZWJ\}\}$chirik|; # Jerusalem
$word=~s|([$patach$kamatz])$chirik|$1\{\{HIRIK WITHOUT LETTER\}\}|; # Jerusalem
#$word=~s|([^$dagesh$right_shin$left_shin$aleph$bet$gimel$dalet$he$vav$zain$het$tet$yud$kaf$lamed$mem$nun$samech$ayin$pe$tzadik$kuf$reysh$shin$tav$Etnahta$Segol$Shalshelet$Zaqef_Qatan$Zaqef_Gadol$Tipeha$Revia$Zarqa$Pashta$Yetiv$Tevir$Geresh$Geresh_Muqdam$Gershayim$Qarney_Para$Telisha_Gedola$Paser$Munah$Makapakh$Merkha$Merkha_Kefula$Darga$Qadma$Telisha_Qetana$Yerah_ben_Yomo$Ole$Iluy$Dehi$Zinor$Meteg$Paseq])$vav$holam|$1$vav\{\{ZWJ\}\}$holam|g; # holam haser on vav
$word=~s|([^$dagesh$right_shin$left_shin$aleph$bet$gimel$dalet$he$vav$zain$het$tet$yud$kaf$lamed$mem$nun$samech$ayin$pe$tzadik$kuf$reysh$shin$tav$Etnahta$Segol$Shalshelet$Zaqef_Qatan$Zaqef_Gadol$Tipeha$Revia$Zarqa$Pashta$Yetiv$Tevir$Geresh$Geresh_Muqdam$Gershayim$Qarney_Para$Telisha_Gedola$Paser$Munah$Makapakh$Merkha$Merkha_Kefula$Darga$Qadma$Telisha_Qetana$Yerah_ben_Yomo$Ole$Iluy$Dehi$Zinor$Meteg$Paseq])$vav$holam|$1\{\{VAV WITH HOLAM HASER\}\}|g; # holam haser on vav
print_func ( $word) if $type eq 'main';
}
########################################### פרשה פתוחה וסגורה
if ($line =~ /<samekh\/>/) {
if ($mahadura eq 'ביאור' ) {
print_func ( "{{ביאור:פרשה סגורה}}");
} else {
$add_after_pasuk = 'samekh';
}
}
if ($line =~ /<pe\/>/) {
if ($mahadura eq 'ביאור' ) {
print_func ( "{{ביאור:פרשה פתוחה}}");
} else {
$add_after_pasuk = 'pe';
}
}
}
################################################################
################################################################
sub remove_teamim {
my $word = shift;
$word =~ s/$Etnahta//g;$word =~ s/$Segol//g;$word =~ s/$Shalshelet//g;$word =~ s/$Zaqef_Qatan//g;
$word =~ s/$Zaqef_Gadol//g;$word =~ s/$Tipeha//g;$word =~ s/$Revia//g;$word =~ s/$Zarqa//g;
$word =~ s/$Pashta//g;$word =~ s/$Yetiv//g;$word =~ s/$Tevir//g;$word =~ s/$Geresh//g;
$word =~ s/$Geresh_Muqdam//g;$word =~ s/$Gershayim//g;$word =~ s/$Qarney_Para//g;$word =~ s/$Telisha_Gedola//g;
$word =~ s/$Paser//g;$word =~ s/$Munah//g;$word =~ s/$Makapakh//g;$word =~ s/$Merkha//g;$word =~ s/$Merkha_Kefula//g;
$word =~ s/$Darga//g;$word =~ s/$Qadma//g; $word =~ s/$Telisha_Qetana//g;$word =~ s/$Yerah_ben_Yomo//g;
$word =~ s/$Ole//g;$word =~ s/$Iluy//g;$word =~ s/$Dehi//g;$word =~ s/$Zinor//g; $word =~ s/$Paseq//g;
$word =~ s/$Meteg//g; # in last word in pasuk
return $word;
}
################################################################
# גמטריה ממספרים לאותיות - נכתב על ידי אראל סגל
sub number2hebrew {
my $num = shift;
my $heb = "";
my @letters1 = split(//,'אבגדהוזחטי');
my @letters2 = split(//,'יכלמנסעפצק');
my @letters3 = split(//,'קרשת');
while ($num > 400) {
$heb .= "ת";
$num -= 400;
}
if ($num >= 100) {
$heb .= $letters3[ ($num / 100) - 1 ];
$num %= 100;
}
if ($num >= 10) {
if ($num == 15) {
$heb .= "טו";
$num = 0;
}
elsif ($num == 16) {
$heb .= "טז";
$num = 0;
}
else {
$heb .= $letters2[ ($num / 10) - 1 ];
$num %= 10;
}
}
if ($num >= 1) {
$heb .= $letters1[ $num - 1 ];
}
return $heb;
}