Perl: comment ignorer une boucle si la tâche prend trop de temps

-2

Ce code perl traite les fichiers txt dans un dossier et ses sous-répertoires dans un en-tête, un corps et un xml.Perl: comment ignorer une boucle si la tâche prend trop de temps

#!perl -w 

use strict; 
use utf8; 
use File::Copy; 
use File::Basename; 

our @folders=(); 
our %errors=(); 
our $page_errors=''; 
our $folder_out=''; 
our $folder_in=''; 
our $sub_folder=""; 
our $dev=0; 
our $anker='#a_'; 
our $coded_lb=0; 
our $line_cnt=0; 

sub get_complete_filename 
{ 
    my $return = $_[0]; 
    $return=~m/([^\d]+)(\d+)/; 
    return $return if (!$1 || !$2); 
    my $name=$1; 
    my $number=$2; 

    open (IN,"<:encoding(utf-8)","..\\..\\complete_filenames.txt"); 
    while (<IN>) 
    { 
     my $line=$_; chomp($line); 
     next if ($line eq '' || $line=~m/Datei/);  
     if ($line=~m/$name[a-zA-Z_-]+$number/) 
     { 
      $return = $line; 
      last; 
     }  
    } 
    close IN; 
    return $return; 
} 

sub get_files 
{ 
    my $dir = $_[0]; 
    my $file; 
    opendir(DIR, $dir) || die "Unable to open $dir: $!"; 
    my @fl = grep {!/^\.\.?$/ } readdir(DIR); 
    closedir(DIR); 
    foreach (@fl) 
    { 
     if (-d ($file = "$dir\\$_")) 
     { 
      push(@folders,$file); 
      get_files($file); 
     } 
    } 
} 

sub header 
{ 
    my $fn=$_[0]; 
    my $folder_in=$_[1]; 
    my $folder_out=$_[2]; 

    if (-e ($folder_in."\\".$fn.".teih")) 
    { 
     open (IN,"<:encoding(utf-8)", $folder_in."\\".$fn.".teih"); 
     my $input = do { local $/; <IN> }; 
     close IN; 
     my @lines=split(/[\n\r]/,$input); 
     my $read=0; 
     my $output=""; 
     foreach my $line (@lines) 
     { 
      chomp($line); 
      if($line=~m/<\/teiHeader>/) 
      { 
       $read=0; 
       $output.=$line."\n"; 
       last; 
      } 
      elsif ($read eq 1 || $line=~m/<teiHeader>/) 
      { 
       $read=1; 
       $output.=$line."\n"; 
      } 
     }   
     open (OUT,">:encoding(utf-8)", $folder_out.($dev eq 0 ? "\\".$sub_folder : "")."\\".$fn.".teih"); 
     print OUT $output; 
     close OUT; 
     #copy($folder_in."\\".$fn.".teih", $folder_out."\\".$fn."\\".$fn.".teih"); 
    } 
    else 
    { 
    open (H,">:encoding(utf-8)", $folder_out.($dev eq 0 ? "\\".$sub_folder : "")."\\".$fn.".teih"); 
    print H "\n\t<!--\n\t copy of the main_header or empty header!!!\n\t please update its content\n\t-->\n". 
'<teiHeader> 
    <fileDesc> 
     <titleStmt> 
      <title/> 
      <respStmt> 
       <resp/> 
       <name/> 
      </respStmt> 
     </titleStmt> 
     <publicationStmt> 
      <distributor/> 
     </publicationStmt> 
     <sourceDesc> 
      <bibl/> 
     </sourceDesc> 
    </fileDesc> 
</teiHeader>'; 
    close H; 
    } 

    return '<?xml version="1.0" encoding="utf-8"?>'; 
} 

sub check_linebreak 
{ 
    my $line=$_[0]; my $pg=$_[1]; 
    my $ret=""; 

    $line_cnt++; 
    if ($line=~m/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ\-]+)\/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ\-]+[[:punct:]]*)/) 
    {     
     my $tmp1=$1; my $tmp2=$2; 
     my $z="TRENNENDERZBTRENNENDERZB".($line_cnt+1)."TRENNENDERZBTRENNENDERZB"; 
     $line=~s/\Q$tmp1\E\/\Q$tmp2\E/$tmp1$z$tmp2/; $line.=" "; 
     if ($coded_lb eq 0) 
     { 
      $ret=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line; 
     } 
     else 
     { 
      $ret=$pg.$line; 
     } 
     $coded_lb=1;   
    } 
    else 
    { 
     if ($coded_lb eq 0) 
     { 
      $ret=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line; 
     } 
     else 
     { 
      $ret=$pg.$line;      
     } 
     $coded_lb=0; 
    } 
    return $ret; 
} 

sub anfangs_verarbeitung 
{ 
    my $tmp=$_[0]; 

    $tmp =~ s/^\x{FEFF}//; # removes BOM 


    $tmp =~ s/#(?:(?:\r\n)|\n|\r)+(\-{2,})/#$1\n/mg; 
    $tmp =~ s/^p\s*$//g; 

    $tmp =~ s/^\s*([pP]\d+)\s*([cC]\s*[0-9IVX]+)/$1\n$2/g; 

    $tmp =~ s/(?<=#)\|(?=[pppctPCT])//g; 
    $tmp =~ s/\|(?=[pppctPCT])/#/g; 
    $tmp =~ s/(?<![\|#])([pppcPC]\s*[\dIVXMC]+)+/#$1/g; 
    $tmp =~ s/\|(\d+)/#p$1/g; 

    $tmp =~ s/«(?=[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/»/g; 
    $tmp =~ s/»(?![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/«/g; 
    $tmp =~ s/<<(?=[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/»/g; 
    $tmp =~ s/>>(?![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/«/g; 
    $tmp =~ s/(?:„|,,|")([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)«/»$1«/g; 
    $tmp =~ s/»([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)[“"']/»$1«/g; 



    $tmp =~ s/^(\d{4}\s*წ\.)\s*$/#d $1/g; 

    $tmp =~ s/<pol>/<pol>/g;  

    $tmp =~ s/<ა>/<a>/g;  
    $tmp =~ s/<\?([athzee])>/<\/$1>/g;     #<?a>   
    $tmp =~ s/[<>]\/([athzee])(?![<>])/<\/$1>/g;   #</a >/a    
    $tmp =~ s/<([athzee])\/>/<\/$1>/g;     #<a/> 
    $tmp =~ s/[<>]\/([athzee])[<>]/<\/$1>/g;    #>/a> etc.   
    $tmp =~ s/[<>]([athzee])[<>]/<$1>/g;     #>a< etc. 
    $tmp =~ s/<([athzee])(?![<>])/<$1>/g;     #<a >a   
    $tmp =~ s/(?<=[^><\/#])([athzee])[<>]/<$1>/g;   #a< a>   
    $tmp =~ s/(?<=[^><])\/([athzee])[<>]/<\/$1>/g;  #/a< /a>  

    #$tmp =~ s/<\/([athze])>([^<]+)<\/[^\1]>/<$1>$2<\/$1>/gm; 

    ##$tmp =~s/<([athz])>([^<]+)<\/[^(?:$1)]>/<$1>$2<\/$1>/g; 
    ##$tmp =~s/<([athz])>([^<]+)<[^(?:\1)]>/<$1>$2<\/$1>/g; 

    $tmp =~ s/<([pol])>([^<]+)<\/\1>-<\1>([^<]+)<\/\1>/<$1>$2-$3<\/$1>/g; 
    #$tmp =~ s/<([athze])>([^<]+)<\/\1>[\-\-]<([athze])>([^<]+)<\/\3>/<$1>$2-$4<\/$1>/g; 

    ##$tmp =~ s/([^\s]+)\-<([athz])>([^<]+)<\/\2>/<$2>$1\-$3<\/$2>/g; 
    ##$tmp =~ s/<name([^>]+)>([^<]+)<\/name>//g; 

    $tmp =~ s/<\/</</g; 

    ####$tmp =~ s/<\/(?![athzee])//g; 

    $tmp =~ s/#{2,}/#/g; 

    $tmp =~ s/\(\/\/\?([^\)]*)\)/<unclear>$1<\/unclear>/gm; 
    $tmp =~ s/<unclear><\/unclear>/<unclear\/>/g; 
    $tmp =~ s/\(\/\/([^\)]+)\)/<corr>$1<\/corr>/gm; 

    #$tmp =~ s/<s(\d+)>([^<]+)<\/s\1>/$2<ref target="#a$1" type="noteAnchor">$1<\/ref>/gm; 
    $tmp =~ s/<[sS](\d+)>/<ref target="a$1" type="noteAnchor">/g; 
    $tmp =~ s/<\/[sS]\d+>/<\/ref>/g; 
    $tmp =~ s/\([sS](\d+)=?\s*([^\)]+)\)/<note xml:id="a$1" type="footnote">$2<\/note>\n/gm; 

    #$tmp =~ s/#f(\d+)\s*(.*)([^#\|]+)/<note xml:id="a$1" type="footnote">$2<\/note>\n\n/gm; 
    #$tmp =~ s/\(s\s*(\d+)\s*([^\)]+)\)/<note xml:id="a$1" type="footnote">$2<\/note>\n\n/gm; 
    $tmp =~ s/\n{1,}<\/note>/<\/note>/gm; 

    #$tmp =~ s/\s*#\-{2,}//gm; 

    $tmp=~s/ვი\$/ჳ/g; 
    $tmp=~s/ხ\$/ჴ/g; 
    $tmp=~s/ე\$/ჱ/g; 
    $tmp=~s/ი\$/ჲ/g; 
    $tmp=~s/ფ\$/ჶ/g; 
    $tmp=~s/ვ\$/უ/g; 
    $tmp=~s/ო\$/ჵ/g; 

    $tmp=~s/#\.{2,}/#\-\-\-\-\-\-\-\-\-\-\-\-\-\-/g; 

    return $tmp; 
} 

sub end_verarbeitung 
{ 
    my $tmp=$_[0]; 
    $tmp =~ s/[\n\r]{2,}/\n/g; 
    $tmp =~ s/<p>\s+/<p>/g; 
    $tmp =~ s/<\/p>\s+/<\/p>/g; 
    $tmp =~ s/<p><\/p>//g; 
    $tmp =~ s/<div><p><div type="dateline">/<div type="dateline">/g; 
    $tmp =~ s/<p><div type="dateline">/<div type="dateline">/g; 

    $tmp =~ s/<pol>([^<]+)<\/pol>/<term type="political">$1<\/term>/g; 
    $tmp =~ s/<term type="political"> ([^<]+)<\/name>/ <term type="political">$1<\/term>/g; 
    $tmp =~ s/<a><name/<name/g; 

    $tmp =~ s/<t>([^<]+)<\/t>/<name type="toponym">$1<\/name>/g; 

    $tmp =~ s/<z>([^<]+)<\/z>/<name type="zoonym">$1<\/name>/g;   
    $tmp =~ s/<h>([^<]+)<\/h>/<name type="hydronym">$1<\/name>/g;   
    $tmp =~ s/<e>([^<]+)<\/e>/<name type="ethnonym">$1<\/name>/g;   

    #$tmp =~ s/<a>([^<]+)/<name type="anthroponym">$1<\/name>/g; 
    #$tmp =~ s/([^>]+)<\/a>/<name type="anthroponym">$1<\/name>/g; 

    $tmp =~ s/<u>([^<]+)<?\/u>/<name type="unknown">$1<\/name>/g; 

    $tmp =~ s/\s+([\.:,!\?\)])/$1/g; 
    $tmp =~ s/(\()\s+/$1/g; 

    $tmp=~s/<p>#<\/p>//g; 
    $tmp=~s/<div><\/div>//g; 

    $tmp=~s/\.\s+\./\.\./g; 
    $tmp=~s/\.\.(?!<\.)/\.\.\./g; 
    $tmp=~s/\.\.\./…/g; 
    $tmp=~s/…\s*\./…/g; 
    $tmp=~s/ +([,\.…;:!\?])/$1/g; 
    #$tmp=~s/([,\.…;:!\?])(?!<)/$1 /g; 
    $tmp=~s/-/–/g; 
    $tmp=~s/,–/, –/g; 
    $tmp=~s/([\.:,!\?\)])–/$1 -/g; 
    $tmp=~s/\. </\.</g; 

    $tmp=~s/xml: id/xml:id/g; 

    $tmp=~s/#-{2,}//g; 
    $tmp=~s/<p><\/p>//g; 
    $tmp=~s/\s*<\/p><p>/<\/p>\n\t\t\t\t<p>/g; 
    $tmp=~s/ +/ /g; 

    #$tmp =~ s/„([^„“]+)„/„$1“/g; 

    #$tmp=~s/<pb n="(\d+)"\/>(?:\r\n)*n*\s*<\/div>/<pb n="$1"\/>/gm; 
    #$tmp=~s/<div type="Section">(?:\r\n)*\n*\s*<head>([^<]+)<\/head>/<div type="Section">\n\t\t\t\t<head>$1<\/head>\n\t\t\t\t<\/div>/gm; 
    #$tmp=~s/\s*<pb n="(\d+)"\/>(?:\r\n)*\n*\s*<div type="Section">/<div type="Section">\n\t\t\t\t<pb n="$1"\/>/gm; 

    $tmp=~s/<\/p><lg>/<\/p>\n\t\t\t\t<lg>/g; 
    $tmp=~s/<\/p><\/div>/<\/p>\n\t\t\t\<\/div>/g; 

    $tmp=~s/(<name[^>]*>) +/ $1/g; 
    $tmp=~s/([^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]*) +<\/name>/<\/name>$1 /g; 
    $tmp=~s/…\s*<\/name>/<\/name>…/g; 
    $tmp=~s/,\s*\./\./g; 
    $tmp=~s/ +/ /g; 

    $tmp=~s/NORMALERZBNORMALERZB(\d+)NORMALERZBNORMALERZB/\n\t\t\t\t\t<lb n="$1"\/> /g; 
    $tmp=~s/TRENNENDERZBTRENNENDERZB(\d+)TRENNENDERZBTRENNENDERZB/<lb n="$1"\/>/g; 
    $tmp=~s/PAGE PAGE PAGE PAGE PAGE(\d+)PAGE PAGE PAGE PAGE PAGE/<pb n="$1"\/>/g; #//<pb n=\"".$current_page."\"/>"; 
    $tmp=~s/<\/p>(<pb n="\d+"\/>)/<\/p>\n\t\t\t\t$1/g; 
    $tmp=~s/ (<pb n="\d+"\/>)/$1/g; 
    $tmp=~s/<\/p>[\r\n]+\s+<p>(<pb n="\d+"\/>)<\/p>/$1<\/p>/g; 

    $tmp=~s/<\/l>(<pb n="\d+"\/>)/$1<\/l>/g; 

    $tmp=~s/ +/ /g; 

    $tmp=~s/<a><name/<name/g; 

    $tmp=~s/<head><\/head>//; 

    my $sperr=""; 
    if ($_[1]!~m/(?:04|07|11).1857/ && $_[1]!~m/(?:04|08).1858/) 
    { 
     while ($tmp=~m/(?<![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])((?:[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ][^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ~\–\-]){3,})/) 
     { 
      my $sperr_org=$1; 
      my $sperr_edit=$1; 
      my $rest=""; 
      $sperr_edit=~s/ //g; 
      $sperr.=$sperr_edit."\n"; 
      if ($sperr_edit=~m/([^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)$/) 
      { 
       $rest=($1 ne "<"?" ":"").$1; 
       $sperr_edit=~s/[^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+$//; 
      }  
      else { $rest=" "; } 
      $tmp=~s/(?<![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])\Q$sperr_org\E/<hi rend="letter-spacing">$sperr_edit<\/hi>$rest/; 
     } 
     if ($sperr ne "") 
     { 
      open (OUT,">>:encoding(utf-8)", $_[2]."\\000_sperrschrift.txt"); 
      print OUT $_[1]."\n\t".$sperr; 
      close OUT; 
     } 
    } 

    $tmp=~s/>([^<])<\/name>\./>$1\.<\/name>/g; 

    return $tmp; 
} 

sub go_go_gadget 
{ 
    my $file_xml=$_[0]; 
    my $file_html=''; my $output=''; my $output_filename=''; 
    my $chapter=0; my $div=0; my $p=0; my $last_p=0; my $v=0; 
    my $input_xml=''; my $chapter_type=''; my $written=0; 
    my $page=0; my $started=0; 

    (my $fn,my $pn)=fileparse $file_xml; 
    return if ($fn=~m/instruqcia/); 

    print "\tkonvertiere $fn\n"; 
    $fn=~s/\.txt//g; 
    $fn=~s/(\d+)_/$1+/; 
    $fn=~s/_/-/g; $fn=~s/^([a-z]+)-/$1_/g; 
    $sub_folder=""; 
    if ($fn=~m/^([^_]+_[a-zA-Z]+)/) 
    { 
     $sub_folder=$1;  
    } 

    $file_xml=~s/(?:\/|\\+)/\\/g; 

    open (IN,"<:encoding(utf-8)", $file_xml) || die "konnte die datei nicht oeffnen: $!\n"; 
    $input_xml = do { local $/; <IN> } ;      # Eingabedatei komplett in String einlesen 
    close IN; 

    # ----------------------------------------- 
    $input_xml=anfangs_verarbeitung($input_xml); 
    # ----------------------------------------- 

    $div=0; 
    my $last_line=''; 
    my @lines=split(/\n/,$input_xml); 
    $line_cnt=0;  
    my $group_cnt=0; 
    my $verse_cnt=0; 
    my $pg=''; 
    my $first_page=0; 
    my $last_page=0; 
    my $has_chapters=0; 
    my $ut=0; 
    my $quote_open=0; 
    my $section_cnt=0; 
    my $chapter_cnt=0; 

    $coded_lb=0; 
    $chapter_type="Section"; 
    $has_chapters=1 if ($input_xml=~m/#\s*[cC]\s*[\dIVXMC]+[\–\-]?[\dIVXMC]*/); 
    if ($has_chapters eq 0) 
    { 
     $output='<div type="Section">' ; 
     #$chapter=1; 
    } 

    $output='<div type="Content" n="1">'."\n"; 
    foreach my $line (@lines) 
    { 
     $line=~s/^L\s*//; 

     chomp($line); $line=~s/\n//g; $line=~s/\r//g; $line=~s/(\s){2,}/$1/g; $line =~ s/^\s+//g; $line =~ s/\s+$//g; 
     #$line=~s/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])([,;\.])([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/$1$2 $3/g; 

     if ($has_chapters eq 1 && $line =~ m/^\s*#?\s*\|?[cC]\s*(\d+)\s*(.*)/)  # chapter 
     { 
      $output.="</note>" if ($ut == 1); 
      if ($p eq 1) { $output.="</p>"; } 
      elsif ($v eq 1) { $output.="\n\t\t\t\t</lg>"; } 
      if ($started eq 1) 
      { 
       if ($div eq 1) { $output.="\n\t\t\t</div>"; } 
       elsif ($chapter eq 1) { $output.="\n\t\t\t</div>"; } 
      } 

      my $title=$2; 
      if ($title) { $title=~s/<ref target="#a(\d+)" type="noteAnchor">/<ref target="#a_$page\_$1" type="noteAnchor">/g; } 
      $chapter_cnt=$1; 
      $output.="\n\t\t\t".'<div type="Chapter" n="'.$chapter_cnt.'">'."\n\t\t\t\t<head>".($title?check_linebreak($title,$pg):$pg)."</head>"; 

      $chapter=1; $chapter_type="Chapter"; #$div=0; 
      $p=0; $written=0; $v=0; $ut=0; 
      $last_line=""; $started=0; 
      $pg=''; 
     } 
     elsif ($line =~ m/^\s*#\s*[pP]\s*(\d+)/)    # page break 
     { 
      if ($v ne 1 && $p eq 0) 
      { 
       if ($div == 0) 
       { 
        if ($chapter_type eq "Section" || $has_chapters == 0) { $section_cnt++; $output.='<div type="Section" n="'.$section_cnt.'">'; } 
        else { $output.='<div type="Chapter" n="'.$chapter_cnt.'">'; } 
        $div=1; 
       } 
       $output.="<p>"; $p=1; 
      } 
      #$output.="<pb n=\"".$1."\"/>"; 

      # --- detecting page errors 
      my $current_page=$1; #0;    
      #if ($first_page > 0) 
      #{ 
      # $current_page=$1; 
      # if ($current_page-$last_page<1) 
      # { 
      #  $current_page=$last_page+1; 
      #  $page_errors.=$fn."\t".$last_page."\n"; 
      # }    
      # elsif ($current_page-$last_page>1) 
      # { 
      #  $page_errors.=$fn."\t".$last_page."\n"; 
      # }    
      #} 
      #else 
      #{ 
      # $first_page=$1; 
      # $current_page=$1; 
      #} 
      #$last_page=$current_page; 
      # ---- 

      $pg.="PAGE PAGE PAGE PAGE PAGE".$current_page."PAGE PAGE PAGE PAGE PAGE"; 
      #$p=0; 
      $page=$1; 
      #$written=0; 
      $last_line=""; 
      $line_cnt=0; 
     } 
     elsif ($line =~ m/\s*#[tT]\s*(.+)/)    # title 
     { 
      $output.="</note>" if ($ut == 1); 
      if ($p eq 1) { $output.="</p>"; } 
      elsif ($v eq 1) { $output.="\n\t\t\t\t</lg>"; } 

      if (($chapter eq 1 || $div eq 1) && $chapter_type ne 'chapter') 
      { 
      # if($chapter_type eq 'chapter') 
      # { 
      #  if ($started eq 1) 
      #  { $output.="\n\t\t\t</div>\n\t\t\t".'<div type="Chapter" n="'.$1.'">';} 
      #  else { $output.='<div type="Chapter" n="'.$1.'">';} 
      # } 
      # else 
      # { 
        if ($started eq 1) { $section_cnt++; $output.="\n\t\t\t</div>\n\t\t\t".'<div type="Section" n="'.$section_cnt.'">'; }  
        else { $section_cnt++; $output.="\n\t\t\t".'<div type="Section" n="'.$section_cnt.'">'; } 
      # }    
      } 
      else 
      { 
       #$section_cnt++; 
       #$output.='<div type="Section" n="'.$section_cnt.'">'; 
       #$div=1; 
      } 
      #$line_cnt++; 
      $output.="\n\t\t\t\t<head>".$pg.$1."</head>"; 
      $pg=''; 
      $p=0; $written=0; $v=0; $ut=0; 
      $last_line=''; $started=1; 
     } 
     elsif ($line =~ m/#v\s*(.+)/)       # verse 
     { 
      $output.="</note>" if ($ut == 1); 
      if ($p eq 1) { $output.="</p>" ;} 
      if ($v eq 0) { $group_cnt++; $verse_cnt=0; $output.="\n\t\t\t\t".'<lg n="'.$group_cnt.'">'; } 
      $verse_cnt++; 
      $last_line=$1; 
      $line_cnt++;    
      $output.="\n\t\t\t\t\t".'<l n="'.$verse_cnt.'">'.$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$1."</l>";   
      $p=0; 
      $written=1; $v=1; 
      $started=1; $pg=''; $ut=0; 
     } 
     elsif ($line=~m/#\s*\-{2,}/) #elsif ($line eq '' && $last_line ne '') # && $last_line!~m/[\.!\?]\s*$/)   # paragraph 
     {   
      if ($written eq 1) 
      { 
       $output.="</note>" if ($ut == 1); 
       if ($p eq 1) { $output.="</p>"; $p=0; } 
       elsif ($v eq 1 && $written eq 0) { $output.="\n\t\t\t</lg>\n\t\t\t\t"; $v=0; } 
      }    
      #if ($p eq 0 && $v eq 0) { $output.="\n\t\t\t\t<p>"; $p=1; } 

      $written=0; $last_line=''; $ut=0; 
     } 
     elsif ($line =~ m/^(?:#d)?\s*(\d{4}\s*წ\.)$/ || $line=~m/^\s*(\d{4}(?: – \d+\s*წ*\.)?\s*)$/ || $line=~m/^\s*([0-9]+\s*[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+\s*[0-9]+\s*[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]*)$/)   # dateline 
     { 
      if ($div eq 1 || $chapter eq 1) 
      { 
       $output.="</note>" if ($ut == 1); 
       if ($p eq 1) { $output.="</p>"; } 
       elsif ($v eq 1) { $output.="\n\t\t\t\t</lg>"; } 
       $output.="\n\t\t\t</div>"; 
       $chapter=0; $div=0; $ut=0; 
      } 
      $line_cnt++; 
      $output.="\n\t\t\t\t<div type=\"dateline\"><p>".$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$1."</p></div>"; 
      $p=0; $written=0; $v=0; $pg=''; 
      $last_line=""; 
     } 
     elsif ($div eq 1 || $chapter eq 1 || ($div eq 0 && $chapter eq 0)) 
     { 
      if ($line!~m/^\s*$/) 
      { 
       $output.="\n\t\t\t\t</lg>" if ($v eq 1); 
       if ($div eq 0 && $chapter eq 0) { $div=1; $section_cnt++; $output.="\n\t\t\t".'<div type="Section" n="'.$section_cnt.'">';} 
       if ($p eq 0) { $output.="\n\t\t\t\t<p>"; } 
       $line=~s/\s*#\s*//g; 

       # --- quotes 
       $line=~s/([\.,;\?!:])„/$1“/g;  #„ “ 
       if ($line=~m/^\s*„/ && $line!~m/“/ && $line=~m/[\.\?!:]+\s*$/) 
       { 
        $line.="“"; 
       }    
       else 
       { 
        $line=~s/„//g; 
       } 
       $line=~s/“//g if ($line=~m/“/ && $line!~m/„/); 
       # --- 

       # --- ref 
       $line=~s/<ref target="a(\d+)" type="noteAnchor">/<ref target="#a_$page\_$1" type="noteAnchor">/g; 
       $line=~s/<note xml:id="a(\d+)" type="footnote">/<note xml:id="a_$page\_$1" type="footnote">/g;   
       # --- 

       if ($line=~m/\|ut/) 
       { 
        $line=~s/\|ut/<note type="comment">/; 
        $ut=1; 
       } 

       $output.=check_linebreak($line,$pg); 
       #$line_cnt++; 
       #if ($line=~m/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ\-]+)\/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ\-]+[[:punct:]]*)/) 
       #{     
       # my $tmp1=$1; my $tmp2=$2; 
       # my $z="TRENNENDERZBTRENNENDERZB".($line_cnt+1)."TRENNENDERZBTRENNENDERZB"; 
       # $line=~s/\Q$tmp1\E\/\Q$tmp2\E/$tmp1$z$tmp2/; $line.=" "; 
       # if ($coded_lb eq 0) 
       # { 
       #  $output.=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line; 
       # } 
       # else 
       # { 
       #  $output.=$pg.$line; 
       # } 
       # $coded_lb=1; 
       #} 
       #else 
       #{ 
       # if ($coded_lb eq 0) 
       # { 
       #  $output.=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line; 
       # } 
       # else 
       # { 
       #  $output.=$pg.$line;      
       # } 
       # $coded_lb=0; 
       #}    
       #$output.=$pg." "."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$line; 
       $last_line=$line; 
       $p=1; $written=1; $v=0; $started=1;$pg=''; 
      } 
     } 
    } 

    if ($p eq 1) { $output.="</p>";} 
    elsif ($v eq 1) { $output.="\n\t\t\t\t</lg>"; } 
    if ($div eq 1) { $output.="\n\t\t\t</div>"; } 
    elsif ($chapter eq 1) { $output.="\n\t\t\t</div>"; } 

    # ----------------------------------------- 
    $output=end_verarbeitung($output,$fn,$folder_out)."</div>"; 
    # ----------------------------------------- 

    #$fn=get_complete_filename($fn);  
    mkdir($folder_out."\\".$sub_folder,0777) if ($dev eq 0 && !(-d $folder_out."\\".$sub_folder)); 
    my $txt='<text rend="Section" xml:lang="kat">'; 
    $txt='<text rend="'.($section_cnt?"Section ":"").'Chapter" xml:lang="kat">' if ($has_chapters eq 1); 
    $output=header($fn,$folder_in,$folder_out)."\n\t".$txt.'  
     <body> 
      '.$output.' 
     </body> 
    </text>'; 

    $output=~s/(<body>(?:\r\n)*\s*<pb n="\d+"\/>)(?:\r\n)*\s*<\/div>/$1/g; 

    $output_filename=$folder_out.($dev eq 0 ? "\\".$sub_folder : "")."\\".$fn.".xml";   
    open (OUT, ">:encoding(utf-8)", $output_filename); 
    print OUT '<?xml version="1.0" encoding="utf-8"?>'."\n".'<!DOCTYPE TEI [ 
    <!ENTITY header SYSTEM "'.$fn.'.teih"> 
    <!ENTITY text SYSTEM "'.$fn.'.txml"> 
]> 
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:gnc="http://iness.uib.no/ns/1.0"> 
    &header; 
    &text; 
</TEI>'; 
    close (OUT); 
    $output_filename=~s/\.xml/\.txml/gi; 
    open (OUT, ">:encoding(utf-8)",$output_filename) || die "konnte die ausgabedatei \"$output_filename\" nicht oeffnen: $!\n"; 
    print OUT $output; 
    close OUT;   
} 

sub main 
{ 
    print "\nBeginne...\n"; 

    my $root="D:\\bla"; 
    my @startfolders=($root."\\"); 
    $folder_out="D:\\bla"; 
    foreach my $startfolder(@startfolders) 
    { 
     @folders=(); 
     get_files($startfolder); 
     if (scalar(@folders)<1) { push(@folders,$startfolder); } 
     $root=~s/0_Eingabe/1_Ausgabe\\1/; 
     foreach $folder_in(@folders) 
     { 
      $page_errors=''; 
      $folder_out=$folder_in; 
      $folder_out=~s/0_Eingabe/1_Ausgabe/; 
      # creating subfolders too 
      #my $tmp=$folder_out; 
      #$tmp=~s/\Q$root\E//;   
      #my @arr_tmp=split("\\\\",$tmp); 
      #$tmp=""; 
      #foreach my $dings (@arr_tmp) 
      #{    
      # next if ($dings eq ''); 
      # $tmp.="\\".$dings;        
      # mkdir($root.$tmp,0777) if (!(-d $root.$tmp)); 
      #}# 
      # ----- 

      $folder_out=~s/\\+/\\/g; 
      $dev = 1; # entwicklermodus an bei 1 
      $folder_out=~s/1_Ausgabe.*/1_Ausgabe/ if ($dev eq 1); 

      print "Ordner ".$folder_in."\n"; 
      foreach my $file_xml(<${folder_in}/*.txt>) 
      { 
       go_go_gadget($file_xml); 
      } 

      next; 

      if ($page_errors ne '') 
      { 
       $folder_in=~m/0_Eingabe\\(.+)/; 
       my $tmp=$1; 
       $tmp=~s/\\+/__/g;   
       open (OUT, ">:encoding(utf-8)", $root."\\".$tmp.".txt") || die "\n\tPage errors to file ".$tmp.": ".$!."\n"; 
       print OUT $page_errors; 
       close OUT; 
      } 
     } 
    } 

    print "Fertig!\n\n"; 
} 

main();

Toutefois, certains fichiers prennent trop de temps à traiter. Je voudrais sauter l'une des étapes si elles prennent plus de 6 secondes. De cette façon, si le fichier géré prend trop de temps à convertir, il passe au fichier suivant. Des suggestions sur la façon dont je peux faire cela via timeout?

Source

2016-08-21 user80407

http://stackoverflow.com/questions/2423288/ways-to-do-timeouts-in-perl –

Merci, je l'avais déjà regardé mais mon problème est que je n'arrive pas à savoir où appliquer le patch de code. – user80407

Je n'ai pas parcouru suffisamment votre code pour vous dire exactement où le code de délai devrait aller, mais vous devriez être capable d'accomplir facilement ce que vous voulez en utilisant Time::Out. Il suffit d'utiliser

use Time::Out 'timeout'; 

timeout 6 => sub { 
    # code that you want to time out after 6 seconds goes here 
}

et vous devriez être défini.

Source

2016-08-21 21:59:58

Je vais prendre cela et expérimenter un peu. Merci – user80407

Aussi attrayant que vous répondiez, il y a un énorme problème avec l'installation du module. J'utilise ActiveState Perl 5.24.0 sur Win10 64bit. J'ai installé dmake via ppm mais il n'est pas reconnu si j'accède au shell cpan: "dmake et MinGW compilateur gcc" ne sont pas installés. Il démarre toujours cpan mais si je tape "install Time :: Out" il échoue parce que dmake est censé manquer. – user80407

@ user80407 - Cela me semble être un problème de chemin, mais je crains de ne pouvoir rien faire d'autre, puisque je n'ai jamais utilisé Perl sous Windows, seulement plusieurs * nix. Peut-être poser une autre question à ce sujet? –

Perl: comment ignorer une boucle si la tâche prend trop de temps

Répondre

Questions connexes