Ticket #3111: imdb_fix.patch

File imdb_fix.patch, 4.3 KB (added by jcrews@…, 19 years ago)

Trivial tag corrections to imdb.pl

  • mythvideo/scripts/imdb.pl

    diff -ur mythvideo/mythvideo/scripts/imdb.pl my-mythvideo/mythvideo/scripts/imdb.pl
    old new  
    115115   my $year = $2;
    116116
    117117   # parse director
    118    my $director = parseBetween($response, ">Directed by</b>", "/a><br>");
     118   my $director = parseBetween($response, ">Directed by</h5>", "/a><br>");
    119119   $director = parseBetween($director, "/\">", "<");
    120120
    121121   # parse writer
    122122   # (Note: this takes the 'first' writer, may want to include others)
    123    my $writer = parseBetween($response, ">Writing credits</b>", "</table>");
     123   my $writer = parseBetween($response, ">Writing credits <a href=\"/wga\">(WGA)</a></h5>", "</table>");
    124124   $writer = parseBetween($writer, "/\">", "</");
    125125
    126126   # parse plot
    127    my $plot = parseBetween($response, ">Plot Outline:</b> ", "<br>");
     127   my $plot = parseBetween($response, ">Plot Outline:</h5> \n", "<br>");
    128128   if (!$plot) {
    129       $plot = parseBetween($response, ">Plot Summary:</b> ", "<br>");
     129      $plot = parseBetween($response, ">Plot Summary:</h5> \n", "<br>");
    130130   }
    131131
    132132   if ($plot) {
     
    139139      $plot =~ s/$title_link_pat/$1/g;
    140140
    141141      # plot ends at first remaining link
    142       my $plot_end = index($plot, "<a href=\"");
     142      my $plot_end = index($plot, "<a class=\"tn15more inline\" href");
    143143      if ($plot_end != -1) {
    144144         $plot = substr($plot, 0, $plot_end);
    145145      }
     
    151151
    152152   # parse MPAA rating
    153153   my $ratingcountry = "USA";
    154    my $movierating = parseBetween($response, ">MPAA</a>:</b> ", "<br>");
     154   my $movierating = parseBetween($response, ">MPAA</a>:</h5> \n", "</div" );
    155155   if (!$movierating) {
    156        $movierating = parseBetween($response, ">Certification:</b>", "<br>");
     156       $movierating = parseBetween($response, ">Certification:</h5>", "</div>");
    157157       $movierating = parseBetween($movierating, "certificates=$ratingcountry",
    158158                                   "/a>");
    159159       $movierating = parseBetween($movierating, ">", "<");
    160160   }
    161161
    162162   # parse movie length
    163    my $runtime = parseBetween($response, ">Runtime:</b>\n", " min");
     163   my $runtime = parseBetween($response, ">Runtime:</h5>\n", " min");
    164164
    165165   # parse cast
    166166   #  Note: full cast would be from url:
     
    168168   my @actors;
    169169   my $cast = "";
    170170   my $count = 0;
    171    my $data = parseBetween($response, "Cast overview, first billed only:",
     171   my $data = parseBetween($response, "\(Cast overview, first billed only\)",
    172172                               "/table>");
    173173   if ($data) {
    174174      my $beg = "/\">";
     
    178178      my $actor;
    179179      while ($start != -1) {
    180180         $start += length($beg);
    181          $actor = substr($data, $start, $finish - $start);
     181         if(substr($data, $start, 1) ne '<') {
     182               $actor = substr($data, $start, $finish - $start);
    182183         # add to array
    183184         $actors[$count++] = $actor;
    184 
     185        }
    185186         # advance data to next movie
    186187         $data = substr($data, - (length($data) - $finish));
    187188         $start = index($data, $beg);
     
    194195   # parse genres
    195196   my $lgenres = "";
    196197   $count = 0;
    197    $data = parseBetween($response, "<b class=\"ch\">Genre:</b>","<b class=\"ch\">User Comments:</b>");
     198   $data = parseBetween($response, "<h5>Genre:</h5>","<h5>User Comments:</h5>");
    198199   if ($data) {
    199200      my $genre_pat = qr'/Sections/Genres/(?:[a-z ]+/)*">([^<]+)<'im;
    200201      $lgenres = join(',', ($data =~ /$genre_pat/g));
     
    204205   my @countries;
    205206   my $lcountries = "";
    206207   $count = 0;
    207    $data = parseBetween($response, "<b class=\"ch\">Country:</b>","<br>");
     208   $data = parseBetween($response, "<h5>Country:</h5>","</div>");
    208209   if ($data) {
    209210      my $beg = "/\">";
    210211      my $end = "</a>";
     
    428429   # check to see if we got a results page or a movie page
    429430   #    looking for 'add=<movieid>" target=' which only exists
    430431   #    in a movie description page
    431    my $movienum = parseBetween($response, "add=", "\" target=");
     432   my $movienum = parseBetween($response, "add=", "\"");
    432433   if ($movienum) {
    433434       if (defined $opt_d) { printf("# redirected to movie page\n"); }
    434435       my $movietitle = parseBetween($response, "<title>", "</title>");