#!/usr/local/bin/perl

##
## written by Claus Reinke (http://www.cs.nott.ac.uk/~czr)
## modified by Dietmar Kreye (http://www.informatik.uni-kiel.de/~dkr)
##
##
## This script is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; either version 2
## of the License, or (at your option) any later version.
## 
## This script is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##


sub man
{
  print << ;
  # -------Description----------------------------------------------
  #
  # generate literature references list in HTML-format from
  # BibTeX-format. allows you to keep an organized virtual bookshelf
  # with only few additions to your BibTeX-references.
  #
  # + uses title, author and year field to display information from reference
  # + uses nice icons :-) 
  # + uses comma separated entries in field 'topics' to categorize references
  # + uses URL in field 'sourceURL' to hyperlink the book title to the source
  # + uses URL in field 'abstractURL' to hyperlink the book title to the
  #   abstract
  # + uses URL in field 'relatedURL' to hyperlink the book title to related
  #   stuff
  # + the resulting page provides a link to the BibTeX-file
  # + each entry may be linked to its BibTeX-reference
  # + the script also
  #   + converts special characters
  #   + converts occurrences of special characters (e.g. \$\\pi\$) in titles
  # + topics may be grouped into hierarchies based on comments in the
  #   BibTeX-source:
  #   % TOPICS
  #   % group1 : topic1, topic2, topic3, topic4,
  #   % group2 : group1, topic1, topic5,
  #   % ENDTOPICS
  # + topics may be described via notes based on comments in the BibTeX-source:
  #   % NOTES
  #   % topic1 : note1
  #   % topic2 : note2
  #   % ENDNOTES
  # + topics may be described via keywords based on comments in the
  #   BibTeX-source:
  #   % KEYS
  #   % topic1 : keyword11, keyword12
  #   % topic2 : keyword21, keyword22, keyword23
  #   % ENDKEYS
  # + uses flags in field 'FLAGS' to add some decorations:
  #   + own: books of which you own a copy
  #   + hot: highly recommended books
  #
  # - no real parser for BibTeX
  #   +  multi-line fields allowed for author and title
  #   -  important fields should be terminated by ","
  #      (year or topics as last field in entry will do ;-)
  #   -  entries have to be delimited by empty lines
  #   +- text following empty line and not starting with "@...{"  is ignored
  #      (if e.g. an abstract field contains empty lines, insert spaces there)
  #

}

require 'getopts.pl';


sub usage
{
  print << ;
  #
  # bookshelf [-hmescykfitboT]
  #
  # -------Options--------------------------------------------------
  #
  # -h     : help (print this usage info)
  # -m     : man (print longer usage info)
  # -e     : edit BibTeX source file
  # -s     : split HTML (generate directory with one file per topic)
  # -c     : classes (rudimentary support for supercategories)
  # -y     : document types (create sections for document types)
  # -n     : process notes
  # -k     : process keywords
  # -f     : process flags
  # -i     : info (link each entry to its BibTeX-reference)
  # -t     : text (no icons)
  # -u <user>: user id (use <user> instead of the current id)
  # -b <name>: input from <name>
  # -o <name>: output to <name>.html (name/ with -s)
  # -T <text>: title (use <text> instead of "Virtual Bookshelf")
  #
  # -------Defaults-------------------------------------------------
  #
  # BibTeX    : \$HOME/public_html/bib/bookshelf.bib
  # HTML      : \$HOME/public_html/bib/bookshelf.html
  # topics directory (for -s): \$HOME/public_html/bib/bookshelf/
  # reference-file   (for -i): \$HOME/public_html/bib/bibtex.html
  #

}

# -d reserved for debugging purposes
do Getopts('u:b:o:T:hmescynkfitd');

if ($opt_h) { &usage(); exit; }

if ($opt_m) { &usage(); &man(); exit; }

$user = $opt_u || $ENV{'USER'};

@pwent = getpwnam( $user);
die "unknown user $user" unless @pwent;

$userid = $pwent[0];
$username = $pwent[6];
$username =~ s/,*$//;       # get rid of these (where do they come from??)
$userdir = $pwent[7];

##--- user defined settings ---------------------

$basehost = "www.computer-scientist.org";
$mail = "dietmar\@computer-scientist.org";

#$baseurl = "http://$basehost/~$userid";
$baseurl = "http://$basehost/old";
$biburl = "$baseurl/bib";
$iconsurl = "/images/icons";
$downloadurl = "/download/bookshelf";
$cssfile = "format.css";

##--- other settings ---------------------

chop( $DATE = `date`);
chop( $pwd = `pwd`);
$html_doctype = "<!doctype html public \"-//W3C//DTD HTML 4.01 Transitional//EN\">";
$html_charset = "<meta http-equiv=\"content-type\" content=\"text/html; charset=us-ascii\">";
$html_css = "<link rel=\"stylesheet\" type=\"text/css\" href=\"/$cssfile\">";
$target_self = "target=\"_self\"";
$target_blank = "target=\"_top\"";

$bibdir = $pwd; 
if ($opt_b) {
  $bibfile = $opt_b;
}
else {
  $bibfile = "bookshelf.bib";
}

if ($opt_e) {
  exec( "$ENV{'EDITOR'} $bibdir/$bibfile");
}

$shelfdir = $pwd; 
if ($opt_o) {
  $shelffile = "$opt_o.html";
  $shelfsubdir = "$opt_o" if $opt_s; 
}
else {
  $shelffile = "bookshelf.html"; 
  $shelfsubdir = "bookshelf" if $opt_s; 
}

$INFO_FILE = "bibtex.html";

$username_s = $username . "'s";
$shelftitle = $opt_T || "$username_s Virtual Bookshelf";

$shelf_pic = "<img alt=\"Virtual Bookshelf\" src=\"$iconsurl/tbook_shelf.gif\">";
$topic_pic = "<img alt=\"Topic\" src=\"$iconsurl/tbook_topic.gif\">";
$read_pic = "<img alt=\"[+]\" border=\"0\" src=\"$iconsurl/tbook_read.gif\">";
$empty_pic = "<img alt=\"[-]\" border=\"0\" src=\"$iconsurl/tbook_empty.gif\">";
$own_pic = "<img alt=\"[own]\" border=\"0\" src=\"$iconsurl/tbook_own.gif\">";
$hot_pic = "<img alt=\"[hot]\" border=\"0\" src=\"$iconsurl/tbook_hot.gif\">";

$PLUS = "+";
$MINUS = "-";
$MULT = "*";
$RIGHT = "-->";
$LEFT = "<--";
$LEFTRIGHT = "<-->";
$PI = "&pi;";
$LAMBDA = "&lambda;";
$LATEX = "LaTeX";
$TEX = "TeX";

%NAME = (
  "ARTICLE"       => "Article",
  "BOOK"          => "Book",
  "BOOKLET"       => "Booklet",
  "CONFERENCE"    => "Conference",
  "INBOOK"        => "In Book",
  "INCOLLECTION"  => "In Collection",
  "INPROCEEDINGS" => "In Proceedings",
  "MANUAL"        => "Manual",
  "MASTERSTHESIS" => "Master's Thesis",
  "DIPLOMATHESIS" => "Diploma Thesis",
  "SEMINARPAPER"  => "Seminar Paper",
  "PRACTREPORT"   => "Practical Training Report",
  "ONLINE"        => "Online Document",
  "MISC"          => "Miscellaneous",
  "PHDTHESIS"     => "PhD Thesis",
  "PROCEEDINGS"   => "Proceedings",
  "TECHREPORT"    => "Technical Report",
  "UNPUBLISHED"   => "Unpublished"
);

##--- no need to customize below?? ----------

if ($opt_i) {
  chdir( $shelfdir);
  open( INFO,"> $INFO_FILE") || die "cannot write $INFO_FILE\n$!";

  print INFO <<HEADER;
    $html_doctype
    <html>
    <head>
    <title>BibTeX</title>
    $html_charset
    $html_css
    </head>
    <body>
    <pre>
HEADER

  chdir( $bibdir);
  open( BIB,"$bibfile") || die $!;
  $/='';
  $*=1;
  while (<BIB>) {
    if (/^@[^{]*{\s*([^,]*)\s*,/) {
      print INFO "% <a name=\"$1\">$1</a> <br>\n";
    }
    print INFO;
  }
  close( BIB);

  print INFO <<FOOTER;
    </pre>
    <hr>
    <address>
    <a href="mailto:$mail">$username</a><br>
    generated by <a href="$downloadurl">bookshelf</a>, $DATE
    </address>
    </body>
    </html>
FOOTER

  close( INFO);
}

if ($opt_t) {
  $shelf_pic = "";
  $topic_pic = "";
  $read_pic = "";
  $empty_pic = "";
}

# need to work with relative paths here
#
chdir( $bibdir)      || die "cannot chdir to $bibdir\n$!"; 
open( BIB, $bibfile) || die "cannot read $bibdir/$bibfile\n$!";
$/='';

while (<BIB>) {
  $i++;
  if (/^@([^{]*){/) {
    $TYPE[$i] = $1;
    if (($TYPE[$i] =~ /string/i) || ($TYPE[$i] eq '')) {
      $i--; next;
    }
  }

  if ($opt_c && /%\s*TOPICS\s*%\s*((.|\n)*)\s*%\s*ENDTOPICS/) {
    &process_topics( $1);
  }

  if ($opt_n && /%\s*NOTES\s*%\s*((.|\n)*)\s*%\s*ENDNOTES/) {
    &process_notes( $1);
  }

  if ($opt_k && /%\s*KEYS\s*%\s*((.|\n)*)\s*%\s*ENDKEYS/) {
    &process_keys( $1);
  }

  if (/%\s*DESCRIPTION\s*%\s*((.|\n)*)\s*%\s*ENDDESCRIPTION/) {
    &process_description( $1);
  }

  if (/^%[^\n]*\n/) {
    $i--;
    next;
  }

  if (/^@[^{]*{\s*([^,]*)\s*,/) {
    $KEY[$i] = $1;
    print STDERR "$i: $1\n" if $opt_d
  }

  s/\\-//g;

  if (/flags\s*=\s*{([^}]+)}/i) {
    $FLAGS[$i] = $1;
  }

  if (/topics\s*=\s*{([^}]+)}/i) {
    $TOPICS[$i] = $1;
    $TOPICS[$i] =~ s/,\s*/,/g;      # remove whitespaces after ,
  }

  s/\\v{[A-Za-z]*}/$1/g;
  s/{|}//g;
  s/\\"a/&auml;/g;
  s/\\"o/&ouml;/g;
  s/\\"u/&uuml;/g;
  s/\\"A/&Auml;/g;
  s/\\"O/&Ouml;/g;
  s/\\"U/&Uuml;/g;
  s/\\ss/&szlig;/g;
  s/\\o/&oslash;/g;
  s/\\O/&Oslash;/g;
  s/\\`\\i/&igrave;/g;
  s/\\`\\I/&Igrave;/g;
  s/\\`([A-Za-z])/&$1grave;/g;
  s/\\'\\i/&iacute;/g;
  s/\\'\\I/&Iacute;/g;
  s/\\'([A-Za-z])/&$1acute;/g;

  if (/editor\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $EDITOR[$i] = $1 if ($1 ne "");
    $EDITOR[$i] =~ s/\s+and\s+others/ and .../g;
    $EDITOR[$i] =~ s/\s+and\s+/; /g;
  }

  if (/author\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $AUTHOR[$i] = $1 if ($1 ne "");
    $AUTHOR[$i] =~ s/\s+and\s+others/ and .../g;
    $AUTHOR[$i] =~ s/\s+and\s+/; /g;
  }

  if (/title\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $TITLE[$i] = $1 if ($1 ne "");
  }

  if (/relatedURL\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    # support for multiple 'relatedURL' entries
#   $RELATED[$i] = [ $1 ] if ($1 ne "");
    $RELATED[$i] = [ /relatedURL\s*=\s*(.*?\S.*?)\s*,\s*\n/ig ];
  }

  if (/sourceURL\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $SOURCE[$i] = $1 if ($1 ne "");
  }

  if (/abstractURL\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $ABSTRACT[$i] = $1 if ($1 ne "");
  }

  if (/year\s*=\s*(.*?)\s*,\s*\n\s*[a-z]+\s*=/is) {
    $YEAR[$i] = $1 if ($1 ne "");
  }

  $urls++ if ($SOURCE[$i]);
}

close( BIB);

foreach $entry (1..$i) {
  &process_title( $TITLE[$entry]);

  if (defined( $FLAGS[$entry])) {
    @flags = split( ',', $FLAGS[$entry]);
  }
  else {
    @flags = ();
  }

  if (defined( $TOPICS[$entry])) {
    @topics = split( ',', $TOPICS[$entry]);
  }
  else {
    @topics = ("Miscellaneous")
  }

  $REF = $empty_pic;

  $TITLE = "$TITLE[$entry]";
  if (defined( $SOURCE[$entry])) {
    if ($TYPE[$entry] eq "ONLINE") {
      $TITLE = "<a href=\"$SOURCE[$entry]\" $target_blank>$TITLE</a>";
    }
    else {
      $TITLE = "<a href=\"$SOURCE[$entry]\" $target_self>$TITLE</a>";
    }
    $REF = $read_pic;
  }
  if (defined( $ABSTRACT[$entry])) {
    $TITLE .= " (<a href=\"$ABSTRACT[$entry]\" $target_self>Abstract</a>)";
  }
  if (defined( $RELATED[$entry])) {
    while (@{$RELATED[$entry]}) {
      $TITLE .= " (<a href=\"@{$RELATED[$entry]}[0]\" $target_blank>Related</a>)";
      shift( @{$RELATED[$entry]});
    }
  }

  $AUTHOR = "";
  if (defined( $AUTHOR[$entry])) {
    $AUTHOR .= "<em>$AUTHOR[$entry]</em> "
  }
  if (defined( $EDITOR[$entry])) {
    if ($EDITOR[$entry] =~ /;/) {
      $AUTHOR .= "<em>[$EDITOR[$entry] (Eds.)]</em> "
    }
    else {
      $AUTHOR .= "<em>[$EDITOR[$entry] (Ed.)]</em> "
    }
  }

  $YEAR = "";
  if (defined( $YEAR[$entry])) {
    $YEAR = ", " . $YEAR[$entry];
  }

  if ($opt_i) {
    $REF = "<a href=\"$biburl/$INFO_FILE#$KEY[$entry]\">$REF</a>";
  }

  $DECO = "";
  if ($opt_f) {
    foreach $flag (@flags) {
      if ($flag eq 'own') {
        $DECO .= " $own_pic";
      }
      elsif ($flag eq 'hot') {
        $DECO .= " $hot_pic";
      }
      else {
      }
    }
  }

  $HTML = <<ENTRY;
    <table border=\"0\" cellspacing=\"10\" cellpadding=\"0\">
    <tr>
     <td valign="top">$REF</td>
     <td valign="top">
      <b>$TITLE</b><br>
      $AUTHOR($NAME{$TYPE[$entry]}$YEAR)
     </td> 
     <td valign="top">$DECO</td>
    </tr>
    </table>
ENTRY

  if (! exists $NAME{$TYPE[$entry]}) {
    print <<WARNING;
WARNING: unknown doctype found
   entry:   $KEY[$entry]
   doctype: $TYPE[$entry]
WARNING
  }

  foreach $topic (@topics) {
    if (defined( $sub{$topic})) { 
      print <<WARNING;
WARNING: entry in class not allowed
   entry: $KEY[$entry]
   class: $topic
WARNING
    }
    $TOPIC{$topic} .= $HTML;
  }

  if ($opt_y) {
    $DOCTYPE{$TYPE[$entry]} .= $HTML;
  }
}

# need to work with relative paths here
#
chdir( $shelfdir) || die "cannot chdir to $shelfdir\n$!"; 

if ($opt_s) {
  (mkdir( $shelfsubdir, 0755) || die "cannot create $shelfdir/$shelfsubdir\n$!") unless -e $shelfsubdir;
}

open( SHELF, "> $shelffile") || die "cannot create $shelfdir/$shelffile\n$!";

print SHELF <<HEADER;
  $html_doctype
  <html>
  <head>
  <title>$shelftitle</title>
  $html_charset
  $html_css
  </head>
  <body>
  $shelf_pic
  <h2><a href=\"$bibfile\">$shelftitle</a></h2>
  <p>$i entries, $urls available online ($DATE)</p>
  <p>
  $DESCRIPTION
HEADER

print SHELF "<h3>Topics</h3>\n";
print SHELF "<ul>\n";

if ($opt_s) {
  foreach $topic (keys%TOPIC) {
    $tmp = $topic;
    # remove dangerous characters from filename
    $tmp =~ s|[^\w-]|_|g;
    $FILE{$topic} = "$shelfsubdir/$tmp.html";
    $HREF{$topic} = $FILE{$topic};
  }

  if ($opt_y) {
    foreach $doctype (keys%DOCTYPE) {
      $tmp = $doctype;
      # remove dangerous characters from filename
      $tmp =~ s|[^\w-]|_|g;
      $FILE{$doctype} = "$shelfsubdir/$tmp.html";
      $HREF{$doctype} = $FILE{$doctype};
    }
  }
}
else {
  foreach $topic (keys%TOPIC) {
    $HREF{$topic} = "#$topic";
  }

  if ($opt_y) {
    foreach $doctype (keys%DOCTYPE) {
      $HREF{$doctype} = "#$doctype";
    }
  }
}

foreach $topic (sort( keys%TOPIC,@classes)) {
  if (defined($super{$topic})) {
    next;
  }
  elsif (defined( $sub{$topic})) {
    &print_subtopics($topic);
  }
  else {
    print SHELF "<li><a href=\"$HREF{$topic}\">$topic</a>\n";
  }
}

print SHELF "</ul>\n";
print SHELF "<hr>\n";
print SHELF "<h3>Document Types</h3>\n";
print SHELF "<ul>\n";

if ($opt_y) {
  foreach $doctype (sort keys%DOCTYPE) {
    print SHELF "<li><a href=\"$HREF{$doctype}\">$NAME{$doctype}</a>\n";
  }
  print SHELF "</ul>\n";
}

if ($opt_s) {
  foreach $topic (sort keys%TOPIC) {
    open( TMP,"> $FILE{$topic}") || die "cannot create $FILE{$topic}\n$!";

    print TMP <<HERE;
      $html_doctype
      <html>
      <head>
      <title>Virtual Bookshelf: $topic</title>
      $html_charset
      $html_css
      </head>

      <body>
      <h2><a href="../$shelffile">$topic</a></h2>
      <p>
      $topic_notes{$topic}
      $topic_keys{$topic}

      <hr>
      $TOPIC{$topic}

      <hr>
      <address>
      <a href="mailto:$mail">$username</a><br>
      generated by <a href="$downloadurl">bookshelf</a>, $DATE
      </address>
      </body>
      </html>
HERE

    close( TMP);
  }

  if ($opt_y) {
    foreach $doctype (sort keys%DOCTYPE) {
      open( TMP,"> $FILE{$doctype}") || die "cannot create $FILE{$doctype}\n$!";

      print TMP <<HERE;
        $html_doctype
        <html>
        <head>
        <title>Virtual Bookshelf: $NAME{$doctype}</title>
        $html_charset
        $html_css
        </head>

        <body>
        <h2><a href="../$shelffile">$NAME{$doctype}</a></h2>

        <hr>
        $DOCTYPE{$doctype}

        <hr>
        <address>
        <a href="mailto:$mail">$username</a><br>
        generated by <a href="$downloadurl">bookshelf</a>, $DATE
        </address>
        </body>
        </html>
HERE

      close( TMP);
    }
  }
}
else {
  foreach $topic (sort keys%TOPIC) {
    print SHELF <<HERE;
    <hr>
    <h2>$topic_pic <a name=\"$topic\">$topic</a></h2>\n
    $TOPIC{$topic}\n
HERE
  }

  if ($opt_y) {
    foreach $doctype (sort keys%DOCTYPE) {
      print SHELF <<HERE;
      <hr>
      <h2>$topic_pic <a name=\"$doctype\">$NAME{$doctype}</a></h2>\n
      $DOCTYPE{$doctype}\n
HERE
    }
  }
}

print SHELF <<FOOTER;
  <hr>
  <address>
  <a href="mailto:$mail">$username</a><br>
  generated by <a href="$downloadurl">bookshelf</a>, $DATE
  </address>
  </body>
  </html>
FOOTER

close( SHELF);


sub process_title
{
  $_[0] =~ s/\$\\pi\$/$PI/g;
  $_[0] =~ s/\$\^\+\$/$PLUS/g;
  $_[0] =~ s/\$\^\-\$/$MINUS/g;
  $_[0] =~ s/\$\^\*\$/$MULT/g;
  $_[0] =~ s/\$\\leftarrow\$/$LEFT/g;
  $_[0] =~ s/\$\\rightarrow\$/$RIGHT/g;
  $_[0] =~ s/\$\\leftrightarrow\$/$LEFTRIGHT/g;
  $_[0] =~ s/\$\\lambda\$/$LAMBDA/g;
  $_[0] =~ s/\\,/ /g;
  $_[0] =~ s/\\ / /g;
  $_[0] =~ s/\\\#/\#/g;
  $_[0] =~ s/\\&/&/g;
  $_[0] =~ s/\\TeX /$TEX/g;
  $_[0] =~ s/\\TeX\\/$TEX/g;
  $_[0] =~ s/\\TeX/$TEX/g;
  $_[0] =~ s/\\LaTeX /$LATEX/g;
  $_[0] =~ s/\\LaTeX\\/$LATEX/g;
  $_[0] =~ s/\\LaTeX/$LATEX/g;
  $_[0] =~ s/`/&quot;/g;
  $_[0] =~ s/''/&quot;/g;
  $_[0] =~ s/\$//g;
}


sub process_description
{
  $DESCRIPTION = "<em>\n$_[0]\n</em>\n<hr>\n";
  $DESCRIPTION =~ s/\$MAIL/$mail/g;
  $DESCRIPTION =~ s/%//g;
}


sub process_notes
{
  local (@TOPIC_LIST) = split( '%', $_[0]);

  foreach (@TOPIC_LIST) {
    if (/\s*(\S.*\S)\s*:\s*(.*)$/) {
      $topic = $1;
      if (defined( $topic_notes{$topic})) {
        print <<WARNING;
WARNING: note redefined
   topic: $topic
   note:  $2
WARNING
      }
      $topic_notes{$topic} = "$2";
      $topic_notes{$topic} .= "<p>";
    }
  }
}


sub process_keys
{
  local (@TOPIC_LIST) = split( '%', $_[0]);

  foreach (@TOPIC_LIST) {
    if (/\s*(\S.*\S)\s*:\s*(.*)$/) {
      $topic = $1;
      if (defined( $topic_keys{$topic})) {
        $topic_keys{$topic} .= ", ";
        $topic_keys{$topic} .= $2;
      }
      else {
        $topic_keys{$topic} = "<b>Keywords:</b> $2";
      }
    }
  }
}


sub process_topics
{
  local (@TOPIC_LIST) = split( '%', $_[0]);

  foreach (@TOPIC_LIST) {
    if (/\s*(\S.*\S)\s*:\s*(.*)$/) {
      $class = $1;
      if (defined( $sub{$class})) {
        $sub{$class} .= $2;
      }
      else {
        push( @classes, $class);
        $sub{$class} = $2;
      }
      foreach $topic (split( /,\s*/, $2)) {
        $super{$topic} = $class;
      }
    }
  }
}


sub print_subtopics
{
  local ($topic) = shift;

  print SHELF "<li>$topic:\n<ul>\n";
  foreach $subtopic (sort split( /,\s*/, $sub{$topic})) {
    if (defined( $sub{$subtopic})) {
      &print_subtopics( $subtopic);
    }
    elsif (defined( $HREF{$subtopic})) {
      print SHELF "<li><a href=\"$HREF{$subtopic}\">$subtopic</a>\n";
    }
    else {
      print SHELF "<li>$subtopic\n";
    }
  }
  print SHELF "</ul>\n";
}
