bkmrkconv.pl
#!/usr/local/bin/perl -w
#
# bkmrkconv - Convert Netscape bookmarks.html into a tree of HTML
# ~~~~~~~~~ (c) Andrew Flegg 1999, 2004.
# Released under the Artistic License.
#
# Syntax:
# bkmrkconv [OPTIONS] ...
#
# (see bkmrkconv --help for full list)
use strict;
my $VERSION = '1.12 (12-Nov-2006)';
my ($ME) = ($0 =~ m!/([^/]+)$!);
my $inputFile = '-'; # File to use as input
my $outputDir = '.'; # Path to output files to and read templates from
my $doContents = 1; # Produce the contents page?
my $justSections = 0; # Only put sections in the contents page?
my $sortOutput = 0; # Sort output
my %outputFiles = (); # Keep track of files we're going to produce
my ($i, $var);
for ($i = 0; $i < scalar(@ARGV); $i++) {
$var = $ARGV[$i];
if (($var eq '-f') || ($var eq '--file')) {
$inputFile = $ARGV[++$i];
} elsif (($var eq '-d') || ($var eq '--dir')){
$outputDir = $ARGV[++$i];
} elsif (($var eq '-n') || ($var eq '--nocontents')) {
$doContents = 0;
} elsif (($var eq '-s') || ($var eq '--onlysections')) {
$justSections = 1;
} elsif (($var eq '-t') || ($var eq '--sort')) {
$sortOutput = 1;
} elsif (($var eq '-h') || ($var eq '--help')) {
print <<EOM;
$ME - Convert Mozilla bookmarks.html into a tree of HTML
(c) Andrew Flegg 1999, 2006. $VERSION.
http://www.bleb.org/software/bkmrkconv/
Syntax:
EOM
print " ".&getSyntax. <<EOM;
Options:
-h, --help Show this online help
-n, --nocontents Don't create the contents.html and so don't require
the existence of OUTPUTDIR/src/contentspage.html
-s, --onlysections Only put sections in the contents page, not all the
links in the source file
-f, --file <FILE> File to read in (default: standard input)
-d, --dir <DIR> Directory to use as OUTPUTDIR (default: current)
-t, --sort Sort output
bkmrkconv is released under the Artistic Licence without any form of
warranty, either express or implied. Please report bugs to
<andrew\@bleb.org>.
EOM
exit;
}
}
if (! (defined($inputFile) && defined($outputDir))) {
print "$ME: syntax error:\n ".&getSyntax;
exit;
}
$outputDir =~ s!/$!!;
chomp(my $oldcwd = `pwd`);
chdir $outputDir;
my $rootpage = &readFile("src/rootpage.html");
my $linkspage = &readFile("src/linkspage.html");
my $contentspage = &readFile("src/contentspage.html") if ($doContents);
chdir $oldcwd;
my %root;
# Read in the file a section at a time
#
open(INFILE, "<$inputFile") or die "Unable to open $inputFile: $!\n";
&readSection(\*INFILE, \%root);
close(INFILE) or die "Unable to close $inputFile: $!\n";
# Sort tree and all sections
&sortTree(\%root, 0);
&sortSections(\%root, 0);
#&printTree(\%root, 0);
&outputTree(\%root, 1, "");
if ($doContents) {
my @links = ();
&sortContents(\%root, \@links, $justSections);
&outputContents(\@links);
}
exit;
# ========================================================================
# -- Get the syntax ------------------------------------------------------
sub getSyntax {
return "$ME [OPTIONS] ... (--help for full list)\n";
}
# -- Read a section recursively ------------------------------------------
#
sub readSection {
my ($fh, $ref) = @_;
my $line;
my $lastRef;
$ref->{sections} = [];
$ref->{links} = [];
$ref->{sublinks} = 0;
while ( defined($fh) && ($line = <$fh>) && ($line !~ m!</DL>!i) ) {
$line =~ s/(\r|\n)//g;
if ($line =~ m!<H1[^>]*>(.*?)</H1>$!i) {
$ref->{name} = 'Root';
$ref->{url} = 'index.html';
} elsif ($line =~ m!<H3 .*?>(.*)</H3>$!i) {
my $skipping = 0;
$skipping = 1 if (index($1, '(P)') >= 0); # Private link
my %sub;
$sub{url} = &hashcode($ref->{name}, $1).".html";
$sub{name} = $1;
&readSection($fh, \%sub);
if (! $skipping) {
push @{ $ref->{sections} }, \%sub;
$ref->{sublinks} += scalar( $sub{sublinks} );
}
} elsif ($line =~ m!<DT><A HREF="(.*?)"[^>]*>(.*?)</A>$!i) {
next if (index($2, '(P)') >= 0); # Private link
my %url;
$url{name} = $2;
$url{url} = $1;
$lastRef = \%url;
push @{ $ref->{links} }, $lastRef;
$ref->{sublinks}++;
} elsif ($line =~ m!<DD>(.*?)$!i) {
if (defined($lastRef)) { $lastRef->{desc} = $1; }
}
}
return;
}
# -- Prints a tree -------------------------------------------------------
#
sub printTree {
my ($ref, $indent) = @_;
my %tree = %{ $ref };
my $i;
print ' ' x $indent.$tree{name}."/\n";
foreach $i (@{ $tree{sections} }) {
&printTree($i, $indent + 2);
}
foreach $i (@{ $tree{links} }) {
print ' '.' ' x $indent.$i->{name}."\n";
}
return;
}
# -- Output the files ----------------------------------------------------
#
sub outputTree {
my ($ref, $root, $path) = @_;
my %tree = %{ $ref };
my $page = ($root ? $rootpage : $linkspage);
my ($i, $wrap);
study($page);
my ($sBlock) = ($page =~ m%<!--SECTION-->(.*)<!--/SECTION-->%si);
my ($lBlock) = ($page =~ m%<!--LINK-->(.*)<!--/LINK-->%si);
my $dBlock;
if ($lBlock) {
($dBlock) = ($lBlock =~ m%<!--DESC-->(.*)<!--/DESC-->%si);
$lBlock =~ s%(<!--DESC-->).*?<!--/DESC-->%$1%sig;
}
$page =~ s%(<!--(SECTION|LINK)-->).*?<!--/(SECTION|LINK)-->%$1%sig;
$path =~ s/^ : //;
my $sectionTable = ($page =~ m%<table[^>]*?>.{0,16}<!--SECTION-->%si);
my $linkTable = ($page =~ m%<table[^>]*?>.{0,16}<!--LINK-->%si);
my $numSections = scalar(@{ $tree{sections} });
my $numLinks = scalar(@{ $tree{links} });
if (($sBlock) && ($numSections > 0)) {
my $section;
$wrap = 0;
foreach $i (@{ $tree{sections} }) {
my ($u, $n, $l, $s) = ($i->{url},
$i->{name},
$i->{sublinks},
&sublinks($i));
$section = $sBlock;
$section =~ s/\$URL/$u/g;
$section =~ s/\$NAME/$n/g;
$section =~ s/\$NEXT/$s/g;
$section =~ s/\$NUMLINKS/$l/g;
if ($sectionTable) {
$section = ($wrap ? $section.'</tr>' : '<tr class="section">'.$section);
$wrap = 1 - $wrap;
}
$page =~ s%(<!--SECTION-->)%$section$1%;
&outputTree($i, 0, "$path : <a href=\"$tree{url}\">$tree{name}</a>");
}
$page =~ s%(<!--SECTION-->)%</tr>$1% if $wrap; # Finish off row
}
if (($lBlock) && ($numLinks > 0)) {
my ($link, $desc);
$wrap = 0;
foreach $i (@{ $tree{links} }) {
my ($u, $n, $d) = ($i->{url}, $i->{name}, $i->{desc});
if (! $d) { $d = ""; }
$link = $lBlock;
$link =~ s/\$URL/$u/g;
$link =~ s/\$NAME/$n/g;
if ($dBlock && $d) {
$desc = $dBlock;
$desc =~ s/\$DESC/$d/g;
} else {
$desc = "";
}
$link =~ s%<!--DESC-->%$desc%;
if ($linkTable) {
$link = ($wrap ? $link.'</tr>' : '<tr class="link">'.$link);
$wrap = 1 - $wrap;
}
$page =~ s%(<!--LINK-->)%$link$1%;
}
$page =~ s%(<!--LINK-->)%</tr>$1% if $wrap; # Finish row if necessary
}
# -- Post processing of the page
#
my $pathNoURL = $path;
my $date = scalar(localtime());
$pathNoURL =~ s/<.*?>//g;
$page =~ s/\$TITLE/$tree{name}/g;
$page =~ s/\$PATHURL/$path/g;
$page =~ s/\$PATH/$pathNoURL/g;
$page =~ s/\$DATE/$date/g;
$page =~ s%<!--\s*EXEC\s+"([^>]*?)"\s*-->%my $p = quotemeta($1); `$p 2>/dev/null`%eg;
# If-Then-Else and If-Then segments
#
$page =~ s/<!--\s*IF\s+(.*?)\s+THEN\s*-->(.*?)<!--\s*ELSE\s*-->(.*?)<!--\s*ENDIF\s*-->/
(eval($1) ? $2 : $3)
/seg;
$page =~ s/<!--\s*IF\s+(.*?)\s+THEN\s*-->(.*?)<!--\s*ENDIF\s*-->/
(eval($1) ? $2 : "")
/seg;
$page =~ s/^\s+//gm; # Trim leading whitespace
$page =~ s/\s+$//gm; # Trim trailing whitespace
$page =~ s/<!--(SECTION|LINK)-->//g; # Tidy up the source
open(OUTFILE,">$outputDir/$tree{url}") or die "Unable to write file: $!\n";
print OUTFILE $page;
close(OUTFILE);
return;
}
# -- Prints a tree -------------------------------------------------------
#
sub sortContents {
my ($ref, $arr, $sections) = @_;
my %tree = %{ $ref };
my $i;
foreach $i (@{ $tree{sections} }) {
if ($sections) {
$i->{sectionurl} = "<a href=\"$tree{url}\">$tree{name}</a>";
push @{ $arr }, $i;
}
&sortContents($i, $arr, $sections);
}
if (! $sections) {
foreach $i (@{ $tree{links} }) {
# Add a URL for the section this link comes from and add to list...
$i->{sectionurl} = "<a href=\"$tree{url}\">$tree{name}</a>";
push @{ $arr }, $i;
}
}
return;
}
# -- Output the sorted list ----------------------------------------------
#
sub outputContents {
my ($arr) = @_;
my @sorted = sort { lc($a->{name}) cmp lc($b ->{name}) } @{ $arr };
my $letter = ord('!');
my $page = $contentspage;
study($page);
my ($sBlock) = ($page =~ m%<!--SECTION-->(.*)<!--/SECTION-->%si);
my ($lBlock) = ($page =~ m%<!--LINK-->(.*)<!--/LINK-->%si);
$sBlock =~ s%(<!--LINK-->).*?<!--/LINK-->%$1%sig;
$page =~ s%(<!--SECTION-->).*?<!--/SECTION-->%$1%sig;
my ($s, $section, $link) = (undef, $sBlock, undef);
foreach $i (@sorted) {
if ((($s = ord( lc($i->{name}) )) > $letter) &&
($s >= ord('a'))) {
$section =~ s/\$PREFIX/uc(chr($letter))/eg;
$page =~ s%(<!--SECTION-->)%$section$1%;
$section = $sBlock;
$letter = $s;
}
my ($u, $n, $p) = ($i->{url}, $i->{name}, $i->{sectionurl});
$link = $lBlock;
$link =~ s/\$URL/$u/g;
$link =~ s/\$NAME/$n/g;
$link =~ s/\$SECTION/$p/g;
$section =~ s%(<!--LINK-->)%$link$1%;
}
$section =~ s/\$PREFIX/uc(chr($letter))/eg; # Update whatever was left
$page =~ s%<!--SECTION-->%$section%; # in $section...
my $date = scalar(localtime());
$page =~ s/\$DATE/$date/g;
$page =~ s%<!--\s*EXEC\s+"([^>]*?)"\s*-->%my $p = quotemeta($1);`$p 2>/dev/null`%eg;
$page =~ s/^\s+//gm; # Trim leading whitespace
$page =~ s/\s+$//gm; # Trim trailing whitespace
$page =~ s/<!--LINK-->//g; # Tidy up the source
open(OUTFILE,">$outputDir/contents.html") or die "Unable to write file: $!\n";
print OUTFILE $page;
close(OUTFILE);
return;
}
# -- Produces a URL for a new section ------------------------------------
#
sub hashcode {
my ($from, $to) = @_;
my $prefix = lc($from."_".$to);
$prefix =~ s/\W//g;
my $tail = "";
if ($outputFiles{$prefix}) { $tail = "001"; }
while ($outputFiles{$prefix.'_'.$tail}) { $tail++; }
if ($tail) { $prefix .= '_'.$tail; }
$outputFiles{$prefix} = 1; # Mark this as used
return $prefix;
}
# -- Gets a string for the first three URLs of a section -----------------
#
sub sublinks {
my ($ref) = @_;
my %tree = %{ $ref };
my @sections = @{ $tree{sections} };
my @links = @{ $tree{links} };
my $data = "";
my $offset = scalar(@sections);
my @sublink;
my $i;
for ($i = 0; $i < 2; $i++) {
if ($i < $offset) {
$sublink[$i] = $sections[$i];
} elsif ($i < (scalar(@links) + $offset)) {
$sublink[$i] = $links[$i - scalar(@sections)];
}
}
foreach $i (@sublink) {
$data .= '<a href="'.$i->{url}.'">'.$i->{name}.'</a>, ';
}
if ((scalar(@links) + $offset) < 3) {
chop($data); chop($data);
} else {
$data .= '...';
}
return $data;
}
# -- Reads in a file and returns its content -----------------------------
#
sub readFile {
my ($file) = @_;
my $data = '';
open(INFILE, "<$file") or die "Unable to open $file: $!\n";
while(<INFILE>) { $data .= $_; }
close(INFILE) or die "Unable to close $file: $!\n";
while ($data =~ m/<!--\s*INCLUDE\s*"(.*)"\s*-->/i) {
my $file = $1;
my $otherFile = readFile($file);
$data =~ s/<!--\s*INCLUDE\s*"$file"\s*-->/$otherFile/ig;
}
$data =~ s#</head>#<meta name=\"generator\" content=\"bkmrkconv-$VERSION\" />\n</head>#i;
return $data;
}
# -- Sorts a tree (Most of this code is from printTree)------------
sub sortTree {
my ($ref, $indent) = @_;
my %tree = %{ $ref };
my $i;
foreach $i (@{ $tree{sections} }) {
&sortTree($i, $indent + 2);
}
my @t = sort { $indent.$a->{name} cmp $indent.$b->{name} } @{$tree{links} };
@{ $tree{links} } = @t;
return;
}
# -- Sorts the Sections (Most of this code is from printTree)------------
sub sortSections {
my ($ref, $indent) = @_;
my %tree = %{ $ref };
my $i;
my @t = sort { $indent.$a->{name} cmp $indent.$b->{name} } @{$tree{sections}
};
@{ $tree{sections} } = @t;
foreach $i (@{ $tree{sections} }) {
&sortSections($i, $indent + 2);
}
return;
}
Generated by GNU Enscript 1.6.5.90.