#!/usr/bin/perl # # # script to convert omniweb bookmarks.html file to a xbel # compliant xml file. # # usage: # omniweb2xbel [-a] bookmarks.html b.xml # -a : use url to retrieve description, following link # # author: js@simulakron.de # 2003/11/18 0.0.1 start of project # 2003/11/26 0.0.2 working version # 2003/11/27 0.0.3 adding automatic description recovery from href # free for fair use. GPL applies. use Getopt::Std; getopts('a'); use LWP::UserAgent; $ua = LWP::UserAgent->new; $ua->agent("MyApp/0.1 "); use File::Temp qw/ tempfile tempdir /; # 0 1 2 3 4 5 6 7 8 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year+= 1900; $mon+= 1; $wann= sprintf("%02d:%02d - %02d.%02d.%4d",$hour,$min,$mday,$mon,$year); @out; #print < '; push @out, "JS Bookmarks $wann "; push @out, ''; push @out, " "; #EOT # -------------------- W O R K ---------------- #use Tie::File; #$\="\n"; my $filename= shift(@ARGV); my $outfilename= shift(@ARGV); open FILE, '<', $filename or die "Can't open input `$filename': $!"; open OUT, '>', $outfilename or die "Can't open input `$outfilename': $!"; while () { # bis einschliesslich
nach #) { last; } else { next; } } #push @out, "\n--skipped header-\n"; while ( my $f=) { # alles nach tag ignorieren chop $f; if ($f=~ m##) { last; } else { my $ok=0; $f=~ s#
##i and $ok=1; $f=~ s#

(.*)

#\n $2\n#i and $ok=2; $f=~ s#
#\n#i and $ok=3; $f=~ s#
(.*)#\n$3\n\n\n#i and $ok=4; if ($ok == 4 && defined $opt_a ) { my $url=$f; #print "--- url ---$url---\n"; $url=~ s#.*href="([^"]*)" .*#$1#is; #print "- url $url\n"; my $desc= &hole_sum_fuer_url($url); $f=~ s#(.*)(.*)#$1$desc$2# and $ok=44; #print "- $f \n"; } if ($f=~ m#^
(.*)#) { my $besch=$1; # in letztem bookmark als desc eintragen, wenn noch nix da $out[-1]=~ s#(.*)(.*)#$1$besch$2#; # nix neues, ersatzlos ins vorherige eingefuegt $ok=5; next; } if ($ok != 0) { push @out, $f; } else { $f=~ s#
.*##i and $ok=6 and next; $f=~ m#^\w*$# and $ok=7 and next; print "\n++++++++++++ $ok ++++++++++++++++++\n"; print "+++ was tuen mit\n"; print "+++ $f +++ \n"; print "\n++++++++++++++++++++++++++++++\n"; } # next; } } #push @out, "\n--skipping footer-\n"; push @out, ""; #$,="\n--\n"; print OUT @out; #--------------------- E N D ------------------- exit; # --------------------- dies und das ----------- use HTML::SummaryBasic; sub datei_summary { my $document = shift @_; my $p = new HTML::SummaryBasic { PATH => $document, NOT_AVAILABLE =>"NA", # FIELDS => ['H1'], }; # What did we get? #foreach ( keys %{$p->{SUMMARY}} ) { # warn "$_ ... $p->{SUMMARY}->{$_}\n" if ($p->{SUMMARY}->{$_} ne "NA"); #} my %uniq; foreach ('AUTHOR', 'TITLE','HEADLINE','FIRST_PARA','DESCRIPTION') { #warn "$_ ... $p->{SUMMARY}->{$_}\n" if ($p->{SUMMARY}->{$_} ne "NA"); my $v= $p->{SUMMARY}->{$_}; next if ($v =~ m/^NA$/); next if ($v =~ m/^\w*$/); next if ($v =~ m/^\@/); next if ($v =~ m/^\[IMG\]/i); next if ($v =~ m/^untitled$/i); $uniq{"$v"}=1; } return join(':', (keys %uniq) ); } sub hole_datei { my ($urle, $fh) = @_; print "=$urle= :"; my $req = HTTP::Request->new(GET => $urle); $req->header(Accept => "text/html, */*;q=0.1"); my $res = $ua->request($req); # Check the outcome of the response if ($res->is_success) { print " ok\n"; print $fh $res->content; } else { print "Bad luck this time!\n"; } return $res->is_success; } sub hole_sum_fuer_url { my ($url) = @_; #print "- hole_sum_fuer_url ==$url==\n"; my $sum=" "; my ($fh, $filename) = tempfile("url-summary.tmp.XXXXXXXX", SUFFIX => ".html", DIR => "/tmp", UNLINK => 1); my $res= &hole_datei($url, $fh); if ($res == 1) { $sum= &datei_summary($filename); $sum=~ s/[<>!#\@\[\]\{\}]*//g; #print "\n=$sum=\n---------------\n"; } else { #print "\n!!! $url nicht geholt\n---------------\n"; } return $sum; } #### ----------so siehts vorher aus: ------------------- #
#

Suchen

#
#
Sofort buchbar: Salome Tickets - Oper #
Der Opernführer für Eilige ! #
AlleAuktionen.de - Die Suche nach Auktionen und Versteigerungen # # #### ---------und das soll draus werden: ----------- # #linux #dies und das # #der kernel #eben da # # #script to convert mozilla/netscape bookmarks.html file to a xbel #script to convert mozilla/netscape bookmarks.html file to a xbel # compliant xml file. # #