E2 node backup

I don't know about the rest of you, but when the site went down for the move (parts of November and December 2003) it really made me wish I had captured a node backup a long time ago. Unfortunately, due to various problems like a) not having a working perl devlopment environment and b) my awe-inspiring laziness, I hadn't. So, with my free time (ha!) over the past month or so, I re-wrote the script to provide a CGI interface, so it could be run from a central server, so that non-coders could use it with a minimum of effort.

As of earlier today, it is finished and working perfectly, so I am providing a server running it at http://www.postreal.org/nodebackup . Check it out.

If you want to mirror the script on your own server, message me once your copy is running, and I will post a link to it in this writeup. The interface and requirements of the script should be fairly obvious from its source.

#!/usr/bin/perl -w
# e2backup_cgi.pl - gathers user nodes from everything2.com and lets 
# users download it from a server.
# Portions Copyright (C) 2000,2001 Will Woods 
# Portions Copyright (C) 2001,2002,2003 Arthur Shipkowski  
#                             
# Portions Copyright (C) 2003 J. Chatterton 
# Distributed under the terms of the GNU General Public License,
# included here by reference.
#
# This program is being maintained by J. Chatterton, please email him
# at the address above with any questions, patches, etcetera.

use Archive::Zip; # This will itself require Compress::Zlib.
use LWP::UserAgent; # these are both part of libwww-perl, available
use CGI qw/unescapeHTML/; # The CGI package should be available at your friendly local CPAN mirror, too.
use HTTP::Request;

my $query = new CGI;
my $username = $query->param('username');
$username = lc($username);
my $singleFileMode = $query->param('singleFileMode');
my $sysdate = localtime;
my $baseurl = "http://www.everything2.com/index.pl";
my $ua = LWP::UserAgent->new(agent => "e2backup_cgi");
$ua->env_proxy();
# get the User Search XML page, and array-ify it
my @data = split(/\n/,&getusernameXMLTicker) or die "failed ($!)";
my $outputfilename = "../e2generated/".$username."_index.html";
my $zipfilename = "../e2generated/".$username."_index.zip";
if ($singleFileMode) {
    $outputfilename = "../e2generated/".$username.".html";
    $zipfilename = "../e2generated/".$username.".zip";
} 
sleep(3);
## Begin CGI output,
print "Content-type: text/html\n\n";
if (-e $zipfilename) { 
    ## put out a link to the already-generated content.
    if ($singleFileMode) {
        print "Content has already been generated in the past 24 hours. Right click and 
save it <a href=\"../e2generated/$username.zip\">here</a>.\n";
    } else {
        print "Content has already been generated in the past 24 hours. Right click and 
save it <a href=\"../e2generated/$username"."_index.zip\">here</a>.\n";
    }
    print "</body></html>\n";
    exit 1;
} else {
    ## New search. Create main file.
    open(NODEFILE, ">$outputfilename");
    print NODEFILE htmlheader();
    print NODEFILE "<center><big>Writeups by $username</big><br>Snapshot taken: $sysdate</center><br><br>\n";
    close(NODEFILE);
}

my $writeupcount = scalar(@data);
if ($writeupcount <= 1) {
    print "<p><b>E2 server error</b>, unable to get content for $username!</p>\n";
    print "<p>Check that the username is correct and try again in ten minutes.</p>\n";
    print "</body></html>\n";
    exit;
}
print "<p>Checking $writeupcount lines, please stand by until complete:</p>\n<p>\n";
$writeupcount = 0;
my %nodelist;
# Read the info out of the User Search page.
foreach (@data) { # loop over each line in the page
    $writeupcount++;
    print " $writeupcount "; 
    $| = 1; # Flush output to browser.
    if (/^<writeup/g) { # if this line is about a writeup..
        ## Put line's info into hash.
        while (/ (\w+)=\"(.*?)\"/gc) {
            $n{$1}=$2; 
        } 
        # get node info

        ($name, $type) = />(.*) \(([a-z]+)\)<\/writeup>/gc;
        $type =~ s///; ## I am tired of looking at the warning.
        $title = substr($name.' 'x59,0,59);
        $title =~ s/(.*)\w*?/$1/;
        my $createtime = $n{createtime};
        my $nodeid = $n{node_id};
        my $nodecontent = &getXMLwu($nodeid);
        if (!($nodecontent =~ m{<doctext>(\C*)</doctext>}is )) {
            print "<b>E2 server error</b>, unable to get content for $name";
        }
        $nodecontent = &unescapeHTML($1);
        ## Create a friendly html-ish formatted writeup.
        my $htmlformat = "";
        if (!($singleFileMode)) {
            $htmlformat .= htmlheader();
        }
        $htmlformat .= "<!-- Below is e2 node #$nodeid -->\n";
        $htmlformat .= '<table border="3" bordercolor="000000"><tr><td>'."\n";
        $htmlformat .= "<b>Node title:</b> <a href=\"http://www.everything2.com/index.pl?node_id=$nodeid\">$name</a> <br> \n";
        $htmlformat .= "<b>Submit date:</b> $createtime\n";
        $htmlformat .= "</td></tr></table>\n";
        $htmlformat .= $nodecontent . "\n";
        $htmlformat .= "<br><br><br>\n";
        ## Open whatever should be open. 
        if($singleFileMode) {
            open(NODEFILE, ">>$outputfilename");
        } else {
            ## Relies on nodeid being unique.
            open(IDXFILE, ">>$outputfilename");
            print IDXFILE "<a href=\"$nodeid.html\">$name</a><br>\n";
            $nodelist{$nodeid} = 1;
            close(IDXFILE);
            my $singlefilename = "../e2generated/".$nodeid.".html";
            open(NODEFILE, ">$singlefilename");
        }
        ## Add the content.
        print NODEFILE $htmlformat;
        close(NODEFILE);
    }
    sleep(3);
}
print "<b>Done!</b></p>\n";
## Archive and delete downloaded data.
my $zip = Archive::Zip->new();
my $zname = "$username.zip";
if ($singleFileMode) {
    $zip->addFile($outputfilename, "$username.html");
    $zip->writeToFileNamed("../e2generated/$username.zip");
    
} else {
    my @keys = keys %nodelist;
    $zip->addFile($outputfilename, "$username\_index.html");
    foreach $nodeid (@keys) {
        $zip->addFile("../e2generated/$nodeid.html", "$nodeid.html");
    }
    $zip->writeToFileNamed("../e2generated/$username\_index.zip");
    foreach $nodeid (@keys) {
        unlink("../e2generated/$nodeid.html");
    }
    $zname = "$username\_index.zip";
}
unlink($outputfilename);
print "<p>Right click and save this zip file: <a href=\"$zipfilename\">$zname</a></p>\n";
print "</body></html>\n";

########## Subs are delicious. ##########

## Gets the node list for a given username. Seems to be
## case-insensitive, at the discretion of everything2.
sub getusernameXMLTicker {
    # 762826 = User Search XML Ticker
    my $req = HTTP::Request->new('GET', "$baseurl?node_id=762826&usersearch=$username");
    return ($ua->request($req)->content());
}
# takes one argument: $node_id
# assumes that $ua is a valid HTTP::UserAgent object
# returns the contents of the XML writeup page in a scalar variable
sub getXMLwu {
    my $req = HTTP::Request->new('GET', "$baseurl?node_id=$_[0]&displaytype=xmltrue");
    return($ua->request($req)->content());
}
## Returns a valid html header. Prettier code than having it above.
sub htmlheader {
    my $header = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'."\n";
    $header .= "<html><head></head><body>\n";
    return $header;
}

Everything2 autonoder	Small helpful scripts for noders	E2 Link and Logger Client	E2 node tracker
E2 Explorer	I gotta go right now. Someone is videotaping me in my spaceship.	How to get a Goth out of a tree	E2 Offline Scratchpad
Editor Log: December 2003	All your base are belong to us	Everything as a journal	Plant Nerd
View source	Die Eier Von Satan	Pornography and Feminism	LWP
e2interface	A message from sensei	autonoding	CPAN
Swallowed, like sleep	libwww-perl	Dem Bones	Virtual reality pornography

Page category: