#!/usr/bin/perl -w use strict; use Net::IRC; use LWP::Simple qw( get ); use LWP::UserAgent qw(); use HTTP::Request qw(); use Time::HiRes qw( gettimeofday tv_interval ); use POSIX qw( strftime ); my $test = @ARGV && $ARGV[0] eq 'test'; my $SERVER = 'test.net'; my $PORT = 6667; my @CHANNELS = $test ? ( '#testing' ) : ( '#real', '#testing' ); my $NICK = 'tub'; # build up the url regex my $URL; { my $g = sub { return "(?:$_[0])"; }; my $schemes = $g->( join '|', map { $_, $_."s" } qw( http ftp nntp news irc ) ); # note that, for spotting urls, we want to be fairly permissive... my $mark = quotemeta '-_.!~*\'()'; my $alnum = 'a-zA-Z0-9'; my $reserved = quotemeta ';/?:@&=+$,'; my $unwise = quotemeta '{}|\\^[]`'; my $uri_cset = '[' . $mark . $alnum . $reserved . $unwise . '#' . ']'; my $hex = '[0-9a-fA-F]'; my $esc = $g->( '%' . $hex . $hex ); $URL = qr/$schemes : (?: $uri_cset+ | $esc+ )+ /x; print "\$URL=\"$URL\"\n" if $test; }; my $SEC_PAUSE = 5; my $ALREADY_SHORT_LEN = 30; my $ONE_LINE = 25; # ---------------------------------------------------------------------- my $OFS = $, || ''; sub xlog { my ( @message ) = @_; my ( $s, $us ) = gettimeofday(); my $ts = ( strftime( '%Y-%m-%dT%H:%M:%S', gmtime $s ) . sprintf( '.%06dZ', $us ) ); my $message = join $OFS, @message; $message =~ s/\w*\z/\n/; my $caller = (caller 1)[3]; if ( $caller ) { $caller =~ s!^.*::!!; } else { $caller = '(main)'; } print STDERR "$ts: $caller: $message"; } sub extract_urls ( $ ) { my ( $text ) = @_; xlog "text='$text'"; my @urls = ( $text =~ m/($URL)/g ); foreach my $url ( @urls ) { # remove any trailing comma. not sure what needed this, but # since it's in here... $url =~ s!\.,$!!; # get rid of trailing parens (but only if url doesn't have a # starting paren in it) if ( $url =~ m!\)$! && $url !~ m!\(! ) { $url =~ s!\)$!!; } xlog "url='$url'"; } return @urls; } sub just_nick ( $ ) { my ( $nick_id_host ) = @_; return $nick_id_host =~ /^([^!]+).*/ ? $1 : $nick_id_host; } # ---------------------------------------------------------------------- { my $last_req_time_t = 0; my %cache; sub get_tiny_url ( $ $ ) { my ( $conn, $url ) = @_; xlog "url='$url'\n"; if ( length($url) <= $ALREADY_SHORT_LEN ) { xlog "no-op: already short"; return; } elsif ( my $tu = $cache{$url} ) { xlog "success: tu='$tu' (cached)"; return $tu; } # be nice. my $sec_since_last = time() - $last_req_time_t; sleep $SEC_PAUSE-$sec_since_last if $sec_since_last < $SEC_PAUSE; $last_req_time_t = time(); my $req_url = "http://tinyurl.com/create.php?url=$url"; my $t0 = [ gettimeofday() ]; my $resp; eval { local $SIG{ALRM} = sub { die "timeout\n" }; alarm 30; $resp = get $req_url; alarm 0; }; my $elapsed = tv_interval $t0; if ( $@ ) { $conn->privmsg( '#tub', "error on \"$url\" after $elapsed sec: $@" ); xlog "error: $@ ($elapsed sec)"; return "ERROR: $@"; } if ( $resp =~ m!
(?:)(http://tinyurl.com/\w+)!i ) { my $rv = $1; $cache{$url} = $rv; $conn->privmsg( '#tub', "got \"$url\" => \"$rv\" in $elapsed sec" ); xlog "success: tu='$rv' ($elapsed sec)"; return $rv; } else { $conn->privmsg( '#tub', "no tinyurl in response for \"$url\"" . " after $elapsed sec" ); xlog "error: timeout ($elapsed sec)"; return; } } } # ---------------------------------------------------------------------- sub on_endofmotd { my ( $self, $event ) = @_; foreach my $channel ( @CHANNELS ) { $self->join( $channel ); } } my $last_try = 0; sub on_nicknameinuse { my ( $self, $event ) = @_; my $nick_try = $NICK . $last_try++; $self->nick( $nick_try ); } sub send_tiny_urls ( $ $ $ $ ) { my ( $conn, $from, $to, $text ) = @_; my $is_pub = ( $from ne '' ); return if $is_pub && length( $text ) < $ONE_LINE; my @urls = extract_urls $text or return; my %already_sent; foreach my $url ( @urls ) { # send each url no more than once an hour my $t0 = time(); next if $already_sent{$url} && ( $t0 - $already_sent{$url} < 3600 ); $already_sent{$url} = $t0; my $tu = get_tiny_url $conn, $url or next; if ( $tu =~ /^ERROR: (.*)/ ) { # $conn->privmsg( $from, "error getting tiny URL for '$url': $1" ); next; } # remove scheme $url =~ s!^\w+:/*!!; # elide center bits if necessary if ( length($url) > 25 ) { $url = substr( $url, 0, 10 ) . "..." . substr( $url, -10 ); } my $out = ( $is_pub ? "$from: " : "" ) . "$url => $tu"; $conn->privmsg( $to, $out ); } } { my $ua; sub redirect_one_level ( $ ) { my ( $url ) = @_; xlog "url='$url'"; if ( ! $ua ) { $ua = LWP::UserAgent->new(); } my $req = HTTP::Request->new( GET => $url ); my $resp = $ua->simple_request( $req ); my $code = $resp->code(); unless ( $code == 302 ) { die "couldn't resolve '$url': $code" . "(" . $resp->message() . ")"; } my $loc = $resp->headers()->header( 'Location' ); unless ( $loc ) { die "couldn't get location for '$url': $code"; } xlog "loc='$loc'"; return $loc; } sub send_reverse_lookup ( $ $ $ $ ) { my ( $conn, $from, $to, $tu ) = @_; xlog "tu='$tu'"; my $dest = $tu; eval { while ( $dest && ( $dest =~ m!^http://(?:www\.)?tinyurl\.com/! || $dest =~ m!^http://unicyclist\.com/! ) ) { my $old = $dest; $dest = undef; $dest = redirect_one_level $old; } }; if ( $@ ) { $conn->privmsg( $to, $@ ); return; } if ( $dest ) { xlog "dest='$dest'"; $conn->privmsg( $to, "$tu => $dest" ); } else { $conn->privmsg( $to, "couldn't get location for '$tu'" ); } } } sub handle_message ( $ $ $ $ ) { my ( $conn, $from, $to, $text ) = @_; print STDERR "\$text='$text'\n" if $test; if ( $text =~ m!^r \s+ ( http://(?:www\.)?tinyurl\.com/[a-z0-9]+$ )!x ) { send_reverse_lookup $conn, $from, $to, $1; } else { send_tiny_urls $conn, $from, $to, $text; } } sub on_msg { my ( $self, $event ) = @_; my $from_nick = just_nick($event->from()); my $text = join('', $event->args()); handle_message $self, '', $from_nick, $text; } sub on_public { my ( $self, $event ) = @_; my $from_nick = just_nick($event->from()); my $to_channel = $event->to(); my $text = join('', $event->args()); handle_message $self, $from_nick, $to_channel, $text; } # ---------------------------------------- sub on_join { my ( $self, $event ) = @_; $event->dump(); my $nick = $event->nick(); my $chan = $event->to(); $chan = $chan->[0] if ref $chan; return unless $chan eq '#tub'; $self->mode( $chan, "+o", $nick ); } # ---------------------------------------------------------------------- my $irc = Net::IRC->new(); $irc->debug($test); my $conn = $irc->newconn( Server => $SERVER, Port => $PORT, Nick => $NICK, Ircname => 'TinyURL Bot' ) or die "couldn't connect to $SERVER:$PORT: $!"; # $conn->add_global_handler( connect => \&on_connect ); $conn->add_global_handler( endofmotd => \&on_endofmotd ); $conn->add_global_handler( nomotd => \&on_endofmotd ); $conn->add_global_handler( nicknameinuse => \&on_nicknameinuse ); $conn->add_global_handler( msg => \&on_msg ); $conn->add_global_handler( public => \&on_public ); $conn->add_global_handler( join => \&on_join ); $irc->start(); exit 0;