package WWW::Scrape::BillionGraves;

use warnings;
use strict;
use Firefox::Marionette;
use HTML::SimpleLinkExtor;
use Carp;

# TODO: new interface
#
# Request:
# https://billiongraves.com/search/results?cemetery_country=United%20Kingdom&given_names=isaac&family_names=horne&death_year=1964&size=15
#
# Results
# https://billiongraves.com/grave/Isaac-Horne/6745939

=head1 NAME

WWW::Scrape::BillionGraves - Scrape the BillionGraves website

=head1 VERSION

Version 0.01

=cut

our $VERSION = '0.01';

=head1 SYNOPSIS

    use WWW::Scrape::BillionGraves;

    my $bg = WWW::Scrape::BillionGraves->new({
	firstname => 'John',
	lastname => 'Smith',
	country => 'England',
	date_of_death => 1862
    });

    while(my $url = $bg->get_next_entry()) {
	print "$url\n";
    }
}

=head1 SUBROUTINES/METHODS

=head2 new

Creates a WWW::Scrape::BillionGraves object.

It takes two mandatory arguments firstname and lastname.

Also one of either date_of_birth and date_of_death must be given.

There are two optional arguments: middlename and host.

host is the domain of the site to search, the default is billiongraves.com.
=cut

sub new {
	my $proto = shift;
	my $class = ref($proto) || $proto;

	return unless(defined($class));

	my %args;
	if(ref($_[0]) eq 'HASH') {
		%args = %{$_[0]};
	} elsif(ref($_[0])) {
		Carp::croak("Usage: __PACKAGE__->new(%args)");
	} elsif(@_ % 2 == 0) {
		%args = @_;
	}


	die "First name is not optional" unless($args{'firstname'});
	die "Last name is not optional" unless($args{'lastname'});
	die "You must give one of the date of birth or death"
		unless($args{'date_of_death'} || $args{'date_of_birth'});

	my $firefox = Firefox::Marionette->new();

	# if(!defined($param{'host'})) {
		# $ua->ssl_opts(verify_hostname => 0);	# Yuck
	# }

	my $rc = {
		firefox => $firefox,
		date_of_birth => $args{'date_of_birth'},
		date_of_death => $args{'date_of_death'},
		country => $args{'country'},
		firstname => $args{'firstname'},
		middlename => $args{'middlename'},
		lastname => $args{'lastname'},
		year_range => 0,
		matches => 0,
		index => 0,
	};
	$rc->{'host'} = $args{'host'} || 'billiongraves.com';

	my %query_parameters;
	if($args{'firstname'}) {
		$query_parameters{'given_names'} = $args{'firstname'};
	}
	if($args{'middlename'}) {
		$query_parameters{'middlename'} = $args{'middlename'};
	}
	if($args{'lastname'}) {
		$query_parameters{'family_names'} = $args{'lastname'};
	}
	if($args{'date_of_birth'}) {
		$query_parameters{'birth_year'} = $args{'date_of_birth'};
	}
	if($args{'date_of_death'}) {
		$query_parameters{'death_year'} = $args{'date_of_death'};
	}
	if($args{'country'}) {
		if($args{'country'} eq 'England') {
			$query_parameters{'cemetery_country'} = 'United Kingdom';
			$query_parameters{'cemetery_state'} = 'England';
		} else {
			$query_parameters{'cemetery_country'} = $args{'country'};
		}
	}
	$query_parameters{'size'} = 15;
	my $uri = URI->new("https://$rc->{host}/search/results");
	$uri->query_form(%query_parameters);
	my $url = $uri->as_string();
	# ::diag($url);

	my $resp = Firefox::Marionette->new()->go($url);

	$rc->{'resp'} = $resp;
	if($resp->html() =~ /\d+ \- \d+ of (\d+)/m) {
		$rc->{'matches'} = $1;
		return bless $rc, $class if($rc->{'matches'} == 0);
		$rc->{'page'} = 1;
		$rc->{'query_parameters'} = \%query_parameters;
	} else {
		$rc->{'matches'} = 0;
	}
	return bless $rc, $class;
}

=head2 get_next_entry

Returns the next match as a URL to the BillionGraves page.

=cut

sub get_next_entry
{
	my $self = shift;

	return if(!defined($self->{'matches'}));
	return if($self->{'matches'} == 0);

	my $rc = pop @{$self->{'results'}};
	return $rc if $rc;

	return if($self->{'index'} >= $self->{'matches'});

	my $firstname = $self->{'firstname'};
	my $lastname = $self->{'lastname'};
	# my $date_of_death = $self->{'date_of_death'};	# FIXME: check results against this
	# my $date_of_birth = $self->{'date_of_birth'};	# FIXME: check results against this

	my $html = $self->{'resp'}->html();
	my $e = HTML::SimpleLinkExtor->new($html);

	$e->remove_tags('img', 'script');
	$e->parse($self->{'resp'}->html());	# FIXME: having to parse every time

	foreach my $link ($e->links()) {
		my $match = 0;
		if($link =~ /\/grave\/\Q$firstname\E.+\Q$lastname\E.*\/\d+/i) {
			# ::diag($link);
			$match = 1;
		}
		if($match) {
			push @{$self->{'results'}}, "https://$self->{host}/$link";
		}
	}
	$self->{'index'}++;
	if($self->{'index'} <= $self->{'matches'}) {
		$self->{'page'}++;
		$self->{'query_parameters'}->{'page'} = $self->{'page'};

		my $uri = URI->new("https://$self->{host}/search/results");
		$uri->query_form(%{$self->{'query_parameters'}});
		my $url = $uri->as_string();

		my $resp = $self->{'firefox'}->go($url);
		$self->{'resp'} = $resp;
	}

	return pop @{$self->{'results'}};
}

=head1 AUTHOR

Nigel Horne, C<< <njh at bandsman.co.uk> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-www-scrape-billiongraves at rt.cpan.org>,
or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-Scrape-BillionGraves>.
I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SEE ALSO

L<https://github.com/nigelhorne/gedcom>
L<https://billiongraves.com>

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc WWW::Scrape::BillionGraves

You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-Scrape-BillionGraves>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/WWW-Scrape-BillionGraves>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/WWW-Scrape-BillionGraves>

=item * Search CPAN

L<https://metacpan.org/release/WWW-Scrape-BillionGraves>

=back

=head1 LICENSE AND COPYRIGHT

Copyright 2018 Nigel Horne.

This program is released under the following licence: GPL2

=cut

1; # End of WWW::Scrape::BillionGraves
