File Coverage

blib/lib/GitHub/RSS.pm

Criterion	Covered	Total	%
statement	32	193	16.5
branch	0	28	0.0
condition	0	11	0.0
subroutine	11	28	39.2
pod	3	13	23.0
total	46	273	16.8

line	stmt	bran	cond	sub	pod	time	code
1							use strict;
2	1			1		550	use 5.010;
	1					1
	1					32
3	1			1		16	use Moo 2;
	1					4
4	1			1		467	use feature 'signatures';
	1					9316
	1					5
5	1			1		1207	no warnings 'experimental::signatures';
	1					2
	1					75
6	1			1		4	use PerlX::Maybe;
	1					2
	1					24
7	1			1		392
	1					2009
	1					4
8							use IO::Socket::SSL;
9	1			1		702	use Net::GitHub;
	1					62350
	1					6
10	1			1		506	use DBI;
	1					255217
	1					34
11	1			1		2706	use JSON;
	1					15647
	1					74
12	1			1		662
	1					8232
	1					6
13							use Data::Dumper;
14	1			1		626
	1					4830
	1					1860
15							our $VERSION = '0.02';
16
17							=head1 NAME
18
19							GitHub::RSS - collect data from Github.com for feeding into RSS
20
21							=head1 SYNOPSIS
22
23							my $gh = GitHub::RSS->new(
24							dbh => {
25							dsn => "dbi:SQLite:dbname=$store",
26							},
27							);
28
29							my $last_updated = $gh->last_check;
30							$gh->fetch_and_store( $github_user => $github_repo, $last_updated );
31							if( $verbose ) {
32							print "Updated from $last_updated to " . $gh->last_check, "\n";
33							};
34
35							=head1 DESCRIPTION
36
37							This module provides a cache database for GitHub issues and scripts to
38							periodically update the database from GitHub.
39
40							This is mainly used for creating an RSS feed from the database, hence the
41							name.
42
43							=head1 METHODS
44
45							=head2 C<< ->new >>
46
47							my $gh = GitHub::RSS->new(
48							dbh => {
49							dsn => 'dbi:SQLite:dbname=db/issues.sqlite',
50							},
51							);
52
53							Constructs a new GitHub::RSS instance
54
55							=over 4
56
57							=item *
58
59							B<gh> - instance of L<Net::GitHub>
60
61							=cut
62
63							has 'gh' => (
64							is => 'ro',
65							default => sub( $self ) {
66							Net::GitHub->new(
67							maybe access_token => $self->token
68							),
69							},
70							);
71
72							=item *
73
74							B<token_file> - name and path of the JSON-format token file containing the
75							GitHub API token By default, that file is searched for under the name
76							C<github.credentials> in C<.>, C<$ENV{XDG_DATA_HOME}>, C<$ENV{USERPROFILE}>
77							and C<$ENV{HOME}>.
78
79							=cut
80
81							has 'token_file' => (
82							is => 'lazy',
83							default => \&_find_gh_token_file,
84							);
85
86							=item *
87
88							B<token> - GitHub API token. If this is missing, it will be attempted to read
89							it from the C<token_file>.
90
91							=cut
92
93							has 'token' => (
94							is => 'lazy',
95							default => \&_read_gh_token,
96							);
97
98							=item *
99
100							B<default_user> - name of the GitHub user whose repos will be read
101
102							=cut
103
104							has default_user => (
105							is => 'ro',
106							);
107
108							=item *
109
110							B<default_repo> - name of the GitHub repo whose issues will be read
111
112							=cut
113
114							has default_repo => (
115							is => 'ro',
116							);
117
118							=item *
119
120							B<dbh> - premade database handle or alternatively a hashref containing
121							the L<DBI> arguments
122
123							dbh => $dbh,
124
125							or alternatively
126
127							dbh => {
128							user => 'scott',
129							password => 'tiger',
130							dsn => 'dbi:SQLite:dbname=db/issues.sqlite',
131							}
132
133							=cut
134
135							has dbh => (
136							is => 'ro',
137							required => 1,
138							coerce => \&_build_dbh,
139							);
140
141							return $args if ref($args) eq 'DBI::db';
142	0			0			ref($args) eq 'HASH' or die 'Not a DB handle nor a hashref';
	0
	0
143	0	0					return DBI->connect( @{$args}{qw/dsn user password options/} );
144	0	0					}
145	0
	0
146							=item *
147
148							B<fetch_additional_pages> - number of additional pages to fetch from GitHub.
149							This is relevant when catching up a database for a repository with many issues.
150
151							=back
152
153							=cut
154
155							has fetch_additional_pages => (
156							is => 'ro',
157							default => '1',
158							);
159
160							$env //= \%ENV;
161
162	0			0			my $token_file;
	0
	0
	0
163	0		0
164							# This should use File::User
165	0						for my $candidate_dir ('.',
166							$ENV{XDG_DATA_HOME},
167							$ENV{USERPROFILE},
168	0						$ENV{HOME}
169							) {
170							next unless defined $candidate_dir;
171							if( -f "$candidate_dir/github.credentials" ) {
172							$token_file = "$candidate_dir/github.credentials";
173	0	0					last
174	0	0					};
175	0						};
176
177	0						return $token_file
178							}
179
180	0						my $file = $token_file // $self->token_file;
181
182							if( $file ) {
183	0			0			open my $fh, '<', $file
	0
	0
	0
184	0		0				or die "Couldn't open file '$file': $!";
185							binmode $fh;
186	0	0					local $/;
187	0	0					my $json = <$fh>;
188							my $token_json = decode_json( $json );
189	0						return $token_json->{token};
190	0						} else {
191	0						# We'll run without a known account
192	0						return
193	0						}
194							}
195
196							$user = $self->default_user,
197	0						$repo = $self->default_repo,
198							$since=undef ) {
199							my @issues = $self->fetch_issues( $user, $repo, $since );
200	0						my $gh = $self->gh;
201	0						while ($gh->issue->has_next_page) {
202	0						push @issues, $gh->issue->next_page;
203	0			0	0		}
	0
	0
204	0						@issues
205	0						}
206	0
207	0						$user = $self->default_user,
208							$repo = $self->default_repo,
209							$since=undef ) {
210	0						my $gh = $self->gh;
211							my @issues = $gh->issue->repos_issues($user => $repo,
212	0						{ sort => 'updated',
213	0						direction => 'asc', # so we can interrupt any time
214	0						state => 'all', # so we find issues that got closed
215	0			0	0		maybe since => $since,
	0
	0
216	0						}
217	0						);
218							};
219
220							=head2 C<< ->fetch_issue_comments >>
221
222							=cut
223
224							$user=$self->default_user,
225							$repo=$self->default_repo
226							) {
227							# Shouldn't this loop as well, just like with the issues?!
228							return $self->gh->issue->comments($user, $repo, $issue_number );
229							}
230	0
	0
231	0						my @columns = sort keys %{ $rows[0] };
232	0						my $statement = sprintf q{replace into "%s" (%s) values (%s)},
233	0			0	1		$table,
	0
234							join( ",", map qq{"$_"}, @columns ),
235	0						join( ",", ('?') x (0+@columns))
236							;
237							my $sth = $self->dbh->prepare( $statement );
238	0			0	0		eval {
	0
	0
	0
	0
239	0						$sth->execute_for_fetch(sub { @rows ? [ @{ shift @rows }{@columns} ] : () }, \my @errors);
	0
240	0						} or die Dumper \@rows;
241							#if( @errors ) {
242							# warn Dumper \@errors if (0+@errors);
243							#};
244							}
245	0
246	0	0					# Munge some columns:
247	0	0		0			for (@$issues) {
	0
	0
248							my $u = $_->{user};
249							@{ $_ }{qw( user_id user_login user_gravatar_id )}
250							= @{ $u }{qw( id login gravatar_id )};
251
252							# Squish all structure into JSON, for later
253							for (values %$_) {
254	0			0	0		if( ref($_) ) { $_ = encode_json($_) };
	0
	0
	0
	0
	0
255							};
256	0						};
257	0
258	0						for my $issue (@$issues) {
259	0						#$\|=1;
	0
260							#print sprintf "% 6d %s\r", $issue->{number}, $issue->{updated_at};
261							my @comments = $self->fetch_issue_comments( $issue->{number}, $user => $repo );
262	0
263	0	0					# Squish all structure into JSON, for later
	0
264							for (@comments) {
265							for (values %$_) {
266							if( ref($_) ) { $_ = encode_json($_) };
267	0						};
268							};
269							$self->write_data( 'comment' => @comments )
270	0						if @comments;
271							};
272
273	0						# We wrote the comments first so we will refetch them if there is a problem
274	0						# when writing the issue
275	0	0					$self->write_data( 'issue' => @$issues );
	0
276							};
277
278	0	0					=head2 C<< ->fetch_and_store($user, $repo, $since) >>
279
280							my $since = $gh->last_check;
281							$gh->fetch_and_store($user, $repo, $since)
282
283							Fetches all issues and comments modified after the C<$since> timestamp.
284	0						If C<$since> is missing or C<undef>, all issues will be retrieved.
285
286							=cut
287
288							$user = $self->default_user,
289							$repo = $self->default_repo,
290							$since = undef) {
291							my $dbh = $self->dbh;
292							my $gh = $self->gh;
293
294							my $can_fetch_more = $self->fetch_additional_pages;
295
296							FETCH:
297	0						my @issues = $self->fetch_issues( $user => $repo, $since );
298	0						my $has_more = $gh->issue->has_next_page;
299	0						$self->store_issues_comments( $user => $repo, \@issues );
300	0			0	1
	0
	0
301	0						if( $has_more and (!defined($can_fetch_more) or $can_fetch_more-- > 0)) {
302	0						$since = $issues[-1]->{updated_at};
303							goto FETCH;
304	0						}
305							}
306	0
307							$user = $self->default_user,
308	0						$repo = $self->default_repo,
309	0						@issue_numbers) {
310							my $dbh = $self->dbh;
311	0	0	0				my $gh = $self->gh;
			0
312	0
313	0						my @issues = map { scalar $gh->issue->issue($user => $repo, $_) } @issue_numbers;
314							$self->store_issues_comments( $user => $repo, \@issues );
315							}
316
317	0						for (@fields) {
318	0						$item->{$_} = $item->{$_} ? decode_json( $item->{$_} ) : $item->{$_};
319	0						}
320	0			0	0		}
	0
	0
321	0
322	0						map {
323							$self->inflate_fields( $_, qw(user closed_by));
324	0						$_
	0
325	0						}
326							@{ $self->dbh->selectall_arrayref(<<'SQL', { Slice => {}}, $since, $since) }
327							select
328	0			0	0		i.id
	0
	0
	0
	0
329	0						, i.user
330	0	0					, i.html_url
331							, i.body
332							, i.created_at
333							, i.updated_at
334	0			0	0		, i.title as issue_title
	0
	0
	0
335							, i.number as issue_number
336	0						from issue i
337	0						where i.updated_at >= ?
338							union all
339	0						select
	0
340							c.id
341							, c.user
342							, c.html_url
343							, c.body
344							, c.created_at
345							, c.updated_at
346							, i.title as issue_title
347							, i.number as issue_number
348							from comment c
349							join issue i on c.issue_url=i.url
350							where i.updated_at >= ?
351							order by i.updated_at, html_url
352							SQL
353							}
354
355							map {
356							$self->inflate_fields( $_, qw(user closed_by));
357							$_
358							}
359							@{ $self->dbh->selectall_arrayref(<<'SQL', { Slice => {}}) }
360							select distinct
361							i.* -- later, expand to explicit list
362							from issue i
363							join comment c on c.issue_url=i.url
364							where c.body like '%```diff%'
365							and i.state = 'open'
366							order by i.url
367							SQL
368	0			0	0		}
	0
	0
369
370	0						$self->dbh->selectall_arrayref(<<'SQL', { Slice => {}}, $issue)->[0]
371	0						select
372							* -- later, expand to explicit list
373	0						from issue i
	0
374							where i.number = ?
375							order by i.url
376							SQL
377							}
378
379							@{ $self->dbh->selectall_arrayref(<<'SQL', { Slice => {}}, $issue) }
380							select
381							c.* -- later, expand to explicit list
382							from comment c
383							join issue i on c.issue_url=i.url
384	0			0	0		where i.number = ?
	0
	0
	0
385	0						order by c.url
386							SQL
387							}
388
389							=head2 C<< ->last_check >>
390
391							my $since = $gh->last_check;
392
393							Returns the timestamp of the last stored modification or C<undef>
394	0			0	0		if no issue or comment is stored.
	0
	0
	0
395	0
	0
396							=cut
397
398
399							$user = $self->default_user,
400							$repo = $self->default_repo ) {
401							my $last = $self->dbh->selectall_arrayref(<<'SQL', { Slice => {} });
402							select
403							max(updated_at) as updated_at
404							from issue
405							SQL
406							if( @$last ) {
407							return $last->[0]->{updated_at}
408							} else {
409							return undef # empty DB
410							}
411							}
412
413							1;