-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTabulist.php
212 lines (176 loc) · 5.95 KB
/
Tabulist.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
<?php
use GetOpt\GetOpt;
require_once __DIR__ . '/vendor/autoload.php';
define( 'DATA_TALK_NS', 487 );
// DB name - with underscore!
define( 'TEMPLATE', 'Wikidata_tabular' );
class Tabulist
{
const DB_NAME = "tabulist";
const SPARQL_ENDPOINT = 'https://query.wikidata.org/sparql';
private $wiki;
private $verbose = false;
private static $servers = [
'commonswiki' => 'commons.wikimedia.org'
];
/**
* @var PageHandler
*/
private $handler;
/**
* @var ToolsDb
*/
private $tool_db;
public function __construct( $wiki ) {
$this->wiki = $wiki;
if ( !isset( self::$servers[$wiki] ) ) {
throw new InvalidArgumentException( "Unknown wiki $wiki" );
}
$this->handler = new PageHandler( self::$servers[$wiki], self::SPARQL_ENDPOINT );
$this->tool_db = ToolsDb::getLocal( self::DB_NAME );
}
public function updateCommonsPagesList() {
$ts = date( 'YmdHis' );
$sql = "UPDATE pagestatus SET `status`='CHECKING' WHERE wiki=:wiki AND page LIKE 'Data_talk:%'";
$this->tool_db->query( $sql, ['wiki' => $this->wiki] );
$replica = ToolsDb::getReplica( $this->wiki );
$sql = "select page.* from page,templatelinks t1
where page_id=t1.tl_from and t1.tl_title=:title AND t1.tl_namespace=10
AND page.page_namespace = :ns";
$result = $replica->query( $sql, ['title' => TEMPLATE, 'ns' => DATA_TALK_NS] );
if ( $this->verbose ) {
print "{$result->rowCount()} pages found.\n";
}
foreach ( $result as $row ) {
if ( $row->page_namespace != DATA_TALK_NS ) {
continue;
}
$page = 'Data_talk:' . $row->page_title;
$sql = "INSERT INTO pagestatus (wiki,page,status,message,timestamp)
VALUES (:wiki,:page,'WAITING','',:ts)
ON DUPLICATE KEY UPDATE status='WAITING',message='',timestamp=:ts";
$this->tool_db->query( $sql, ['wiki' => $this->wiki, 'page' => $page, 'ts' => $ts] );
}
$this->tool_db->query( "DELETE FROM pagestatus WHERE `status`='CHECKING' AND wiki=:wiki",
['wiki' => $this->wiki] );
}
public function getPageById( $pageId ) {
$sql = "SELECT * FROM pagestatus WHERE wiki=:wiki AND id=:id ORDER BY id ASC";
$result = $this->tool_db->query( $sql, ['id' => $pageId, 'wiki' => $this->wiki] );
foreach ( $result as $row ) {
return $row;
}
return null;
}
public function getPageByTitle( $title ) {
$sql = "SELECT * FROM pagestatus WHERE wiki=:wiki AND page=:page ORDER BY id ASC";
$result = $this->tool_db->query( $sql, ['page' => $title, 'wiki' => $this->wiki] );
foreach ( $result as $row ) {
return $row;
}
return null;
}
public function updatePage( $pageId ) {
if ( !$pageId ) {
print "No such page";
return false;
}
$ts = date( 'YmdHis' );
if ( $pageId instanceof stdClass ) {
$pageData = $pageId;
$pageId = $pageData->id;
} else {
$pageData = $this->getPageById( $pageId );
if ( !$pageData ) {
if ( $this->verbose ) {
print "Page $pageId not found";
}
return false;
}
}
$this->tool_db->query( "UPDATE pagestatus SET `status`='RUNNING',`message`='',timestamp=:ts WHERE wiki=:wiki and id=:id",
['ts' => $ts, 'wiki' => $this->wiki, 'id' => $pageId] );
try {
$this->handler->login( __DIR__ . "/tabulist.ini" );
// $handler->debugMode( true );
if ( !$this->handler->updateTemplateData( $pageData->page, TEMPLATE ) ) {
$message = implode( "\n", $this->handler->getErrors() );
$status = "FAILED";
} else {
$message = $this->handler->getStatus();
$status = "OK";
}
} catch ( Exception $e ) {
$status = "FAILED";
$message = "Exception: " . $e->getMessage();
}
if ( $this->verbose ) {
print "$status: $message\n";
}
$ts = date( 'YmdHis' );
$this->tool_db->query( "UPDATE pagestatus SET `status`=:status,`message`=:msg,timestamp=:ts WHERE wiki=:wiki and id=:id",
['ts' => $ts, 'wiki' => $this->wiki, 'id' => $pageId, 'msg' => $message, "status" => $status] );
}
public function listPages() {
$sql = "SELECT id,status,page FROM pagestatus WHERE wiki=:wiki ORDER BY id ASC";
$result = $this->tool_db->query( $sql, ['wiki' => $this->wiki] );
foreach ( $result as $row ) {
print "{$row->id}\t{$row->status}\t{$row->page}\n";
}
}
public function showPage( $pageId ) {
$pageData = $this->getPageById( $pageId );
var_dump( $pageData );
}
/**
* @param bool $verbose
*/
public function setVerbose( $verbose ) {
$this->verbose = $verbose;
}
public function updateRandomPages( $count ) {
$sql = "SELECT id FROM pagestatus WHERE wiki=:wiki
AND status != 'RUNNING' AND status != 'FAILED' AND status != 'CHECKING'
AND ts < :ts ORDER BY rand() LIMIT :count";
// Ignore pages that we've touched in the last hour
$cutoff = time() - 3600;
$result = $this->tool_db->query( $sql, ['count' => $count, 'wiki' => $this->wiki, 'ts' => $cutoff] );
foreach ( $result as $row ) {
$this->updatePage( $row->id );
}
if ( $this->verbose ) {
print "{$result->rowCount()} pages updated.\n";
}
}
}
$getopt = new GetOpt( [
['h', 'help', GetOpt::NO_ARGUMENT, "Usage instructions"],
['u', 'update', GetOpt::NO_ARGUMENT, 'Update pages list'],
['l', 'list', GetOpt::NO_ARGUMENT, 'Show pages list'],
['s', 'show', GetOpt::REQUIRED_ARGUMENT, 'Show page data'],
['p', 'page', GetOpt::REQUIRED_ARGUMENT, 'Update specific page'],
['P', 'random', GetOpt::OPTIONAL_ARGUMENT, 'Update N (default:10) random pages', 10],
['v', 'verbose', GetOpt::NO_ARGUMENT, "More verbose output"],
] );
$getopt->process();
if ( $getopt->count() == 0 || $getopt->getOption( 'h' ) ) {
echo $getopt->getHelpText();
exit( 0 );
}
$tabulist = new Tabulist( 'commonswiki' );
$tabulist->setVerbose( $getopt->getOption( 'v' ) );
if ( $getopt->getOption( 'u' ) ) {
$tabulist->updateCommonsPagesList();
}
if ( $getopt->getOption( 'l' ) ) {
$tabulist->listPages();
}
if ( $getopt->getOption( 's' ) ) {
$tabulist->showPage( $getopt->getOption( 's' ) );
}
if ( $getopt->getOption( 'p' ) ) {
$tabulist->updatePage( $getopt->getOption( 'p' ) );
}
if ( $getopt->getOption( 'P' ) ) {
$tabulist->updateRandomPages( $getopt->getOption( 'P' ) );
}