-
Notifications
You must be signed in to change notification settings - Fork 0
/
deliciousweb2wp
executable file
·125 lines (112 loc) · 3.81 KB
/
deliciousweb2wp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl -W
# Read web-export from delicious.com and save it as WordPress posts-file
# Elmar Klausmeier, 03-Jan-2015
use strict;
use POSIX qw(strftime);
use Getopt::Std;
my %opts = ('d' => 0, 'n' => 0, 'o' => 0);
getopts('dn:o:',\%opts);
my $n = (($opts{'n'} > 0) ? $opts{'n'} : 0);
my $offset = (($opts{'o'} > 0) ? $opts{'o'} : 0);
my ($flag,$links,$post_id) = (0,0,0);
my $title = "";
print << "EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Collected Links</title>
<link>http://klmlinks.wordpress.com</link>
<language>de</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:author>
<wp:author_login>eklausmeier</wp:author_login>
<wp:author_email>Elmar.Klausmeier\@gmail.com</wp:author_email>
<wp:author_display_name><![CDATA[eklausmeier]]></wp:author_display_name>
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
</wp:author>
EOF
while (<>) {
chomp;
s/\s+$//; # rtrim
if ($flag > 0) {
s/\xE4/ä/g; # klein ae
s/\xFC/ü/g; # klein ue
s/\xF6/ö/g; # klein oe
s/\xDF/ß/g; # szet
s/\xC4/Ä/g; # Gross Ae
s/\xDC/Ü/g; # Gross Ue
s/\xD6/Ö/g; # Gross Oe
s/\xC9/é/g; # e apostophe
s/\xBB/»/g; # right-pointing double angle quotation mark
s/\xC2\xAE/©/g; # copyright
s/\xE2\x84\xA2/™/g; # trademark
s/\xC3\xBC/ü/g; # klein ue
s/\xE2\x80\x99/'/g; # apostroph
s/\xC3\xA4/ä/g; # klein ae
s/\xE4\xA4/ä/g; # klein ae
s/\xE4\xB6/ö/g; # klein oe
s/\xC3\xB6/ö/g; # klein oe
s/\xC3\x9C/Ü/g; # gross Ue
s/\xC3\x9F/ß/g; # szet
s/\xC2\xA0//g; # backquote?
s/\xC3\xA9/é/g; # e apostophe
s/\xE4\xA9/é/g; # e apostophe
s/\xC3\xA8/è/g; # e accent grave
s/ & / & /g; # ampersand
}
if (($flag == 0 || $flag == 3) && /\s+<div class="title-wrapper">/) { # signals to link
$flag = 1;
$title = "";
print "</item>\n" if ($links > 0);
} elsif ($flag == 1 && /<a href=/) { # next line is a_href
$flag = 2;
++$links; # count number of a_href's
$post_id = $offset + $links;
last if ($n > 0 && $links > $n);
s/^\s+//; # strip space before a_href
$title = $1 if (/<a href=.+">(.*)<\/a>/);
s/ class="title"//;
print "<item>\n"
. "<title>$title</title>\n"
# . "<pubDate>$add_date_rfc</pubDate>\n"
. "<dc:creator>eklausmeier</dc:creator>\n"
. "<description/>\n"
. "<content:encoded><![CDATA[$_\n";
} elsif ($flag == 2 && /<div class="note">/) {
s/\s+<div class="note">//g;
if (/<\/div>/) {
$flag = 3;
s/<\/div>//g;
}
print "$_]]></content:encoded>\n"
. "<wp:post_id>$post_id</wp:post_id>\n"
. "<wp:post_name>$title</wp:post_name>\n"
. "<wp:status>publish</wp:status>\n"
. "<wp:post_type>post</wp:post_type>\n"
. "<category domain=\"post_format\" nicename=\"post-format-link\"><![CDATA[Link]]></category>\n"
. "<category domain=\"category\" nicename=\"uncategorized\"><![CDATA[Uncategorized]]></category>\n";
} elsif ($flag == 3
&& /<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel" data-tag="(.+)"/) {
my $tag = $1;
my $mtag = $tag; # modified tag
$mtag =~ s/\s+/\-/g; # replace space with dash
printf("<category domain=\"post_tag\" nicename=\"%s\"><![CDATA[%s]]></category>\n",
lc $mtag,$tag);
} elsif ($flag == 3
&& /<time datetime="(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d)/) {
my $add_date_ymd = $1;
$flag = 0;
$add_date_ymd =~ s/T/ /g; # drop letter T
print "<wp:post_date>$add_date_ymd</wp:post_date>\n";
}
}
print "</item>\n" if ($n == 0);
print << "EOF";
</channel>
</rss>
EOF