Skip to content

Commit

Permalink
Merge pull request #83 from ipfs/feat/my
Browse files Browse the repository at this point in the history
Add Myanmar Wikipedia
  • Loading branch information
lidel authored Feb 25, 2021
2 parents 196a8d4 + 2e43fac commit 8f26c93
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 36 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Putting Wikipedia Snapshots on IPFS and working towards making it fully read-write.
<br />
<br />
Existing Mirrors: https://en.wikipedia-on-ipfs.org, https://tr.wikipedia-on-ipfs.org
Existing Mirrors: https://en.wikipedia-on-ipfs.org, https://tr.wikipedia-on-ipfs.org, https://my.wikipedia-on-ipfs.org
</p>

- [Purpose](#purpose)
Expand Down
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
"date-fns": "^2.10.0",
"handlebars": "^4.7.3",
"node-fetch": "^2.6.0",
"tslib": "^1"
"tslib": "^2"
},
"devDependencies": {
"@oclif/dev-cli": "^1",
"@oclif/test": "^1",
"@types/chai": "^4",
"@types/cheerio": "^0.22.16",
"@types/mocha": "^5",
"@types/node": "^10",
"@types/node-fetch": "^2.5.5",
"@types/node": "^14",
"@types/node-fetch": "^2.5.8",
"chai": "^4",
"eslint": "^5.13",
"eslint-config-oclif": "^3.1",
Expand All @@ -36,8 +36,8 @@
"mocha": "^5",
"nyc": "^14",
"prettier": "^1.19.1",
"ts-node": "^8",
"typescript": "^3.3"
"ts-node": "^9",
"typescript": "^4.1"
},
"engines": {
"node": ">=14.0.0"
Expand Down
6 changes: 6 additions & 0 deletions snapshot-hashes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ tr:
date: 2021-02-19
ipns:
ipfs: https://dweb.link/ipfs/bafybeieuutdavvf55sh3jktq2dpi2hkle6dtmebe7uklod3ramihyf3xa4
my:
name: Myanmar
original: my.wikipedia.org
date: 2021-02-22
ipns:
ipfs: https://dweb.link/ipfs/bafybeib66xujztkiq7lqbupfz6arzhlncwagva35dx54nj7ipyoqpyozhy
57 changes: 54 additions & 3 deletions src/site-transforms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import {
existsSync,
lstatSync,
mkdirSync,
rmdirSync,
readdirSync,
readFileSync,
renameSync,
closeSync,
openSync,
opendirSync,
unlinkSync,
writeFileSync
} from 'fs'
Expand Down Expand Up @@ -114,6 +116,42 @@ export const fixRedirects = async ({
if (stderr) console.error('redirect fix stderr:', stderr)
}

// https://github.com/ipfs/distributed-wikipedia-mirror/issues/80
export const fixExceptions = async ({
unpackedZimDir,
wikiFolder
}: Directories) => {

/* TODO this needs more work
// Articles with "/" in namei like "foo/bar" produce conflicts and those are saved under
// url-escaped flat-files in exceptions directory
// What we do here is to take every "foo" exception and rename it to foo/index.html,
// so it loads fine under own name
const exceptionsDir = join(unpackedZimDir, '_exceptions')
if (!existsSync(exceptionsDir)) {
return
}
const dir = opendirSync(exceptionsDir)
for await (let file of dir) {
const articleName = decodeURIComponent(file.name)
console.log(articleName)
const segments = articleName.split('/')
if (segments[0] !== 'A') continue
segments[0] = 'wiki'
const articleDir = join(unpackedZimDir, ...segments)
if (!existsSync(articleDir)) {
// problem: articleDir may not exist and neither its parent,
// and the root one is a file and not a dir (eg A/Australia/Foo/index.html blocked by A/Australia flat article)
mkdirSync(articleDir, { recursive: true })
}
const articleSrc = join(exceptionsDir, file.name)
const articleDest = join(articleDir, 'index.html')
renameSync(articleSrc, articleDest)
}
*/
// TODO: remove _exceptions?
}

export const includeSourceZim = ({
zimFile,
Expand Down Expand Up @@ -182,9 +220,7 @@ export const generateMainPage = async (
options: Options,
{ wikiFolder, imagesFolder }: Directories
) => {
const kiwixMainpage = readFileSync(
join(wikiFolder, `${options.kiwixMainPage}`)
)


// We copy "kiwix main page" to /wiki/index.html
// This way original one can still be loaded if needed
Expand All @@ -195,6 +231,21 @@ export const generateMainPage = async (

cli.action.start(` Generating main page into ${mainPagePath} `)

const kiwixMainPageSrc = join(wikiFolder, `${options.kiwixMainPage}`)

// This is a crude fix that replaces exploded dir with single html
// just to fix main pages that happen to end up in _exceptions.
// A proper fix is needed for regular articles: https://github.com/ipfs/distributed-wikipedia-mirror/issues/80
if (lstatSync(kiwixMainPageSrc).isDirectory()) {
const exceptionsPage = join(options.unpackedZimDir, '_exceptions', `A%2f${options.kiwixMainPage}`)
if (existsSync(exceptionsPage)) {
rmdirSync(kiwixMainPageSrc, { recursive: true })
renameSync(exceptionsPage, kiwixMainPageSrc)
}
}

const kiwixMainpage = readFileSync(kiwixMainPageSrc)

const $kiwixMainPageHtml = cheerio.load(kiwixMainpage.toString())

const canonicalUrlString = $kiwixMainPageHtml('link[rel="canonical"]').attr(
Expand Down
11 changes: 6 additions & 5 deletions src/utils/download-file.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import { exec } from 'child_process'
import { Url } from 'url'
import { URL } from 'url'

export const downloadFile = (url: Url, dest: string) => {
export const downloadFile = (url: URL, dest: string) => {
return new Promise((resolve, reject) => {
const wget = `wget --continue -O "${dest}" "${url.href}"`
const wget = `wget --continue -O "${dest}" "${url}"`

exec(wget, err => {
exec(wget, (err, stdout, stderr) => {
if (err) {
if (stderr) console.error(stderr)
reject(err)
}

resolve()
resolve(stdout)
})
})
}
2 changes: 2 additions & 0 deletions src/zim-to-website.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
copyImageAssetsIntoWiki,
fixFavicon,
fixRedirects,
fixExceptions,
generateMainPage,
insertIndexRedirect,
moveArticleFolderToWiki,
Expand Down Expand Up @@ -48,6 +49,7 @@ export const zimToWebsite = async (options: Options) => {
copyImageAssetsIntoWiki('./assets', directories)
fixFavicon(directories)
moveArticleFolderToWiki(directories)
await fixExceptions(directories)
await fixRedirects(directories)
insertIndexRedirect(options)
appendJavascript(options, directories)
Expand Down
55 changes: 33 additions & 22 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,10 @@
resolved "https://registry.yarnpkg.com/@types/mocha/-/mocha-5.2.7.tgz#315d570ccb56c53452ff8638738df60726d5b6ea"
integrity sha512-NYrtPht0wGzhwe9+/idPaBB+TqkY9AhTvOLMkThm0IoEfLaiVQZwBwyJ5puCkO3AUCWrmcoePjp2mbFocKy4SQ==

"@types/node-fetch@^2.5.5":
version "2.5.5"
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.5.5.tgz#cd264e20a81f4600a6c52864d38e7fef72485e92"
integrity sha512-IWwjsyYjGw+em3xTvWVQi5MgYKbRs0du57klfTaZkv/B24AEQ/p/IopNeqIYNy3EsfHOpg8ieQSDomPcsYMHpA==
"@types/node-fetch@^2.5.8":
version "2.5.8"
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.5.8.tgz#e199c835d234c7eb0846f6618012e558544ee2fb"
integrity sha512-fbjI6ja0N5ZA8TV53RUqzsKNkl9fv8Oj3T7zxW7FGv1GSH7gwJaNF8dzCjrqKaxKeUpTz4yT1DaJFq/omNpGfw==
dependencies:
"@types/node" "*"
form-data "^3.0.0"
Expand All @@ -270,10 +270,10 @@
resolved "https://registry.yarnpkg.com/@types/node/-/node-13.7.4.tgz#76c3cb3a12909510f52e5dc04a6298cdf9504ffd"
integrity sha512-oVeL12C6gQS/GAExndigSaLxTrKpQPxewx9bOcwfvJiJge4rr7wNaph4J+ns5hrmIV2as5qxqN8YKthn9qh0jw==

"@types/node@^10":
version "10.17.16"
resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.16.tgz#ee96ddac1a38d98d2c8a71c7df0cdad5758e8993"
integrity sha512-A4283YSA1OmnIivcpy/4nN86YlnKRiQp8PYwI2KdPCONEBN093QTb0gCtERtkLyVNGKKIGazTZ2nAmVzQU51zA==
"@types/node@^14":
version "14.14.31"
resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.31.tgz#72286bd33d137aa0d152d47ec7c1762563d34055"
integrity sha512-vFHy/ezP5qI0rFgJ7aQnjDXwAMrG0KqqIH7tQG5PPv3BWBayOPIQNBjVc/P6hhdZfMx51REc6tfDNXHUio893g==

"@types/sinon@*":
version "7.5.1"
Expand Down Expand Up @@ -682,6 +682,11 @@ cp-file@^6.2.0:
pify "^4.0.1"
safe-buffer "^5.0.1"

create-require@^1.1.0:
version "1.1.1"
resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==

cross-spawn@^4:
version "4.0.2"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-4.0.2.tgz#7b9247621c23adfdd3856004a823cbe397424d41"
Expand Down Expand Up @@ -2470,10 +2475,10 @@ sort-keys@^4.0.0:
dependencies:
is-plain-obj "^2.0.0"

source-map-support@^0.5.6:
version "0.5.16"
resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.16.tgz#0ae069e7fe3ba7538c64c98515e35339eac5a042"
integrity sha512-efyLRJDr68D9hBBNIPWFjhpFzURh+KJykQwvMyW5UiZzYwoF6l4YMMDIJJEyFWxWCqfyxLzz6tSfUFR+kXXsVQ==
source-map-support@^0.5.17:
version "0.5.19"
resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
dependencies:
buffer-from "^1.0.0"
source-map "^0.6.0"
Expand Down Expand Up @@ -2715,22 +2720,28 @@ treeify@^1.1.0:
resolved "https://registry.yarnpkg.com/treeify/-/treeify-1.1.0.tgz#4e31c6a463accd0943879f30667c4fdaff411bb8"
integrity sha512-1m4RA7xVAJrSGrrXGs0L3YTwyvBs2S8PbRHaLZAkFw7JR8oIFwYtysxlBZhYIa7xSyiYJKZ3iGrrk55cGA3i9A==

ts-node@^8:
version "8.6.2"
resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-8.6.2.tgz#7419a01391a818fbafa6f826a33c1a13e9464e35"
integrity sha512-4mZEbofxGqLL2RImpe3zMJukvEvcO1XP8bj8ozBPySdCUXEcU5cIRwR0aM3R+VoZq7iXc8N86NC0FspGRqP4gg==
ts-node@^9:
version "9.1.1"
resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-9.1.1.tgz#51a9a450a3e959401bda5f004a72d54b936d376d"
integrity sha512-hPlt7ZACERQGf03M253ytLY3dHbGNGrAq9qIHWUY9XHYl1z7wYngSr3OQ5xmui8o2AaxsONxIzjafLUiWBo1Fg==
dependencies:
arg "^4.1.0"
create-require "^1.1.0"
diff "^4.0.1"
make-error "^1.1.1"
source-map-support "^0.5.6"
source-map-support "^0.5.17"
yn "3.1.1"

tslib@^1, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
version "1.11.0"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.11.0.tgz#f1f3528301621a53220d58373ae510ff747a66bc"
integrity sha512-BmndXUtiTn/VDDrJzQE7Mm22Ix3PxgLltW9bSNLoeCY31gnG2OPx0QqJnuc9oMIKioYrz487i6K9o4Pdn0j+Kg==

tslib@^2:
version "2.1.0"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==

tsutils@^3.17.1:
version "3.17.1"
resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.17.1.tgz#ed719917f11ca0dee586272b2ac49e015a2dd759"
Expand Down Expand Up @@ -2769,10 +2780,10 @@ typedarray-to-buffer@^3.1.5:
dependencies:
is-typedarray "^1.0.0"

typescript@^3.3:
version "3.8.2"
resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.8.2.tgz#91d6868aaead7da74f493c553aeff76c0c0b1d5a"
integrity sha512-EgOVgL/4xfVrCMbhYKUQTdF37SQn4Iw73H5BgCrF1Abdun7Kwy/QZsE/ssAy0y4LxBbvua3PIbFsbRczWWnDdQ==
typescript@^4.1:
version "4.1.5"
resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.1.5.tgz#123a3b214aaff3be32926f0d8f1f6e704eb89a72"
integrity sha512-6OSu9PTIzmn9TCDiovULTnET6BgXtDYL4Gg4szY+cGsc3JP1dQL8qvE8kShTRx1NIw4Q9IBHlwODjkjWEtMUyA==

uglify-js@^3.1.4:
version "3.8.0"
Expand Down

0 comments on commit 8f26c93

Please sign in to comment.