Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update mirrorbrain #250

Merged
merged 7 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mirrorbrain_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
uses: openzim/docker-publish-action@v10
with:
image-name: kiwix/mirrorbrain
on-master: latest
on-master: bookworm
restrict-to: kiwix/container-images
context: mirrorbrain
registries: ghcr.io
Expand Down
113 changes: 53 additions & 60 deletions mirrorbrain/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,76 +1,69 @@
FROM httpd:2.4.43
FROM httpd:2.4.62
LABEL org.opencontainers.image.source https://github.com/kiwix/container-images
#
# Author : Florent Kaisser <[email protected]>
#
# Based on instructions from https://mirrorbrain.org/docs/installation/source/
#
LABEL maintainer="kiwix"

#Set software versions
ENV MB_VERSION 2.18.1
ENV GEOIP_VERSION 1.6.12
ENV MOD_GEOPIP_VERSION 1.2.10
ENV MB_VERSION 3.0.0
ENV MB_USER_OR_ORG kiwix

ENV LIBMAXMINDDB_VERSION 1.11.0
ENV MOD_MAXMINDDB_VERSION 1.2.0
ENV GEOIPUPDATE_VERSION 7.0.1
ENV GEOIPUPDATE_ARCH amd64

#Install needed packages
RUN mkdir -p /usr/share/man/man1/ /usr/share/man/man7/ && apt-get update && apt-get install -y --no-install-recommends wget cron automake libtool unzip libaprutil1-dbd-pgsql postgresql-client build-essential libz-dev python python-dev python-pip python-setuptools python-sqlobject python-formencode python-psycopg2 libconfig-inifiles-perl libwww-perl libdbd-pg-perl libtimedate-perl libdigest-md4-perl
RUN pip install cmdln
RUN mkdir -p /usr/share/man/man1/ /usr/share/man/man7/ && apt-get update && apt-get install -y --no-install-recommends wget cron automake libtool unzip libaprutil1-dbd-pgsql postgresql-client build-essential libz-dev libconfig-inifiles-perl libwww-perl libdbd-pg-perl libtimedate-perl libdigest-md4-perl libaprutil1-dev python3-setuptools python3-pip python3-sqlobject python3-psycopg2 python3-pip python3-dev python3-geoip2 rsync
#Install cmdln manually since it is not packaged
RUN pip install --no-cache-dir --break-system-packages cmdln

#Copy owned base config file for apache
COPY config/apache/httpd.conf conf/httpd.conf

#Install Geolocalisation
RUN { \
cd /tmp ; \
wget -q -O GeoIP-$GEOIP_VERSION.tar.gz https://github.com/maxmind/geoip-api-c/releases/download/v$GEOIP_VERSION/GeoIP-$GEOIP_VERSION.tar.gz && \
tar xzf GeoIP-$GEOIP_VERSION.tar.gz -C /usr/local/src && \
cd /usr/local/src/GeoIP-$GEOIP_VERSION/ && \
aclocal && autoconf && automake --add-missing && ./configure --prefix=/usr/local/geoip && make && make install ; \
cd /tmp ; \
mkdir -p /usr/local/geoip/share/GeoIP/ ; \
# wget -q https://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip && \
# unzip GeoLite2-City-CSV.zip && cp GeoLite2-City-*/*.csv /usr/local/geoip/share/GeoIP/ && rm -rf GeoLite2-City-* ; \
# wget -q https://geolite.maxmind.com/download/geoip/database/GeoLite2-Country-CSV.zip && \
# unzip GeoLite2-Country-CSV.zip && cp GeoLite2-Country-*/*.csv /usr/local/geoip/share/GeoIP/ && rm -rf GeoLite2-Country-* ; \
}

#Install Geolocalisation for Apache
RUN { \
cd /tmp ; \
wget -q -O mod_geoip2.tar.gz https://github.com/maxmind/geoip-api-mod_geoip2/archive/$MOD_GEOPIP_VERSION.tar.gz && \
tar xzf mod_geoip2.tar.gz -C /usr/local/src && \
cd /usr/local/src/geoip-api-mod_geoip2-$MOD_GEOPIP_VERSION/ && \
sed s/remote_ip/client_ip/g -i mod_geoip.c && \
apxs -i -a -L/usr/local/geoip/lib -I/usr/local/geoip/include -lGeoIP -c mod_geoip.c ; \
}
RUN \
cd /tmp && \
wget -q -O libmaxminddb-${LIBMAXMINDDB_VERSION}.tar.gz https://github.com/maxmind/libmaxminddb/releases/download/${LIBMAXMINDDB_VERSION}/libmaxminddb-${LIBMAXMINDDB_VERSION}.tar.gz && \
tar xzf libmaxminddb-${LIBMAXMINDDB_VERSION}.tar.gz -C /usr/local/src && \
rm libmaxminddb-${LIBMAXMINDDB_VERSION}.tar.gz && \
cd /usr/local/src/libmaxminddb-${LIBMAXMINDDB_VERSION} && \
./configure && \
make && \
make check && \
make install && \
ldconfig && \
cd /tmp && \
wget -q -O geoipupdate_${GEOIPUPDATE_VERSION}_linux_${GEOIPUPDATE_ARCH}.deb https://github.com/maxmind/geoipupdate/releases/download/v${GEOIPUPDATE_VERSION}/geoipupdate_${GEOIPUPDATE_VERSION}_linux_${GEOIPUPDATE_ARCH}.deb && \
apt-get install -y ./geoipupdate_${GEOIPUPDATE_VERSION}_linux_${GEOIPUPDATE_ARCH}.deb && \
rm /tmp/geoipupdate_${GEOIPUPDATE_VERSION}_linux_${GEOIPUPDATE_ARCH}.deb && \
cd /tmp && \
wget -q -O mod_maxminddb-${MOD_MAXMINDDB_VERSION}.tar.gz https://github.com/maxmind/mod_maxminddb/releases/download/${MOD_MAXMINDDB_VERSION}/mod_maxminddb-${MOD_MAXMINDDB_VERSION}.tar.gz && \
tar xzf mod_maxminddb-${MOD_MAXMINDDB_VERSION}.tar.gz -C /usr/local/src && \
rm mod_maxminddb-${MOD_MAXMINDDB_VERSION}.tar.gz && \
cd /usr/local/src/mod_maxminddb-${MOD_MAXMINDDB_VERSION} && \
./configure && \
make install

#Install MirrorBrain from sources
COPY patch /tmp
RUN { \
wget --no-check-certificate -qO - https://github.com/poeml/mirrorbrain/archive/refs/tags/$MB_VERSION.tar.gz | tar -xz ; \
cd mirrorbrain-$MB_VERSION/mod_mirrorbrain ; \
wget -q http://apache.webthing.com/svn/apache/forms/mod_form.h ; \
wget -q http://apache.webthing.com/svn/apache/forms/mod_form.c ; \
mv /tmp/mod_form.c.patch ./ ; \
apxs -cia -lm mod_form.c ; \
apxs -e -n dbd -a modules/mod_dbd.so ; \
apxs -e -n rewrite -a modules/mod_rewrite.so ; \
apxs -cia -lm mod_mirrorbrain.c ; \
cd ../mod_autoindex_mb ; \
apxs -cia mod_autoindex_mb.c ; \
cd ../tools ; \
gcc -Wall -o geoiplookup_continent geoiplookup_continent.c -L/usr/local/geoip/lib -I/usr/local/geoip/include -lGeoIP ; \
gcc -Wall -o geoiplookup_city geoiplookup_city.c -L/usr/local/geoip/lib -I/usr/local/geoip/include -lGeoIP ; \
install -m 755 geoiplookup_continent /usr/bin/geoiplookup_continent ; \
install -m 755 geoiplookup_city /usr/bin/geoiplookup_city ; \
install -m 755 geoip-lite-update /usr/bin/geoip-lite-update ; \
install -m 755 tnull-rsync /usr/bin/null-rsync ; \
install -m 755 scanner.pl /usr/bin/scanner ; \
cd ../mirrorprobe/ && install -m 755 mirrorprobe.py /usr/bin/mirrorprobe ; \
cd ../mb && python setup.py install ; \
patch /usr/bin/scanner /tmp/scanner.patch ; \
rm /tmp/scanner.patch ; \
}
RUN \
wget --no-check-certificate -qO - https://github.com/$MB_USER_OR_ORG/mirrorbrain/archive/refs/tags/$MB_VERSION.tar.gz | tar -xz && \
cd mirrorbrain-$MB_VERSION/mod_mirrorbrain && \
wget -q http://apache.webthing.com/svn/apache/forms/mod_form.h && \
wget -q http://apache.webthing.com/svn/apache/forms/mod_form.c && \
apxs -cia -lm mod_form.c && \
apxs -e -n dbd -a modules/mod_dbd.so && \
apxs -e -n rewrite -a modules/mod_rewrite.so && \
apxs -cia -lm mod_mirrorbrain.c && \
cd ../mod_autoindex_mb && \
apxs -cia mod_autoindex_mb.c && \
cd ../tools && \
install -m 755 null-rsync /usr/bin/null-rsync && \
install -m 755 scanner.pl /usr/bin/scanner && \
cd ../mirrorprobe/ && install -m 755 mirrorprobe.py /usr/bin/mirrorprobe && \
cd ../mb && pip install --no-cache-dir --break-system-packages . && \
cd ../assets && \
mkdir -p /var/www/static/flags && \
cp famfamfam_flag_icons/png/*.png /var/www/static/flags && \
cp mirrorbrain.css /var/www/static

#Copy files configuration
RUN groupadd -r mirrorbrain && useradd -r -g mirrorbrain -s /bin/bash -c "MirrorBrain user" -d /home/mirrorbrain mirrorbrain
Expand Down
10 changes: 10 additions & 0 deletions mirrorbrain/bin/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,13 @@ else
cron -f
fi
fi

if [ ! -z $GEOIPUPDATE ]
then
geoipupdate -v
fi

if [ ! -z $HTTPD_ONLY ]
then
httpd-foreground
fi
2 changes: 1 addition & 1 deletion mirrorbrain/bin/update_mirrorbrain_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,4 @@ scanMirror mirror-sites-ca.mblibrary.info ALLDIRS
scanMirror mirror-sites-in.mblibrary.info ALLDIRS

# Generate HTML mirrors list
mb mirrorlist -f xhtml | grep -v @ > /var/www/download.kiwix.org/mirrors.html
mb mirrorlist -f xhtml --html-header /etc/mirrorlist_header.txt | grep -v @ > /var/www/download.kiwix.org/mirrors.html
2 changes: 2 additions & 0 deletions mirrorbrain/dev/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
download.kiwix.org
GeoIP.conf
56 changes: 56 additions & 0 deletions mirrorbrain/dev/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Development environment

This folder is not meant to reach production. It is a small stack based on docker compose to help developers
test changes locally before pushing them forward.

The stack deploys:
- our custom Mirrorbrain Docker image (web proxy)
- a PostgreSQL database

The stack contains configuration files which have been extracted from production and adapted to work locally.

This is hence not a 100% realistic setup, for instance redirect maps in Apache have been commented out for now.

## How to start the stack

First thing you will need is a MaxMind configuration file for GeoIP databases v2. This configuration file
should be retrieved from your MaxMind account, must be named `GeoIP.conf` and placed in `dev` folder.

Once this configuration file is in place, you can start the docker compose:
```bash
cd mirrorbrain/dev
docker compose -p mirrorbrain up -d
```

If it is the first time you start the stack, you must initialize DB schema and data. Password of `mirrorbrain` DB
user is `mirrorbrain`.

```
docker exec -it mb_web ./init_mirrorbrain_db_dev.sh
```

You must also update GeoIP database once in a while.

```
docker exec -it mb_web geoipupdate -v
```

## How to test mirror scanning and stuff like that

Mirrorbrain provides a helpfull utility named null-rsync which allows to mirror files locally in the Docker container
with sparse files (no content, no disk usage, only filename and attributes).

```bash
docker exec mb_web null-rsync master.download.kiwix.org::download.kiwix.org/ /var/www/download.kiwix.org/
```

Once this is done, you can run regular `mb` operations. For instance

```bash
docker exec mb_web mb scan -d nightly dotsrc.org
```

You can then check mirror is operating properly (note `X-Forwarded-For` header which is mandatory in our setup to pass end-user IP):
```
curl -H "X-Forwarded-For: 45.82.174.12" "http://localhost:8100/nightly/2023-10-26/kiwix-js-electron_i386_2023-10-26.deb?mirrorlist"
```
38 changes: 38 additions & 0 deletions mirrorbrain/dev/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
services:
postgresdb:
image: postgres:11
container_name: mb_postgresdb
ports:
- 127.0.0.1:5433:5432
volumes:
- pg_data_mirrorbrain:/var/lib/postgresql/data
environment:
- POSTGRES_DB=mirrorbrain
- POSTGRES_USER=mirrorbrain
- POSTGRES_PASSWORD=mirrorbrain
web:
build: ../
command: ["start.sh"]
container_name: mb_web
ports:
- 127.0.0.1:8100:80
environment:
- GEOIPUPDATE=1
- HTTPD_ONLY=1
volumes:
- ./mirrorbrain.conf:/etc/mirrorbrain.conf
- ./httpd.conf:/usr/local/apache2/conf/httpd.conf
- ./httpd-vhosts.conf:/usr/local/apache2/conf/extra/httpd-vhosts.conf
- ./download.kiwix.org:/var/www/download.kiwix.org
- ./GeoIP.conf:/etc/GeoIP.conf
- ./init_mirrorbrain_db_dev.sh:/usr/local/apache2/init_mirrorbrain_db_dev.sh
- ../sql/mirrors-postgresql.sql:/usr/local/apache2/mirrors-postgresql.sql
# - /home/benoit/Repos/poeml/mirrorbrain/mb/mb:/usr/local/lib/python3.11/dist-packages/mb/
# - /home/benoit/Repos/poeml/mirrorbrain/tools/null-rsync:/usr/bin/null-rsync
- geoip:/usr/share/GeoIP
depends_on:
- postgresdb

volumes:
pg_data_mirrorbrain:
geoip:
96 changes: 96 additions & 0 deletions mirrorbrain/dev/httpd-vhosts.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
<VirtualHost *:80>
ServerName download.kiwix.org
ServerAdmin [email protected]
DocumentRoot /var/www/download.kiwix.org

Alias /flags "/var/www/flags"

MirrorBrainMetalinkPublisher "Kiwix project" https://kiwix.org
MirrorBrainTorrentTrackerURL "http://tracker.openzim.org:6969/announce"
MirrorBrainTorrentTrackerURL "udp://tracker.openzim.org:6969/announce"
# MirrorBrainTorrentTrackerURL "https://opentracker.xyz:443/announce"
# MirrorBrainTorrentTrackerURL "http://torrent.nwps.ws:80/announce"
# MirrorBrainTorrentTrackerURL "udp://tracker.open-internet.nl:6969/announce"
# MirrorBrainTorrentTrackerURL "udp://tracker.coppersurfer.tk:6969/announce"
# MirrorBrainTorrentTrackerURL "udp://tracker.openbittorrent.com:80/announce"
MirrorBrainDHTNode router.bittorrent.com 6881
MirrorBrainDHTNode router.utorrent.com 6881

<IfModule mod_expires.c>
ExpiresActive On
ExpiresDefault "access plus 1 seconds"
ExpiresByType text/html "access plus 1 seconds"
ExpiresByType image/gif "access plus 120 minutes"
ExpiresByType image/jpeg "access plus 120 minutes"
ExpiresByType image/png "access plus 120 minutes"
ExpiresByType text/css "access plus 60 minutes"
ExpiresByType text/javascript "access plus 60 minutes"
ExpiresByType application/x-javascript "access plus 60 minutes"
ExpiresByType text/xml "access plus 60 minutes"
</IfModule>

<IfModule maxminddb_module>
MaxMindDBEnable On
#MaxMindDBFile COUNTRY_DB /usr/share/GeoIP/GeoLite2-Country.mmdb
MaxMindDBFile CITY_DB /usr/share/GeoIP/GeoLite2-City.mmdb
# we use old DBEnv names to not modify mirrorbrain code
MaxMindDBEnv GEOIP_COUNTRY_CODE CITY_DB/country/iso_code
MaxMindDBEnv GEOIP_COUNTRY_NAME CITY_DB/country/names/en
MaxMindDBEnv GEOIP_CONTINENT_CODE CITY_DB/continent/code
MaxMindDBEnv GEOIP_LONGITUDE CITY_DB/location/longitude
MaxMindDBEnv GEOIP_LATITUDE CITY_DB/location/latitude
MaxMindDBEnv GEOIP_REGION CITY_DB/subdivisions/0/iso_code
MaxMindDBEnv GEOIP_REGION_NAME CITY_DB/subdivisions/0/names/en
</IfModule>

# This is a hack to avoid counting twice a same
# download. Therwfore, we need to differenciate - in the Matomo
# log indgester - Mirrorbrain generated redirects and our custom
# permanent links. Therefore Matomo links use HTTP 302 and ours
# HTTP 301 without a cache.
<IfModule mod_rewrite.c>
RewriteEngine on

# RewriteMap redirects-map "txt:/var/www/library.kiwix.org/download.kiwix.org.permalinks"
# RewriteCond ${redirects-map:$1} !=""
# RewriteRule ^(.*)$ ${redirects-map:$1} [last,redirect=301,E=NOCACHE:1]

# RewriteMap releases-map "txt:/data/maps/kiwix-releases.map"
# RewriteCond ${releases-map:$1} !=""
# RewriteRule ^(.*)$ ${releases-map:$1} [last,redirect=301,E=NOCACHE:1]

# RewriteMap zim-map "txt:/data/maps/zim.map"
# RewriteCond ${zim-map:$1} !=""
# RewriteRule ^(.*)$ ${zim-map:$1} [last,redirect=301,E=NOCACHE:1]

# RewriteMap nightly-map "txt:/data/maps/kiwix-nightly.map"
# RewriteCond ${nightly-map:$1} !=""
# RewriteRule ^(.*)$ ${nightly-map:$1} [last,redirect=301,E=NOCACHE:1]

Header always set Cache-Control "no-store, no-cache, must-revalidate" env=NOCACHE
</IfModule>

<Directory /var/www/download.kiwix.org>
MirrorBrainEngine On
MirrorBrainDebug Off
FormGET On
MirrorBrainHandleHEADRequestLocally Off
MirrorBrainMinSize 2048
MirrorBrainExcludeUserAgent rpm/4.4.2*
MirrorBrainExcludeUserAgent *APT-HTTP*
MirrorBrainExcludeMimeType application/pgp-keys
Options FollowSymLinks Indexes
IndexOptions FancyIndexing
AllowOverride All
Order allow,deny
Allow from all
Header set Access-Control-Allow-Origin "*"
</Directory>

<Directory /var/www/download.kiwix.org/nightly>
AllowOverride none
Options +Indexes
IndexOptions FancyIndexing
IndexOrderDefault Descending Date
</Directory>
</VirtualHost>
Loading