From 46167af32d515975d3bfed71694ad4a3c667c24a Mon Sep 17 00:00:00 2001 From: Reuben Thomas Date: Sun, 19 Feb 2023 13:38:44 +0000 Subject: [PATCH] Use GNU libiconv instead of system iconv --- .github/workflows/c-cpp.yml | 13 +++++---- .gitmodules | 3 ++ Makefile.am | 16 ++++++++-- bootstrap.conf | 29 +++++++++++++----- build-aux/libiconv-configure.gnu | 2 ++ configure.ac | 3 +- doc/recode.texi | 10 +++---- libiconv | 1 + src/Makefile.am | 6 ++-- src/iconv.c | 2 +- src/iconv.h | 1 + tables.py | 50 ++++++++++++-------------------- tests/setup.py.in | 2 -- 13 files changed, 79 insertions(+), 59 deletions(-) create mode 100644 build-aux/libiconv-configure.gnu create mode 160000 libiconv create mode 120000 src/iconv.h diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index f8a44c1..75ec9b9 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -31,7 +31,7 @@ jobs: release: false msystem: ${{matrix.sys}} install: >- - patch git groff help2man rsync texinfo flex + patch git groff help2man rsync texinfo flex gperf mingw-w64-${{matrix.arch}}-autotools mingw-w64-${{matrix.arch}}-gcc mingw-w64-${{matrix.arch}}-python3 @@ -42,17 +42,18 @@ jobs: submodules: true - name: Install dependencies (Ubuntu) if: ${{ matrix.os == 'ubuntu-latest' }} - run: sudo apt-get -y install python3 cython3 flex help2man autopoint texinfo gettext + run: sudo apt-get -y install python3 cython3 flex help2man autopoint texinfo gettext gperf groff - name: Install dependencies (macOS) if: ${{ matrix.os == 'macos-latest' }} run: | - brew install autoconf automake help2man python cython flex texinfo + brew install autoconf automake help2man python cython flex texinfo gperf groff gnu-tar # Prepend optional brew binary directories to PATH - echo "/usr/local/opt/flex/bin:/usr/local/opt/m4/bin:/usr/local/opt/texinfo/bin:/usr/local/opt/gettext/bin:/usr/local/opt/cython/bin" >> $GITHUB_PATH + echo "/usr/local/opt/gnu-tar/libexec/gnubin:/usr/local/opt/flex/bin:/usr/local/opt/m4/bin:/usr/local/opt/texinfo/bin:/usr/local/opt/gettext/bin:/usr/local/opt/cython/bin" >> $GITHUB_PATH - name: Set up environment (Ubuntu) if: ${{ matrix.os == 'ubuntu-latest' }} + # FIXME: ASAN run: | - echo "ASAN=yes" >> $GITHUB_ENV + echo "# ASAN=yes" >> $GITHUB_ENV echo "LSAN_OPTIONS=verbosity=1:log_threads=1" >> $GITHUB_ENV - name: Set up environment (macOS) if: ${{ matrix.os == 'macos-latest' }} @@ -68,6 +69,6 @@ jobs: - name: Build run: | ./bootstrap - if [[ "$ASAN" == "yes" ]]; then ./configure --enable-silent-rules CFLAGS="-g3 -fsanitize=address -fsanitize=undefined" LDFLAGS="-fsanitize=address -fsanitize=undefined" TESTS_ENVIRONMENT_EXTRA="LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/11/libasan.so PYTHONMALLOC=malloc"; else ./configure --enable-silent-rules; fi + if [[ "$ASAN" == "yes" ]]; then ./configure --enable-silent-rules CFLAGS="-g3 -fPIC -fsanitize=address -fsanitize=undefined" LDFLAGS="-fsanitize=address -fsanitize=undefined" TESTS_ENVIRONMENT_EXTRA="LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/11/libasan.so PYTHONMALLOC=malloc"; else ./configure --enable-silent-rules; fi make check make distcheck diff --git a/.gitmodules b/.gitmodules index 4868669..0774da7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "gl-mod/bootstrap"] path = gl-mod/bootstrap url = https://github.com/gnulib-modules/bootstrap.git +[submodule "libiconv"] + path = libiconv + url = https://git.savannah.gnu.org/git/libiconv.git diff --git a/Makefile.am b/Makefile.am index 2bc0210..0d8461f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,13 +19,23 @@ AUTOMAKE_OPTIONS = gnits ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = doc lib src po tests contrib +SUBDIRS = libiconv doc lib src po tests contrib EXTRA_DIST = COPYING-LIB \ m4/gnulib-cache.m4 tables.py \ keld/NomsSeulsfinal.lst keld/charsets.def keld/control.def \ keld/iso10646.def keld/other.def keld/rfc1345.txt +dist-hook: + @if test "$(VPATH)" = ""; then \ + cd libiconv && \ + make dist && \ + cd ../$(PACKAGE)-$(VERSION) && \ + tar -z -x -f ../libiconv/libiconv-*.tar.gz --transform="s|^libiconv-[^/]*|libiconv|"; \ + else \ + echo "I can't make a proper dist tarball in a VPATH build!"; \ + fi + release: distcheck git diff --exit-code && \ git tag -a -m "Release tag" "v$(VERSION)" && \ @@ -37,7 +47,7 @@ release: distcheck dist_type=tar.gz # Ignore built files that are part of the distribution (specifically, -# src/recode.1). +# src/recode.1), and the libiconv subdirectory. distcleancheck_listfiles = \ - find . -type f -exec sh -c 'test -f $(srcdir)/$$1 || echo $$1' \ + find . -prune libiconv -type f -exec sh -c 'test -f $(srcdir)/$$1 || echo $$1' \ sh '{}' ';' diff --git a/bootstrap.conf b/bootstrap.conf index bf7a9bf..e152c04 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -1,4 +1,4 @@ -# bootstrap.conf (Recode) version 2023-02-11 +# bootstrap.conf (Recode) version 2023-02-25 # This file is part of Recode. # @@ -42,7 +42,10 @@ buildreq=' # Non-default gnulib directory options. local_gl_path=gl-mod/bootstrap:gl -gnulib_git_submodules=gl-mod/bootstrap +gnulib_git_submodules=' + gl-mod/bootstrap + libiconv +' # Additional gnulib-tool options to use. gnulib_tool_options=' @@ -58,7 +61,6 @@ gnulib_modules=' getopt-posix gettext-h hash - iconv isatty localcharset manywarnings @@ -87,18 +89,21 @@ gnulib_non_module_files="$gnulib_non_module_files"' # Copyright holder copyright_holder="Free Software Foundation, Inc." +# Don't run autoreconf recursively +AUTORECONF="autoreconf --no-recursive" + ## --------------- ## ## Hook functions. ## ## --------------- ## -# enchant_ignore_gnulib_ignore -# ------------------------- +# recode_ignore_gnulib_ignore +# --------------------------- # gnulib-tool updates m4/.gitignore and lib/.gitignore, and keeping # generated files under version control does not make sense. Since # lib is entirely ignored, we only need to prepopulate the m4 ignore # files with generated files not tracked by gnulib-tool. -enchant_ignore_gnulib_ignore () +recode_ignore_gnulib_ignore () { $debug_cmd @@ -116,7 +121,17 @@ gnulib-comp.m4 EOF fi } -func_add_hook func_prep enchant_ignore_gnulib_ignore +func_add_hook func_prep recode_ignore_gnulib_ignore + +# recode_autogen_libiconv +# ----------------------- +recode_autogen_libiconv () +{ + $debug_cmd + + ( unset MAKEFLAGS; export GNULIB_SRCDIR=$(pwd)/gnulib; cd ./libiconv; ln -sf ../build-aux/libiconv-configure.gnu configure.gnu; ./autogen.sh ) +} +func_add_hook func_reconfigure recode_autogen_libiconv # Local variables: diff --git a/build-aux/libiconv-configure.gnu b/build-aux/libiconv-configure.gnu new file mode 100644 index 0000000..8e2bd49 --- /dev/null +++ b/build-aux/libiconv-configure.gnu @@ -0,0 +1,2 @@ +#!/bin/sh +./configure --disable-shared --enable-extra-encodings "$@" diff --git a/configure.ac b/configure.ac index 4221afc..a32aec9 100644 --- a/configure.ac +++ b/configure.ac @@ -5,6 +5,7 @@ AC_PREREQ([2.71]) AC_INIT([recode],[3.7.14],[rrt@sc3d.org]) AC_CONFIG_SRCDIR(src/recode.c) +AC_CONFIG_SUBDIRS([libiconv]) AC_CONFIG_AUX_DIR([build-aux]) AM_INIT_AUTOMAKE AC_CONFIG_MACRO_DIR(m4) @@ -110,8 +111,8 @@ AM_GNU_GETTEXT_VERSION(0.19) AM_GNU_GETTEXT(external) AM_XGETTEXT_OPTION([--from-code=UTF-8]) +# Output AC_CONFIG_HEADERS([config.h]) - AC_CONFIG_FILES([Makefile contrib/Makefile doc/Makefile lib/Makefile po/Makefile.in src/Makefile src/recode.x tests/Makefile tests/setup.py]) diff --git a/doc/recode.texi b/doc/recode.texi index 5a0c2f6..659c8fe 100644 --- a/doc/recode.texi +++ b/doc/recode.texi @@ -81,7 +81,7 @@ sets and surface encodings. When this cannot be achieved exactly, it may get rid of the offending characters or fall back on approximations. The library recognises or produces more than 300 different character sets and is able to convert files between almost any pair. Most @w{RFC 1345} -character sets, and all character sets from a pre-installed @code{iconv} +character sets, and all character sets from the include GNU @code{libiconv} library, are supported. The @code{recode} program is a handy front-end to the library. @@ -404,11 +404,9 @@ those of Keld Simonsen and Bruno Haible. @cindex charsets, overview Recoding is currently possible between many charsets, the bulk of which -is described by @w{RFC 1345} tables or available in a pre-installed -external @code{iconv} library. @xref{Tabular}, and -@pxref{iconv}@footnote{Because @code{iconv} can vary from system to -system, and is itself a complex tool, it can cause recode to behave in -unexpected ways. Therefore, by default it is only used when a conversion +is described by @w{RFC 1345} tables or available in the included +GNU @code{libiconv} library. @xref{Tabular}, and +@pxref{iconv}@footnote{By default, @code{iconv} is only used when a conversion would not be possible without it. To request that @code{iconv} be used, use @code{--prefer-iconv}; see @xref{prefer-iconv}. Conversely, you can disable it with the @code{-x:} option; see @xref{disable-iconv}.}. The diff --git a/libiconv b/libiconv new file mode 160000 index 0000000..4b9c27b --- /dev/null +++ b/libiconv @@ -0,0 +1 @@ +Subproject commit 4b9c27bec873392533dd3ef309cd62962a644e18 diff --git a/src/Makefile.am b/src/Makefile.am index 26d7f37..10d1497 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,9 +52,9 @@ recode_LDADD = librecode.la librecode_la_SOURCES = charname.c combine.c fr-charname.c iconv.c \ names.c outer.c recode.c request.c strip-pool.c task.c $(ALL_STEPS) \ $(include_HEADERS) $(noinst_HEADERS) $(H_STEPS) -librecode_la_LDFLAGS = -no-undefined -version-info $(VERSION_INFO) $(LTLIBICONV) $(LTLIBINTL) \ +librecode_la_LDFLAGS = -no-undefined -version-info $(VERSION_INFO) $(LTLIBINTL) \ $(LIB_CLOCK_GETTIME) $(LIB_GETRANDOM) $(LIB_HARD_LOCALE) $(LIB_MBRTOWC) $(LIB_SETLOCALE_NULL) -librecode_la_LIBADD = ../lib/libgnu.la libmerged.la +librecode_la_LIBADD = ../lib/libgnu.la libmerged.la ../libiconv/lib/libiconv.la libmerged_la_SOURCES = merged.c libmerged_la_CPPFLAGS = $(NON_WARN_CFLAGS) @@ -165,3 +165,5 @@ strip-pool.c strip-data.c: stamp-strip stamp-strip: ../tables.py $(MNEMONICS_DS) $(CHARSETS_DEF) $(TABLES_PY) -C $(srcdir) -p $(MNEMONICS_DS) $(CHARSETS_DEF) @echo timestamp > $(srcdir)/$@ + +DISTCLEANFILES = iconv.h libiconv.a libiconv.la diff --git a/src/iconv.c b/src/iconv.c index a44fd8c..8e29a4c 100644 --- a/src/iconv.c +++ b/src/iconv.c @@ -21,7 +21,7 @@ #include "config.h" #include "common.h" #include "decsteps.h" -#include +#include "iconv.h" #include "iconvdecl.h" /*--------------------------------------. diff --git a/src/iconv.h b/src/iconv.h new file mode 120000 index 0000000..22fd79e --- /dev/null +++ b/src/iconv.h @@ -0,0 +1 @@ +../libiconv/include/iconv.h.inst \ No newline at end of file diff --git a/tables.py b/tables.py index 1d7bc9a..1e5a18d 100755 --- a/tables.py +++ b/tables.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # -*- coding: utf-8 -*- # Automatically derive Recode table files from various sources. -# Copyright © 1993-2000 Free Software Foundation, Inc. +# Copyright © 1993-2023 Free Software Foundation, Inc. # François Pinard , 1993. # This program is free software; you can redistribute it and/or modify @@ -40,7 +40,7 @@ When `-F' and `-n' are used, process Alain's tables. """ -import re, sys, io +import re, sys, os, io # Character constants. REPLACEMENT_CHARACTER = 0xFFFD @@ -98,7 +98,6 @@ def main(self, *arguments): # Read all data tables. if self.directory: - import os os.chdir(self.directory) if self.iconv: self.iconv.digest() @@ -464,32 +463,22 @@ def digest(self): canonical[charset.upper()] = charset # Read in the encodings.def file. - sys.stdout.write("Reading from `iconv -l'\n") - libc = None - import os + sys.stdout.write("Reading from `iconv_no_18n -l'\n") names = [] - for line in os.popen('iconv -l'): - if libc is None: - libc = len(line.split('/')) == 3 - if libc: - first, second, empty = line.split('/') - assert empty == '\n', repr(line) - name = second or first - if name not in names: - names.append(name) - self.data.append((name, ())) - else: - aliases = [] - for alias in line.split(): - if alias in canonical: - alias = canonical[alias] - aliases.append(alias) - self.data.append((aliases[0], aliases[1:])) + for line in os.popen('../libiconv/src/iconv_no_i18n -l'): + aliases = [] + for alias in line.split(): + if alias in canonical: + alias = canonical[alias] + aliases.append(alias) + self.data.append((aliases[0], aliases[1:])) def complete(self, french): - def write_charset(format, charset): - write(format % charset) - write(format % (charset + "-translit")) + def write_charset_with_aliases(charset, suffix=''): + write(f' "{charset + suffix}",\n') + for alias in aliases[:-1]: + write(f'\t"{alias + suffix}",\n') + write(f'\t"{aliases[-1] + suffix}", NULL,\n') if not self.do_sources: return write = Output(self.SOURCES).write @@ -505,12 +494,11 @@ def write_charset(format, charset): % count) for charset, aliases in self.data: if aliases: - write_charset(' "%s",\n', charset) - for alias in aliases[:-1]: - write_charset('\t"%s",\n', alias) - write_charset('\t"%s", NULL,\n', aliases[-1]) + write_charset_with_aliases(charset) + write_charset_with_aliases(charset, '-translit') else: - write_charset(' "%s", NULL,\n', charset) + write(f' "{charset}", NULL,\n') + write(f' "{charset}-translit", NULL,\n') write(' NULL\n' ' };\n') diff --git a/tests/setup.py.in b/tests/setup.py.in index 352b05b..1e6d6e1 100644 --- a/tests/setup.py.in +++ b/tests/setup.py.in @@ -9,10 +9,8 @@ srcdir = '@srcdir@' top_builddir = '@top_builddir@' top_srcdir = '@top_srcdir@' libintl = '@LIBINTL@' -libiconv = '@LIBICONV@' extra_libs = [] extra_libs.extend(libintl.split()) -extra_libs.extend(libiconv.split()) # FIXME: On Mingw, Cython seems to call gcc in such a way that it doesn't understand UNIX paths if os.name == 'nt':