Skip to content

Commit

Permalink
WIP --enable-u8
Browse files Browse the repository at this point in the history
There's not much practial need for wchar strings (esp. 4-byte strings),
but a lot for a proper and safe utf-8 string library.

Recommended: ./configure --disable-wchar --enable-u8
See GH #61

Start with u8cpy_s, u8icpy_s, u8cat_s, u8icat_s, u8nlen_s,
u8norm_s and the safe_u8_lib.h header.
  • Loading branch information
rurban committed Mar 8, 2024
1 parent 81429ac commit 4019ed7
Show file tree
Hide file tree
Showing 18 changed files with 13,188 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ dist_pkginclude_HEADERS = \
include/safe_types.h \
include/safe_lib_errno.h

if ENABLE_U8
pkginclude_HEADERS += \
include/safe_u8_lib.h
endif

# Support files
SAFEC_INFRA = \
$(top_srcdir)/README.md \
Expand Down
20 changes: 20 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,26 @@ fi
AC_SUBST(INSERT_EXTS)
AM_CONDITIONAL(ENABLE_EXTS, test "x$enable_extensions" = "xtrue")

AC_ARG_ENABLE(u8,
AS_HELP_STRING([--enable-u8],
[Add an additional utf-8 string library.
@<:@default=no@:>@]),
[case "${enableval}" in
yes) enable_u8=true ;;
no) enable_u8=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-u8]) ;;
esac], [enable_u8=false])
AC_MSG_CHECKING([for --enable-u8])
if test "x$enable_u8" = "xtrue" ; then
AC_MSG_RESULT([yes])
INSERT_U8="#define SAFECLIB_ENABLE_U8 1"
else
AC_MSG_RESULT([no (default)])
INSERT_U8="#undef SAFECLIB_ENABLE_U8"
fi
AM_CONDITIONAL(ENABLE_U8, test "x$enable_u8" = "xtrue")
AC_SUBST(INSERT_U8)

AC_ARG_ENABLE(memmax,
AS_HELP_STRING([--enable-memmax],
[specify the largest object size allowed for the
Expand Down
4 changes: 4 additions & 0 deletions include/safe_str_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,10 @@ EXTERN errno_t _wcsnorm_s_chk(wchar_t *restrict dest, rsize_t dmax,

#endif /* SAFECLIB_DISABLE_WCHAR */

#ifdef SAFECLIB_ENABLE_U8
#include "safe_u8_lib.h"
#endif

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/safe_types.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,16 @@ extern "C" {
/* errno_t isn't defined in the kernel */
typedef int errno_t;

@INSERT_U8@

#else

#include <stdio.h>
@INSERT_SYS_TYPES_H@
@INSERT_INTTYPES_H@
@INSERT_STDINT_H@
@INSERT_ERRNO_H@
@INSERT_U8@

@FALLBACK_ERRNO_T@

Expand Down
456 changes: 456 additions & 0 deletions include/safe_u8_lib.h

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,17 @@ STD_WCHAR_FILES += \
extwchar/wcsnorm_s.c
endif

if ENABLE_U8
EXT_U8_FILES = \
extu8/u8cat_s.c \
extu8/u8icat_s.c \
extu8/u8norm_s.c \
extu8/u8cpy_s.c \
extu8/u8icpy_s.c \
extu8/u8ncpy_s.c \
extu8/u8nlen_s.c
endif

STD_IO_FILES = \
io/sscanf_s.c \
io/fscanf_s.c \
Expand Down Expand Up @@ -227,6 +238,7 @@ ALL_SRC_FILES =
$(STD_STR_FILES) \
$(EXT_MEM_FILES) \
$(EXT_STR_FILES) \
$(EXT_U8_FILES) \
$(STD_WCHAR_FILES) \
$(STD_IO_FILES) \
$(STD_OS_FILES) \
Expand Down Expand Up @@ -283,6 +295,12 @@ libsafec_la_SOURCES += \
$(EXT_MEM_FILES) \
$(EXT_STR_FILES)
endif
if ENABLE_U8
libsafec_la_SOURCES += \
$(EXT_U8_FILES)
lib_LTLIBRARIES += libsafecu8.la
libsafecu8_la_SOURCES = $(EXT_U8_FILES)
endif

libsafec_la_LIBADD = \
libmemprims.la \
Expand Down
231 changes: 231 additions & 0 deletions src/extu8/u8cat_s.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*------------------------------------------------------------------
* u8cat_s.c
*
* September 2020, Reini Urban
*
* Copyright (c) 2020 by Reini Urban
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*------------------------------------------------------------------
*/

#ifdef FOR_DOXYGEN
#include "safe_u8_lib.h"
#else
#include "safeclib_private.h"
#endif

/**
* @def u8cat_s(dest,dmax,src)
* @brief
* The u8cat_s function appends a copy of the utf-8 string pointed
* to by src (including the terminating null character) to the
* end of the utf-8 string pointed to by dest. The initial utf-8 character
* from src overwrites the null character at the end of dest.
* @details
* All elements following the terminating null utf-8 character (if
* any) written by u8cat_s in the array of dmax characters
* pointed to by dest take unspecified values when u8cat_s
* returns.
* With SAFECLIB_STR_NULL_SLACK defined the rest is cleared with
* 0.
*
* @param[out] dest pointer to utf-8 string that will be extended by src
* if dmax allows. The utf-8 string is null terminated.
* If the resulting concatenated utf-8 string is less
* than dmax, the remaining slack space is nulled.
* @param[in] dmax restricted maximum wchar_t length of the resulting
* dest, including the null
* @param[in] src pointer to the utf-8 string that will be concatenaed
* to string dest
*
* @pre Neither dest nor src shall be a null pointer
* @pre dmax shall not equal zero
* @pre dmax shall not be greater than RSIZE_MAX_WSTR and size of dest
* @pre dmax shall be greater than u8nlen_s(src,m).
* @pre Copying shall not take place between objects that overlap
*
* @note C11 uses RSIZE_MAX, not RSIZE_MAX_STR.
*
* @return If there is a runtime-constraint violation, then if dest is
* not a null pointer and dmax is greater than zero and not
* greater than RSIZE_MAX_STR, then u8cat_s nulls dest.
* @retval EOK when successful operation, all the utf-8 characters from
* src were appended to dest and the result in dest is null
* terminated.
* @retval ESNULLP when dest or src is a NULL pointer
* @retval ESZEROL when dmax = 0
* @retval ESLEMAX when dmax > RSIZE_MAX_WSTR
* @retval EOVERFLOW when dmax > size of dest (optionally, when the compiler
* knows the object_size statically)
* @retval ESLEWRNG when dmax != size of dest and --enable-error-dmax
* @retval ESUNTERM when dest not terminated in the first dmax utf-8
* characters
* @retval ESOVRLP when src overlaps with dest
*
* @see
* u8icat_s(), wcscat_s(), strcpy_s(), strncpy_s()
*/
#ifdef FOR_DOXYGEN
errno_t u8cat_s(char *restrict dest, rsize_t dmax, const char *restrict src)
#else
EXPORT errno_t _u8cat_s_chk(char *restrict dest, rsize_t dmax, const char *restrict src,
const size_t destbos)
#endif
{
rsize_t orig_dmax;
char *orig_dest;
const char *overlap_bumper;

CHK_DEST_NULL("u8cat_s")
CHK_DMAX_ZERO("u8cat_s")
if (destbos == BOS_UNKNOWN) {
CHK_DMAX_MAX("u8cat_s", RSIZE_MAX_STR)
BND_CHK_PTR_BOUNDS(dest, dmax);
} else {
CHK_DESTW_OVR("u8cat_s", dmax, destbos)
}
CHK_SRC_NULL_CLEAR("u8cat_s", src)

/* hold base of dest in case src was not copied */
orig_dmax = dmax;
orig_dest = dest;

if (dest < src) {
overlap_bumper = src;

/* Find the end of dest */
while (*dest != L'\0') {

if (unlikely(dest == overlap_bumper)) {
handle_error(orig_dest, orig_dmax,
"u8cat_s: "
"overlapping objects",
ESOVRLP);
return RCNEGATE(ESOVRLP);
}

dest++;
dmax--;
if (unlikely(dmax == 0)) {
handle_error(orig_dest, orig_dmax,
"u8cat_s: "
"dest unterminated",
ESUNTERM);
return RCNEGATE(ESUNTERM);
}
}

while (dmax > 0) {
if (unlikely(dest == overlap_bumper)) {
handle_error(orig_dest, orig_dmax,
"u8cat_s: "
"overlapping objects",
ESOVRLP);
return RCNEGATE(ESOVRLP);
}

*dest = *src;
if (unlikely(*dest == L'\0')) {
#ifdef SAFECLIB_STR_NULL_SLACK
/* null slack to clear any data */
if (dmax > 0x20)
memset(dest, 0, dmax);
else {
while (dmax) {
*dest = L'\0';
dmax--;
dest++;
}
}
#endif
return RCNEGATE(EOK);
}

dmax--;
dest++;
src++;
}
} else {
overlap_bumper = dest;

/* Find the end of dest */
while (*dest != L'\0') {

/*
* NOTE: no need to check for overlap here since src comes first
* in memory and we're not incrementing src here.
*/
dest++;
dmax--;
if (unlikely(dmax == 0)) {
handle_error(orig_dest, orig_dmax,
"u8cat_s: "
"dest unterminated",
ESUNTERM);
return RCNEGATE(ESUNTERM);
}
}

while (dmax > 0) {
if (unlikely(src == overlap_bumper)) {
handle_error(orig_dest, orig_dmax,
"u8cat_s: "
"overlapping objects",
ESOVRLP);
return RCNEGATE(ESOVRLP);
}

*dest = *src;
if (*dest == L'\0') {
#ifdef SAFECLIB_STR_NULL_SLACK
/* null slack to clear any data */
if (dmax > 0x20)
memset(dest, 0, dmax);
else {
while (dmax) {
*dest = L'\0';
dmax--;
dest++;
}
}
#endif
return RCNEGATE(EOK);
}

dmax--;
dest++;
src++;
}
}

/*
* the entire src was not copied, so null the string
*/
handle_error(orig_dest, orig_dmax,
"u8cat_s: not enough "
"space for src",
ESNOSPC);

return RCNEGATE(ESNOSPC);
}
Loading

0 comments on commit 4019ed7

Please sign in to comment.