This file contains definitions for ARI text CODEC functions. More...

#include "cace/cace_data.h"
#include <m-string.h>
#include <stdint.h>
#include <time.h>

Include dependency graph for text_util.h:

This graph shows which files directly or indirectly include this file:

Functions
bool	cace_ari_text_is_identity (const cace_data_t *text)
	Determine if a text string conforms to the "id-text" rule from Section 4.1 of [ietf-dtn-ari-00].

int	cace_uri_percent_encode (m_string_t out, const cace_data_t in, const char safe)
	Encode to URI percent-encoding text form.

int	cace_uri_percent_decode (m_string_t out, const cace_data_t *in)
	Decode from URI percent-encoding text form.

int	cace_ari_uint64_encode (m_string_t out, uint64_t value, int base)
	Encode an unsigned integer to text form.

int	cace_ari_uint64_decode (uint64_t *out, const m_string_t in)

int	cace_ari_float64_encode (m_string_t out, double value, char form)
	Encode a floating point number to text form.

int	cace_decfrac_encode (m_string_t out, const struct timespec *in)
	Encode a decimal fraction to text form.

int	cace_decfrac_decode (struct timespec out, const cace_data_t in)
	Decode a decimal fraction from text.

bool	cace_data_is_utf8 (const cace_data_t *in)
	Determine if data contains only valid UTF-8 code points.

int	cace_slash_escape (m_string_t out, const cace_data_t *in, const char quote)
	Escape backslashes in tstr or bstr text form.

int	cace_slash_unescape (m_string_t out, const cace_data_t *in)
	Unescape backslashes in tstr/bstr text form.

void	cace_strip_space (m_string_t out, const char *in, size_t in_len)
	Remove whitespace characters from a text string.

void	cace_string_tolower (m_string_t out)
	Convert a text string to lowercase.

void	cace_string_toupper (m_string_t out)
	Convert a text string to uppercase.

int	cace_base16_encode (m_string_t out, const cace_data_t *in, bool uppercase)
	Encode to base16 text form.

int	cace_base16_decode (cace_data_t *out, const m_string_t in)
	Decode base16 text form.

int	cace_base64_encode (m_string_t out, const cace_data_t *in, bool useurl, bool usepad)
	Encode base64 and base64url text forms.

int	cace_base64_decode (cace_data_t *out, const m_string_t in)
	Decode base64 and base64url text forms.

int	cace_date_encode (m_string_t out, const struct tm *in, bool usesep)
	Encode to a Gregorian date in text form.

int	cace_date_decode (struct tm out, const cace_data_t in)
	Decode a UTC time from text form.

int	cace_utctime_encode (m_string_t out, const struct timespec *in, bool usesep)
	Encode to a UTC time in text form.

int	cace_utctime_decode (struct timespec out, const cace_data_t in)
	Decode a UTC time from text form.

int	cace_timeperiod_encode (m_string_t out, const struct timespec *in)
	Encode a signed time period time in text form.

int	cace_timeperiod_decode (struct timespec out, const cace_data_t in)
	Decode a signed time period from text form.

Detailed Description

This file contains definitions for ARI text CODEC functions.

Function Documentation

◆ cace_ari_float64_encode()

int cace_ari_float64_encode	(	m_string_t	out,
		double	value,
		char	form
	)

Encode a floating point number to text form.

Parameters

[out]	out	The output buffer, which will be appended to.
	value	The value to encode.
	form	The encoding form as one of: 'f' 'g' 'x'

Returns: Zero upon success.

◆ cace_ari_text_is_identity()

bool cace_ari_text_is_identity ( const cace_data_t * text )

Determine if a text string conforms to the "id-text" rule from Section 4.1 of [ietf-dtn-ari-00].

Parameters

[in] text The null-terminated text string.

Returns: True if the text matches the identifier rule.

References CHKFALSE, and cace_data_t::ptr.

◆ cace_ari_uint64_encode()

int cace_ari_uint64_encode	(	m_string_t	out,
		uint64_t	value,
		int	base
	)

Encode an unsigned integer to text form.

The sign will be prepended as necessary.

Parameters

[out]	out	The output buffer, which will be appended to.
	in	The input encoded text which may be null-terminated.
	base	The base to encode to, which must be one of: 2, 10, 16.

Returns: Zero upon success.

References CHKERR1.

◆ cace_base16_decode()

int cace_base16_decode	(	cace_data_t *	out,
		const m_string_t	in
	)

Decode base16 text form.

This is defined in Section 8 of RFC 4648 [11].

Parameters

[out]	out	The output buffer, which will be sized to its data.
[in]	in	The input buffer to read, which must be null terminated. Whitespace in the input must have already been removed with strip_space().

Returns: Zero upon success.

References cace_data_resize(), CHKERR1, and cace_data_t::ptr.

◆ cace_base16_encode()

int cace_base16_encode	(	m_string_t	out,
		const cace_data_t *	in,
		bool	uppercase
	)

Encode to base16 text form.

This is defined in Section 8 of RFC 4648 [11].

Parameters

[out]	out	The output buffer, which will be appended to.
[in]	in	The input buffer to read.
	uppercase	True to use upper-case letters, false to use lower-case.

Returns: Zero upon success.

References cace_data_t::len, and cace_data_t::ptr.

◆ cace_base64_decode()

int cace_base64_decode	(	cace_data_t *	out,
		const m_string_t	in
	)

Decode base64 and base64url text forms.

These is defined in Section 4 and 5 of RFC 4648 [11].

Parameters

[out]	out	The output buffer, which will be sized to its data.
[in]	in	The input buffer to read, which must be null terminated. Whitespace in the input must have already been removed with strip_space().

Returns: Zero upon success.

References cace_data_extend_back(), cace_data_resize(), CHKERR1, and cace_data_t::ptr.

◆ cace_base64_encode()

int cace_base64_encode	(	m_string_t	out,
		const cace_data_t *	in,
		bool	useurl,
		bool	usepad
	)

Encode base64 and base64url text forms.

These is defined in Section 4 and 5 of RFC 4648 [11].

Parameters

[out]	out	The output buffer, which will be appended to.
[in]	in	The input buffer to read.
	useurl	True to use the base64url alphabet, false to use the base64 alphabet.
	usepad	True to include padding characters (=), false to not use padding.

Returns: Zero upon success.

References cace_data_t::len, and cace_data_t::ptr.

◆ cace_data_is_utf8()

bool cace_data_is_utf8 ( const cace_data_t * in )

Determine if data contains only valid UTF-8 code points.

Parameters

[in] in The data to check.

Returns: True if the data contains no null codepoints and only valid UTF-8 codepoints.

References CHKFALSE, cace_data_t::len, and cace_data_t::ptr.

◆ cace_date_decode()

int cace_date_decode	(	struct tm *	out,
		const cace_data_t *	in
	)

Decode a UTC time from text form.

This is defined in Appendix A of RFC 3339 [12].

Parameters

[out]	out	The decoded time from DTN epoch.
	in	The input buffer to read, which may be null terminated.

Returns: Zero upon success.

References CHKERR1, and cace_data_t::ptr.

◆ cace_date_encode()

int cace_date_encode	(	m_string_t	out,
		const struct tm *	in,
		bool	usesep
	)

Encode to a Gregorian date in text form.

This is defined as "full-date" in Appendix A of RFC 3339 [12].

Note: The CBOR tag 100 epoch of 1970-01-01 is used from [10].

Parameters

[out]	out	The output buffer, which will be sized to its text and null terminated.
[in]	in	The time in separated fields.
	usesep	If true component separators will be added, if false the concise form is used.

Returns: Zero upon success.

References CHKERR1.

◆ cace_decfrac_decode()

int cace_decfrac_decode	(	struct timespec *	out,
		const cace_data_t *	in
	)

Decode a decimal fraction from text.

Parameters

[out]	out	The fractional value with least digit 1e-9
[in]	in	The input buffer to read, which may be null terminated.

Returns: Zero upon success. Or 1 to indicate an argument error, 2 to indicate a numeric error, 3 to indicate extra text at the end, and 4 to indicate value underflow.

References CHKERR1, and cace_data_t::ptr.

◆ cace_decfrac_encode()

int cace_decfrac_encode	(	m_string_t	out,
		const struct timespec *	in
	)

Encode a decimal fraction to text form.

Parameters

[out]	out	The output buffer, which will be appended to.
	in	The input fractional value with least digit 1e-9

Returns: Zero upon success.

References CHKERR1.

◆ cace_slash_escape()

int cace_slash_escape	(	m_string_t	out,
		const cace_data_t *	in,
		const char	quote
	)

Escape backslashes in tstr or bstr text form.

This is defined in Section G.2 of RFC 8610 [2] and Section 7 of RFC 8259 [8].

Parameters

[out]	out	The output buffer, which will be appended to.
	in	The input buffer to read, which must be null terminated.
	quote	The character used to quote the string.

Returns: Zero upon success.

References CHKERR1, cace_data_t::len, and cace_data_t::ptr.

◆ cace_slash_unescape()

int cace_slash_unescape	(	m_string_t	out,
		const cace_data_t *	in
	)

Unescape backslashes in tstr/bstr text form.

This is defined in Section G.2 of RFC8610 [2].

Parameters

[out]	out	The output buffer, which will be appended to.
	in	The input buffer to read, which may be null terminated.

Returns: Zero upon success.

References cace_data_append_byte(), cace_data_copy(), cace_data_deinit(), cace_data_init(), cace_data_init_view(), CHKERR1, cace_data_t::len, and cace_data_t::ptr.

◆ cace_string_tolower()

void cace_string_tolower ( m_string_t out )

Convert a text string to lowercase.

This is written to work on byte strings, not unicode.

Parameters

[out] out The output buffer, which will be replaced.

References CHKVOID.

◆ cace_string_toupper()

void cace_string_toupper ( m_string_t out )

Convert a text string to uppercase.

This is written to work on byte strings, not unicode.

Parameters

[out] out The output buffer, which will be replaced.

References CHKVOID.

◆ cace_strip_space()

void cace_strip_space	(	m_string_t	out,
		const char *	in,
		size_t	in_len
	)

Remove whitespace characters from a text string.

This is based on isspace() inspection.

Parameters

[out]	out	The output buffer, which will be replaced.
[in]	in	The input text to read.
	in_len	The length of text not including null terminator.

◆ cace_timeperiod_decode()

int cace_timeperiod_decode	(	struct timespec *	out,
		const cace_data_t *	in
	)

Decode a signed time period from text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters

[out]	out	The decoded relative time.
	in	The input buffer to read, which may be null terminated.

Returns: Zero upon success.

References CHKERR1, and cace_data_t::ptr.

◆ cace_timeperiod_encode()

int cace_timeperiod_encode	(	m_string_t	out,
		const struct timespec *	in
	)

Encode a signed time period time in text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters

[out]	out	The output buffer, which will be sized to its text and null terminated.
	in	The relative time.

Returns: Zero upon success.

References CHKERR1.

◆ cace_uri_percent_decode()

int cace_uri_percent_decode	(	m_string_t	out,
		const cace_data_t *	in
	)

Decode from URI percent-encoding text form.

This is defined in Section 2.1 of RFC 3986 [1].

Parameters

[out]	out	The output buffer, which will be appended to.
[in]	in	The input encoded text which may be null-terminated.

Returns: Zero upon success.

References CHKERR1, and cace_data_t::ptr.

◆ cace_uri_percent_encode()

int cace_uri_percent_encode	(	m_string_t	out,
		const cace_data_t *	in,
		const char *	safe
	)

Encode to URI percent-encoding text form.

This is defined in Section 2.1 of RFC 3986 [1]. The set of unreserved characters are alpha, digits, and _.-~ characters. in accordance with Section 2.3 of RFC 3986 [1].

Parameters

[out]	out	The output buffer, which will be appended to.
	in	The input encoded text which is null-terminated.
	safe	A set of additional safe characters to not be encoded, which is null-terminated.

Returns: Zero upon success.

References CHKERR1, and cace_data_t::ptr.

◆ cace_utctime_decode()

int cace_utctime_decode	(	struct timespec *	out,
		const cace_data_t *	in
	)

Decode a UTC time from text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters

[out]	out	The decoded time from DTN epoch.
[in]	in	The input buffer to read, which may be null terminated.

Returns: Zero upon success.

References cace_ari_dtn_epoch, CHKERR1, and cace_data_t::ptr.

◆ cace_utctime_encode()

int cace_utctime_encode	(	m_string_t	out,
		const struct timespec *	in,
		bool	usesep
	)

Encode to a UTC time in text form.

This is defined in Appendix A of RFC 3339 [12].

Note: The DTN epoch is 2000-01-01T00:00:00Z from [9].

Parameters

[out]	out	The output buffer, which will be sized to its text and null terminated.
[in]	in	The time from DTN epoch.
	usesep	If true component separators will be added, if false the concise form is used.

Returns: Zero upon success.

References cace_ari_dtn_epoch, and CHKERR1.

Functions

Detailed Description

Function Documentation

◆ cace_ari_float64_encode()

◆ cace_ari_text_is_identity()

◆ cace_ari_uint64_encode()

◆ cace_base16_decode()

◆ cace_base16_encode()

◆ cace_base64_decode()

◆ cace_base64_encode()

◆ cace_data_is_utf8()

◆ cace_date_decode()

◆ cace_date_encode()

◆ cace_decfrac_decode()

◆ cace_decfrac_encode()

◆ cace_slash_escape()

◆ cace_slash_unescape()

◆ cace_string_tolower()

◆ cace_string_toupper()

◆ cace_strip_space()

◆ cace_timeperiod_decode()

◆ cace_timeperiod_encode()

◆ cace_uri_percent_decode()

◆ cace_uri_percent_encode()

◆ cace_utctime_decode()

◆ cace_utctime_encode()