DTNMA Reference Tools v2.1.0 - 13.gc5c0bac
Delay-Tolerant Networking Management Architecture (DTNMA) Tool Suite
Loading...
Searching...
No Matches
text_util.c File Reference
#include "text_util.h"
#include "lit.h"
#include "cace/util/defs.h"
#include <string.h>
#include <strings.h>
#include <inttypes.h>
#include <stdlib.h>
#include <math.h>
+ Include dependency graph for text_util.c:

Macros

#define TIMEPERIOD_DAY   (24 * 3600)
 
#define TIMEPERIOD_HOUR   3600
 
#define TIMEPERIOD_MINUTE   60
 

Typedefs

typedef unsigned long subsec_t
 Type for timespec::tv_nsec.
 

Functions

static size_t text_real_len (const cace_data_t *data)
 Get the size of text ignoring a terminating null.
 
static int take_hex_1byte (uint8_t *out, const char **curs, const char *end)
 
static int take_hex_2byte (uint16_t *out, const char **curs, const char *end)
 
bool cace_ari_text_is_identity (const cace_data_t *text)
 Determine if a text string conforms to the "id-text" rule from Section 4.1 of [ietf-dtn-ari-00].
 
int cace_uri_percent_encode (m_string_t out, const cace_data_t *in, const char *safe)
 Encode to URI percent-encoding text form.
 
int cace_uri_percent_decode (m_string_t out, const cace_data_t *in)
 Decode from URI percent-encoding text form.
 
int cace_ari_uint64_encode (m_string_t out, uint64_t value, int base)
 Encode an unsigned integer to text form.
 
int cace_ari_uint64_decode (uint64_t *out, const m_string_t in)
 
int cace_ari_float64_encode (m_string_t out, double value, char form)
 Encode a floating point number to text form.
 
static int subsec_decode (subsec_t *subsec, const char **curs, const char *const end)
 
int cace_subsec_encode (m_string_t out, subsec_t subsec)
 
int cace_decfrac_encode (m_string_t out, const struct timespec *in)
 Encode a decimal fraction to text form.
 
int cace_decfrac_decode (struct timespec *out, const cace_data_t *in)
 Decode a decimal fraction from text.
 
bool cace_data_is_utf8 (const cace_data_t *in)
 Determine if data contains only valid UTF-8 code points.
 
int cace_slash_escape (m_string_t out, const cace_data_t *in, const char quote)
 Escape backslashes in tstr or bstr text form.
 
int cace_slash_unescape (m_string_t out, const cace_data_t *in)
 Unescape backslashes in tstr/bstr text form.
 
static void strip_chars (m_string_t out, const char *in, size_t in_len, const char *chars)
 
void cace_strip_space (m_string_t out, const char *in, size_t in_len)
 Remove whitespace characters from a text string.
 
void cace_string_tolower (m_string_t out)
 Convert a text string to lowercase.
 
void cace_string_toupper (m_string_t out)
 Convert a text string to uppercase.
 
int cace_base16_encode (m_string_t out, const cace_data_t *in, bool uppercase)
 Encode to base16 text form.
 
static int base16_decode_char (uint8_t chr)
 Decode a single character.
 
int cace_base16_decode (cace_data_t *out, const m_string_t in)
 Decode base16 text form.
 
int cace_base64_encode (m_string_t out, const cace_data_t *in, bool useurl, bool usepad)
 Encode base64 and base64url text forms.
 
static int base64_decode_char (uint8_t chr)
 Decode a single character.
 
int cace_base64_decode (cace_data_t *out, const m_string_t in)
 Decode base64 and base64url text forms.
 
int cace_date_encode (m_string_t out, const struct tm *in, bool usesep)
 Encode to a Gregorian date in text form.
 
int cace_date_decode (struct tm *out, const cace_data_t *in)
 Decode a UTC time from text form.
 
int cace_utctime_encode (m_string_t out, const struct timespec *in, bool usesep)
 Encode to a UTC time in text form.
 
int cace_utctime_decode (struct timespec *out, const cace_data_t *in)
 Decode a UTC time from text form.
 
int cace_timeperiod_encode (m_string_t out, const struct timespec *in)
 Encode a signed time period time in text form.
 
int cace_timeperiod_decode (struct timespec *out, const cace_data_t *in)
 Decode a signed time period from text form.
 

Variables

static const char id_text_first [] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
 
static const char id_text_rest [] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-"
 
static const char * unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~"
 Set of unreserved characters from Section 2.3 of RFC 3986 [1].
 
static const size_t base16_decode_lim = 0x80
 Size of the base16_decode_table.
 
static const int base16_decode_table [0x80]
 Decode table for base16.
 
static const char * base64_alphabet
 
static const char * base64url_alphabet
 
static const size_t base64_decode_lim = 0x80
 Size of the base16_decode_table.
 
static const int base64_decode_table [0x80]
 Decode table for base64 and base64uri.
 

Macro Definition Documentation

◆ TIMEPERIOD_DAY

#define TIMEPERIOD_DAY   (24 * 3600)

◆ TIMEPERIOD_HOUR

#define TIMEPERIOD_HOUR   3600

◆ TIMEPERIOD_MINUTE

#define TIMEPERIOD_MINUTE   60

Typedef Documentation

◆ subsec_t

typedef unsigned long subsec_t

Type for timespec::tv_nsec.

Function Documentation

◆ base16_decode_char()

static int base16_decode_char ( uint8_t  chr)
static

Decode a single character.

Parameters
chrThe character to decode.
Returns
If positive, the decoded value. -1 to indicate error. -2 to indicate whitespace.

References base16_decode_lim, and base16_decode_table.

Referenced by cace_base16_decode().

◆ base64_decode_char()

static int base64_decode_char ( uint8_t  chr)
static

Decode a single character.

Parameters
chrThe character to decode.
Returns
If positive, the decoded value. -1 to indicate error. -2 to indicate whitespace.

References base64_decode_lim, and base64_decode_table.

Referenced by cace_base64_decode().

◆ cace_ari_float64_encode()

int cace_ari_float64_encode ( m_string_t  out,
double  value,
char  form 
)

Encode a floating point number to text form.

Parameters
[out]outThe output buffer, which will be appended to.
valueThe value to encode.
formThe encoding form as one of: 'f' 'g' 'x'
Returns
Zero upon success.

Referenced by cace_ari_text_encode_lit(), and test_cace_ari_float64_encode().

◆ cace_ari_text_is_identity()

bool cace_ari_text_is_identity ( const cace_data_t text)

Determine if a text string conforms to the "id-text" rule from Section 4.1 of [ietf-dtn-ari-00].

Parameters
[in]textThe null-terminated text string.
Returns
True if the text matches the identifier rule.

References CHKFALSE, id_text_first, id_text_rest, cace_data_t::ptr, and text_real_len().

Referenced by cace_ari_text_encode_lit(), and test_cace_ari_text_is_identity().

◆ cace_ari_uint64_decode()

int cace_ari_uint64_decode ( uint64_t *  out,
const m_string_t  in 
)

References CHKERR1.

◆ cace_ari_uint64_encode()

int cace_ari_uint64_encode ( m_string_t  out,
uint64_t  value,
int  base 
)

Encode an unsigned integer to text form.

The sign will be prepended as necessary.

Parameters
[out]outThe output buffer, which will be appended to.
inThe input encoded text which may be null-terminated.
baseThe base to encode to, which must be one of: 2, 10, 16.
Returns
Zero upon success.

References CHKERR1.

Referenced by cace_ari_text_encode_lit().

◆ cace_base16_decode()

int cace_base16_decode ( cace_data_t out,
const m_string_t  in 
)

Decode base16 text form.

This is defined in Section 8 of RFC 4648 [11].

Parameters
[out]outThe output buffer, which will be sized to its data.
[in]inThe input buffer to read, which must be null terminated. Whitespace in the input must have already been removed with strip_space().
Returns
Zero upon success.

References base16_decode_char(), cace_data_resize(), CHKERR1, and cace_data_t::ptr.

Referenced by agentParseHex(), check_cnst(), check_convert(), check_decoding(), check_lookup(), check_match(), check_normalize(), read_cborhex(), stdin_recv(), suiteSetUp(), test_ari_cmp(), test_ari_equal(), test_ari_hash(), test_cace_ari_cbor_decode_invalid(), test_cace_ari_cbor_decode_partial(), test_cace_base16_decode_invalid(), test_cace_base16_decode_valid(), TEST_CASE(), TEST_CASE(), and test_util_ari_decode().

◆ cace_base16_encode()

int cace_base16_encode ( m_string_t  out,
const cace_data_t in,
bool  uppercase 
)

Encode to base16 text form.

This is defined in Section 8 of RFC 4648 [11].

Parameters
[out]outThe output buffer, which will be appended to.
[in]inThe input buffer to read.
uppercaseTrue to use upper-case letters, false to use lower-case.
Returns
Zero upon success.

References cace_data_t::len, and cace_data_t::ptr.

Referenced by agentShowHexReports(), cace_ari_text_encode_lit(), check_convert(), check_encoding(), stdout_send(), test_cace_base16_encode(), TEST_CASE(), test_util_ari_encode(), and write_cborhex().

◆ cace_base64_decode()

int cace_base64_decode ( cace_data_t out,
const m_string_t  in 
)

Decode base64 and base64url text forms.

These is defined in Section 4 and 5 of RFC 4648 [11].

Parameters
[out]outThe output buffer, which will be sized to its data.
[in]inThe input buffer to read, which must be null terminated. Whitespace in the input must have already been removed with strip_space().
Returns
Zero upon success.

References base64_decode_char(), cace_data_extend_back(), cace_data_resize(), CHKERR1, and cace_data_t::ptr.

Referenced by test_cace_base64_decode_invalid(), and test_cace_base64_decode_valid().

◆ cace_base64_encode()

int cace_base64_encode ( m_string_t  out,
const cace_data_t in,
bool  useurl,
bool  usepad 
)

Encode base64 and base64url text forms.

These is defined in Section 4 and 5 of RFC 4648 [11].

Parameters
[out]outThe output buffer, which will be appended to.
[in]inThe input buffer to read.
useurlTrue to use the base64url alphabet, false to use the base64 alphabet.
usepadTrue to include padding characters (=), false to not use padding.
Returns
Zero upon success.

References base64_alphabet, base64url_alphabet, cace_data_t::len, and cace_data_t::ptr.

Referenced by cace_ari_text_encode_lit(), and TEST_CASE().

◆ cace_data_is_utf8()

bool cace_data_is_utf8 ( const cace_data_t in)

Determine if data contains only valid UTF-8 code points.

Parameters
[in]inThe data to check.
Returns
True if the data contains no null codepoints and only valid UTF-8 codepoints.

References CHKFALSE, cace_data_t::len, and cace_data_t::ptr.

Referenced by cace_ari_text_encode_lit().

◆ cace_date_decode()

int cace_date_decode ( struct tm *  out,
const cace_data_t in 
)

Decode a UTC time from text form.

This is defined in Appendix A of RFC 3339 [12].

Parameters
[out]outThe decoded time from DTN epoch.
inThe input buffer to read, which may be null terminated.
Returns
Zero upon success.

References CHKERR1, cace_data_t::ptr, strip_chars(), and text_real_len().

Referenced by cace_ari_cbor_decode_optdate(), cace_ari_date_from_text(), test_cace_date_decode_invalid(), and test_cace_date_decode_valid().

◆ cace_date_encode()

int cace_date_encode ( m_string_t  out,
const struct tm *  in,
bool  usesep 
)

Encode to a Gregorian date in text form.

This is defined as "full-date" in Appendix A of RFC 3339 [12].

Note
The CBOR tag 100 epoch of 1970-01-01 is used from [10].
Parameters
[out]outThe output buffer, which will be sized to its text and null terminated.
[in]inThe time in separated fields.
usesepIf true component separators will be added, if false the concise form is used.
Returns
Zero upon success.

References CHKERR1.

Referenced by cace_ari_cbor_encode_optdate(), cace_ari_text_encode_objpath(), refda_adm_ietf_dtnma_agent_ctrl_ensure_odm(), refda_adm_ietf_dtnma_agent_edd_capability(), refda_adm_ietf_dtnma_agent_edd_odm_list(), and test_cace_date_encode_valid().

◆ cace_decfrac_decode()

int cace_decfrac_decode ( struct timespec *  out,
const cace_data_t in 
)

Decode a decimal fraction from text.

Parameters
[out]outThe fractional value with least digit 1e-9
[in]inThe input buffer to read, which may be null terminated.
Returns
Zero upon success. Or 1 to indicate an argument error, 2 to indicate a numeric error, 3 to indicate extra text at the end, and 4 to indicate value underflow.

References CHKERR1, cace_data_t::ptr, subsec_decode(), and text_real_len().

Referenced by test_cace_decfrac_decode_invalid(), and test_cace_decfrac_decode_valid().

◆ cace_decfrac_encode()

int cace_decfrac_encode ( m_string_t  out,
const struct timespec *  in 
)

Encode a decimal fraction to text form.

Parameters
[out]outThe output buffer, which will be appended to.
inThe input fractional value with least digit 1e-9
Returns
Zero upon success.

References cace_subsec_encode(), and CHKERR1.

Referenced by cace_ari_text_encode_lit(), and test_cace_decfrac_encode_valid().

◆ cace_slash_escape()

int cace_slash_escape ( m_string_t  out,
const cace_data_t in,
const char  quote 
)

Escape backslashes in tstr or bstr text form.

This is defined in Section G.2 of RFC 8610 [2] and Section 7 of RFC 8259 [8].

Parameters
[out]outThe output buffer, which will be appended to.
inThe input buffer to read, which must be null terminated.
quoteThe character used to quote the string.
Returns
Zero upon success.

References CHKERR1, cace_data_t::len, and cace_data_t::ptr.

Referenced by cace_ari_text_encode_lit(), and test_cace_slash_escape_valid().

◆ cace_slash_unescape()

int cace_slash_unescape ( m_string_t  out,
const cace_data_t in 
)

Unescape backslashes in tstr/bstr text form.

This is defined in Section G.2 of RFC8610 [2].

Parameters
[out]outThe output buffer, which will be appended to.
inThe input buffer to read, which may be null terminated.
Returns
Zero upon success.

References cace_data_append_byte(), cace_data_copy(), cace_data_deinit(), cace_data_init(), cace_data_init_view(), CHKERR1, cace_data_t::len, cace_data_t::ptr, take_hex_2byte(), and text_real_len().

Referenced by test_cace_slash_unescape_invalid(), and test_cace_slash_unescape_valid().

◆ cace_string_tolower()

void cace_string_tolower ( m_string_t  out)

Convert a text string to lowercase.

This is written to work on byte strings, not unicode.

Parameters
[out]outThe output buffer, which will be replaced.

References CHKVOID.

◆ cace_string_toupper()

void cace_string_toupper ( m_string_t  out)

Convert a text string to uppercase.

This is written to work on byte strings, not unicode.

Parameters
[out]outThe output buffer, which will be replaced.

References CHKVOID.

◆ cace_strip_space()

void cace_strip_space ( m_string_t  out,
const char *  in,
size_t  in_len 
)

Remove whitespace characters from a text string.

This is based on isspace() inspection.

Parameters
[out]outThe output buffer, which will be replaced.
[in]inThe input text to read.
in_lenThe length of text not including null terminator.

References strip_chars().

◆ cace_subsec_encode()

int cace_subsec_encode ( m_string_t  out,
subsec_t  subsec 
)

◆ cace_timeperiod_decode()

int cace_timeperiod_decode ( struct timespec *  out,
const cace_data_t in 
)

Decode a signed time period from text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters
[out]outThe decoded relative time.
inThe input buffer to read, which may be null terminated.
Returns
Zero upon success.

References CHKERR1, cace_data_t::ptr, subsec_decode(), text_real_len(), TIMEPERIOD_DAY, TIMEPERIOD_HOUR, and TIMEPERIOD_MINUTE.

Referenced by test_cace_timeperiod_decode_invalid(), and test_cace_timeperiod_decode_valid().

◆ cace_timeperiod_encode()

int cace_timeperiod_encode ( m_string_t  out,
const struct timespec *  in 
)

Encode a signed time period time in text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters
[out]outThe output buffer, which will be sized to its text and null terminated.
inThe relative time.
Returns
Zero upon success.

References cace_subsec_encode(), CHKERR1, TIMEPERIOD_DAY, TIMEPERIOD_HOUR, and TIMEPERIOD_MINUTE.

Referenced by cace_ari_text_encode_lit(), check_execute(), refda_exec_worker_iteration(), and test_cace_timeperiod_encode_valid().

◆ cace_uri_percent_decode()

int cace_uri_percent_decode ( m_string_t  out,
const cace_data_t in 
)

Decode from URI percent-encoding text form.

This is defined in Section 2.1 of RFC 3986 [1].

Parameters
[out]outThe output buffer, which will be appended to.
[in]inThe input encoded text which may be null-terminated.
Returns
Zero upon success.

References CHKERR1, cace_data_t::ptr, take_hex_1byte(), and text_real_len().

Referenced by test_cace_uri_percent_decode_invalid(), and test_cace_uri_percent_decode_valid().

◆ cace_uri_percent_encode()

int cace_uri_percent_encode ( m_string_t  out,
const cace_data_t in,
const char *  safe 
)

Encode to URI percent-encoding text form.

This is defined in Section 2.1 of RFC 3986 [1]. The set of unreserved characters are alpha, digits, and _.-~ characters. in accordance with Section 2.3 of RFC 3986 [1].

Parameters
[out]outThe output buffer, which will be appended to.
inThe input encoded text which is null-terminated.
safeA set of additional safe characters to not be encoded, which is null-terminated.
Returns
Zero upon success.

References CHKERR1, cace_data_t::ptr, text_real_len(), and unreserved.

Referenced by cace_ari_text_percent_helper(), refdm_agent_rotate_log(), and test_cace_uri_percent_encode_valid().

◆ cace_utctime_decode()

int cace_utctime_decode ( struct timespec *  out,
const cace_data_t in 
)

Decode a UTC time from text form.

This is defined in Appendix A of RFC 3339 [12] with the addition of an optional leading sign character.

Parameters
[out]outThe decoded time from DTN epoch.
[in]inThe input buffer to read, which may be null terminated.
Returns
Zero upon success.

References cace_ari_dtn_epoch, CHKERR1, cace_data_t::ptr, strip_chars(), subsec_decode(), and text_real_len().

Referenced by test_cace_utctime_decode_invalid(), and test_cace_utctime_decode_valid().

◆ cace_utctime_encode()

int cace_utctime_encode ( m_string_t  out,
const struct timespec *  in,
bool  usesep 
)

Encode to a UTC time in text form.

This is defined in Appendix A of RFC 3339 [12].

Note
The DTN epoch is 2000-01-01T00:00:00Z from [9].
Parameters
[out]outThe output buffer, which will be sized to its text and null terminated.
[in]inThe time from DTN epoch.
usesepIf true component separators will be added, if false the concise form is used.
Returns
Zero upon success.

References cace_ari_dtn_epoch, cace_subsec_encode(), and CHKERR1.

Referenced by cace_ari_text_encode_lit(), refdm_db_insert_rptset(), and test_cace_utctime_encode_valid().

◆ strip_chars()

static void strip_chars ( m_string_t  out,
const char *  in,
size_t  in_len,
const char *  chars 
)
static

◆ subsec_decode()

static int subsec_decode ( subsec_t subsec,
const char **  curs,
const char *const  end 
)
static

◆ take_hex_1byte()

static int take_hex_1byte ( uint8_t *  out,
const char **  curs,
const char *  end 
)
static

Referenced by cace_uri_percent_decode().

◆ take_hex_2byte()

static int take_hex_2byte ( uint16_t *  out,
const char **  curs,
const char *  end 
)
static

Referenced by cace_slash_unescape().

◆ text_real_len()

static size_t text_real_len ( const cace_data_t data)
static

Variable Documentation

◆ base16_decode_lim

const size_t base16_decode_lim = 0x80
static

Size of the base16_decode_table.

Referenced by base16_decode_char().

◆ base16_decode_table

const int base16_decode_table[0x80]
static
Initial value:
=
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
}

Decode table for base16.

Referenced by base16_decode_char().

◆ base64_alphabet

const char* base64_alphabet
static
Initial value:
=
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/"

Referenced by cace_base64_encode().

◆ base64_decode_lim

const size_t base64_decode_lim = 0x80
static

Size of the base16_decode_table.

Referenced by base64_decode_char().

◆ base64_decode_table

const int base64_decode_table[0x80]
static
Initial value:
= {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
}

Decode table for base64 and base64uri.

Referenced by base64_decode_char().

◆ base64url_alphabet

const char* base64url_alphabet
static
Initial value:
=
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789-_"

Referenced by cace_base64_encode().

◆ id_text_first

const char id_text_first[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
static

◆ id_text_rest

const char id_text_rest[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-"
static

◆ unreserved

const char* unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~"
static

Set of unreserved characters from Section 2.3 of RFC 3986 [1].

Referenced by cace_uri_percent_encode().