// Copyright (c) 2019, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
// The full license is in the file LICENSE, distributed with this software.
#ifndef MAMBA_CORE_UTIL_STRING_HPP
#define MAMBA_CORE_UTIL_STRING_HPP
#include <algorithm>
#include <array>
#include <cstring>
#include <iomanip>
#include <iterator>
#include <sstream>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <vector>
#include "mamba/util/compare.hpp"
namespace mamba
{
/**
* Return the string if the pointer is not null, otherwise a pointer to an empty string.
*/
const char* raw_str_or_empty(const char* ptr);
/**
* Safe non utf-8 wrapping of <cctype> (see its doc).
*/
bool is_control(char c);
bool is_control(wchar_t c);
bool is_print(char c);
bool is_print(wchar_t c);
bool is_space(char c);
bool is_space(wchar_t c);
bool is_blank(char c);
bool is_blank(wchar_t c);
bool is_graphic(char c);
bool is_graphic(wchar_t c);
bool is_digit(char c);
bool is_digit(wchar_t c);
bool is_alpha(char c);
bool is_alpha(wchar_t c);
bool is_alphanum(char c);
bool is_alphanum(wchar_t c);
bool is_lower(char c);
bool is_lower(wchar_t c);
bool is_upper(char c);
bool is_upper(wchar_t c);
char to_lower(char c);
wchar_t to_lower(wchar_t c);
std::string to_lower(std::string_view str);
std::wstring to_lower(std::wstring_view str);
// The use of a template here serves to exclude the overload for const Char*
template <typename Char>
std::basic_string<Char> to_lower(std::basic_string<Char>&& str);
extern template std::string to_lower(std::string&& str);
extern template std::wstring to_lower(std::wstring&& str);
char to_upper(char c);
wchar_t to_upper(wchar_t c);
std::string to_upper(std::string_view str);
std::wstring to_upper(std::wstring_view str);
// The use of a template here serves to exclude the overload for const Char*
template <typename Char>
std::basic_string<Char> to_upper(std::basic_string<Char>&& str);
extern template std::string to_upper(std::string&& str);
extern template std::wstring to_upper(std::wstring&& str);
bool starts_with(std::string_view str, std::string_view prefix);
bool ends_with(std::string_view str, std::string_view suffix);
bool contains(std::string_view str, std::string_view sub_str);
/**
* Check if any of the strings starts with the prefix.
*/
template <typename StrRange>
bool any_starts_with(const StrRange& strs, std::string_view prefix);
template <typename StrRange>
bool any_starts_with(const StrRange& strs, std::wstring_view prefix);
/**
* Check if the string starts with any of the prefix.
*/
template <typename StrRange>
bool starts_with_any(std::string_view str, const StrRange& prefix);
template <typename StrRange>
bool starts_with_any(std::wstring_view str, const StrRange& prefix);
std::string_view lstrip(std::string_view input, char c);
std::wstring_view lstrip(std::wstring_view input, wchar_t c);
std::string_view lstrip(std::string_view input, std::string_view chars);
std::wstring_view lstrip(std::wstring_view input, std::wstring_view chars);
std::string_view lstrip(std::string_view input);
std::wstring_view lstrip(std::wstring_view input);
std::array<std::string_view, 2> lstrip_parts(std::string_view input, char c);
std::array<std::wstring_view, 2> lstrip_parts(std::wstring_view input, wchar_t c);
std::array<std::string_view, 2> lstrip_parts(std::string_view input, std::string_view chars);
std::array<std::wstring_view, 2> lstrip_parts(std::wstring_view input, std::wstring_view chars);
template <typename UnaryFunc>
std::string_view lstrip_if(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::wstring_view lstrip_if(std::wstring_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::string_view, 2> lstrip_if_parts(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::wstring_view, 2> lstrip_if_parts(std::wstring_view input, UnaryFunc should_strip);
std::string_view rstrip(std::string_view input, char c);
std::wstring_view rstrip(std::wstring_view input, wchar_t c);
std::string_view rstrip(std::string_view input, std::string_view chars);
std::wstring_view rstrip(std::wstring_view input, std::wstring_view chars);
std::string_view rstrip(std::string_view input);
std::wstring_view rstrip(std::wstring_view input);
std::array<std::string_view, 2> rstrip_parts(std::string_view input, char c);
std::array<std::wstring_view, 2> rstrip_parts(std::wstring_view input, wchar_t c);
std::array<std::string_view, 2> rstrip_parts(std::string_view input, std::string_view chars);
std::array<std::wstring_view, 2> rstrip_parts(std::wstring_view input, std::wstring_view chars);
template <typename UnaryFunc>
std::string_view rstrip_if(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::wstring_view rstrip_if(std::wstring_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::string_view, 2> rstrip_if_parts(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::wstring_view, 2> rstrip_if_parts(std::wstring_view input, UnaryFunc should_strip);
std::string_view strip(std::string_view input, char c);
std::wstring_view strip(std::wstring_view input, wchar_t c);
std::string_view strip(std::string_view input, std::string_view chars);
std::wstring_view strip(std::wstring_view input, std::wstring_view chars);
std::string_view strip(std::string_view input);
std::wstring_view strip(std::wstring_view input);
std::array<std::string_view, 3> strip_parts(std::string_view input, char c);
std::array<std::wstring_view, 3> strip_parts(std::wstring_view input, wchar_t c);
std::array<std::string_view, 3> strip_parts(std::string_view input, std::string_view chars);
std::array<std::wstring_view, 3> strip_parts(std::wstring_view input, std::wstring_view chars);
template <typename UnaryFunc>
std::string_view strip_if(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::wstring_view strip_if(std::wstring_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::string_view, 3> strip_if_parts(std::string_view input, UnaryFunc should_strip);
template <typename UnaryFunc>
std::array<std::wstring_view, 3> strip_if_parts(std::wstring_view input, UnaryFunc should_strip);
std::vector<std::string>
split(std::string_view input, std::string_view sep, std::size_t max_split = SIZE_MAX);
std::vector<std::wstring>
split(std::wstring_view input, std::wstring_view sep, std::size_t max_split = SIZE_MAX);
std::vector<std::string>
rsplit(std::string_view input, std::string_view sep, std::size_t max_split = SIZE_MAX);
std::vector<std::wstring>
rsplit(std::wstring_view input, std::wstring_view sep, std::size_t max_split = SIZE_MAX);
void replace_all(std::string& data, std::string_view search, std::string_view replace);
void replace_all(std::wstring& data, std::wstring_view search, std::wstring_view replace);
namespace detail
{
struct PlusEqual
{
template <typename T, typename U>
auto operator()(T& left, const U& right);
};
}
/**
* Execute the function @p func on each element of a join iteration.
*
* The join iteration of an iterator pair (@p first, @p last) with a separator @p sep is
* defined by iterating through the ``n`` elements of the iterator pair, interleaving the
* separator in between the elements (thus appearing ``n-1`` times).
*/
template <typename InputIt, typename UnaryFunction, typename Value>
UnaryFunction join_for_each(InputIt first, InputIt last, UnaryFunction func, const Value& sep);
/**
* Concatenate the elements of the container @p container by interleaving a separator.
*
* Joining is done by successively joining (using the provided @p joiner) the aggregate with
* element of the container and the separator, such that the separator only appears
* in-between two elements of the range.
*
* @see join_for_each
*/
template <class Range, class Value, class Joiner = detail::PlusEqual>
auto join(const Value& sep, const Range& container, Joiner joiner = detail::PlusEqual{}) ->
typename Range::value_type;
/**
* Execute the function @p func on each element of a tuncated join iteration.
*
* The join iteration of an iterator pair (@p first, @p last) with a separator @p sep
* and a trunction symbol @p etc is define by the join iteration of either all the elements
* in the iterator pair if they are less than @p threshold, a limited number of elements, with
* middle elements represented by @p etc.
* defined by iterating through the ``n`` elements of the iterator pair, interleaving the
* separator in between the elements (thus appearing ``n-1`` times).
*
* @param first The iterator pointing to the begining of the range of elements to join.
* @param last The iterator pointing to past the end of the range of elements to join.
* @param func The unary function to apply to all elements (separation and truncation included).
* @param sep The separator used in between elements.
* @param etc The value used to represent the truncation of the elements.
* @param threshold Distance between the iterator pair beyond which truncation is preformed.
* @param show Number of elements to keep at the begining/end when truncation is preformed.
*
* @see join_for_each
*/
template <typename InputIt, typename UnaryFunction, typename Value>
UnaryFunction join_trunc_for_each(
InputIt first,
InputIt last,
UnaryFunction func,
const Value& sep,
const Value& etc,
std::size_t threshold = 5,
std::pair<std::size_t, std::size_t> show = { 2, 1 }
);
/**
* Join elements of a range, with possible truncation.
*
* @param range Elements to join.
* @param sep The separator used in between elements.
* @param etc The value used to represent the truncation of the elements.
* @param threshold Distance between the iterator pair beyond which truncation is preformed.
* @param show Number of elements to keep at the begining/end when truncation is preformed.
*
* @see join_trunc_for_each
* @see join
*/
template <typename Range, typename Joiner = detail::PlusEqual>
auto join_trunc(
const Range& range,
std::string_view sep = ", ",
std::string_view etc = "...",
std::size_t threshold = 5,
std::pair<std::size_t, std::size_t> show = { 2, 1 },
Joiner joiner = detail::PlusEqual{}
) -> typename Range::value_type;
;
/************************
* Implementation misc *
************************/
inline const char* raw_str_or_empty(const char* ptr)
{
return ptr ? ptr : "";
}
/********************************************
* Implementation of start_with functions *
********************************************/
template <typename StrRange, typename Char>
bool any_starts_with(const StrRange& strs, std::basic_string_view<Char> prefix)
{
return std::any_of(
strs.cbegin(),
strs.cend(),
[&prefix](const auto& s) { return starts_with(s, prefix); }
);
}
template <typename StrRange>
bool any_starts_with(const StrRange& strs, std::string_view prefix)
{
return any_starts_with<StrRange, decltype(prefix)::value_type>(strs, prefix);
}
template <typename StrRange>
bool any_starts_with(const StrRange& strs, std::wstring_view prefix)
{
return any_starts_with<StrRange, decltype(prefix)::value_type>(strs, prefix);
}
extern template bool any_starts_with(const std::vector<std::string>&, std::string_view);
extern template bool any_starts_with(const std::vector<std::string_view>&, std::string_view);
template <typename StrRange, typename Char>
bool starts_with_any(std::basic_string_view<Char> str, const StrRange& prefix)
{
return std::any_of(
prefix.cbegin(),
prefix.cend(),
[&str](const auto& p) { return starts_with(str, p); }
);
}
template <typename StrRange>
bool starts_with_any(std::string_view str, const StrRange& prefix)
{
return starts_with_any<StrRange, char>(str, prefix);
}
template <typename StrRange>
bool starts_with_any(std::wstring_view str, const StrRange& prefix)
{
return starts_with_any<StrRange, wchar_t>(str, prefix);
}
extern template bool starts_with_any(std::string_view, const std::vector<std::string>&);
extern template bool starts_with_any(std::string_view, const std::vector<std::string_view>&);
/***************************************
* Implementation of strip functions *
***************************************/
namespace detail
{
template <typename Char, typename UnaryFunc>
std::array<std::basic_string_view<Char>, 2>
lstrip_if_parts_impl(std::basic_string_view<Char> input, UnaryFunc should_strip)
{
const auto start_iter = std::find_if(
input.cbegin(),
input.cend(),
[&should_strip](Char c) -> bool { return !should_strip(c); }
);
const auto start_idx = static_cast<std::size_t>(start_iter - input.cbegin());
return { input.substr(0, start_idx), input.substr(start_idx) };
}
}
template <typename UnaryFunc>
std::string_view lstrip_if(std::string_view input, UnaryFunc should_strip)
{
return lstrip_if_parts(input, std::move(should_strip))[1];
}
template <typename UnaryFunc>
std::wstring_view lstrip_if(std::wstring_view input, UnaryFunc should_strip)
{
return lstrip_if_parts(input, std::move(should_strip))[1];
}
template <typename UnaryFunc>
std::array<std::string_view, 2> lstrip_if_parts(std::string_view input, UnaryFunc should_strip)
{
return detail::lstrip_if_parts_impl(input, std::move(should_strip));
}
template <typename UnaryFunc>
std::array<std::wstring_view, 2> lstrip_if_parts(std::wstring_view input, UnaryFunc should_strip)
{
return detail::lstrip_if_parts_impl(input, std::move(should_strip));
}
namespace detail
{
template <typename Char, typename UnaryFunc>
std::array<std::basic_string_view<Char>, 2>
rstrip_if_parts_impl(std::basic_string_view<Char> input, UnaryFunc should_strip)
{
const auto rstart_iter = std::find_if(
input.crbegin(),
input.crend(),
[&should_strip](Char c) -> bool { return !should_strip(c); }
);
const auto past_end_idx = static_cast<std::size_t>(input.crend() - rstart_iter);
return { input.substr(0, past_end_idx), input.substr(past_end_idx) };
}
}
template <typename UnaryFunc>
std::string_view rstrip_if(std::string_view input, UnaryFunc should_strip)
{
return rstrip_if_parts(input, std::move(should_strip))[0];
}
template <typename UnaryFunc>
std::wstring_view rstrip_if(std::wstring_view input, UnaryFunc should_strip)
{
return rstrip_if_parts(input, std::move(should_strip))[0];
}
template <typename UnaryFunc>
std::array<std::string_view, 2> rstrip_if_parts(std::string_view input, UnaryFunc should_strip)
{
return detail::rstrip_if_parts_impl(input, std::move(should_strip));
}
template <typename UnaryFunc>
std::array<std::wstring_view, 2> rstrip_if_parts(std::wstring_view input, UnaryFunc should_strip)
{
return detail::rstrip_if_parts_impl(input, std::move(should_strip));
}
namespace detail
{
template <typename Char, typename UnaryFunc>
std::array<std::basic_string_view<Char>, 3>
strip_if_parts_impl(std::basic_string_view<Char> input, UnaryFunc should_strip)
{
const auto [head, not_head] = lstrip_if_parts(input, should_strip);
const auto [body, tail] = rstrip_if_parts(not_head, std::move(should_strip));
return { head, body, tail };
}
}
template <typename UnaryFunc>
std::string_view strip_if(std::string_view input, UnaryFunc should_strip)
{
return strip_if_parts(input, std::move(should_strip))[1];
}
template <typename UnaryFunc>
std::wstring_view strip_if(std::wstring_view input, UnaryFunc should_strip)
{
return strip_if_parts(input, std::move(should_strip))[1];
}
template <typename UnaryFunc>
std::array<std::string_view, 3> strip_if_parts(std::string_view input, UnaryFunc should_strip)
{
return detail::strip_if_parts_impl(input, std::move(should_strip));
}
template <typename UnaryFunc>
std::array<std::wstring_view, 3> strip_if_parts(std::wstring_view input, UnaryFunc should_strip)
{
return detail::strip_if_parts_impl(input, std::move(should_strip));
}
/**************************************
* Implementation of join functions *
**************************************/
namespace detail
{
template <typename T, typename U>
auto PlusEqual::operator()(T& left, const U& right)
{
left += right;
}
template <class T, class = void>
struct has_reserve : std::false_type
{
};
template <class T>
struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(std::size_t()))>>
: std::true_type
{
};
template <typename T>
inline constexpr bool has_reserve_v = has_reserve<T>::value;
std::size_t length(const char* s);
std::size_t length(const wchar_t* s);
std::size_t length(const char c);
std::size_t length(const wchar_t c);
template <class T>
std::size_t length(const T& s)
{
return s.length();
}
}
// TODO(C++20) Use ``std::ranges::join_view`` (or ``std::ranges::join``)
template <typename InputIt, typename UnaryFunction, typename Value>
UnaryFunction join_for_each(InputIt first, InputIt last, UnaryFunction func, const Value& sep)
{
if (first < last)
{
func(*(first++));
for (; first < last; ++first)
{
func(sep);
func(*first);
}
}
return func;
}
template <class Range, class Value, class Joiner>
auto join(const Value& sep, const Range& container, Joiner joiner) -> typename Range::value_type
{
using Result = typename Range::value_type;
Result out{};
if constexpr (detail::has_reserve_v<Result>)
{
std::size_t final_size = 0;
auto inc_size = [&final_size](const auto& val) { final_size += detail::length(val); };
join_for_each(container.begin(), container.end(), inc_size, sep);
out.reserve(final_size);
}
auto out_joiner = [&](auto&& val) { joiner(out, std::forward<decltype(val)>(val)); };
join_for_each(container.begin(), container.end(), out_joiner, sep);
return out;
}
/********************************************
* Implementation of join_trunc functions *
********************************************/
// TODO(C++20) Take an input range and return a range
template <typename InputIt, typename UnaryFunction, typename Value>
UnaryFunction join_trunc_for_each(
InputIt first,
InputIt last,
UnaryFunction func,
const Value& sep,
const Value& etc,
std::size_t threshold,
std::pair<std::size_t, std::size_t> show
)
{
if (util::cmp_less_equal(last - first, threshold))
{
return join_for_each(first, last, std::move(func), sep);
}
// Working around non-assignable function types, such as lambda with references.
auto join_for_each_func = [&func](auto f, auto l, auto val)
{
if constexpr (std::is_assignable_v<UnaryFunction, UnaryFunction>)
{
func = join_for_each(f, l, std::move(func), val);
}
else
{
join_for_each(f, l, func, val);
}
};
const auto [show_head, show_tail] = show;
if (show_head > 0)
{
join_for_each_func(first, first + static_cast<std::ptrdiff_t>(show_head), sep);
func(sep);
}
func(etc);
if (show_tail)
{
func(sep);
join_for_each_func(last - static_cast<std::ptrdiff_t>(show_tail), last, sep);
}
return func;
}
template <typename Range, typename Joiner>
auto join_trunc(
const Range& range,
std::string_view sep,
std::string_view etc,
std::size_t threshold,
std::pair<std::size_t, std::size_t> show,
Joiner joiner
) -> typename Range::value_type
{
using Result = typename Range::value_type;
Result out{};
if constexpr (detail::has_reserve_v<Result>)
{
std::size_t final_size = 0;
auto inc_size = [&final_size](const auto& val) { final_size += detail::length(val); };
join_trunc_for_each(range.begin(), range.end(), inc_size, sep, etc, threshold, show);
out.reserve(final_size);
}
auto out_joiner = [&](auto&& val) { joiner(out, std::forward<decltype(val)>(val)); };
join_trunc_for_each(range.begin(), range.end(), out_joiner, sep, etc, threshold, show);
return out;
}
template <typename... Args>
std::string concat(const Args&... args)
{
std::string result;
result.reserve((detail::length(args) + ...));
((result += args), ...);
return result;
}
template <class B>
std::string hex_string(const B& buffer, std::size_t size)
{
std::ostringstream oss;
oss << std::hex;
for (std::size_t i = 0; i < size; ++i)
{
oss << std::setw(2) << std::setfill('0') << static_cast<int>(buffer[i]);
}
return oss.str();
}
template <class B>
std::string hex_string(const B& buffer)
{
return hex_string(buffer, buffer.size());
}
}
#endif