process a string using regular expression with C++14

I need to extract 3 vars from a string in C++14. The string format is:

a single uppercase char + `->` + a single uppercase char + `,` + a number

For example: A->B,100, C->D,20000, E->F,22. And I want to extract both single uppercase chars and the number, like A, B, 100. So far I can write a tokenized function to separate them by calling tokenized() multiple times like this:

vector<string> tokenize(const string& s, const char c)
{
    vector<string> splitted;
    auto ss = stringstream(s);
    string tmp;
    while(getline(ss, tmp, c)) { splitted.push_back(tmp); }
    return splitted;
}
// ...
t = tokenized(s, '')

But I was wondering if there is a simpler way to extract these 3 variables using regex? I tried the (A-Z.*?->A-Z), but clearly I wrote the wrong regex.

Answer

With regexes in C++ there is one thing you need to know : you need to do the iteration over multiple matches yourself. Here’s an example :

#include <iostream>
#include <regex>
#include <string>
#include <vector>

//-----------------------------------------------------------------------------
// if you're going to tokenize you might as well return tokens

struct token_t
{
    token_t(const std::string& f, const std::string& t, const unsigned long v) :
        from{ f },
        to{ t },
        value{ v }
    {
    }

    std::string from;
    std::string to;
    unsigned long value;
};

std::ostream& operator<<(std::ostream& os, const token_t& token)
{
    os << "token : from = " << token.from << ", to = " << token.to << ", value = " << token.value << std::endl;
    return os;
}

std::ostream& operator<<(std::ostream& os, const std::vector<token_t>& tokens)
{
    std::cout << std::endl << "------------------ tokens ------------------" << std::endl;
    for (const auto& token : tokens)
    {
        os << token;
    }
    std::cout << "--------------------------------------------" << std::endl;
    return os;
}

//-----------------------------------------------------------------------------

auto tokenize(const std::string& s)
{
    static const std::regex rx{ "([A-Z])->([A-Z]),([0-9]+)" };
    std::smatch match;

    std::vector<token_t> tokens;
    auto from = s.cbegin();
    while (std::regex_search(from, s.cend(), match, rx))
    {
        tokens.push_back({ match[1], match[2], std::stoul(match[3]) });
        from = match.suffix().first;
    }

    return tokens;
}

//-----------------------------------------------------------------------------

int main()
{
    auto v1 = tokenize("A->B,100");
    auto v2 = tokenize("A->B,100, C->D,2000, E->F,22");

    std::cout << v1;
    std::cout << v2;

    return 0;
}