#include #include #include #include #include #include #include #define DEBUGGING 0 #if DEBUGGING # include # define SHOW( var ) std::cout << #var ": '" << var << "'" << std::endl; #else # define SHOW( var ) #endif namespace { typedef boost::unordered_map< std::string, std::string > ssmap; typedef const boost::unordered_map< std::string, std::string > cssmap; const boost::regex make_regex( cssmap & dict ) { // create our regular expression. (there is almost certainly a // cleverer / more-functional way to do this with lambdas etc, but // i couldn't make it work -- and this version has the advantage // of working on older boosts.) std::string re_str; BOOST_FOREACH( const cssmap::value_type & v, dict ) { if ( ! re_str.empty() ) re_str.append( "|" ); re_str.append( "\\Q" + v.first + "\\E" ); } SHOW( re_str ); // now compile the regex return boost::regex( re_str ); } const std::string replace( const std::string & input, const ssmap & dict, const boost::regex & re ) { // note that this entire subroutine can be replaced with a single // call to boost::regex_replace, but only in very recent versions // of boost, as it requires the interface that allows the // replacement formatter to be any functor, not just a string. // we'll store the processed text here std::string output; // and we want to save the end of last match to make sure we catch // the tail. this also covers the case where there's no matches // at all. std::string::const_iterator prev_match_end( input.begin() ); // this is too long to type too many times... typedef boost::regex_iterator< std::string::const_iterator > const_str_regex_iter; // now loop over the input data for ( const_str_regex_iter end, i( input.begin(), input.end(), re ); i != end; ++i ) { // for each match, append... output .append( (*i).prefix().str() ) // everything since previous match .append( dict.at( (*i)[0].str() ) ); // and the replacement for current match // and save the end of this match, to catch the tail. prev_match_end = (*i)[0].second; } // append the tail. output.append( prev_match_end, input.end() ); return output; } void test( const ssmap & dict, const boost::regex & re, const std::string & input, const std::string & expected ) { SHOW( input ); const std::string output( replace( input, dict, re ) ); SHOW( output ); BOOST_CHECK( output == expected ); } } int test_main( int argc, char * argv [] ) { // build up mapping ssmap dict; dict.insert( ssmap::value_type( "\\alpha", "a" ) ); dict.insert( ssmap::value_type( "\\beta", "b" ) ); dict.insert( ssmap::value_type( "\\gamma", "g" ) ); // make a regex from the keys boost::regex re( make_regex( dict ) ); // now try some corner cases test( dict, re, "", "" ); test( dict, re, "x", "x" ); test( dict, re, "\\alpha", "a" ); test( dict, re, "alpha", "alpha" ); test( dict, re, " \\beta", " b" ); test( dict, re, "\\beta ", "b " ); // and a more complex example. test( dict, re, "alpha \\alpha beta \\beta gamma \\gamma tail", "alpha a " "beta b " "gamma g " "tail" ); return 0; }