Claw  1.7.3
string_algorithm.tpp
1 /*
2  CLAW - a C++ Library Absolutely Wonderful
3 
4  CLAW is a free library without any particular aim but being useful to
5  anyone.
6 
7  Copyright (C) 2005-2011 Julien Jorge
8 
9  This library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU Lesser General Public
11  License as published by the Free Software Foundation; either
12  version 2.1 of the License, or (at your option) any later version.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public
20  License along with this library; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 
23  contact: julien.jorge@gamned.org
24 */
25 /**
26  * \file string_algorithm.tpp
27  * \brief Implementation of the algorithms on strings.
28  * \author Julien Jorge
29  */
30 
31 #include <claw/algorithm.hpp>
32 #include <claw/glob.hpp>
33 
34 #include <sstream>
35 #include <string>
36 #include <iterator>
37 
38 /*----------------------------------------------------------------------------*/
39 /**
40  * \brief A portable version of std::getline( is, str, '\\n' ) that removes a
41  * tailing '\\r'.
42  * \param is The stream in which we read.
43  * \param str The line read from the stream.
44  */
45 template<typename StreamType, typename StringType>
46 StreamType& claw::text::getline( StreamType& is, StringType& str )
47 {
48  std::getline( is, str );
49 
50  if ( !str.empty() )
51  if ( str[ str.size() - 1 ] == typename StringType::value_type('\r') )
52  str.erase( str.size() - 1 );
53 
54  return is;
55 } // getline()
56 
57 /*----------------------------------------------------------------------------*/
58 /**
59  * \brief Remove characters at the begining of a string.
60  * \param str The string to modify.
61  * \param s The characters to remove.
62  */
63 template<typename StringType>
64 void claw::text::trim_left( StringType& str,
65  const typename StringType::value_type* const s )
66 {
67  typename StringType::size_type p = str.find_first_not_of(s);
68 
69  if (p != StringType::npos)
70  str = str.substr(p);
71 } // trim_left()
72 
73 /*----------------------------------------------------------------------------*/
74 /**
75  * \brief Remove characters at the end of a string.
76  * \param str The string to modify.
77  * \param s The characters to remove.
78  */
79 template<typename StringType>
80 void claw::text::trim_right( StringType& str,
81  const typename StringType::value_type* const s )
82 {
83  typename StringType::size_type p = str.find_last_not_of(s);
84 
85  if (p != StringType::npos)
86  str = str.substr( 0, p+1 );
87 } // trim_right()
88 
89 /*----------------------------------------------------------------------------*/
90 /**
91  * \brief Remove characters at the begining end at the end of a string.
92  * \param str The string to modify.
93  * \param s The characters to remove.
94  */
95 template<typename StringType>
96 void claw::text::trim( StringType& str,
97  const typename StringType::value_type* const s )
98 {
99  typename StringType::size_type first = str.find_first_not_of(s);
100  typename StringType::size_type last = str.find_last_not_of(s);
101 
102  if (first != StringType::npos)
103  str = str.substr( first, last - first + 1 );
104 } // trim()
105 
106 /*----------------------------------------------------------------------------*/
107 /**
108  * \brief Squeeze successive characters of a string into one character.
109  * \param str The string to modify.
110  * \param s The characters to remove.
111  *
112  * \b Example :
113  * <tt>
114  * std::string s("word aaa bbb abab");
115  * claw::squeeze( s, "ab" );
116  * std::cout << s << std::end; // result is "word a b abab"
117  * </tt>
118  */
119 template<typename StringType>
120 void claw::text::squeeze( StringType& str,
121  const typename StringType::value_type* const s )
122 {
123  typedef typename StringType::size_type size_type;
124 
125  size_type first(0);
126 
127  do
128  {
129  first = str.find_first_of(s, first);
130 
131  if ( first != StringType::npos )
132  {
133  size_type last = str.find_first_not_of(str[first], first+1);
134 
135  if ( last == StringType::npos )
136  str = str.substr(0, first+1);
137  else if ( last - first > 1 )
138  str = str.substr(0, first+1) + str.substr(last);
139 
140  ++first;
141  }
142  }
143  while ( (first != StringType::npos) && (first != str.length()) );
144 } // squeeze()
145 
146 /*----------------------------------------------------------------------------*/
147 /**
148  * \brief Replace a set of characters by other characters.
149  * \param str The string to modify.
150  * \param e1 The characters to remove.
151  * \param e2 The characters replacing the ones in \a e1.
152  *
153  * \return The number of replaced characters.
154  *
155  * Each character e1[i] will be replaced with e2[i]. If \a e1 is smaller than
156  * \a e2, the latter will be completed by repeating its last character.
157  *
158  * \b Example :
159  * <tt>
160  * std::string s("word aaa bbb abab");
161  * claw::replace( s, "ab", "ba" );
162  * std::cout << s << std::end; // result is "word bbb aaa baba"
163  * </tt>
164  */
165 template<typename StringType>
166 std::size_t claw::text::replace
167 ( StringType& str, const StringType& e1, const StringType& e2 )
168 {
169  return
170  claw::replace
171  ( str.begin(), str.end(), e1.begin(), e1.end(), e2.begin(), e2.end() );
172 } // replace()
173 
174 /*----------------------------------------------------------------------------*/
175 /**
176  * \brief Test if the content of a string is immediately convertible to a type.
177  * \param str The string to test.
178  */
179 template<typename T, typename StringType>
180 bool claw::text::is_of_type( const StringType& str )
181 {
182  std::basic_istringstream< typename StringType::value_type,
183  typename StringType::traits_type,
184  typename StringType::allocator_type > iss(str);
185 
186  T val;
187  bool result = false;
188 
189  if ( iss >> val )
190  result = iss.eof();
191 
192  return result;
193 } // is_of_type()
194 
195 /*----------------------------------------------------------------------------*/
196 /**
197  * \brief Split a string into several substrings, according to a given
198  * separator.
199  * \param sequence A sequence in which the substrings are added.
200  * \param str The string to split.
201  * \param sep The separator on which the string is splitted.
202  */
203 template<typename Sequence>
204 void claw::text::split
205 ( Sequence& sequence, const typename Sequence::value_type& str,
206  const typename Sequence::value_type::value_type sep )
207 {
208  split(sequence, str.begin(), str.end(), sep);
209 } // split()
210 
211 /*----------------------------------------------------------------------------*/
212 /**
213  * \brief Split a string into several substrings, according to a given
214  * separator.
215  * \param sequence A sequence in which the substrings are added.
216  * \param first Iterator on the beginning of the string to split.
217  * \param last Iterator just past the end of the string to split.
218  * \param sep The separator on which the string is splitted.
219  */
220 template<typename Sequence>
221 void claw::text::split
222 ( Sequence& sequence, typename Sequence::value_type::const_iterator first,
223  typename Sequence::value_type::const_iterator last,
224  const typename Sequence::value_type::value_type sep )
225 {
226  typedef typename Sequence::value_type string_type;
227 
228  string_type line;
229  std::basic_istringstream< typename string_type::value_type,
230  typename string_type::traits_type,
231  typename string_type::allocator_type > iss( string_type(first, last) );
232 
233  while ( std::getline(iss, line, sep) )
234  *std::insert_iterator<Sequence>(sequence, sequence.end()) = line;
235 } // split()
236 
237 /*----------------------------------------------------------------------------*/
238 /**
239  * \brief Find escaped symbols in a sequence of characters and replace them by
240  * their c-equivalent.
241  *
242  * \param first Iterator on the beginning of the string to escape.
243  * \param last Iterator just past the end of the string to escape.
244  * \param out Iterator on the beginning of the output string.
245  * \pre \a out points on a range long enough to store the resulting string.
246  *
247  * \code
248  * std::string s("\\a\\t\\n\\r");
249  * std::string r;
250  *
251  * claw::text::c_escape( s.begin(), s.end(), std::insert_iterator(r, r.end()) );
252  *
253  * if ( r == "\a\t\n\r" )
254  * std::cout << "It works!" << std::endl;
255  * \endcode
256  *
257  * \remark This method has not been tested with wide chars yet.
258  */
259 template<typename InputIterator, typename OutputIterator>
260 void claw::text::c_escape
261 ( InputIterator first, InputIterator last, OutputIterator out )
262 {
263  typedef typename std::iterator_traits<InputIterator>::value_type char_type;
264  typedef std::basic_string<char_type> string_type;
265 
266  const string_type oct("01234567");
267  const string_type hex("0123456789ABCDEFabcdef");
268 
269  bool escape(false);
270 
271  for ( ; first!=last; ++out )
272  if ( escape )
273  {
274  switch( *first )
275  {
276  case 'a': *out = '\a'; ++first; break;
277  case 'b': *out = '\b'; ++first; break;
278  case 'f': *out = '\f'; ++first; break;
279  case 'n': *out = '\n'; ++first; break;
280  case 'r': *out = '\r'; ++first; break;
281  case 't': *out = '\t'; ++first; break;
282  case 'v': *out = '\v'; ++first; break;
283  case 'o':
284  {
285  ++first;
286  int v(0);
287  const InputIterator e
288  ( find_first_not_of(first, last, oct.begin(), oct.end()) );
289 
290  std::basic_istringstream<char_type> iss( string_type(first, e) );
291  iss >> std::oct >> v;
292  *out = (char_type)v;
293  first = e;
294  break;
295  }
296  case 'x':
297  {
298  ++first;
299  int v(0);
300  const InputIterator e
301  ( find_first_not_of(first, last, hex.begin(), hex.end()) );
302 
303  std::basic_istringstream<char_type> iss( string_type(first, e) );
304  iss >> std::hex >> v;
305  *out = (char_type)v;
306  first = e;
307  break;
308  }
309  default: *out = *first; ++first;
310  }
311 
312  escape = false;
313  }
314  else if ( *first == '\\' )
315  {
316  escape = true;
317  ++first;
318  }
319  else
320  {
321  *out = *first;
322  ++first;
323  }
324 } // c_escape()
325 
326 /*----------------------------------------------------------------------------*/
327 /**
328  * \brief Check if a string matches a given pattern.
329  * \param pattern The pattern.
330  * \param text The text to check.
331  * \param any_sequence A value representing any sequence of values, empty or
332  * not.
333  * \param zero_or_one A value representing any value or no value.
334  * \param any A value representing any value.
335  */
336 template<typename StringType>
337 bool claw::text::glob_match
338 ( const StringType& pattern, const StringType& text,
339  const typename StringType::value_type any_sequence,
340  const typename StringType::value_type zero_or_one,
341  const typename StringType::value_type any )
342 {
343  return claw::glob_match
344  ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
345  zero_or_one, any );
346 } // glob_match()
347 
348 /*----------------------------------------------------------------------------*/
349 /**
350  * \brief Check if a string may match a given pattern.
351  * \param pattern The pattern.
352  * \param text The text to check.
353  * \param any_sequence A value representing any sequence of values, empty or
354  * not.
355  * \param zero_or_one A value representing any value or no value.
356  * \param any A value representing any value.
357  */
358 template<typename StringType>
359 bool claw::text::glob_potential_match
360 ( const StringType& pattern, const StringType& text,
361  const typename StringType::value_type any_sequence,
362  const typename StringType::value_type zero_or_one,
363  const typename StringType::value_type any )
364 {
365  return claw::glob_potential_match
366  ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
367  zero_or_one, any );
368 } // glob_potential_match()