sficl  Current version
SfiDelimitedRecordSTD.h
Go to the documentation of this file.
1 // Copyright (c) 2007-2011 Jabiru Ventures LLC
2 // Licensing questions should be addressed to jvlicense@jabiruventures.com
3 //
4 // Permission is hereby granted, free of charge, to any person
5 // obtaining a copy of this software and associated documentation
6 // files (the "Software"), to deal in the Software without
7 // restriction, including without limitation the rights to use,
8 // copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the
10 // Software is furnished to do so, subject to the following
11 // conditions:
12 //
13 // The above copyright notice and this permission notice shall be
14 // included in all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 // OTHER DEALINGS IN THE SOFTWARE.
24 //-----------------------------------------------------------------------------
25 //
26 // SfiDelimitedRecordSTD.h - A record consisting of string fields separated by a delimiter.
27 //
28 //-----------------------------------------------------------------------------
29 #ifndef SfiDelimitedRecordSTD_INCLUDED
30 #define SfiDelimitedRecordSTD_INCLUDED
31 
32 #include "SfiVectorLite.h"
33 #include <string>
34 
35 
36 //------------------------------------------------------------------------------
37 //
38 // SfiDelimitedRecordSTD
39 //
40 //------------------------------------------------------------------------------
41 /// A record consisting of string fields separated by a delimiter. The functionality
42 /// is similar to the split() function in Perl or the way awk treats lines.
43 
44 /// There are two ways of using this class. The first makes a copy of the string.
45 /// In this case, the class should be initialized with a string and the
46 /// resulting fields should be accessed by the \c operator[]. The second way is to
47 /// pass the line to the class as a writable buffer in the \c split function and
48 /// access the resulting fields via the \c get() method. In this case, the split
49 /// occurs in-place, i.e., without a copy or memory allocation, but the delimiters
50 /// in the buffer are overwritten with null characters. The \c get() method
51 /// should not be called after the buffer was subsequently changed by the caller.
53 {
54 protected:
55  /// Buffer with a modified string for fast retrieval.
56  string m_buffer;
60  char* m_sptr;
61  const char m_nullChar;
62 
63 public:
64  explicit SfiDelimitedRecordSTD(const char* str = 0, char delimiter = ',') : m_delimiter(delimiter), m_sptr(0), m_nullChar(0)
65  {
66  m_offsets.reserve(6);
67  m_lengths.reserve(6);
68  *this = str;
69  }
71  {
72  *this = rec;
73  }
75 
76  /// Copies all data from rec.
78  {
79  m_buffer = rec.m_buffer;
81  m_offsets = rec.m_offsets;
82  m_lengths = rec.m_lengths;
83  m_sptr = rec.m_sptr;
84  return *this;
85  }
86 
87  /// Sets the record to a new string (makes a copy).
88  /// Access to the resulting split string is via the \c operator[].
89  SfiDelimitedRecordSTD& operator=(const char* str)
90  {
91  if (str)
92  {
93  m_buffer = str;
94  split();
95  }
96  else
97  {
98  clear();
99  }
100  return *this;
101  }
102 
103  /// Returns the number of fields in the record.
104  int size() const
105  {
106  return m_offsets.size();
107  }
108 
109  /// Returns a pointer to the i-th field or an empty string if there are fewer than i fields.
110  const char* operator[](int i) const
111  {
112  int size = m_offsets.size();
113  if (size == 0 || i < 0 || i >= size)
114  {
115  return &m_nullChar; //m_buffer.c_str() + m_offsets[size - 1] + m_lengths[size - 1];
116  }
117  return m_buffer.c_str() + m_offsets[i];
118  }
119 
120  /// Returns the length of the n-th field (zero-based) or -1 if there is no such field.
121  int length(int n) const
122  {
123  return n < m_offsets.size() ? m_lengths[n] : -1;
124  }
125 
126  /// Sets the delimiter character and re-splits the string.
127  void setDelimiter(char delim)
128  {
129  m_delimiter = delim;
130  }
131 
132  /// Returns the offset of the n-th field (zero-based) in the original string or -1 if there is no such field.
133  int offset(int n) const
134  {
135  return n < m_offsets.size() ? m_offsets[n] : -1;
136  }
137 
138  /// Splits the \c buf in-place, overwriting delimiters with null characters.
139  /// Returns the number of fields in the \c buf. Delimiters inside double quotes are ignored.
140  /// \c n is the size of string in \c buf, excluding the terminating null.
141  /// Access to fields is provided by \c get(int).
142  int split(char* buf, int n)
143  {
144  if (!buf)
145  {
146  clear();
147  return 0;
148  }
149  // The code here is identical with that in split() except here we
150  // operate on a char* buffer, and split() operates on a std::string.
151  m_sptr = buf;
152  int start = 0;
153  int i;
154  m_offsets.clear();
155  m_lengths.clear();
156  bool insideQuotes = false;
157  for (i = 0; i < n; i++)
158  {
159  if (buf[i] == '"')
160  {
161  insideQuotes = !insideQuotes;
162  }
163  if (!insideQuotes && buf[i] == m_delimiter)
164  {
165  buf[i] = '\0';
166  m_offsets.push_back(start);
167  m_lengths.push_back(i - start);
168  start = i + 1;
169  }
170  }
171  m_offsets.push_back(start);
172  m_lengths.push_back(i - start);
173  return i ? m_offsets.size() : 0;
174  }
175 
176  /// Returns a pointer to the i-th field of a split string - for use with split(char*, int) only!!!
177  /// If the index i is outside the range of valid fields, a pointer to an empty string is returned.
178  const char* get(int i) const
179  {
180  int size = m_offsets.size();
181  if (size == 0 || i < 0 || i >= size)
182  {
183  return &m_nullChar; //m_sptr + m_offsets[size - 1] + m_lengths[size - 1];
184  }
185  return m_sptr + m_offsets[i];
186  }
187 
188 protected:
189 
190  /// Returns the number of fields in the record. Delimiters inside double quotes are ignored.
191  int split()
192  {
193  int start = 0;
194  int i, n;
195  m_offsets.clear();
196  m_lengths.clear();
197  bool insideQuotes = false;
198  for (i = 0, n = m_buffer.length(); i < n; i++)
199  {
200  if (m_buffer[i] == '"')
201  {
202  insideQuotes = !insideQuotes;
203  }
204  if (!insideQuotes && m_buffer[i] == m_delimiter)
205  {
206  m_buffer[i] = '\0';
207  m_offsets.push_back(start);
208  m_lengths.push_back(i - start);
209  start = i + 1;
210  }
211  }
212  m_offsets.push_back(start);
213  m_lengths.push_back(i - start);
214  return i ? m_offsets.size() : 0;
215  }
216 
217  /// Clears the record.
218  void clear()
219  {
220  m_buffer.clear();
221  m_offsets.clear();
222  m_lengths.clear();
223  m_sptr = 0;
224  }
225 };
226 
227 #endif
int offset(int n) const
Returns the offset of the n-th field (zero-based) in the original string or -1 if there is no such fi...
void clear()
Clears the record.
SfiVectorLite< int > m_offsets
A record consisting of string fields separated by a delimiter.
int split()
Returns the number of fields in the record. Delimiters inside double quotes are ignored.
void reserve(size_t size)
Resizes the underlying vector, but does not change the perceived size.
void setDelimiter(char delim)
Sets the delimiter character and re-splits the string.
string m_buffer
Buffer with a modified string for fast retrieval.
SfiDelimitedRecordSTD & operator=(const char *str)
Sets the record to a new string (makes a copy).
SfiDelimitedRecordSTD(const SfiDelimitedRecordSTD &rec)
const char * operator[](int i) const
Returns a pointer to the i-th field or an empty string if there are fewer than i fields.
int size() const
Returns the number of fields in the record.
SfiDelimitedRecordSTD & operator=(const SfiDelimitedRecordSTD &rec)
Copies all data from rec.
int length(int n) const
Returns the length of the n-th field (zero-based) or -1 if there is no such field.
SfiVectorLite< int > m_lengths
int size() const
Returns the size of the vector.
Definition: SfiVectorLite.h:89
void clear()
Sets the number of elements to zero without releasing the memory. Use pack() to also release the memo...
void push_back(const T &t)
Appends t to the end of the vector.
SfiDelimitedRecordSTD(const char *str=0, char delimiter= ',')
int split(char *buf, int n)
Splits the buf in-place, overwriting delimiters with null characters.