00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "precomp.h"
00030 #include "mime_part.h"
00031 #include "regexp.h"
00032 #include "file.h"
00033 #include "exception.h"
00034 #include "string_help.h"
00035
00037
00038
00039 CL_MimePart::CL_MimePart()
00040 : header_pos(0), header_length(0), body_pos(0), body_length(0)
00041 {
00042 }
00043
00044 CL_MimePart::CL_MimePart(const CL_MimePart ©)
00045 : headers(copy.headers), header_pos(copy.header_pos), header_length(copy.header_length),
00046 body_pos(copy.body_pos), body_length(copy.body_length), parts(copy.parts)
00047 {
00048 }
00049
00051
00052
00054
00055
00056 CL_MimePart &CL_MimePart::operator =(const CL_MimePart ©)
00057 {
00058 headers = copy.headers;
00059 header_pos = copy.header_pos;
00060 header_length = copy.header_length;
00061 body_pos = copy.body_pos;
00062 body_length = copy.body_length;
00063 parts = copy.parts;
00064 return *this;
00065 }
00066
00067 void CL_MimePart::load(CL_File &file, const CL_StringA &next_mime_boundary)
00068 {
00069
00070 headers.clear();
00071 header_pos = file.get_position();
00072 header_length = 0;
00073 body_pos = 0;
00074 body_length = 0;
00075 parts.clear();
00076
00077
00078 char buffer[16*1024];
00079 int bytes_read = file.read(buffer, 16*1024);
00080 if (bytes_read <= 0)
00081 throw CL_Exception(TEXT("Premature end of MIME document"));
00082
00083 CL_RegExp regexp_header_end("\\r\\n(\\r\\n)");
00084 CL_RegExpMatch match_header_end = regexp_header_end.search(buffer, bytes_read);
00085 if (!match_header_end.is_match())
00086 throw CL_Exception(TEXT("Premature end of MIME entity header"));
00087
00088 header_length = match_header_end.get_capture_pos(1);
00089 headers = CL_StringA(
00090 buffer,
00091 header_length);
00092
00093 body_pos = header_pos + match_header_end.get_capture_pos(0)+match_header_end.get_capture_length(0);
00094 body_length = 0;
00095 file.seek(body_pos);
00096
00097
00098 CL_StringA content_type = get_content_type();
00099 CL_RegExp regexp_multipart("multipart\\/|message\\/", CL_RegExp::compile_caseless);
00100 bool is_multipart = regexp_multipart.search(content_type).is_match();
00101
00102
00103 CL_RegExp boundary_begin_exp("\\r\\n--");
00104 CL_RegExp boundary_end_exp(".*?\\r\\n");
00105 CL_RegExpMatch match_result;
00106 if (is_multipart)
00107 {
00108 CL_StringA boundary = get_content_type_parameter_value("boundary");
00109 if (boundary.empty())
00110 throw CL_Exception(CL_StringHelp::local8_to_text(content_type) + TEXT(" missing expected boundary parameter"));
00111
00112
00113 int offset = 0;
00114 file.seek(body_pos-2);
00115 bytes_read = file.read(buffer, 16*1024);
00116 if (bytes_read <= 0)
00117 throw CL_Exception(TEXT("Premature end of MIME entity"));
00118
00119
00120 while (true)
00121 {
00122 boundary_begin_exp.search(buffer, bytes_read, offset, 0, match_result);
00123 if (!match_result.is_match())
00124 throw CL_Exception(TEXT("MIME boundary not found"));
00125 offset = match_result.get_capture_pos(0) + match_result.get_capture_length(0);
00126 if (offset + boundary.length() > 16*1024)
00127 throw CL_Exception(TEXT("MIME boundary not found"));
00128 if (memcmp(boundary.data(), buffer+offset, boundary.length()) == 0)
00129 break;
00130 }
00131
00132
00133 body_length = offset-4;
00134 if (body_length < 0)
00135 body_length = 0;
00136
00137
00138 boundary_end_exp.search(buffer, bytes_read, offset, 0, match_result);
00139 if (!match_result.is_match())
00140 throw CL_Exception(TEXT("MIME boundary marker line never ended!"));
00141 int multipart_start = body_pos-2+match_result.get_capture_pos(0)+match_result.get_capture_length(0);
00142 file.seek(multipart_start);
00143
00144
00145 while (true)
00146 {
00147 CL_MimePart child_part;
00148 child_part.load(file, boundary);
00149 parts.push_back(child_part);
00150
00151
00152 bytes_read = file.read(buffer, 4);
00153 if (bytes_read < 4)
00154 throw CL_Exception(TEXT("Premature end of MIME entity"));
00155 if (memcmp(buffer, "--\r\n", 4) == 0)
00156 break;
00157 else
00158 file.seek(-4, CL_File::seek_cur);
00159 }
00160
00161
00162 if (!next_mime_boundary.empty())
00163 {
00164 int pos = file.get_position();
00165 int offset = 0;
00166 bytes_read = file.read(buffer, 16*1024);
00167 if (bytes_read <= 0)
00168 throw CL_Exception(TEXT("Premature end of MIME entity"));
00169
00170
00171 while (true)
00172 {
00173 boundary_begin_exp.search(buffer, bytes_read, offset, 0, match_result);
00174 if (!match_result.is_match())
00175 throw CL_Exception(TEXT("MIME boundary not found"));
00176 offset = match_result.get_capture_pos(0) + match_result.get_capture_length(0);
00177 if (offset + boundary.length() > 16*1024)
00178 throw CL_Exception(TEXT("MIME boundary not found"));
00179 if (memcmp(boundary.data(), buffer+offset, boundary.length()) == 0)
00180 break;
00181 }
00182
00183
00184 boundary_end_exp.search(buffer, bytes_read, offset, 0, match_result);
00185 if (!match_result.is_match())
00186 throw CL_Exception(TEXT("MIME boundary marker line never ended!"));
00187 int multipart_end = pos + match_result.get_capture_pos(0)+match_result.get_capture_length(0);
00188 file.seek(multipart_end);
00189 }
00190 }
00191 else if (!next_mime_boundary.empty())
00192 {
00193 int offset = 0;
00194 bytes_read = file.read(buffer, 16*1024);
00195 if (bytes_read <= 0)
00196 throw CL_Exception(TEXT("Premature end of MIME entity"));
00197
00198
00199 while (true)
00200 {
00201 boundary_begin_exp.search(buffer, bytes_read, offset, CL_RegExp::search_partial, match_result);
00202 if (match_result.is_match() || match_result.is_partial())
00203 {
00204 int start_match_offset = match_result.get_capture_pos(0);
00205 int boundary_begin_size = 4;
00206 int end_match_offset = start_match_offset + boundary_begin_size + next_mime_boundary.length();
00207 if (end_match_offset > bytes_read)
00208 {
00209
00210 int size = bytes_read-start_match_offset;
00211 memcpy(buffer, buffer+start_match_offset, size);
00212 bytes_read = file.read(buffer+size, 16*1024-size);
00213 if (bytes_read <= 0)
00214 throw CL_Exception(TEXT("Premature end of MIME entity"));
00215 bytes_read += size;
00216 offset = 0;
00217 continue;
00218 }
00219
00220 if (memcmp(
00221 buffer+start_match_offset+boundary_begin_size,
00222 next_mime_boundary.data(),
00223 next_mime_boundary.length()) == 0)
00224 {
00225
00226 file.seek(end_match_offset - bytes_read, CL_File::seek_cur);
00227 body_length = file.get_position()-boundary_begin_size - body_pos;
00228 break;
00229 }
00230
00231 offset = start_match_offset + boundary_begin_size;
00232 }
00233 else
00234 {
00235 offset = 0;
00236 bytes_read = file.read(buffer, 16*1024);
00237 if (bytes_read <= 0)
00238 throw CL_Exception(TEXT("Premature end of MIME entity"));
00239 }
00240 }
00241 }
00242 else
00243 {
00244 body_length = file.get_size()-body_pos;
00245 }
00246 }
00247
00248 CL_StringA CL_MimePart::get_content_id() const
00249 {
00250 CL_RegExp regexp("^Content-ID\\s*:\\s*(\\\"<.*?>\\\"|<\\.+?>)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00251 CL_RegExpMatch match = regexp.search(headers);
00252 if (match.is_match())
00253 {
00254 CL_StringA id = headers.substr(match.get_capture_pos(1)+1, match.get_capture_length(1)-2);
00255 if (id[0] == '<')
00256 id = id.substr(1, id.length()-2);
00257 return id;
00258 }
00259 return CL_StringA();
00260 }
00261
00262 bool CL_MimePart::has_content_id() const
00263 {
00264 CL_RegExp regexp("^Content-ID\\s*:\\s*(\\\"<.*?>\\\"|<\\.+?>)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00265 CL_RegExpMatch match = regexp.search(headers);
00266 return match.is_match();
00267 }
00268
00269 CL_StringA CL_MimePart::get_content_description() const
00270 {
00271 CL_RegExp regexp("^Content-Description\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00272 CL_RegExpMatch match = regexp.search(headers);
00273 if (match.is_match())
00274 {
00275 CL_StringA description = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00276 if (description[0] == '"')
00277 description = description.substr(1, description.length()-2);
00278 return description;
00279 }
00280 return CL_StringA();
00281 }
00282
00283 bool CL_MimePart::has_content_description() const
00284 {
00285 CL_RegExp regexp("^Content-Description\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00286 CL_RegExpMatch match = regexp.search(headers);
00287 return match.is_match();
00288 }
00289
00290 CL_StringA CL_MimePart::get_content_transfer_encoding() const
00291 {
00292 CL_RegExp regexp("^Content-Transfer-Encoding\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00293 CL_RegExpMatch match = regexp.search(headers);
00294 if (match.is_match())
00295 {
00296 CL_StringA encoding = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00297 if (encoding[0] == '"')
00298 encoding = encoding.substr(1, encoding.length()-2);
00299 return encoding;
00300 }
00301 return CL_StringA("7bit");
00302 }
00303
00304 CL_StringA CL_MimePart::get_content_type() const
00305 {
00306 CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00307 CL_RegExpMatch match = regexp.search(headers);
00308 if (match.is_match())
00309 {
00310 CL_StringA content_type = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00311 if (content_type[0] == '"')
00312 content_type = content_type.substr(1, content_type.length()-2);
00313 return content_type;
00314 }
00315 return CL_StringA("text/plain");
00316 }
00317
00318 bool CL_MimePart::has_content_type() const
00319 {
00320 CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00321 CL_RegExpMatch match = regexp.search(headers);
00322 if (match.is_match())
00323 return true;
00324 return false;
00325 }
00326
00327 CL_StringA CL_MimePart::get_content_type_parameters() const
00328 {
00329 CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)\\s*;\\s*", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00330 CL_RegExpMatch match = regexp.search(headers);
00331 if (!match.is_match())
00332 return CL_StringA();
00333 int parameters_offset = match.get_capture_pos(0)+match.get_capture_length(0);
00334 CL_RegExp regexp_next_line("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00335 CL_RegExpMatch match_next_line = regexp_next_line.search(headers, parameters_offset);
00336 if (match_next_line.is_match())
00337 return headers.substr(parameters_offset, match_next_line.get_capture_pos(0)-parameters_offset);
00338 else
00339 return headers.substr(parameters_offset);
00340 }
00341
00342 CL_StringA CL_MimePart::get_content_type_parameter_value(const CL_StringA &search_name) const
00343 {
00344 if (!has_content_type())
00345 {
00346
00347
00348 if (CL_StringHelp::compare(search_name, "charset", true) == 0)
00349 return CL_StringA("us-ascii");
00350 return CL_StringA();
00351 }
00352
00353 CL_StringA parameters = get_content_type_parameters();
00354
00355 CL_RegExp regexp("\\s*(\\w+)\\s*=(\".*?\"|\\w*)\\s*;*");
00356 int offset = 0;
00357 while (true)
00358 {
00359 CL_RegExpMatch match = regexp.search(parameters, offset);
00360 if (!match.is_match())
00361 break;
00362 CL_StringA name = parameters.substr(match.get_capture_pos(1), match.get_capture_length(1));
00363 CL_StringA value = parameters.substr(match.get_capture_pos(2), match.get_capture_length(2));
00364 if (value[0] == '"')
00365 value = value.substr(1, value.length()-2);
00366 if (CL_StringHelp::compare(search_name, name, true) == 0)
00367 return value;
00368 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00369 }
00370 return CL_StringA();
00371 }
00372
00373 bool CL_MimePart::has_content_type_parameter(const CL_StringA &search_name) const
00374 {
00375 if (!has_content_type())
00376 {
00377
00378
00379 if (CL_StringHelp::compare(search_name, "charset", true) == 0)
00380 return true;
00381 return false;
00382 }
00383
00384 CL_StringA parameters = get_content_type_parameters();
00385
00386 CL_RegExp regexp("\\s*(\\w+)\\s*=(\".*?\"|\\w*)\\s*;*");
00387 int offset = 0;
00388 while (true)
00389 {
00390 CL_RegExpMatch match = regexp.search(parameters, offset);
00391 if (!match.is_match())
00392 break;
00393 CL_StringA name = parameters.substr(match.get_capture_pos(1), match.get_capture_length(1));
00394 if (CL_StringHelp::compare(search_name, name, true) == 0)
00395 return true;
00396 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00397 }
00398
00399 return false;
00400 }
00401
00402 int CL_MimePart::get_header_lines_count() const
00403 {
00404 CL_RegExp regexp("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00405 int offset = 0;
00406 int matches = 0;
00407 while (true)
00408 {
00409 CL_RegExpMatch match = regexp.search(headers, offset);
00410 if (!match.is_match())
00411 break;
00412 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00413 matches++;
00414 }
00415 return matches;
00416 }
00417
00418 CL_StringA CL_MimePart::get_header_line(int line) const
00419 {
00420 CL_RegExp regexp("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00421 int offset = 0;
00422 int matches = 0;
00423 while (true)
00424 {
00425 CL_RegExpMatch match = regexp.search(headers, offset);
00426 if (!match.is_match())
00427 break;
00428 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00429 if (matches == line)
00430 {
00431 CL_RegExpMatch match_next_line = regexp.search(headers, offset);
00432 if (match.is_match())
00433 return headers.substr(
00434 match.get_capture_pos(0),
00435 match_next_line.get_capture_pos(0)-match.get_capture_pos(0));
00436 else
00437 return headers.substr(match.get_capture_pos(0));
00438 }
00439 matches++;
00440 }
00441 return CL_StringA();
00442 }
00443
00445