Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

mime_part.cpp

Go to the documentation of this file.
00001 /*
00002 **  ClanLib SDK
00003 **  Copyright (c) 1997-2005 The ClanLib Team
00004 **
00005 **  This software is provided 'as-is', without any express or implied
00006 **  warranty.  In no event will the authors be held liable for any damages
00007 **  arising from the use of this software.
00008 **
00009 **  Permission is granted to anyone to use this software for any purpose,
00010 **  including commercial applications, and to alter it and redistribute it
00011 **  freely, subject to the following restrictions:
00012 **
00013 **  1. The origin of this software must not be misrepresented; you must not
00014 **     claim that you wrote the original software. If you use this software
00015 **     in a product, an acknowledgment in the product documentation would be
00016 **     appreciated but is not required.
00017 **  2. Altered source versions must be plainly marked as such, and must not be
00018 **     misrepresented as being the original software.
00019 **  3. This notice may not be removed or altered from any source distribution.
00020 **
00021 **  Note: Some of the libraries ClanLib link to may have additional
00022 **  requirements or restrictions.
00023 **
00024 **  File Author(s):
00025 **
00026 **    Magnus Norddahl
00027 */
00028 
00029 #include "precomp.h"
00030 #include "mime_part.h"
00031 #include "regexp.h"
00032 #include "file.h"
00033 #include "exception.h"
00034 #include "string_help.h"
00035 
00037 // CL_MimePart Construction:
00038 
00039 CL_MimePart::CL_MimePart()
00040 : header_pos(0), header_length(0), body_pos(0), body_length(0)
00041 {
00042 }
00043 
00044 CL_MimePart::CL_MimePart(const CL_MimePart &copy)
00045 : headers(copy.headers), header_pos(copy.header_pos), header_length(copy.header_length),
00046         body_pos(copy.body_pos), body_length(copy.body_length), parts(copy.parts)
00047 {
00048 }
00049 
00051 // CL_MimePart Attributes:
00052 
00054 // CL_MimePart Operations:
00055 
00056 CL_MimePart &CL_MimePart::operator =(const CL_MimePart &copy)
00057 {
00058         headers = copy.headers;
00059         header_pos = copy.header_pos;
00060         header_length = copy.header_length;
00061         body_pos = copy.body_pos;
00062         body_length = copy.body_length;
00063         parts = copy.parts;
00064         return *this;
00065 }
00066 
00067 void CL_MimePart::load(CL_File &file, const CL_StringA &next_mime_boundary)
00068 {
00069         // Reset mime part:
00070         headers.clear();
00071         header_pos = file.get_position();
00072         header_length = 0;
00073         body_pos = 0;
00074         body_length = 0;
00075         parts.clear();
00076 
00077         // Look for header end:
00078         char buffer[16*1024];
00079         int bytes_read = file.read(buffer, 16*1024);
00080         if (bytes_read <= 0)
00081                 throw CL_Exception(TEXT("Premature end of MIME document"));
00082 
00083         CL_RegExp regexp_header_end("\\r\\n(\\r\\n)");
00084         CL_RegExpMatch match_header_end = regexp_header_end.search(buffer, bytes_read);
00085         if (!match_header_end.is_match())
00086                 throw CL_Exception(TEXT("Premature end of MIME entity header"));
00087 
00088         header_length = match_header_end.get_capture_pos(1);
00089         headers = CL_StringA(
00090                 buffer,
00091                 header_length);
00092 
00093         body_pos = header_pos + match_header_end.get_capture_pos(0)+match_header_end.get_capture_length(0);
00094         body_length = 0;
00095         file.seek(body_pos);
00096 
00097         // Examine header we got:
00098         CL_StringA content_type = get_content_type();
00099         CL_RegExp regexp_multipart("multipart\\/|message\\/", CL_RegExp::compile_caseless);
00100         bool is_multipart = regexp_multipart.search(content_type).is_match();
00101 
00102         // Read body:
00103         CL_RegExp boundary_begin_exp("\\r\\n--");
00104         CL_RegExp boundary_end_exp(".*?\\r\\n");
00105         CL_RegExpMatch match_result;
00106         if (is_multipart)
00107         {
00108                 CL_StringA boundary = get_content_type_parameter_value("boundary");
00109                 if (boundary.empty())
00110                         throw CL_Exception(CL_StringHelp::local8_to_text(content_type) + TEXT(" missing expected boundary parameter"));
00111 
00112                 // Find end of multipart descriptional body:
00113                 int offset = 0;
00114                 file.seek(body_pos-2);
00115                 bytes_read = file.read(buffer, 16*1024);
00116                 if (bytes_read <= 0)
00117                         throw CL_Exception(TEXT("Premature end of MIME entity"));
00118 
00119                 // Get offset to point at the end of the boundary, but not including any possible additional boundary whitespace.
00120                 while (true)
00121                 {
00122                         boundary_begin_exp.search(buffer, bytes_read, offset, 0, match_result);
00123                         if (!match_result.is_match())
00124                                 throw CL_Exception(TEXT("MIME boundary not found"));
00125                         offset = match_result.get_capture_pos(0) + match_result.get_capture_length(0);
00126                         if (offset + boundary.length() > 16*1024)
00127                                 throw CL_Exception(TEXT("MIME boundary not found"));
00128                         if (memcmp(boundary.data(), buffer+offset, boundary.length()) == 0)
00129                                 break;
00130                 }
00131 
00132                 // Body should be anything between header and body start boundary marker:
00133                 body_length = offset-4;
00134                 if (body_length < 0)
00135                         body_length = 0;
00136 
00137                 // Look for end of boundary line:
00138                 boundary_end_exp.search(buffer, bytes_read, offset, 0, match_result);
00139                 if (!match_result.is_match())
00140                         throw CL_Exception(TEXT("MIME boundary marker line never ended!"));
00141                 int multipart_start = body_pos-2+match_result.get_capture_pos(0)+match_result.get_capture_length(0);
00142                 file.seek(multipart_start);
00143 
00144                 // Read multipart parts:
00145                 while (true)
00146                 {
00147                         CL_MimePart child_part;
00148                         child_part.load(file, boundary);
00149                         parts.push_back(child_part);
00150 
00151                         // End of multiparts marker?
00152                         bytes_read = file.read(buffer, 4);
00153                         if (bytes_read < 4)
00154                                 throw CL_Exception(TEXT("Premature end of MIME entity"));
00155                         if (memcmp(buffer, "--\r\n", 4) == 0)
00156                                 break;
00157                         else
00158                                 file.seek(-4, CL_File::seek_cur);
00159                 }
00160 
00161                 // Find next mime boundary, if needed:
00162                 if (!next_mime_boundary.empty())
00163                 {
00164                         int pos = file.get_position();
00165                         int offset = 0;
00166                         bytes_read = file.read(buffer, 16*1024);
00167                         if (bytes_read <= 0)
00168                                 throw CL_Exception(TEXT("Premature end of MIME entity"));
00169 
00170                         // Get offset to point at the end of the boundary, but not including any possible additional boundary whitespace.
00171                         while (true)
00172                         {
00173                                 boundary_begin_exp.search(buffer, bytes_read, offset, 0, match_result);
00174                                 if (!match_result.is_match())
00175                                         throw CL_Exception(TEXT("MIME boundary not found"));
00176                                 offset = match_result.get_capture_pos(0) + match_result.get_capture_length(0);
00177                                 if (offset + boundary.length() > 16*1024)
00178                                         throw CL_Exception(TEXT("MIME boundary not found"));
00179                                 if (memcmp(boundary.data(), buffer+offset, boundary.length()) == 0)
00180                                         break;
00181                         }
00182 
00183                         // Look for end of boundary line:
00184                         boundary_end_exp.search(buffer, bytes_read, offset, 0, match_result);
00185                         if (!match_result.is_match())
00186                                 throw CL_Exception(TEXT("MIME boundary marker line never ended!"));
00187                         int multipart_end = pos + match_result.get_capture_pos(0)+match_result.get_capture_length(0);
00188                         file.seek(multipart_end);
00189                 }
00190         }
00191         else if (!next_mime_boundary.empty())
00192         {
00193                 int offset = 0;
00194                 bytes_read = file.read(buffer, 16*1024);
00195                 if (bytes_read <= 0)
00196                         throw CL_Exception(TEXT("Premature end of MIME entity"));
00197 
00198                 // Get offset to point at the end of the boundary, but not including any possible additional boundary whitespace.
00199                 while (true)
00200                 {
00201                         boundary_begin_exp.search(buffer, bytes_read, offset, CL_RegExp::search_partial, match_result);
00202                         if (match_result.is_match() || match_result.is_partial())
00203                         {
00204                                 int start_match_offset = match_result.get_capture_pos(0);
00205                                 int boundary_begin_size = 4;
00206                                 int end_match_offset = start_match_offset + boundary_begin_size + next_mime_boundary.length();
00207                                 if (end_match_offset > bytes_read)
00208                                 {
00209                                         // Partial match. Need more data to complete match.
00210                                         int size = bytes_read-start_match_offset;
00211                                         memcpy(buffer, buffer+start_match_offset, size);
00212                                         bytes_read = file.read(buffer+size, 16*1024-size);
00213                                         if (bytes_read <= 0)
00214                                                 throw CL_Exception(TEXT("Premature end of MIME entity"));
00215                                         bytes_read += size;
00216                                         offset = 0;
00217                                         continue;
00218                                 }
00219 
00220                                 if (memcmp(
00221                                         buffer+start_match_offset+boundary_begin_size,
00222                                         next_mime_boundary.data(),
00223                                         next_mime_boundary.length()) == 0)
00224                                 {
00225                                         // Full match.
00226                                         file.seek(end_match_offset - bytes_read, CL_File::seek_cur);
00227                                         body_length = file.get_position()-boundary_begin_size - body_pos;
00228                                         break;
00229                                 }
00230 
00231                                 offset = start_match_offset + boundary_begin_size;
00232                         }
00233                         else
00234                         {
00235                                 offset = 0;
00236                                 bytes_read = file.read(buffer, 16*1024);
00237                                 if (bytes_read <= 0)
00238                                         throw CL_Exception(TEXT("Premature end of MIME entity"));
00239                         }
00240                 }
00241         }
00242         else
00243         {
00244                 body_length = file.get_size()-body_pos;
00245         }
00246 }
00247 
00248 CL_StringA CL_MimePart::get_content_id() const
00249 {
00250         CL_RegExp regexp("^Content-ID\\s*:\\s*(\\\"<.*?>\\\"|<\\.+?>)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00251         CL_RegExpMatch match = regexp.search(headers);
00252         if (match.is_match())
00253         {
00254                 CL_StringA id = headers.substr(match.get_capture_pos(1)+1, match.get_capture_length(1)-2);
00255                 if (id[0] == '<')
00256                         id = id.substr(1, id.length()-2);
00257                 return id;
00258         }
00259         return CL_StringA();
00260 }
00261 
00262 bool CL_MimePart::has_content_id() const
00263 {
00264         CL_RegExp regexp("^Content-ID\\s*:\\s*(\\\"<.*?>\\\"|<\\.+?>)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00265         CL_RegExpMatch match = regexp.search(headers);
00266         return match.is_match();
00267 }
00268 
00269 CL_StringA CL_MimePart::get_content_description() const
00270 {
00271         CL_RegExp regexp("^Content-Description\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00272         CL_RegExpMatch match = regexp.search(headers);
00273         if (match.is_match())
00274         {
00275                 CL_StringA description = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00276                 if (description[0] == '"')
00277                         description = description.substr(1, description.length()-2);
00278                 return description;
00279         }
00280         return CL_StringA();
00281 }
00282 
00283 bool CL_MimePart::has_content_description() const
00284 {
00285         CL_RegExp regexp("^Content-Description\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00286         CL_RegExpMatch match = regexp.search(headers);
00287         return match.is_match();
00288 }
00289 
00290 CL_StringA CL_MimePart::get_content_transfer_encoding() const
00291 {
00292         CL_RegExp regexp("^Content-Transfer-Encoding\\s*:\\s*(\\\".*?\\\"|\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00293         CL_RegExpMatch match = regexp.search(headers);
00294         if (match.is_match())
00295         {
00296                 CL_StringA encoding = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00297                 if (encoding[0] == '"')
00298                         encoding = encoding.substr(1, encoding.length()-2);
00299                 return encoding;
00300         }
00301         return CL_StringA("7bit");
00302 }
00303 
00304 CL_StringA CL_MimePart::get_content_type() const
00305 {
00306         CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00307         CL_RegExpMatch match = regexp.search(headers);
00308         if (match.is_match())
00309         {
00310                 CL_StringA content_type = headers.substr(match.get_capture_pos(1), match.get_capture_length(1));
00311                 if (content_type[0] == '"')
00312                         content_type = content_type.substr(1, content_type.length()-2);
00313                 return content_type;
00314         }
00315         return CL_StringA("text/plain");
00316 }
00317 
00318 bool CL_MimePart::has_content_type() const
00319 {
00320         CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00321         CL_RegExpMatch match = regexp.search(headers);
00322         if (match.is_match())
00323                 return true;
00324         return false;
00325 }
00326 
00327 CL_StringA CL_MimePart::get_content_type_parameters() const
00328 {
00329         CL_RegExp regexp("^Content-Type\\s*:\\s*(\\\".*?\\\"|\\w+/\\w+)\\s*;\\s*", CL_RegExp::compile_multi_line|CL_RegExp::compile_caseless);
00330         CL_RegExpMatch match = regexp.search(headers);
00331         if (!match.is_match())
00332                 return CL_StringA();
00333         int parameters_offset = match.get_capture_pos(0)+match.get_capture_length(0);
00334         CL_RegExp regexp_next_line("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00335         CL_RegExpMatch match_next_line = regexp_next_line.search(headers, parameters_offset);
00336         if (match_next_line.is_match())
00337                 return headers.substr(parameters_offset, match_next_line.get_capture_pos(0)-parameters_offset);
00338         else
00339                 return headers.substr(parameters_offset);
00340 }
00341 
00342 CL_StringA CL_MimePart::get_content_type_parameter_value(const CL_StringA &search_name) const
00343 {
00344         if (!has_content_type())
00345         {
00346                 // If no Content-Type header exist, the default content type is:
00347                 //     Content-type: text/plain; charset=us-ascii
00348                 if (CL_StringHelp::compare(search_name, "charset", true) == 0)
00349                         return CL_StringA("us-ascii");
00350                 return CL_StringA();
00351         }
00352 
00353         CL_StringA parameters = get_content_type_parameters();
00354 
00355         CL_RegExp regexp("\\s*(\\w+)\\s*=(\".*?\"|\\w*)\\s*;*");
00356         int offset = 0;
00357         while (true)
00358         {
00359                 CL_RegExpMatch match = regexp.search(parameters, offset);
00360                 if (!match.is_match())
00361                         break;
00362                 CL_StringA name = parameters.substr(match.get_capture_pos(1), match.get_capture_length(1));
00363                 CL_StringA value = parameters.substr(match.get_capture_pos(2), match.get_capture_length(2));
00364                 if (value[0] == '"')
00365                         value = value.substr(1, value.length()-2);
00366                 if (CL_StringHelp::compare(search_name, name, true) == 0)
00367                         return value;
00368                 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00369         }
00370         return CL_StringA();
00371 }
00372 
00373 bool CL_MimePart::has_content_type_parameter(const CL_StringA &search_name) const
00374 {
00375         if (!has_content_type())
00376         {
00377                 // If no Content-Type header exist, the default content type is:
00378                 //     Content-type: text/plain; charset=us-ascii
00379                 if (CL_StringHelp::compare(search_name, "charset", true) == 0)
00380                         return true;
00381                 return false;
00382         }
00383 
00384         CL_StringA parameters = get_content_type_parameters();
00385 
00386         CL_RegExp regexp("\\s*(\\w+)\\s*=(\".*?\"|\\w*)\\s*;*");
00387         int offset = 0;
00388         while (true)
00389         {
00390                 CL_RegExpMatch match = regexp.search(parameters, offset);
00391                 if (!match.is_match())
00392                         break;
00393                 CL_StringA name = parameters.substr(match.get_capture_pos(1), match.get_capture_length(1));
00394                 if (CL_StringHelp::compare(search_name, name, true) == 0)
00395                         return true;
00396                 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00397         }
00398 
00399         return false;
00400 }
00401 
00402 int CL_MimePart::get_header_lines_count() const
00403 {
00404         CL_RegExp regexp("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00405         int offset = 0;
00406         int matches = 0;
00407         while (true)
00408         {
00409                 CL_RegExpMatch match = regexp.search(headers, offset);
00410                 if (!match.is_match())
00411                         break;
00412                 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00413                 matches++;
00414         }
00415         return matches;
00416 }
00417 
00418 CL_StringA CL_MimePart::get_header_line(int line) const
00419 {
00420         CL_RegExp regexp("^\\w+\\s*:", CL_RegExp::compile_multi_line);
00421         int offset = 0;
00422         int matches = 0;
00423         while (true)
00424         {
00425                 CL_RegExpMatch match = regexp.search(headers, offset);
00426                 if (!match.is_match())
00427                         break;
00428                 offset = match.get_capture_pos(0)+match.get_capture_length(0);
00429                 if (matches == line)
00430                 {
00431                         CL_RegExpMatch match_next_line = regexp.search(headers, offset);
00432                         if (match.is_match())
00433                                 return headers.substr(
00434                                         match.get_capture_pos(0),
00435                                         match_next_line.get_capture_pos(0)-match.get_capture_pos(0));
00436                         else
00437                                 return headers.substr(match.get_capture_pos(0));
00438                 }
00439                 matches++;
00440         }
00441         return CL_StringA();
00442 }
00443 
00445 // CL_MimePart Implementation:

Generated on Sat Feb 19 22:51:16 2005 for npcore by  doxygen 1.4.1