57#include "HTTPCacheResponse.h"
58#include "HTTPConnect.h"
59#include "HTTPResponse.h"
62#include "media_types.h"
78int www_trace_extensive = 0;
81int dods_keep_temps = 0;
83#define CLIENT_ERR_MIN 400
84#define CLIENT_ERR_MAX 417
85static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
87 "Unauthorized: Contact the server administrator.",
89 "Forbidden: Contact the server administrator.",
90 "Not Found: The data source or server could not be found.\n\
91 Often this means that the OPeNDAP server is missing or needs attention.\n\
92 Please contact the server administrator.",
93 "Method Not Allowed.",
95 "Proxy Authentication Required.",
100 "Precondition Failed.",
101 "Request Entity Too Large.",
102 "Request URI Too Large.",
103 "Unsupported Media Type.",
104 "Requested Range Not Satisfiable.",
105 "Expectation Failed."};
107#define SERVER_ERR_MIN 500
108#define SERVER_ERR_MAX 505
109static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = {
110 "Internal Server Error.",
"Not Implemented.",
"Bad Gateway.",
111 "Service Unavailable.",
"Gateway Time-out.",
"HTTP Version Not Supported."};
115static string http_status_to_string(
int status) {
116 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
117 return string(http_client_errors[status - CLIENT_ERR_MIN]);
118 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
119 return string(http_server_errors[status - SERVER_ERR_MIN]);
122 "Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
125static ObjectType determine_object_type(
const string &header_value) {
129 string::size_type plus = header_value.find(
'+');
131 string type_extension =
"";
132 if (plus != string::npos) {
133 base_type = header_value.substr(0, plus);
134 type_extension = header_value.substr(plus + 1);
136 base_type = header_value;
138 if (base_type == DMR_Content_Type ||
139 (base_type.find(
"application/") != string::npos && base_type.find(
"dap4.dataset-metadata") != string::npos)) {
140 if (type_extension ==
"xml")
144 }
else if (base_type == DAP4_DATA_Content_Type ||
145 (base_type.find(
"application/") != string::npos && base_type.find(
"dap4.data") != string::npos)) {
147 }
else if (header_value.find(
"text/html") != string::npos) {
157class ParseHeader :
public unary_function<const string &, void> {
164 ParseHeader() : type(unknown_type), server(
"dods/0.0"), protocol(
"2.0") {}
166 void operator()(
const string &line) {
170 DBG2(cerr << name <<
": " << value << endl);
175 if (type == unknown_type && name ==
"content-type") {
176 type = determine_object_type(value);
178 if (name ==
"content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
184 else if (name ==
"xdods-server" && server ==
"dods/0.0") {
186 }
else if (name ==
"xopendap-server") {
188 }
else if (name ==
"xdap") {
190 }
else if (server ==
"dods/0.0" && name ==
"server") {
192 }
else if (name ==
"location") {
199 string get_server() {
return server; }
201 string get_protocol() {
return protocol; }
203 string get_location() {
return location; }
221static size_t save_raw_http_headers(
void *ptr,
size_t size,
size_t nmemb,
void *resp_hdrs) {
222 DBG2(cerr <<
"Inside the header parser." << endl);
223 vector<string> *hdrs =
static_cast<vector<string> *
>(resp_hdrs);
226 string complete_line;
227 if (nmemb > 1 && *(
static_cast<char *
>(ptr) + size * (nmemb - 2)) ==
'\r')
228 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 2));
230 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 1));
233 if (complete_line !=
"" && complete_line.find(
"HTTP") == string::npos) {
234 DBG(cerr <<
"Header line: " << complete_line << endl);
235 hdrs->push_back(complete_line);
242static int curl_debug(CURL *, curl_infotype info,
char *msg,
size_t size,
void *) {
243 string message(msg, size);
247 cerr <<
"Text: " << message;
249 case CURLINFO_HEADER_IN:
250 cerr <<
"Header in: " << message;
252 case CURLINFO_HEADER_OUT:
253 cerr <<
"Header out: " << message;
255 case CURLINFO_DATA_IN:
256 if (www_trace_extensive)
257 cerr <<
"Data in: " << message;
259 case CURLINFO_DATA_OUT:
260 if (www_trace_extensive)
261 cerr <<
"Data out: " << message;
264 cerr <<
"End: " << message;
266#ifdef CURLINFO_SSL_DATA_IN
267 case CURLINFO_SSL_DATA_IN:
268 cerr <<
"SSL Data in: " << message;
271#ifdef CURLINFO_SSL_DATA_OUT
272 case CURLINFO_SSL_DATA_OUT:
273 cerr <<
"SSL Data out: " << message;
277 if (www_trace_extensive)
278 cerr <<
"Curl info: " << message;
287void HTTPConnect::www_lib_init() {
288 curl_global_init(CURL_GLOBAL_DEFAULT);
290 d_curl = curl_easy_init();
292 throw InternalErr(__FILE__, __LINE__,
"Could not initialize libcurl.");
294 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
296 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
302 if (!d_rcr->get_proxy_server_host().empty()) {
303 DBG(cerr <<
"Setting up a proxy server." << endl);
304 DBG(cerr <<
"Proxy host: " << d_rcr->get_proxy_server_host() << endl);
305 DBG(cerr <<
"Proxy port: " << d_rcr->get_proxy_server_port() << endl);
306 DBG(cerr <<
"Proxy pwd : " << d_rcr->get_proxy_server_userpw() << endl);
307 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
308 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, d_rcr->get_proxy_server_port());
311#ifdef CURLOPT_PROXYAUTH
312 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (
long)CURLAUTH_ANY);
316 if (!d_rcr->get_proxy_server_userpw().empty())
317 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, d_rcr->get_proxy_server_userpw().c_str());
322 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
327 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (
long)CURLAUTH_ANY);
329 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
330 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
331 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
336 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
337 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
340 if (d_rcr->get_validate_ssl() == 0) {
341 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
342 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
347 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
353 if (!d_cookie_jar.empty()) {
354 DBG(cerr <<
"Setting the cookie jar to: " << d_cookie_jar << endl);
355 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
356 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
360 cerr <<
"Curl version: " << curl_version() << endl;
361 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
362 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
369class BuildHeaders :
public unary_function<const string &, void> {
370 struct curl_slist *d_cl;
373 BuildHeaders() : d_cl(0) {}
375 void operator()(
const string &header) {
376 DBG(cerr <<
"Adding '" << header.c_str() <<
"' to the header list." << endl);
377 d_cl = curl_slist_append(d_cl, header.c_str());
380 struct curl_slist *get_headers() {
return d_cl; }
397long HTTPConnect::read_url(
const string &url, FILE *stream, vector<string> *resp_hdrs,
const vector<string> *headers) {
398 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
408 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
409 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
411 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
414 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr,
"\n")));
416 BuildHeaders req_hdrs;
417 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), req_hdrs);
419 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
421 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
424 bool temporary_proxy =
false;
425 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
426 DBG(cerr <<
"Suppress proxy for url: " << url << endl);
427 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
430 string::size_type at_sign = url.find(
'@');
434 if (at_sign != url.npos)
435 d_upstring = url.substr(7, at_sign - 7);
437 if (!d_upstring.empty())
438 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
443 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
448 CURLcode res = curl_easy_perform(d_curl);
451 curl_slist_free_all(req_hdrs.get_headers());
452 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
455 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
456 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
459 throw Error(d_error_buffer);
462 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
464 throw Error(d_error_buffer);
467 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
468 if (res == CURLE_OK && ct_ptr)
469 d_content_type = ct_ptr;
479bool HTTPConnect::url_uses_proxy_for(
const string &url) {
480 if (d_rcr->is_proxy_for_used()) {
484 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
485 int index = 0, matchlen;
486 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
495bool HTTPConnect::url_uses_no_proxy_for(
const string &url)
throw() {
496 return d_rcr->is_no_proxy_for_used() && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
507HTTPConnect::HTTPConnect(
RCReader *rcr,
bool use_cpp)
508 : d_username(
""), d_password(
""), d_cookie_jar(
""), d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0),
509 d_use_cpp_streams(use_cpp)
512 d_accept_deflate = rcr->get_deflate();
519 d_request_headers.push_back(
string(
"Pragma:"));
520 string user_agent = string(
"User-Agent: ") + string(CNAME) + string(
"/") + string(CVER);
521 d_request_headers.push_back(user_agent);
522 if (d_accept_deflate)
523 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
526 if (d_rcr->get_use_cache())
531 DBG2(cerr <<
"Cache object created (" << hex << d_http_cache << dec <<
")" << endl);
534 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
535 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
536 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
537 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
538 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
539 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
542 d_cookie_jar = rcr->get_cookie_jar();
547HTTPConnect::~HTTPConnect() {
548 DBG2(cerr <<
"Entering the HTTPConnect dtor" << endl);
550 curl_easy_cleanup(d_curl);
552 DBG2(cerr <<
"Leaving the HTTPConnect dtor" << endl);
556class HeaderMatch :
public unary_function<const string &, bool> {
557 const string &d_header;
560 HeaderMatch(
const string &header) : d_header(header) {}
561 bool operator()(
const string &arg) {
return arg.find(d_header) == 0; }
578 cout <<
"GET " << url <<
" HTTP/1.0" << endl;
584 stream = caching_fetch_url(url);
586 stream = plain_fetch_url(url);
591 ss <<
"HTTP/1.0 " << stream->get_status() <<
" -" << endl;
592 for (
size_t i = 0; i < stream->get_headers()->size(); i++) {
593 ss << stream->get_headers()->at(i) << endl;
604 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
605 HeaderMatch(
"Content-Type:")) == stream->get_headers()->end())
606 stream->get_headers()->push_back(
"Content-Type: " + d_content_type);
608 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
611 cout << endl << endl;
615 if (parser.get_location() !=
"" &&
616 url.substr(0, url.find(
"?", 0)).compare(parser.get_location().substr(0, url.find(
"?", 0))) != 0) {
621 stream->set_type(parser.get_object_type());
623 stream->set_version(parser.get_server());
624 stream->set_protocol(parser.get_protocol());
626 if (d_use_cpp_streams) {
643static string get_tempfile_template(
const string &file_template) {
649 Regex directory(
"[-a-zA-Z0-9_:\\]*");
654 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
655 goto valid_temp_directory;
658 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
659 goto valid_temp_directory;
664 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
665 goto valid_temp_directory;
669 const Regex directory(
"[-a-zA-Z0-9_/]*");
671 c = getenv(
"TMPDIR");
672 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
673 goto valid_temp_directory;
678 if (access(P_tmpdir, W_OK | R_OK) == 0) {
680 goto valid_temp_directory;
686 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
687 goto valid_temp_directory;
697 c +=
"\\" + file_template;
699 c +=
"/" + file_template;
724 string dods_temp = get_tempfile_template((
string)
"dodsXXXXXX");
726 vector<char> pathname(dods_temp.length() + 1);
728 strncpy(pathname.data(), dods_temp.c_str(), dods_temp.length());
730 DBG(cerr <<
"pathanme: " << pathname.data() <<
" (" << dods_temp.length() + 1 <<
")" << endl);
733#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
734 stream = fopen(_mktemp(pathname.data()),
"w+b");
737 int mask = umask(077);
739 throw Error(
"Could not set the file creation mask: " +
string(strerror(errno)));
740 int fd = mkstemp(pathname.data());
742 throw Error(
"Could not create a temporary file to store the response: " +
string(strerror(errno)));
744 stream = fdopen(fd,
"w+");
749 throw Error(
"Failed to open a temporary file for the data values (" + dods_temp +
")");
751 dods_temp = pathname.data();
763 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
765 res = unlink(name.c_str());
767 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
791HTTPResponse *HTTPConnect::caching_fetch_url(
const string &url) {
792 DBG(cerr <<
"Is this URL (" << url <<
") in the cache?... ");
794 vector<string> *headers =
new vector<string>;
799 DBGN(cerr <<
"no; getting response and caching." << endl);
802 time_t now = time(0);
803 HTTPResponse *rs = plain_fetch_url(url);
804 d_http_cache->
cache_response(url, now, *(rs->get_headers()), rs->get_stream());
808 DBGN(cerr <<
"yes... ");
810 if (d_http_cache->is_url_valid(url)) {
811 DBGN(cerr <<
"and it's valid; using cached response." << endl);
812 HTTPCacheResponse *crs =
new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
815 DBGN(cerr <<
"but it's not valid; validating... ");
817 d_http_cache->release_cached_response(s);
819 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
822 time_t now = time(0);
826 http_status = read_url(url, body, headers, &cond_hdrs);
834 switch (http_status) {
836 DBGN(cerr <<
"read a new response; caching." << endl);
838 d_http_cache->cache_response(url, now, *headers, body);
839 HTTPResponse *rs =
new HTTPResponse(body, http_status, headers, dods_temp);
845 DBGN(cerr <<
"cached response valid; updating." << endl);
848 d_http_cache->update_response(url, now, *headers);
850 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
851 HTTPCacheResponse *crs =
new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
857 if (http_status >= 400) {
860 string msg =
"Error while reading the URL: ";
862 msg +=
".\nThe OPeNDAP server returned the following message:\n";
863 msg += http_status_to_string(http_status);
868 throw InternalErr(__FILE__, __LINE__,
869 "Bad response from the HTTP server: " + long_to_string(http_status));
876 throw InternalErr(__FILE__, __LINE__,
"Should never get here");
890HTTPResponse *HTTPConnect::plain_fetch_url(
const string &url) {
891 DBG(cerr <<
"Getting URL: " << url << endl);
894 vector<string> *resp_hdrs =
new vector<string>;
898 status = read_url(url, stream, resp_hdrs);
901 string msg =
"Error while reading the URL: ";
903 msg +=
".\nThe OPeNDAP server returned the following message:\n";
904 msg += http_status_to_string(status);
916 if (d_use_cpp_streams) {
918 fstream *in =
new fstream(dods_temp.c_str(), ios::in|ios::binary);
919 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
924 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
942 d_accept_deflate = deflate;
944 if (d_accept_deflate) {
945 if (find(d_request_headers.begin(), d_request_headers.end(),
"Accept-Encoding: deflate, gzip, compress") ==
946 d_request_headers.end())
947 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
948 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr,
"\n")));
951 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
952 [](
const string &header) { return header ==
"Accept-Encoding: deflate, gzip, compress"; });
953 d_request_headers.erase(i, d_request_headers.end());
968 i = find_if(d_request_headers.begin(), d_request_headers.end(), HeaderMatch(
"XDAP-Accept:"));
969 if (i != d_request_headers.end())
970 d_request_headers.erase(i);
973 d_dap_client_protocol_major = major;
974 d_dap_client_protocol_minor = minor;
975 ostringstream xdap_accept;
976 xdap_accept <<
"XDAP-Accept: " << major <<
"." << minor;
978 d_request_headers.push_back(xdap_accept.str());
980 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr,
"\n")));
1006 d_upstring = u +
":" + p;
A class for error processing.
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Regular expression matching.
top level DAP object to house generic methods
ObjectType get_description_type(const string &value)
void parse_mime_header(const string &header, string &name, string &value)
string get_temp_file(FILE *&stream)
void close_temp(FILE *s, const string &name)
ObjectType
The type of object in the stream coming from the data server.