libdap Updated for version 3.21.0
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28#ifdef HAVE_UNISTD_H
29#include <unistd.h>
30#endif
31
32#include <sys/stat.h>
33
34#ifdef WIN32
35#include <io.h>
36#endif
37
38#include <algorithm>
39#include <cerrno>
40#include <cstdlib>
41#include <cstring>
42#include <fstream>
43#include <functional>
44#include <iterator>
45#include <sstream>
46#include <string>
47#include <vector>
48
49// #define DODS_DEBUG2
50// #define HTTP_TRACE
51// #define DODS_DEBUG
52
53#undef USE_GETENV
54
55#include "GNURegex.h"
56#include "HTTPCache.h"
57#include "HTTPCacheResponse.h"
58#include "HTTPConnect.h"
59#include "HTTPResponse.h"
60#include "RCReader.h"
61#include "debug.h"
62#include "media_types.h"
63#include "mime_util.h"
64
65using namespace std;
66
67namespace libdap {
68
69// These global variables are not MT-Safe, but I'm leaving them as is because
70// they are used only for debugging (set them in a debugger like gdb or ddd).
71// They are not static because I think that many debuggers cannot access
72// static variables. 08/07/02 jhrg
73
74// Set this to 1 to turn on libcurl's verbose mode (for debugging).
75int www_trace = 0;
76
77// Set this to 1 to turn on libcurl's VERY verbose mode.
78int www_trace_extensive = 0;
79
80// Keep the temporary files; useful for debugging.
81int dods_keep_temps = 0;
82
83#define CLIENT_ERR_MIN 400
84#define CLIENT_ERR_MAX 417
85static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
86 "Bad Request:",
87 "Unauthorized: Contact the server administrator.",
88 "Payment Required.",
89 "Forbidden: Contact the server administrator.",
90 "Not Found: The data source or server could not be found.\n\
91 Often this means that the OPeNDAP server is missing or needs attention.\n\
92 Please contact the server administrator.",
93 "Method Not Allowed.",
94 "Not Acceptable.",
95 "Proxy Authentication Required.",
96 "Request Time-out.",
97 "Conflict.",
98 "Gone:.",
99 "Length Required.",
100 "Precondition Failed.",
101 "Request Entity Too Large.",
102 "Request URI Too Large.",
103 "Unsupported Media Type.",
104 "Requested Range Not Satisfiable.",
105 "Expectation Failed."};
106
107#define SERVER_ERR_MIN 500
108#define SERVER_ERR_MAX 505
109static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = {
110 "Internal Server Error.", "Not Implemented.", "Bad Gateway.",
111 "Service Unavailable.", "Gateway Time-out.", "HTTP Version Not Supported."};
112
115static string http_status_to_string(int status) {
116 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
117 return string(http_client_errors[status - CLIENT_ERR_MIN]);
118 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
119 return string(http_server_errors[status - SERVER_ERR_MIN]);
120 else
121 return string(
122 "Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
123}
124
125static ObjectType determine_object_type(const string &header_value) {
126 // DAP4 Data: application/vnd.opendap.dap4.data
127 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
128
129 string::size_type plus = header_value.find('+');
130 string base_type;
131 string type_extension = "";
132 if (plus != string::npos) {
133 base_type = header_value.substr(0, plus);
134 type_extension = header_value.substr(plus + 1);
135 } else
136 base_type = header_value;
137
138 if (base_type == DMR_Content_Type ||
139 (base_type.find("application/") != string::npos && base_type.find("dap4.dataset-metadata") != string::npos)) {
140 if (type_extension == "xml")
141 return dap4_dmr;
142 else
143 return unknown_type;
144 } else if (base_type == DAP4_DATA_Content_Type ||
145 (base_type.find("application/") != string::npos && base_type.find("dap4.data") != string::npos)) {
146 return dap4_data;
147 } else if (header_value.find("text/html") != string::npos) {
148 return web_error;
149 } else
150 return unknown_type;
151}
152
156
157class ParseHeader : public unary_function<const string &, void> {
158 ObjectType type; // What type of object is in the stream?
159 string server; // Server's version string.
160 string protocol; // Server's protocol version.
161 string location; // Url returned by server
162
163public:
164 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") {}
165
166 void operator()(const string &line) {
167 string name, value;
168 parse_mime_header(line, name, value);
169
170 DBG2(cerr << name << ": " << value << endl);
171
172 // Content-Type is used to determine the content of DAP4 responses, but allow the
173 // Content-Description header to override CT o preserve operation with DAP2 servers.
174 // jhrg 11/12/13
175 if (type == unknown_type && name == "content-type") {
176 type = determine_object_type(value); // see above
177 }
178 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
179 type = get_description_type(value); // defined in mime_util.cc
180 }
181 // The second test (== "dods/0.0") tests if xopendap-server has already
182 // been seen. If so, use that header in preference to the old
183 // XDODS-Server header. jhrg 2/7/06
184 else if (name == "xdods-server" && server == "dods/0.0") {
185 server = value;
186 } else if (name == "xopendap-server") {
187 server = value;
188 } else if (name == "xdap") {
189 protocol = value;
190 } else if (server == "dods/0.0" && name == "server") {
191 server = value;
192 } else if (name == "location") {
193 location = value;
194 }
195 }
196
197 ObjectType get_object_type() { return type; }
198
199 string get_server() { return server; }
200
201 string get_protocol() { return protocol; }
202
203 string get_location() { return location; }
204};
205
220
221static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) {
222 DBG2(cerr << "Inside the header parser." << endl);
223 vector<string> *hdrs = static_cast<vector<string> *>(resp_hdrs);
224
225 // Grab the header, minus the trailing newline. Or \r\n pair.
226 string complete_line;
227 if (nmemb > 1 && *(static_cast<char *>(ptr) + size * (nmemb - 2)) == '\r')
228 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
229 else
230 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
231
232 // Store all non-empty headers that are not HTTP status codes
233 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
234 DBG(cerr << "Header line: " << complete_line << endl);
235 hdrs->push_back(complete_line);
236 }
237
238 return size * nmemb;
239}
240
242static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) {
243 string message(msg, size);
244
245 switch (info) {
246 case CURLINFO_TEXT:
247 cerr << "Text: " << message;
248 break;
249 case CURLINFO_HEADER_IN:
250 cerr << "Header in: " << message;
251 break;
252 case CURLINFO_HEADER_OUT:
253 cerr << "Header out: " << message;
254 break;
255 case CURLINFO_DATA_IN:
256 if (www_trace_extensive)
257 cerr << "Data in: " << message;
258 break;
259 case CURLINFO_DATA_OUT:
260 if (www_trace_extensive)
261 cerr << "Data out: " << message;
262 break;
263 case CURLINFO_END:
264 cerr << "End: " << message;
265 break;
266#ifdef CURLINFO_SSL_DATA_IN
267 case CURLINFO_SSL_DATA_IN:
268 cerr << "SSL Data in: " << message;
269 break;
270#endif
271#ifdef CURLINFO_SSL_DATA_OUT
272 case CURLINFO_SSL_DATA_OUT:
273 cerr << "SSL Data out: " << message;
274 break;
275#endif
276 default:
277 if (www_trace_extensive)
278 cerr << "Curl info: " << message;
279 break;
280 }
281 return 0;
282}
283
286
287void HTTPConnect::www_lib_init() {
288 curl_global_init(CURL_GLOBAL_DEFAULT);
289
290 d_curl = curl_easy_init();
291 if (!d_curl)
292 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
293
294 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
295
296 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
297
298 // Now set options that will remain constant for the duration of this
299 // CURL object.
300
301 // Set the proxy host.
302 if (!d_rcr->get_proxy_server_host().empty()) {
303 DBG(cerr << "Setting up a proxy server." << endl);
304 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() << endl);
305 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() << endl);
306 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() << endl);
307 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
308 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, d_rcr->get_proxy_server_port());
309
310 // As of 4/21/08 only NTLM, Digest and Basic work.
311#ifdef CURLOPT_PROXYAUTH
312 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
313#endif
314
315 // Password might not be required. 06/21/04 jhrg
316 if (!d_rcr->get_proxy_server_userpw().empty())
317 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, d_rcr->get_proxy_server_userpw().c_str());
318 }
319
320 // We have to set FailOnError to false for any of the non-Basic
321 // authentication schemes to work. 07/28/03 jhrg
322 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
323
324 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
325 // choosing the the 'safest' one supported by the server.
326 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
327 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
328
329 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
330 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
331 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
332 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
333 // param of save_raw_http_headers to a vector<string> object.
334
335 // Follow 302 (redirect) responses
336 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
337 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
338
339 // If the user turns off SSL validation...
340 if (d_rcr->get_validate_ssl() == 0) {
341 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
342 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
343 }
344
345 // Set libcurl to use netrc to access data behind URS auth.
346 // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
347 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
348
349 // Look to see if cookies are turned on in the .dodsrc file. If so,
350 // activate here. We honor 'session cookies' (cookies without an
351 // expiration date) here so that session-based SSO systems will work as
352 // expected.
353 if (!d_cookie_jar.empty()) {
354 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
355 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
356 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
357 }
358
359 if (www_trace) {
360 cerr << "Curl version: " << curl_version() << endl;
361 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
362 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
363 }
364}
365
368
369class BuildHeaders : public unary_function<const string &, void> {
370 struct curl_slist *d_cl;
371
372public:
373 BuildHeaders() : d_cl(0) {}
374
375 void operator()(const string &header) {
376 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." << endl);
377 d_cl = curl_slist_append(d_cl, header.c_str());
378 }
379
380 struct curl_slist *get_headers() { return d_cl; }
381};
382
396
397long HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers) {
398 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
399
400#ifdef WIN32
401 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
402 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
403 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
404 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
405 // this issue is that one should not pass a FILE * to a windows DLL. Close
406 // inspection of libcurl yields that their default write function when using
407 // the CURLOPT_WRITEDATA is just "fwrite".
408 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
409 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
410#else
411 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
412#endif
413
414 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
415
416 BuildHeaders req_hdrs;
417 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), req_hdrs);
418 if (headers)
419 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
420
421 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
422
423 // Turn off the proxy for this URL?
424 bool temporary_proxy = false;
425 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
426 DBG(cerr << "Suppress proxy for url: " << url << endl);
427 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
428 }
429
430 string::size_type at_sign = url.find('@');
431 // Assume username:password present *and* assume it's an HTTP URL; it *is*
432 // HTTPConnect, after all. 7 is position after "http://"; the second arg
433 // to substr() is the sub string length.
434 if (at_sign != url.npos)
435 d_upstring = url.substr(7, at_sign - 7);
436
437 if (!d_upstring.empty())
438 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
439
440 // Pass save_raw_http_headers() a pointer to the vector<string> where the
441 // response headers may be stored. Callers can use the resp_hdrs
442 // value/result parameter to get the raw response header information .
443 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
444
445 // This is the call that causes curl to go and get the remote resource and "write it down"
446 // utilizing the configuration state that has been previously conditioned by various perturbations
447 // of calls to curl_easy_setopt().
448 CURLcode res = curl_easy_perform(d_curl);
449
450 // Free the header list and null the value in d_curl.
451 curl_slist_free_all(req_hdrs.get_headers());
452 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
453
454 // Reset the proxy?
455 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
456 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
457
458 if (res != 0)
459 throw Error(d_error_buffer);
460
461 long status;
462 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
463 if (res != 0)
464 throw Error(d_error_buffer);
465
466 char *ct_ptr = 0;
467 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
468 if (res == CURLE_OK && ct_ptr)
469 d_content_type = ct_ptr;
470 else
471 d_content_type = "";
472
473 return status;
474}
475
478
479bool HTTPConnect::url_uses_proxy_for(const string &url) {
480 if (d_rcr->is_proxy_for_used()) {
481 // NB: This could be improved by moving the Regex instance into
482 // the RCReader class, but the proxy stuff is all deprecated.
483 // jhrg 12/1/21
484 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
485 int index = 0, matchlen;
486 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
487 }
488
489 return false;
490}
491
494
495bool HTTPConnect::url_uses_no_proxy_for(const string &url) throw() {
496 return d_rcr->is_no_proxy_for_used() && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
497}
498
499// Public methods. Mostly...
500
506
507HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp)
508 : d_username(""), d_password(""), d_cookie_jar(""), d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0),
509 d_use_cpp_streams(use_cpp)
510
511{
512 d_accept_deflate = rcr->get_deflate();
513 d_rcr = rcr;
514
515 // Load in the default headers to send with a request. The empty Pragma
516 // headers overrides libcurl's default Pragma: no-cache header (which
517 // will disable caching by Squid, et c.). The User-Agent header helps
518 // make server logs more readable. 05/05/03 jhrg
519 d_request_headers.push_back(string("Pragma:"));
520 string user_agent = string("User-Agent: ") + string(CNAME) + string("/") + string(CVER);
521 d_request_headers.push_back(user_agent);
522 if (d_accept_deflate)
523 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
524
525 // HTTPCache::instance returns a valid ptr or 0.
526 if (d_rcr->get_use_cache())
527 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(), true);
528 else
529 d_http_cache = 0;
530
531 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec << ")" << endl);
532
533 if (d_http_cache) {
534 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
535 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
536 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
537 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
538 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
539 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
540 }
541
542 d_cookie_jar = rcr->get_cookie_jar();
543
544 www_lib_init(); // This may throw either Error or InternalErr
545}
546
547HTTPConnect::~HTTPConnect() {
548 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
549
550 curl_easy_cleanup(d_curl);
551
552 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
553}
554
556class HeaderMatch : public unary_function<const string &, bool> {
557 const string &d_header;
558
559public:
560 HeaderMatch(const string &header) : d_header(header) {}
561 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
562};
563
575
577#ifdef HTTP_TRACE
578 cout << "GET " << url << " HTTP/1.0" << endl;
579#endif
580
581 HTTPResponse *stream;
582
583 if (/*d_http_cache && d_http_cache->*/ is_cache_enabled()) {
584 stream = caching_fetch_url(url);
585 } else {
586 stream = plain_fetch_url(url);
587 }
588
589#ifdef HTTP_TRACE
590 stringstream ss;
591 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
592 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
593 ss << stream->get_headers()->at(i) << endl;
594 }
595 cout << ss.str();
596#endif
597
598 ParseHeader parser;
599
600 // An apparent quirk of libcurl is that it does not pass the Content-type
601 // header to the callback used to save them, but check and add it from the
602 // saved state variable only if it's not there (without this a test failed
603 // in HTTPCacheTest). jhrg 11/12/13
604 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
605 HeaderMatch("Content-Type:")) == stream->get_headers()->end())
606 stream->get_headers()->push_back("Content-Type: " + d_content_type);
607
608 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
609
610#ifdef HTTP_TRACE
611 cout << endl << endl;
612#endif
613
614 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
615 if (parser.get_location() != "" &&
616 url.substr(0, url.find("?", 0)).compare(parser.get_location().substr(0, url.find("?", 0))) != 0) {
617 delete stream;
618 return fetch_url(parser.get_location());
619 }
620
621 stream->set_type(parser.get_object_type()); // uses the value of content-description
622
623 stream->set_version(parser.get_server());
624 stream->set_protocol(parser.get_protocol());
625
626 if (d_use_cpp_streams) {
627 stream->transform_to_cpp();
628 }
629
630 return stream;
631}
632
633// Look around for a reasonable place to put a temporary file. Check first
634// the value of the TMPDIR env var. If that does not yeild a path that's
635// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
636// defined in stdio.h. If both come up empty, then use `./'.
637
638// Change this to a version that either returns a string or an open file
639// descriptor. Use information from https://buildsecurityin.us-cert.gov/
640// (see open()) to make it more secure. Ideal solution: get deserialize()
641// methods to read from a stream returned by libcurl, not from a temporary
642// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
643static string get_tempfile_template(const string &file_template) {
644 string c;
645
646 // Windows has one idea of the standard name(s) for a temporary files dir
647#ifdef WIN32
648 // white list for a WIN32 directory
649 Regex directory("[-a-zA-Z0-9_:\\]*");
650
651 // If we're OK to use getenv(), try it.
652#ifdef USE_GETENV
653 c = getenv("TEMP");
654 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
655 goto valid_temp_directory;
656
657 c = getenv("TMP");
658 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
659 goto valid_temp_directory;
660#endif // USE_GETENV
661
662 // The windows default
663 c = "c:\tmp";
664 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
665 goto valid_temp_directory;
666
667#else // Unix/Linux/OSX has another...
668 // white list for a directory
669 const Regex directory("[-a-zA-Z0-9_/]*");
670#ifdef USE_GETENV
671 c = getenv("TMPDIR");
672 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
673 goto valid_temp_directory;
674#endif // USE_GETENV
675
676 // Unix defines this sometimes - if present, use it.
677#ifdef P_tmpdir
678 if (access(P_tmpdir, W_OK | R_OK) == 0) {
679 c = P_tmpdir;
680 goto valid_temp_directory;
681 }
682#endif
683
684 // The Unix default
685 c = "/tmp";
686 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
687 goto valid_temp_directory;
688
689#endif // WIN32
690
691 // If we found nothing useful, use the current directory
692 c = ".";
693
694valid_temp_directory:
695
696#ifdef WIN32
697 c += "\\" + file_template;
698#else
699 c += "/" + file_template;
700#endif
701
702 return c;
703}
704
722
723string get_temp_file(FILE *&stream) throw(Error) {
724 string dods_temp = get_tempfile_template((string) "dodsXXXXXX");
725
726 vector<char> pathname(dods_temp.length() + 1);
727
728 strncpy(pathname.data(), dods_temp.c_str(), dods_temp.length());
729
730 DBG(cerr << "pathanme: " << pathname.data() << " (" << dods_temp.length() + 1 << ")" << endl);
731
732 // Open truncated for update. NB: mkstemp() returns a file descriptor.
733#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
734 stream = fopen(_mktemp(pathname.data()), "w+b");
735#else
736 // Make sure that temp files are accessible only by the owner.
737 int mask = umask(077);
738 if (mask < 0)
739 throw Error("Could not set the file creation mask: " + string(strerror(errno)));
740 int fd = mkstemp(pathname.data());
741 if (fd < 0)
742 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
743
744 stream = fdopen(fd, "w+");
745 umask(mask);
746#endif
747
748 if (!stream)
749 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
750
751 dods_temp = pathname.data();
752 return dods_temp;
753}
754
760void close_temp(FILE *s, const string &name) {
761 int res = fclose(s);
762 if (res)
763 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
764
765 res = unlink(name.c_str());
766 if (res != 0)
767 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
768}
769
790
791HTTPResponse *HTTPConnect::caching_fetch_url(const string &url) {
792 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
793
794 vector<string> *headers = new vector<string>;
795 string file_name;
796 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
797 if (!s) {
798 // url not in cache; get it and cache it
799 DBGN(cerr << "no; getting response and caching." << endl);
800 delete headers;
801 headers = 0;
802 time_t now = time(0);
803 HTTPResponse *rs = plain_fetch_url(url);
804 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
805
806 return rs;
807 } else { // url in cache
808 DBGN(cerr << "yes... ");
809
810 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
811 DBGN(cerr << "and it's valid; using cached response." << endl);
812 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
813 return crs;
814 } else { // url in cache but not valid; validate
815 DBGN(cerr << "but it's not valid; validating... ");
816
817 d_http_cache->release_cached_response(s); // This closes 's'
818 headers->clear();
819 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
820 FILE *body = 0;
821 string dods_temp = get_temp_file(body);
822 time_t now = time(0); // When was the request made (now).
823 long http_status;
824
825 try {
826 http_status = read_url(url, body, /*resp_hdrs*/ headers, &cond_hdrs);
827 rewind(body);
828 } catch (Error &e) {
829 close_temp(body, dods_temp);
830 delete headers;
831 throw;
832 }
833
834 switch (http_status) {
835 case 200: { // New headers and new body
836 DBGN(cerr << "read a new response; caching." << endl);
837
838 d_http_cache->cache_response(url, now, /* *resp_hdrs*/ *headers, body);
839 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/ headers, dods_temp);
840
841 return rs;
842 }
843
844 case 304: { // Just new headers, use cached body
845 DBGN(cerr << "cached response valid; updating." << endl);
846
847 close_temp(body, dods_temp);
848 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
849 string file_name;
850 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
851 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
852 return crs;
853 }
854
855 default: { // Oops.
856 close_temp(body, dods_temp);
857 if (http_status >= 400) {
858 delete headers;
859 headers = 0;
860 string msg = "Error while reading the URL: ";
861 msg += url;
862 msg += ".\nThe OPeNDAP server returned the following message:\n";
863 msg += http_status_to_string(http_status);
864 throw Error(msg);
865 } else {
866 delete headers;
867 headers = 0;
868 throw InternalErr(__FILE__, __LINE__,
869 "Bad response from the HTTP server: " + long_to_string(http_status));
870 }
871 }
872 }
873 }
874 }
875
876 throw InternalErr(__FILE__, __LINE__, "Should never get here");
877}
878
889
890HTTPResponse *HTTPConnect::plain_fetch_url(const string &url) {
891 DBG(cerr << "Getting URL: " << url << endl);
892 FILE *stream = 0;
893 string dods_temp = get_temp_file(stream);
894 vector<string> *resp_hdrs = new vector<string>;
895
896 int status = -1;
897 try {
898 status = read_url(url, stream, resp_hdrs); // Throws Error.
899 if (status >= 400) {
900 // delete resp_hdrs; resp_hdrs = 0;
901 string msg = "Error while reading the URL: ";
902 msg += url;
903 msg += ".\nThe OPeNDAP server returned the following message:\n";
904 msg += http_status_to_string(status);
905 throw Error(msg);
906 }
907 }
908
909 catch (Error &e) {
910 delete resp_hdrs;
911 close_temp(stream, dods_temp);
912 throw;
913 }
914
915#if 0
916 if (d_use_cpp_streams) {
917 fclose(stream);
918 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
919 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
920 }
921 else {
922#endif
923 rewind(stream);
924 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
925#if 0
926}
927#endif
928}
929
942 d_accept_deflate = deflate;
943
944 if (d_accept_deflate) {
945 if (find(d_request_headers.begin(), d_request_headers.end(), "Accept-Encoding: deflate, gzip, compress") ==
946 d_request_headers.end())
947 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
948 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
949 } else {
951 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
952 [](const string &header) { return header == "Accept-Encoding: deflate, gzip, compress"; });
953 d_request_headers.erase(i, d_request_headers.end());
954 }
955}
956
965void HTTPConnect::set_xdap_protocol(int major, int minor) {
966 // Look for, and remove if one exists, an XDAP-Accept header
968 i = find_if(d_request_headers.begin(), d_request_headers.end(), HeaderMatch("XDAP-Accept:"));
969 if (i != d_request_headers.end())
970 d_request_headers.erase(i);
971
972 // Record and add the new header value
973 d_dap_client_protocol_major = major;
974 d_dap_client_protocol_minor = minor;
975 ostringstream xdap_accept;
976 xdap_accept << "XDAP-Accept: " << major << "." << minor;
977
978 d_request_headers.push_back(xdap_accept.str());
979
980 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
981}
982
997
998void HTTPConnect::set_credentials(const string &u, const string &p) {
999 if (u.empty())
1000 return;
1001
1002 // Store the credentials locally.
1003 d_username = u;
1004 d_password = p;
1005
1006 d_upstring = u + ":" + p;
1007}
1008
1009} // namespace libdap
A class for error processing.
Definition Error.h:92
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition HTTPCache.cc:125
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition InternalErr.h:61
Regular expression matching.
Definition GNURegex.h:54
STL iterator class.
top level DAP object to house generic methods
Definition AISConnect.cc:30
ObjectType get_description_type(const string &value)
Definition mime_util.cc:309
void parse_mime_header(const string &header, string &name, string &value)
Definition mime_util.cc:848
string get_temp_file(FILE *&stream)
void close_temp(FILE *s, const string &name)
ObjectType
The type of object in the stream coming from the data server.
Definition ObjectType.h:57