libdap Updated for version 3.20.5
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27#include "config.h"
28
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32
33#include <sys/stat.h>
34
35#ifdef WIN32
36#include <io.h>
37#endif
38
39#include <string>
40#include <vector>
41#include <functional>
42#include <algorithm>
43#include <sstream>
44#include <fstream>
45#include <iterator>
46#include <cstdlib>
47#include <cstring>
48#include <cerrno>
49
50//#define DODS_DEBUG2
51//#define HTTP_TRACE
52//#define DODS_DEBUG
53
54#undef USE_GETENV
55
56
57#include "debug.h"
58#include "mime_util.h"
59#include "media_types.h"
60#include "GNURegex.h"
61#include "HTTPCache.h"
62#include "HTTPConnect.h"
63#include "RCReader.h"
64#include "HTTPResponse.h"
65#include "HTTPCacheResponse.h"
66
67using namespace std;
68
69namespace libdap {
70
71// These global variables are not MT-Safe, but I'm leaving them as is because
72// they are used only for debugging (set them in a debugger like gdb or ddd).
73// They are not static because I think that many debuggers cannot access
74// static variables. 08/07/02 jhrg
75
76// Set this to 1 to turn on libcurl's verbose mode (for debugging).
77int www_trace = 0;
78
79// Set this to 1 to turn on libcurl's VERY verbose mode.
80int www_trace_extensive = 0;
81
82// Keep the temporary files; useful for debugging.
83int dods_keep_temps = 0;
84
85#define CLIENT_ERR_MIN 400
86#define CLIENT_ERR_MAX 417
87static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88 {
89 "Bad Request:",
90 "Unauthorized: Contact the server administrator.",
91 "Payment Required.",
92 "Forbidden: Contact the server administrator.",
93 "Not Found: The data source or server could not be found.\n\
94 Often this means that the OPeNDAP server is missing or needs attention.\n\
95 Please contact the server administrator.",
96 "Method Not Allowed.",
97 "Not Acceptable.",
98 "Proxy Authentication Required.",
99 "Request Time-out.",
100 "Conflict.",
101 "Gone:.",
102 "Length Required.",
103 "Precondition Failed.",
104 "Request Entity Too Large.",
105 "Request URI Too Large.",
106 "Unsupported Media Type.",
107 "Requested Range Not Satisfiable.",
108 "Expectation Failed."
109 };
110
111#define SERVER_ERR_MIN 500
112#define SERVER_ERR_MAX 505
113static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114 {
115 "Internal Server Error.",
116 "Not Implemented.",
117 "Bad Gateway.",
118 "Service Unavailable.",
119 "Gateway Time-out.",
120 "HTTP Version Not Supported."
121 };
122
125static string
126http_status_to_string(int status)
127{
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
132 else
133 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134}
135
136static ObjectType
137determine_object_type(const string &header_value)
138{
139 // DAP4 Data: application/vnd.opendap.dap4.data
140 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141
142 string::size_type plus = header_value.find('+');
143 string base_type;
144 string type_extension = "";
145 if (plus != string::npos) {
146 base_type= header_value.substr(0, plus);
147 type_extension = header_value.substr(plus+1);
148 }
149 else
150 base_type = header_value;
151
152 if (base_type == DMR_Content_Type
153 || (base_type.find("application/") != string::npos
154 && base_type.find("dap4.dataset-metadata") != string::npos)) {
155 if (type_extension == "xml")
156 return dap4_dmr;
157 else
158 return unknown_type;
159 }
160 else if (base_type == DAP4_DATA_Content_Type
161 || (base_type.find("application/") != string::npos
162 && base_type.find("dap4.data") != string::npos)) {
163 return dap4_data;
164 }
165 else if (header_value.find("text/html") != string::npos) {
166 return web_error;
167 }
168 else
169 return unknown_type;
170}
171
176class ParseHeader : public unary_function<const string &, void>
177{
178 ObjectType type; // What type of object is in the stream?
179 string server; // Server's version string.
180 string protocol; // Server's protocol version.
181 string location; // Url returned by server
182
183public:
184 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185 { }
186
187 void operator()(const string &line)
188 {
189 string name, value;
190 parse_mime_header(line, name, value);
191
192 DBG2(cerr << name << ": " << value << endl);
193
194 // Content-Type is used to determine the content of DAP4 responses, but allow the
195 // Content-Description header to override CT o preserve operation with DAP2 servers.
196 // jhrg 11/12/13
197 if (type == unknown_type && name == "content-type") {
198 type = determine_object_type(value); // see above
199 }
200 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201 type = get_description_type(value); // defined in mime_util.cc
202 }
203 // The second test (== "dods/0.0") tests if xopendap-server has already
204 // been seen. If so, use that header in preference to the old
205 // XDODS-Server header. jhrg 2/7/06
206 else if (name == "xdods-server" && server == "dods/0.0") {
207 server = value;
208 }
209 else if (name == "xopendap-server") {
210 server = value;
211 }
212 else if (name == "xdap") {
213 protocol = value;
214 }
215 else if (server == "dods/0.0" && name == "server") {
216 server = value;
217 }
218 else if (name == "location") {
219 location = value;
220 }
221 }
222
223 ObjectType get_object_type()
224 {
225 return type;
226 }
227
228 string get_server()
229 {
230 return server;
231 }
232
233 string get_protocol()
234 {
235 return protocol;
236 }
237
238 string get_location() {
239 return location;
240 }
241};
242
258static size_t
259save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260{
261 DBG2(cerr << "Inside the header parser." << endl);
262 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263
264 // Grab the header, minus the trailing newline. Or \r\n pair.
265 string complete_line;
266 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268 else
269 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270
271 // Store all non-empty headers that are not HTTP status codes
272 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273 DBG(cerr << "Header line: " << complete_line << endl);
274 hdrs->push_back(complete_line);
275 }
276
277 return size * nmemb;
278}
279
281static int
282curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283{
284 string message(msg, size);
285
286 switch (info) {
287 case CURLINFO_TEXT:
288 cerr << "Text: " << message; break;
289 case CURLINFO_HEADER_IN:
290 cerr << "Header in: " << message; break;
291 case CURLINFO_HEADER_OUT:
292 cerr << "Header out: " << message; break;
293 case CURLINFO_DATA_IN:
294 if (www_trace_extensive)
295 cerr << "Data in: " << message; break;
296 case CURLINFO_DATA_OUT:
297 if (www_trace_extensive)
298 cerr << "Data out: " << message; break;
299 case CURLINFO_END:
300 cerr << "End: " << message; break;
301#ifdef CURLINFO_SSL_DATA_IN
302 case CURLINFO_SSL_DATA_IN:
303 cerr << "SSL Data in: " << message; break;
304#endif
305#ifdef CURLINFO_SSL_DATA_OUT
306 case CURLINFO_SSL_DATA_OUT:
307 cerr << "SSL Data out: " << message; break;
308#endif
309 default:
310 if (www_trace_extensive)
311 cerr << "Curl info: " << message; break;
312 }
313 return 0;
314}
315
319void
320HTTPConnect::www_lib_init()
321{
322 d_curl = curl_easy_init();
323 if (!d_curl)
324 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
325
326 // Now set options that will remain constant for the duration of this
327 // CURL object.
328
329 // Set the proxy host.
330 if (!d_rcr->get_proxy_server_host().empty()) {
331 DBG(cerr << "Setting up a proxy server." << endl);
332 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
333 << endl);
334 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
335 << endl);
336 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
337 << endl);
338 curl_easy_setopt(d_curl, CURLOPT_PROXY,
339 d_rcr->get_proxy_server_host().c_str());
340 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
341 d_rcr->get_proxy_server_port());
342
343 // As of 4/21/08 only NTLM, Digest and Basic work.
344#ifdef CURLOPT_PROXYAUTH
345 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
346#endif
347
348 // Password might not be required. 06/21/04 jhrg
349 if (!d_rcr->get_proxy_server_userpw().empty())
350 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
351 d_rcr->get_proxy_server_userpw().c_str());
352 }
353
354 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
355 // We have to set FailOnError to false for any of the non-Basic
356 // authentication schemes to work. 07/28/03 jhrg
357 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
358
359 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
360 // choosing the the 'safest' one supported by the server.
361 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
362 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
363
364 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
365 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
366 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
367 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
368 // param of save_raw_http_headers to a vector<string> object.
369
370 // Follow 302 (redirect) responses
371 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
372 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
373
374 // If the user turns off SSL validation...
375 if (d_rcr->get_validate_ssl() == 0) {
376 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
377 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
378 }
379
380 // Set libcurl to use netrc to access data behind URS auth.
381 // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
382 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
383
384 // Look to see if cookies are turned on in the .dodsrc file. If so,
385 // activate here. We honor 'session cookies' (cookies without an
386 // expiration date) here so that session-based SSO systems will work as
387 // expected.
388 if (!d_cookie_jar.empty()) {
389 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
390 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
391 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
392 }
393
394 if (www_trace) {
395 cerr << "Curl version: " << curl_version() << endl;
396 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
397 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
398 }
399}
400
404class BuildHeaders : public unary_function<const string &, void>
405{
406 struct curl_slist *d_cl;
407
408public:
409 BuildHeaders() : d_cl(0)
410 {}
411
412 void operator()(const string &header)
413 {
414 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
415 << endl);
416 d_cl = curl_slist_append(d_cl, header.c_str());
417 }
418
419 struct curl_slist *get_headers()
420 {
421 return d_cl;
422 }
423};
424
439long
440HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
441{
442 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
443
444#ifdef WIN32
445 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
446 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
447 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
448 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
449 // this issue is that one should not pass a FILE * to a windows DLL. Close
450 // inspection of libcurl yields that their default write function when using
451 // the CURLOPT_WRITEDATA is just "fwrite".
452 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
453 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
454#else
455 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
456#endif
457
458 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
459 ostream_iterator<string>(cerr, "\n")));
460
461 BuildHeaders req_hdrs;
462 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
463 req_hdrs);
464 if (headers)
465 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
466
467 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
468
469 // Turn off the proxy for this URL?
470 bool temporary_proxy = false;
471 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
472 DBG(cerr << "Suppress proxy for url: " << url << endl);
473 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
474 }
475
476 string::size_type at_sign = url.find('@');
477 // Assume username:password present *and* assume it's an HTTP URL; it *is*
478 // HTTPConnect, after all. 7 is position after "http://"; the second arg
479 // to substr() is the sub string length.
480 if (at_sign != url.npos)
481 d_upstring = url.substr(7, at_sign - 7);
482
483 if (!d_upstring.empty())
484 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
485
486 // Pass save_raw_http_headers() a pointer to the vector<string> where the
487 // response headers may be stored. Callers can use the resp_hdrs
488 // value/result parameter to get the raw response header information .
489 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
490
491 // This is the call that causes curl to go and get the remote resource and "write it down"
492 // utilizing the configuration state that has been previously conditioned by various perturbations
493 // of calls to curl_easy_setopt().
494 CURLcode res = curl_easy_perform(d_curl);
495
496 // Free the header list and null the value in d_curl.
497 curl_slist_free_all(req_hdrs.get_headers());
498 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
499
500 // Reset the proxy?
501 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
502 curl_easy_setopt(d_curl, CURLOPT_PROXY,
503 d_rcr->get_proxy_server_host().c_str());
504
505 if (res != 0)
506 throw Error(d_error_buffer);
507
508 long status;
509 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
510 if (res != 0)
511 throw Error(d_error_buffer);
512
513 char *ct_ptr = 0;
514 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
515 if (res == CURLE_OK && ct_ptr)
516 d_content_type = ct_ptr;
517 else
518 d_content_type = "";
519
520 return status;
521}
522
526bool
527HTTPConnect::url_uses_proxy_for(const string &url)
528{
529 if (d_rcr->is_proxy_for_used()) {
530 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
531 int index = 0, matchlen;
532 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
533 }
534
535 return false;
536}
537
541bool
542HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
543{
544 return d_rcr->is_no_proxy_for_used()
545 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
546}
547
548// Public methods. Mostly...
549
556HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
557 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
558
559{
560 d_accept_deflate = rcr->get_deflate();
561 d_rcr = rcr;
562
563 // Load in the default headers to send with a request. The empty Pragma
564 // headers overrides libcurl's default Pragma: no-cache header (which
565 // will disable caching by Squid, et c.). The User-Agent header helps
566 // make server logs more readable. 05/05/03 jhrg
567 d_request_headers.push_back(string("Pragma:"));
568 string user_agent = string("User-Agent: ") + string(CNAME)
569 + string("/") + string(CVER);
570 d_request_headers.push_back(user_agent);
571 if (d_accept_deflate)
572 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
573
574 // HTTPCache::instance returns a valid ptr or 0.
575 if (d_rcr->get_use_cache())
576 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
577 else
578 d_http_cache = 0;
579
580 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
581 << ")" << endl);
582
583 if (d_http_cache) {
584 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
585 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
586 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
587 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
588 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
589 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
590 }
591
592 d_cookie_jar = rcr->get_cookie_jar();
593
594 www_lib_init(); // This may throw either Error or InternalErr
595}
596
597HTTPConnect::~HTTPConnect()
598{
599 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
600
601 curl_easy_cleanup(d_curl);
602
603 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
604}
605
607class HeaderMatch : public unary_function<const string &, bool> {
608 const string &d_header;
609 public:
610 HeaderMatch(const string &header) : d_header(header) {}
611 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
612};
613
626HTTPResponse *
627HTTPConnect::fetch_url(const string &url)
628{
629#ifdef HTTP_TRACE
630 cout << "GET " << url << " HTTP/1.0" << endl;
631#endif
632
633 HTTPResponse *stream;
634
635 if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
636 stream = caching_fetch_url(url);
637 }
638 else {
639 stream = plain_fetch_url(url);
640 }
641
642#ifdef HTTP_TRACE
643 stringstream ss;
644 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
645 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
646 ss << stream->get_headers()->at(i) << endl;
647 }
648 cout << ss.str();
649#endif
650
651 ParseHeader parser;
652
653 // An apparent quirk of libcurl is that it does not pass the Content-type
654 // header to the callback used to save them, but check and add it from the
655 // saved state variable only if it's not there (without this a test failed
656 // in HTTPCacheTest). jhrg 11/12/13
657 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
658 HeaderMatch("Content-Type:")) == stream->get_headers()->end())
659 stream->get_headers()->push_back("Content-Type: " + d_content_type);
660
661 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
662
663#ifdef HTTP_TRACE
664 cout << endl << endl;
665#endif
666
667 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
668 if (parser.get_location() != "" &&
669 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
670 delete stream;
671 return fetch_url(parser.get_location());
672 }
673
674 stream->set_type(parser.get_object_type()); // uses the value of content-description
675
676 stream->set_version(parser.get_server());
677 stream->set_protocol(parser.get_protocol());
678
679 if (d_use_cpp_streams) {
680 stream->transform_to_cpp();
681 }
682
683 return stream;
684}
685
686// Look around for a reasonable place to put a temporary file. Check first
687// the value of the TMPDIR env var. If that does not yeild a path that's
688// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
689// defined in stdio.h. If both come up empty, then use `./'.
690
691// Change this to a version that either returns a string or an open file
692// descriptor. Use information from https://buildsecurityin.us-cert.gov/
693// (see open()) to make it more secure. Ideal solution: get deserialize()
694// methods to read from a stream returned by libcurl, not from a temporary
695// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
696static string
697get_tempfile_template(const string &file_template)
698{
699 string c;
700
701 // Windows has one idea of the standard name(s) for a temporary files dir
702#ifdef WIN32
703 // white list for a WIN32 directory
704 Regex directory("[-a-zA-Z0-9_:\\]*");
705
706 // If we're OK to use getenv(), try it.
707#ifdef USE_GETENV
708 c = getenv("TEMP");
709 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710 goto valid_temp_directory;
711
712 c= getenv("TMP");
713 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
714 goto valid_temp_directory;
715#endif // USE_GETENV
716
717 // The windows default
718 c = "c:\tmp";
719 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
720 goto valid_temp_directory;
721
722#else // Unix/Linux/OSX has another...
723 // white list for a directory
724 Regex directory("[-a-zA-Z0-9_/]*");
725#ifdef USE_GETENV
726 c = getenv("TMPDIR");
727 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
728 goto valid_temp_directory;
729#endif // USE_GETENV
730
731 // Unix defines this sometimes - if present, use it.
732#ifdef P_tmpdir
733 if (access(P_tmpdir, W_OK | R_OK) == 0) {
734 c = P_tmpdir;
735 goto valid_temp_directory;
736 }
737#endif
738
739 // The Unix default
740 c = "/tmp";
741 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
742 goto valid_temp_directory;
743
744#endif // WIN32
745
746 // If we found nothing useful, use the current directory
747 c = ".";
748
749valid_temp_directory:
750
751#ifdef WIN32
752 c += "\\" + file_template;
753#else
754 c += "/" + file_template;
755#endif
756
757 return c;
758}
759
778string
779get_temp_file(FILE *&stream) throw(Error)
780{
781 string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
782
783 vector<char> pathname(dods_temp.length() + 1);
784
785 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
786
787 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
788
789 // Open truncated for update. NB: mkstemp() returns a file descriptor.
790#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
791 stream = fopen(_mktemp(&pathname[0]), "w+b");
792#else
793 // Make sure that temp files are accessible only by the owner.
794 int mask = umask(077);
795 if (mask < 0)
796 throw Error("Could not set the file creation mask: " + string(strerror(errno)));
797 int fd = mkstemp(&pathname[0]);
798 if (fd < 0)
799 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
800
801 stream = fdopen(fd, "w+");
802 umask(mask);
803#endif
804
805 if (!stream)
806 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
807
808 dods_temp = &pathname[0];
809 return dods_temp;
810}
811
812
818void
819close_temp(FILE *s, const string &name)
820{
821 int res = fclose(s);
822 if (res)
823 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
824
825 res = unlink(name.c_str());
826 if (res != 0)
827 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
828}
829
851HTTPResponse *
852HTTPConnect::caching_fetch_url(const string &url)
853{
854 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
855
856 vector<string> *headers = new vector<string>;
857 string file_name;
858 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
859 if (!s) {
860 // url not in cache; get it and cache it
861 DBGN(cerr << "no; getting response and caching." << endl);
862 delete headers; headers = 0;
863 time_t now = time(0);
864 HTTPResponse *rs = plain_fetch_url(url);
865 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
866
867 return rs;
868 }
869 else { // url in cache
870 DBGN(cerr << "yes... ");
871
872 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
873 DBGN(cerr << "and it's valid; using cached response." << endl);
874 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
875 return crs;
876 }
877 else { // url in cache but not valid; validate
878 DBGN(cerr << "but it's not valid; validating... ");
879
880 d_http_cache->release_cached_response(s); // This closes 's'
881 headers->clear();
882 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
883 FILE *body = 0;
884 string dods_temp = get_temp_file(body);
885 time_t now = time(0); // When was the request made (now).
886 long http_status;
887
888 try {
889 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
890 rewind(body);
891 }
892 catch (Error &e) {
893 close_temp(body, dods_temp);
894 delete headers;
895 throw ;
896 }
897
898 switch (http_status) {
899 case 200: { // New headers and new body
900 DBGN(cerr << "read a new response; caching." << endl);
901
902 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
903 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
904
905 return rs;
906 }
907
908 case 304: { // Just new headers, use cached body
909 DBGN(cerr << "cached response valid; updating." << endl);
910
911 close_temp(body, dods_temp);
912 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
913 string file_name;
914 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
915 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
916 return crs;
917 }
918
919 default: { // Oops.
920 close_temp(body, dods_temp);
921 if (http_status >= 400) {
922 delete headers; headers = 0;
923 string msg = "Error while reading the URL: ";
924 msg += url;
925 msg
926 += ".\nThe OPeNDAP server returned the following message:\n";
927 msg += http_status_to_string(http_status);
928 throw Error(msg);
929 }
930 else {
931 delete headers; headers = 0;
932 throw InternalErr(__FILE__, __LINE__,
933 "Bad response from the HTTP server: " + long_to_string(http_status));
934 }
935 }
936 }
937 }
938 }
939
940 throw InternalErr(__FILE__, __LINE__, "Should never get here");
941}
942
954HTTPResponse *
955HTTPConnect::plain_fetch_url(const string &url)
956{
957 DBG(cerr << "Getting URL: " << url << endl);
958 FILE *stream = 0;
959 string dods_temp = get_temp_file(stream);
960 vector<string> *resp_hdrs = new vector<string>;
961
962 int status = -1;
963 try {
964 status = read_url(url, stream, resp_hdrs); // Throws Error.
965 if (status >= 400) {
966 // delete resp_hdrs; resp_hdrs = 0;
967 string msg = "Error while reading the URL: ";
968 msg += url;
969 msg += ".\nThe OPeNDAP server returned the following message:\n";
970 msg += http_status_to_string(status);
971 throw Error(msg);
972 }
973 }
974
975 catch (Error &e) {
976 delete resp_hdrs;
977 close_temp(stream, dods_temp);
978 throw;
979 }
980
981#if 0
982 if (d_use_cpp_streams) {
983 fclose(stream);
984 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
985 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
986 }
987 else {
988#endif
989 rewind(stream);
990 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
991#if 0
992}
993#endif
994}
995
1007void
1009{
1010 d_accept_deflate = deflate;
1011
1012 if (d_accept_deflate) {
1013 if (find(d_request_headers.begin(), d_request_headers.end(),
1014 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1015 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1016 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1017 ostream_iterator<string>(cerr, "\n")));
1018 }
1019 else {
1020 vector<string>::iterator i;
1021 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1022 bind2nd(equal_to<string>(),
1023 string("Accept-Encoding: deflate, gzip, compress")));
1024 d_request_headers.erase(i, d_request_headers.end());
1025 }
1026}
1027
1036void
1038{
1039 // Look for, and remove if one exists, an XDAP-Accept header
1040 vector<string>::iterator i;
1041 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1042 HeaderMatch("XDAP-Accept:"));
1043 if (i != d_request_headers.end())
1044 d_request_headers.erase(i);
1045
1046 // Record and add the new header value
1047 d_dap_client_protocol_major = major;
1048 d_dap_client_protocol_minor = minor;
1049 ostringstream xdap_accept;
1050 xdap_accept << "XDAP-Accept: " << major << "." << minor;
1051
1052 d_request_headers.push_back(xdap_accept.str());
1053
1054 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1055 ostream_iterator<string>(cerr, "\n")));
1056}
1057
1073void
1074HTTPConnect::set_credentials(const string &u, const string &p)
1075{
1076 if (u.empty())
1077 return;
1078
1079 // Store the credentials locally.
1080 d_username = u;
1081 d_password = p;
1082
1083 d_upstring = u + ":" + p;
1084}
1085
1086} // namespace libdap
A class for error processing.
Definition Error.h:93
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition HTTPCache.cc:819
void release_cached_response(FILE *response)
vector< string > get_conditional_request_headers(const string &url)
void set_cache_enabled(bool mode)
Definition HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition HTTPCache.cc:772
bool is_url_valid(const string &url)
void set_always_validate(bool validate)
Definition HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
void set_max_size(unsigned long size)
Definition HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition InternalErr.h:65
string get_proxy_server_host() const
Get the proxy host.
Definition RCReader.h:181
int get_proxy_server_port() const
Get the proxy port.
Definition RCReader.h:186
string get_proxy_server_userpw() const
Get the proxy username and password.
Definition RCReader.h:191
string get_proxy_for_regexp() const
Definition RCReader.h:215
bool is_proxy_for_used()
Definition RCReader.h:210
top level DAP object to house generic methods
ObjectType get_description_type(const string &value)
Definition mime_util.cc:339
void parse_mime_header(const string &header, string &name, string &value)
Definition mime_util.cc:912
string get_temp_file(FILE *&stream)
void close_temp(FILE *s, const string &name)
ObjectType
The type of object in the stream coming from the data server.
Definition ObjectType.h:58