libdap Updated for version 3.21.0
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCache.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28// #define DODS_DEBUG
29// #define DODS_DEBUG2
30#undef USE_GETENV
31
32#include <limits.h>
33#include <pthread.h>
34#include <sys/stat.h>
35#include <sys/types.h> // for stat and mkdir
36#include <unistd.h> // for stat
37
38#include <cerrno>
39#include <cstring>
40
41#include <algorithm>
42#include <iostream>
43#include <iterator>
44#include <set>
45#include <sstream>
46
47#include "Error.h"
48#include "InternalErr.h"
49#include "ResponseTooBigErr.h"
50#ifndef WIN32
51#include "SignalHandler.h"
52#endif
53#include "HTTPCache.h"
54#include "HTTPCacheInterruptHandler.h"
55#include "HTTPCacheMacros.h"
56#include "HTTPCacheTable.h"
57#include "SignalHandlerRegisteredErr.h"
58
59#include "debug.h"
60#include "util_mit.h"
61
62using namespace std;
63
64namespace libdap {
65
66HTTPCache *HTTPCache::_instance = 0;
67
68// instance_mutex is used to ensure that only one instance is created.
69// That is, it protects the body of the HTTPCache::instance() method. This
70// mutex is initialized from within the static function once_init_routine()
71// and the call to that takes place using pthread_once_init() where the mutex
72// once_block is used to protect that call. All of this ensures that no matter
73// how many threads call the instance() method, only one instance is ever
74// made.
75static pthread_mutex_t instance_mutex;
76static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77
78#define NO_LM_EXPIRATION 24 * 3600 // 24 hours
79
80#define DUMP_FREQUENCY 10 // Dump index every x loads
81
82#define MEGA 0x100000L
83#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
84#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
85#define CACHE_GC_PCT 10 // 10% of cache size free after GC
86#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
87#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
88
89static void once_init_routine() {
90 int status;
91 status = INIT(&instance_mutex);
92
93 if (status != 0)
94 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
95}
96
124
125HTTPCache *HTTPCache::instance(const string &cache_root, bool force) {
126 int status = pthread_once(&once_block, once_init_routine);
127 if (status != 0)
128 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
129
130 LOCK(&instance_mutex);
131
132 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
133
134 try {
135 if (!_instance) {
136 _instance = new HTTPCache(cache_root, force);
137
138 DBG(cerr << "New instance: " << _instance << ", cache root: " << _instance->d_cache_root << endl);
139
140 atexit(delete_instance);
141
142#ifndef WIN32
143 // Register the interrupt handler. If we've already registered
144 // one, barf. If this becomes a problem, hack SignalHandler so
145 // that we can chain these handlers... 02/10/04 jhrg
146 //
147 // Technically we're leaking memory here. However, since this
148 // class is a singleton, we know that only three objects will
149 // ever be created and they will all exist until the process
150 // exits. We can let this slide... 02/12/04 jhrg
151 EventHandler *old_eh =
152 SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
153 if (old_eh) {
154 SignalHandler::instance()->register_handler(SIGINT, old_eh);
156 "Could not register event handler for SIGINT without superseding an existing one.");
157 }
158
159 old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
160 if (old_eh) {
161 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
163 "Could not register event handler for SIGPIPE without superseding an existing one.");
164 }
165
166 old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
167 if (old_eh) {
168 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
170 "Could not register event handler for SIGTERM without superseding an existing one.");
171 }
172#endif
173 }
174 } catch (...) {
175 DBG2(cerr << "The constructor threw an Error!" << endl);
176 UNLOCK(&instance_mutex);
177 throw;
178 }
179
180 UNLOCK(&instance_mutex);
181 DBGN(cerr << "returning " << hex << _instance << dec << endl);
182
183 return _instance;
184}
185
188
189void HTTPCache::delete_instance() {
190 DBG(cerr << "Entering delete_instance()..." << endl);
191
192 if (HTTPCache::_instance) {
193 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
194 delete HTTPCache::_instance;
195 HTTPCache::_instance = 0;
196
197 // Now remove the signal handlers
198 delete SignalHandler::instance()->remove_handler(SIGINT);
199 delete SignalHandler::instance()->remove_handler(SIGPIPE);
200 delete SignalHandler::instance()->remove_handler(SIGTERM);
201 }
202
203 DBG(cerr << "Exiting delete_instance()" << endl);
204}
205
219
220HTTPCache::HTTPCache(string cache_root, bool force)
221 : d_locked_open_file(0), d_cache_enabled(false), d_cache_protected(false),
222
223 d_cache_disconnected(DISCONNECT_NONE),
224
225 d_expire_ignored(false), d_always_validate(false), d_total_size(CACHE_TOTAL_SIZE * MEGA),
226 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
227 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), d_default_expiration(NO_LM_EXPIRATION), d_max_age(-1),
228 d_max_stale(-1), d_min_fresh(-1), d_http_cache_table(0) {
229 DBG(cerr << "Entering the constructor for " << this << "... ");
230#if 0
231 int status = pthread_once(&once_block, once_init_routine);
232 if (status != 0)
233 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
234#endif
235 INIT(&d_cache_mutex);
236
237 // This used to throw an Error object if we could not get the
238 // single user lock. However, that results in an invalid object. It's
239 // better to have an instance that has default values. If we cannot get
240 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
241 //
242 // I fixed this block so that the cache root is set before we try to get
243 // the single user lock. That was the fix for bug #661. To make that
244 // work, I had to move the call to create_cache_root out of
245 // set_cache_root(). 09/08/03 jhrg
246
247 set_cache_root(cache_root);
248 int block_size;
249
250 if (!get_single_user_lock(force))
251 throw Error(internal_error, "Could not get single user lock for the cache");
252
253#ifdef WIN32
254 // Windows is unable to provide us this information. 4096 appears
255 // a best guess. It is likely to be in the range [2048, 8192] on
256 // windows, but will the level of truth of that statement vary over
257 // time ?
258 block_size = 4096;
259#else
260 struct stat s;
261 if (stat(cache_root.c_str(), &s) == 0)
262 block_size = s.st_blksize;
263 else
264 throw Error(internal_error, "Could not set file system block size.");
265#endif
266 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
267 d_cache_enabled = true;
268
269 DBGN(cerr << "exiting" << endl);
270}
271
283
285 DBG(cerr << "Entering the destructor for " << this << "... ");
286
287 try {
288 if (startGC())
289 perform_garbage_collection();
290
291 d_http_cache_table->cache_index_write();
292 } catch (Error &e) {
293 // If the cache index cannot be written, we've got problems. However,
294 // unless we're debugging, still free up the cache table in memory.
295 // How should we let users know they cache index is not being
296 // written?? 10/03/02 jhrg
297 DBG(cerr << e.get_error_message() << endl);
298 }
299
300 delete d_http_cache_table;
301
302 release_single_user_lock();
303
304 DBGN(cerr << "exiting destructor." << endl);
305 DESTROY(&d_cache_mutex);
306}
307
311
314
315bool HTTPCache::stopGC() const {
316 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
317}
318
324
325bool HTTPCache::startGC() const {
326 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
327 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
328}
329
343
344void HTTPCache::perform_garbage_collection() {
345 DBG(cerr << "Performing garbage collection" << endl);
346
347 // Remove all the expired responses.
348 expired_gc();
349
350 // Remove entries larger than max_entry_size.
351 too_big_gc();
352
353 // Remove entries starting with zero hits, 1, ..., until stopGC()
354 // returns true.
355 hits_gc();
356}
357
362
363void HTTPCache::expired_gc() {
364 if (!d_expire_ignored) {
365 d_http_cache_table->delete_expired_entries();
366 }
367}
368
384
385void HTTPCache::hits_gc() {
386 int hits = 0;
387
388 if (startGC()) {
389 while (!stopGC()) {
390 d_http_cache_table->delete_by_hits(hits);
391 hits++;
392 }
393 }
394}
395
400void HTTPCache::too_big_gc() {
401 if (startGC())
402 d_http_cache_table->delete_by_size(d_max_entry_size);
403}
404
406
416
417bool HTTPCache::get_single_user_lock(bool force) {
418 if (!d_locked_open_file) {
419 FILE *fp = NULL;
420
421 try {
422 // It's OK to call create_cache_root if the directory already
423 // exists.
424 create_cache_root(d_cache_root);
425 } catch (Error &e) {
426 // We need to catch and return false because this method is
427 // called from a ctor and throwing at this point will result in a
428 // partially constructed object. 01/22/04 jhrg
429 DBG(cerr << "Failure to create the cache root" << endl);
430 return false;
431 }
432
433 // Try to read the lock file. If we can open for reading, it exists.
434 string lock = d_cache_root + CACHE_LOCK;
435 if ((fp = fopen(lock.c_str(), "r")) != NULL) {
436 int res = fclose(fp);
437 if (res) {
438 DBG(cerr << "Failed to close " << (void *)fp << endl);
439 }
440 if (force)
441 REMOVE(lock.c_str());
442 else
443 return false;
444 }
445
446 if ((fp = fopen(lock.c_str(), "w")) == NULL) {
447 DBG(cerr << "Could not open for write access" << endl);
448 return false;
449 }
450
451 d_locked_open_file = fp;
452 return true;
453 }
454
455 DBG(cerr << "locked_open_file is true" << endl);
456 return false;
457}
458
460
461void HTTPCache::release_single_user_lock() {
462 if (d_locked_open_file) {
463 int res = fclose(d_locked_open_file);
464 if (res) {
465 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl);
466 }
467 d_locked_open_file = 0;
468 }
469
470 string lock = d_cache_root + CACHE_LOCK;
471 REMOVE(lock.c_str());
472}
473
476
479
480string HTTPCache::get_cache_root() const { return d_cache_root; }
481
489
490void HTTPCache::create_cache_root(const string &cache_root) {
491#ifdef WIN32
492 string::size_type cur = cache_root[1] == ':' ? 3 : 1;
493 typedef int mode_t;
494
495 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
496 string dir = cache_root.substr(0, cur);
497 struct stat stat_info;
498 if (stat(dir.c_str(), &stat_info) == -1) {
499 DBG2(cerr << "Cache....... Creating " << dir << endl);
500 mode_t mask = UMASK(0);
501 if (MKDIR(dir.c_str(), 0777) < 0) {
502 DBG2(cerr << "Error: can't create." << endl);
503 UMASK(mask);
504 throw Error(string("Could not create the directory for the cache. Failed when building path at ") +
505 dir + string("."));
506 }
507 UMASK(mask);
508 } else {
509 DBG2(cerr << "Cache....... Found " << dir << endl);
510 }
511 cur++;
512 }
513#else
514 // OSX and Linux
515
516 // Save the mask
517 mode_t mask = umask(0);
518
519 // Ignore the error if the directory exists
520 errno = 0;
521 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
522 umask(mask);
523 throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
524 }
525
526 // Restore themask
527 umask(mask);
528
529#endif
530}
531
545
546void HTTPCache::set_cache_root(const string &root) {
547 if (root != "") {
548 d_cache_root = root;
549 // cache root should end in /.
550 if (d_cache_root[d_cache_root.size() - 1] != DIR_SEPARATOR_CHAR)
551 d_cache_root += DIR_SEPARATOR_CHAR;
552 } else {
553 // If no cache root has been indicated then look for a suitable
554 // location.
555#ifdef USE_GETENV
556 char *cr = (char *)getenv("DODS_CACHE");
557 if (!cr)
558 cr = (char *)getenv("TMP");
559 if (!cr)
560 cr = (char *)getenv("TEMP");
561 if (!cr)
562 cr = (char *)CACHE_LOCATION;
563 d_cache_root = cr;
564#else
565 d_cache_root = CACHE_LOCATION;
566#endif
567
568 if (d_cache_root[d_cache_root.size() - 1] != DIR_SEPARATOR_CHAR)
569 d_cache_root += DIR_SEPARATOR_CHAR;
570
571 d_cache_root += CACHE_ROOT;
572 }
573
574 // Test d_hhtp_cache_table because this method can be called before that
575 // instance is created and also can be called later to change the cache
576 // root. jhrg 05.14.08
577 if (d_http_cache_table)
578 d_http_cache_table->set_cache_root(d_cache_root);
579}
580
591
593 lock_cache_interface();
594
595 d_cache_enabled = mode;
596
597 unlock_cache_interface();
598}
599
601
603 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" << endl);
604 return d_cache_enabled;
605}
606
617 lock_cache_interface();
618
619 d_cache_disconnected = mode;
620
621 unlock_cache_interface();
622}
623
625
626CacheDisconnectedMode HTTPCache::get_cache_disconnected() const { return d_cache_disconnected; }
627
635
637 lock_cache_interface();
638
639 d_expire_ignored = mode;
640
641 unlock_cache_interface();
642}
643
644/* Is the cache ignoring Expires headers returned with responses that have
645 been cached? */
646
647bool HTTPCache::is_expire_ignored() const { return d_expire_ignored; }
648
663
664void HTTPCache::set_max_size(unsigned long size) {
665 lock_cache_interface();
666
667 try {
668 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
669 unsigned long old_size = d_total_size;
670 d_total_size = new_size;
671 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
672 d_gc_buffer = d_total_size / CACHE_GC_PCT;
673
674 if (new_size < old_size && startGC()) {
675 perform_garbage_collection();
676 d_http_cache_table->cache_index_write();
677 }
678 } catch (...) {
679 unlock_cache_interface();
680 DBGN(cerr << "Unlocking interface." << endl);
681 throw;
682 }
683
684 DBG2(cerr << "Cache....... Total cache size: " << d_total_size << " with " << d_folder_size
685 << " bytes for meta information and folders and at least " << d_gc_buffer << " bytes free after every gc"
686 << endl);
687
688 unlock_cache_interface();
689}
690
692
693unsigned long HTTPCache::get_max_size() const { return d_total_size / MEGA; }
694
702
703void HTTPCache::set_max_entry_size(unsigned long size) {
704 lock_cache_interface();
705
706 try {
707 unsigned long new_size = size * MEGA;
708 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
709 unsigned long old_size = d_max_entry_size;
710 d_max_entry_size = new_size;
711 if (new_size < old_size && startGC()) {
712 perform_garbage_collection();
713 d_http_cache_table->cache_index_write();
714 }
715 }
716 } catch (...) {
717 unlock_cache_interface();
718 throw;
719 }
720
721 DBG2(cerr << "Cache...... Max entry cache size is " << d_max_entry_size << endl);
722
723 unlock_cache_interface();
724}
725
729
730unsigned long HTTPCache::get_max_entry_size() const { return d_max_entry_size / MEGA; }
731
741
742void HTTPCache::set_default_expiration(const int exp_time) {
743 lock_cache_interface();
744
745 d_default_expiration = exp_time;
746
747 unlock_cache_interface();
748}
749
751
752int HTTPCache::get_default_expiration() const { return d_default_expiration; }
753
757
758void HTTPCache::set_always_validate(bool validate) { d_always_validate = validate; }
759
762
763bool HTTPCache::get_always_validate() const { return d_always_validate; }
764
780
781void HTTPCache::set_cache_control(const vector<string> &cc) {
782 lock_cache_interface();
783
784 try {
785 d_cache_control = cc;
786
788 for (i = cc.begin(); i != cc.end(); ++i) {
789 string header = (*i).substr(0, (*i).find(':'));
790 string value = (*i).substr((*i).find(": ") + 2);
791 if (header != "Cache-Control") {
792 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
793 } else {
794 if (value == "no-cache" || value == "no-store")
795 d_cache_enabled = false;
796 else if (value.find("max-age") != string::npos) {
797 string max_age = value.substr(value.find("=") + 1);
798 d_max_age = parse_time(max_age.c_str());
799 } else if (value == "max-stale")
800 d_max_stale = 0; // indicates will take anything;
801 else if (value.find("max-stale") != string::npos) {
802 string max_stale = value.substr(value.find("=") + 1);
803 d_max_stale = parse_time(max_stale.c_str());
804 } else if (value.find("min-fresh") != string::npos) {
805 string min_fresh = value.substr(value.find("=") + 1);
806 d_min_fresh = parse_time(min_fresh.c_str());
807 }
808 }
809 }
810 } catch (...) {
811 unlock_cache_interface();
812 throw;
813 }
814
815 unlock_cache_interface();
816}
817
821
822vector<string> HTTPCache::get_cache_control() { return d_cache_control; }
823
825
833
834bool HTTPCache::is_url_in_cache(const string &url) {
835 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
836
837 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
838 bool status = entry != 0;
839 if (entry) {
840 entry->unlock_read_response();
841 }
842 return status;
843}
844
849
850bool is_hop_by_hop_header(const string &header) {
851 return header.find("Connection") != string::npos || header.find("Keep-Alive") != string::npos ||
852 header.find("Proxy-Authenticate") != string::npos || header.find("Proxy-Authorization") != string::npos ||
853 header.find("Transfer-Encoding") != string::npos || header.find("Upgrade") != string::npos;
854}
855
866
867void HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) {
868 string fname = cachename + CACHE_META;
869 d_open_files.push_back(fname);
870
871 FILE *dest = fopen(fname.c_str(), "w");
872 if (!dest) {
873 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry file.");
874 }
875
876 vector<string>::const_iterator i;
877 for (i = headers.begin(); i != headers.end(); ++i) {
878 if (!is_hop_by_hop_header(*i)) {
879 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
880 if (s != 1) {
881 fclose(dest);
882 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
883 }
884 s = fwrite("\n", 1, 1, dest);
885 if (s != 1) {
886 fclose(dest);
887 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
888 }
889 }
890 }
891
892 int res = fclose(dest);
893 if (res) {
894 DBG(cerr << "HTTPCache::write_metadata - Failed to close " << dest << endl);
895 }
896
897 d_open_files.pop_back();
898}
899
909
910void HTTPCache::read_metadata(const string &cachename, vector<string> &headers) {
911 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
912 if (!md) {
913 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry meta data file.");
914 }
915
916 const size_t line_buf_len = 1024;
917 char line[line_buf_len];
918 while (!feof(md) && fgets(line, line_buf_len, md)) {
919 line[std::min(line_buf_len, strnlen(line, line_buf_len)) - 1] = '\0'; // erase newline
920 headers.push_back(string(line));
921 }
922
923 int res = fclose(md);
924 if (res) {
925 DBG(cerr << "HTTPCache::read_metadata - Failed to close " << md << endl);
926 }
927}
928
949
950int HTTPCache::write_body(const string &cachename, const FILE *src) {
951 d_open_files.push_back(cachename);
952
953 FILE *dest = fopen(cachename.c_str(), "wb");
954 if (!dest) {
955 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry file.");
956 }
957
958 // Read and write in 1k blocks; an attempt at doing this efficiently.
959 // 09/30/02 jhrg
960 char line[1024];
961 size_t n;
962 int total = 0;
963 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
964 total += fwrite(line, 1, n, dest);
965 DBG2(sleep(3));
966 }
967
968 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
969 int res = fclose(dest);
970 res = res & unlink(cachename.c_str());
971 if (res) {
972 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " << dest << endl);
973 }
974 throw InternalErr(__FILE__, __LINE__, "I/O error transferring data to the cache.");
975 }
976
977 rewind(const_cast<FILE *>(src));
978
979 int res = fclose(dest);
980 if (res) {
981 DBG(cerr << "HTTPCache::write_body - Failed to close " << dest << endl);
982 }
983
984 d_open_files.pop_back();
985
986 return total;
987}
988
996
997FILE *HTTPCache::open_body(const string &cachename) {
998 DBG(cerr << "cachename: " << cachename << endl);
999
1000 FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1001 if (!src)
1002 throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1003
1004 return src;
1005}
1006
1031
1032bool HTTPCache::cache_response(const string &url, time_t request_time, const vector<string> &headers,
1033 const FILE *body) {
1034 lock_cache_interface();
1035
1036 DBG(cerr << "Caching url: " << url << "." << endl);
1037
1038 try {
1039 // If this is not an http or https URL, don't cache.
1040 if (url.find("http:") == string::npos && url.find("https:") == string::npos) {
1041 unlock_cache_interface();
1042 return false;
1043 }
1044
1045 // This does nothing if url is not already in the cache. It's
1046 // more efficient to do this than to first check and see if the entry
1047 // exists. 10/10/02 jhrg
1048 d_http_cache_table->remove_entry_from_cache_table(url);
1049
1051 entry->lock_write_response();
1052
1053 try {
1054 d_http_cache_table->parse_headers(entry, d_max_entry_size,
1055 headers); // etag, lm, date, age, expires, max_age.
1056 if (entry->is_no_cache()) {
1057 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url << ")"
1058 << endl);
1059 entry->unlock_write_response();
1060 delete entry;
1061 entry = 0;
1062 unlock_cache_interface();
1063 return false;
1064 }
1065
1066 // corrected_initial_age, freshness_lifetime, response_time.
1067 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1068
1069 d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1070 // move these write function to cache table
1071 entry->set_size(write_body(entry->get_cachename(), body));
1072 write_metadata(entry->get_cachename(), headers);
1073 d_http_cache_table->add_entry_to_cache_table(entry);
1074 entry->unlock_write_response();
1075 } catch (ResponseTooBigErr &e) {
1076 // Oops. Bummer. Clean up and exit.
1077 DBG(cerr << e.get_error_message() << endl);
1078 REMOVE(entry->get_cachename().c_str());
1079 REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1080 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url << ")" << endl);
1081 entry->unlock_write_response();
1082 delete entry;
1083 entry = 0;
1084 unlock_cache_interface();
1085 return false;
1086 }
1087
1088 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1089 if (startGC())
1090 perform_garbage_collection();
1091
1092 d_http_cache_table->cache_index_write(); // resets new_entries
1093 }
1094 } catch (...) {
1095 unlock_cache_interface();
1096 throw;
1097 }
1098
1099 unlock_cache_interface();
1100
1101 return true;
1102}
1103
1121
1122vector<string> HTTPCache::get_conditional_request_headers(const string &url) {
1123 lock_cache_interface();
1124
1125 HTTPCacheTable::CacheEntry *entry = 0;
1126 vector<string> headers;
1127
1128 DBG(cerr << "Getting conditional request headers for " << url << endl);
1129
1130 try {
1131 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1132 if (!entry)
1133 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1134
1135 if (entry->get_etag() != "")
1136 headers.push_back(string("If-None-Match: ") + entry->get_etag());
1137
1138 if (entry->get_lm() > 0) {
1139 time_t lm = entry->get_lm();
1140 headers.push_back(string("If-Modified-Since: ") + date_time_str(&lm));
1141 } else if (entry->get_max_age() > 0) {
1142 time_t max_age = entry->get_max_age();
1143 headers.push_back(string("If-Modified-Since: ") + date_time_str(&max_age));
1144 } else if (entry->get_expires() > 0) {
1145 time_t expires = entry->get_expires();
1146 headers.push_back(string("If-Modified-Since: ") + date_time_str(&expires));
1147 }
1148 entry->unlock_read_response();
1149 unlock_cache_interface();
1150 } catch (...) {
1151 unlock_cache_interface();
1152 if (entry) {
1153 entry->unlock_read_response();
1154 }
1155 throw;
1156 }
1157
1158 return headers;
1159}
1160
1163
1164struct HeaderLess : binary_function<const string &, const string &, bool> {
1165 bool operator()(const string &s1, const string &s2) const {
1166 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1167 }
1168};
1169
1182
1183void HTTPCache::update_response(const string &url, time_t request_time, const vector<string> &headers) {
1184 lock_cache_interface();
1185
1186 HTTPCacheTable::CacheEntry *entry = 0;
1187 DBG(cerr << "Updating the response headers for: " << url << endl);
1188
1189 try {
1190 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1191 if (!entry)
1192 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1193
1194 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1195 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1196
1197 // Update corrected_initial_age, freshness_lifetime, response_time.
1198 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1199
1200 // Merge the new headers with those in the persistent store. How:
1201 // Load the new headers into a set, then merge the old headers. Since
1202 // set<> ignores duplicates, old headers with the same name as a new
1203 // header will got into the bit bucket. Define a special compare
1204 // functor to make sure that headers are compared using only their
1205 // name and not their value too.
1206 set<string, HeaderLess> merged_headers;
1207
1208 // Load in the new headers
1209 copy(headers.begin(), headers.end(), inserter(merged_headers, merged_headers.begin()));
1210
1211 // Get the old headers and load them in.
1212 vector<string> old_headers;
1213 read_metadata(entry->get_cachename(), old_headers);
1214 copy(old_headers.begin(), old_headers.end(), inserter(merged_headers, merged_headers.begin()));
1215
1216 // Read the values back out. Use reverse iterators with back_inserter
1217 // to preserve header order. NB: vector<> does not support push_front
1218 // so we can't use front_inserter(). 01/09/03 jhrg
1219 vector<string> result;
1220 copy(merged_headers.rbegin(), merged_headers.rend(), back_inserter(result));
1221
1222 write_metadata(entry->get_cachename(), result);
1223 entry->unlock_write_response();
1224 unlock_cache_interface();
1225 } catch (...) {
1226 if (entry) {
1227 entry->unlock_read_response();
1228 }
1229 unlock_cache_interface();
1230 throw;
1231 }
1232}
1233
1244
1245bool HTTPCache::is_url_valid(const string &url) {
1246 lock_cache_interface();
1247
1248 bool freshness;
1249 HTTPCacheTable::CacheEntry *entry = 0;
1250
1251 DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1252
1253 try {
1254 if (d_always_validate) {
1255 unlock_cache_interface();
1256 return false; // force re-validation.
1257 }
1258
1259 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1260 if (!entry)
1261 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1262
1263 // If we supported range requests, we'd need code here to check if
1264 // there was only a partial response in the cache. 10/02/02 jhrg
1265
1266 // In case this entry is of type "must-revalidate" then we consider it
1267 // invalid.
1268 if (entry->get_must_revalidate()) {
1269 entry->unlock_read_response();
1270 unlock_cache_interface();
1271 return false;
1272 }
1273
1274 time_t resident_time = time(NULL) - entry->get_response_time();
1275 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1276
1277 // Check that the max-age, max-stale, and min-fresh directives
1278 // given in the request cache control header is followed.
1279 if (d_max_age >= 0 && current_age > d_max_age) {
1280 DBG(cerr << "Cache....... Max-age validation" << endl);
1281 entry->unlock_read_response();
1282 unlock_cache_interface();
1283 return false;
1284 }
1285 if (d_min_fresh >= 0 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1286 DBG(cerr << "Cache....... Min-fresh validation" << endl);
1287 entry->unlock_read_response();
1288 unlock_cache_interface();
1289 return false;
1290 }
1291
1292 freshness = (entry->get_freshness_lifetime() + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1293 entry->unlock_read_response();
1294 unlock_cache_interface();
1295 } catch (...) {
1296 if (entry) {
1297 entry->unlock_read_response();
1298 }
1299 unlock_cache_interface();
1300 throw;
1301 }
1302
1303 return freshness;
1304}
1305
1332
1333FILE *HTTPCache::get_cached_response(const string &url, vector<string> &headers, string &cacheName) {
1334 lock_cache_interface();
1335
1336 FILE *body = 0;
1337 HTTPCacheTable::CacheEntry *entry = 0;
1338
1339 DBG(cerr << "Getting the cached response for " << url << endl);
1340
1341 try {
1342 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1343 if (!entry) {
1344 unlock_cache_interface();
1345 return 0;
1346 }
1347
1348 cacheName = entry->get_cachename();
1349 read_metadata(entry->get_cachename(), headers);
1350
1351 DBG(cerr << "Headers just read from cache: " << endl);
1352 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1353
1354 body = open_body(entry->get_cachename());
1355
1356 DBG(cerr << "Returning: " << url << " from the cache." << endl);
1357
1358 d_http_cache_table->bind_entry_to_data(entry, body);
1359 } catch (...) {
1360 // Why make this unlock operation conditional on entry?
1361 if (entry)
1362 unlock_cache_interface();
1363 if (body != 0)
1364 fclose(body);
1365 throw;
1366 }
1367
1368 unlock_cache_interface();
1369
1370 return body;
1371}
1372
1383
1384FILE *HTTPCache::get_cached_response(const string &url, vector<string> &headers) {
1385 string discard_name;
1386 return get_cached_response(url, headers, discard_name);
1387}
1388
1398
1399FILE *HTTPCache::get_cached_response(const string &url) {
1400 string discard_name;
1401 vector<string> discard_headers;
1402 return get_cached_response(url, discard_headers, discard_name);
1403}
1404
1416
1418 lock_cache_interface();
1419
1420 try {
1421 // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1422 d_http_cache_table->uncouple_entry_from_data(body);
1423 } catch (...) {
1424 unlock_cache_interface();
1425 throw;
1426 }
1427
1428 unlock_cache_interface();
1429}
1430
1442
1444 lock_cache_interface();
1445
1446 try {
1447 if (d_http_cache_table->is_locked_read_responses())
1448 throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1449
1450 d_http_cache_table->delete_all_entries();
1451 } catch (...) {
1452 unlock_cache_interface();
1453 throw;
1454 }
1455
1456 unlock_cache_interface();
1457}
1458
1459} // namespace libdap
A class for error processing.
Definition Error.h:92
std::string get_error_message() const
Definition Error.cc:212
CacheDisconnectedMode get_cache_disconnected() const
Definition HTTPCache.cc:626
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition HTTPCache.cc:125
vector< string > get_cache_control()
Definition HTTPCache.cc:822
void set_expire_ignored(bool mode)
Definition HTTPCache.cc:636
void set_default_expiration(int exp_time)
Definition HTTPCache.cc:742
string get_cache_root() const
Definition HTTPCache.cc:480
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition HTTPCache.cc:616
void release_cached_response(FILE *response)
vector< string > get_conditional_request_headers(const string &url)
unsigned long get_max_entry_size() const
Definition HTTPCache.cc:730
void set_cache_enabled(bool mode)
Definition HTTPCache.cc:592
unsigned long get_max_size() const
Definition HTTPCache.cc:693
void set_max_entry_size(unsigned long size)
Definition HTTPCache.cc:703
bool get_always_validate() const
Definition HTTPCache.cc:763
int get_default_expiration() const
Definition HTTPCache.cc:752
bool is_url_valid(const string &url)
void set_always_validate(bool validate)
Definition HTTPCache.cc:758
void update_response(const string &url, time_t request_time, const vector< string > &headers)
void set_max_size(unsigned long size)
Definition HTTPCache.cc:664
void set_cache_control(const vector< string > &cc)
Definition HTTPCache.cc:781
virtual ~HTTPCache()
Definition HTTPCache.cc:284
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
bool is_cache_enabled() const
Definition HTTPCache.cc:602
A class for software fault reporting.
Definition InternalErr.h:61
static EventHandler * register_handler(int signum, EventHandler *eh, bool ignore_by_default=false)
static SignalHandler * instance()
static EventHandler * remove_handler(int signum)
STL iterator class.
top level DAP object to house generic methods
Definition AISConnect.cc:30
bool is_hop_by_hop_header(const string &header)
Definition HTTPCache.cc:850
string date_time_str(time_t *calendar, bool local)
Definition util_mit.cc:260
time_t parse_time(const char *str, bool expand)
Definition util_mit.cc:144