48#include "InternalErr.h"
49#include "ResponseTooBigErr.h"
51#include "SignalHandler.h"
54#include "HTTPCacheInterruptHandler.h"
55#include "HTTPCacheMacros.h"
56#include "HTTPCacheTable.h"
57#include "SignalHandlerRegisteredErr.h"
75static pthread_mutex_t instance_mutex;
76static pthread_once_t once_block = PTHREAD_ONCE_INIT;
78#define NO_LM_EXPIRATION 24 * 3600
80#define DUMP_FREQUENCY 10
83#define CACHE_TOTAL_SIZE 20
84#define CACHE_FOLDER_PCT 10
85#define CACHE_GC_PCT 10
86#define MIN_CACHE_TOTAL_SIZE 5
87#define MAX_CACHE_ENTRY_SIZE 3
89static void once_init_routine() {
91 status = INIT(&instance_mutex);
94 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
126 int status = pthread_once(&once_block, once_init_routine);
128 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
130 LOCK(&instance_mutex);
132 DBG(cerr <<
"Entering instance(); (" << hex << _instance << dec <<
")" <<
"... ");
136 _instance =
new HTTPCache(cache_root, force);
138 DBG(cerr <<
"New instance: " << _instance <<
", cache root: " << _instance->d_cache_root << endl);
140 atexit(delete_instance);
156 "Could not register event handler for SIGINT without superseding an existing one.");
163 "Could not register event handler for SIGPIPE without superseding an existing one.");
170 "Could not register event handler for SIGTERM without superseding an existing one.");
175 DBG2(cerr <<
"The constructor threw an Error!" << endl);
176 UNLOCK(&instance_mutex);
180 UNLOCK(&instance_mutex);
181 DBGN(cerr <<
"returning " << hex << _instance << dec << endl);
189void HTTPCache::delete_instance() {
190 DBG(cerr <<
"Entering delete_instance()..." << endl);
192 if (HTTPCache::_instance) {
193 DBG(cerr <<
"Deleting the cache: " << HTTPCache::_instance << endl);
194 delete HTTPCache::_instance;
195 HTTPCache::_instance = 0;
203 DBG(cerr <<
"Exiting delete_instance()" << endl);
220HTTPCache::HTTPCache(
string cache_root,
bool force)
221 : d_locked_open_file(0), d_cache_enabled(false), d_cache_protected(false),
223 d_cache_disconnected(DISCONNECT_NONE),
225 d_expire_ignored(false), d_always_validate(false), d_total_size(CACHE_TOTAL_SIZE * MEGA),
226 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
227 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), d_default_expiration(NO_LM_EXPIRATION), d_max_age(-1),
228 d_max_stale(-1), d_min_fresh(-1), d_http_cache_table(0) {
229 DBG(cerr <<
"Entering the constructor for " <<
this <<
"... ");
231 int status = pthread_once(&once_block, once_init_routine);
233 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
235 INIT(&d_cache_mutex);
247 set_cache_root(cache_root);
250 if (!get_single_user_lock(force))
251 throw Error(internal_error,
"Could not get single user lock for the cache");
261 if (stat(cache_root.c_str(), &s) == 0)
262 block_size = s.st_blksize;
264 throw Error(internal_error,
"Could not set file system block size.");
266 d_http_cache_table =
new HTTPCacheTable(d_cache_root, block_size);
267 d_cache_enabled =
true;
269 DBGN(cerr <<
"exiting" << endl);
285 DBG(cerr <<
"Entering the destructor for " <<
this <<
"... ");
289 perform_garbage_collection();
291 d_http_cache_table->cache_index_write();
300 delete d_http_cache_table;
302 release_single_user_lock();
304 DBGN(cerr <<
"exiting destructor." << endl);
305 DESTROY(&d_cache_mutex);
315bool HTTPCache::stopGC()
const {
316 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
325bool HTTPCache::startGC()
const {
326 DBG(cerr <<
"startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
327 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
344void HTTPCache::perform_garbage_collection() {
345 DBG(cerr <<
"Performing garbage collection" << endl);
363void HTTPCache::expired_gc() {
364 if (!d_expire_ignored) {
365 d_http_cache_table->delete_expired_entries();
385void HTTPCache::hits_gc() {
390 d_http_cache_table->delete_by_hits(hits);
400void HTTPCache::too_big_gc() {
402 d_http_cache_table->delete_by_size(d_max_entry_size);
417bool HTTPCache::get_single_user_lock(
bool force) {
418 if (!d_locked_open_file) {
424 create_cache_root(d_cache_root);
429 DBG(cerr <<
"Failure to create the cache root" << endl);
434 string lock = d_cache_root + CACHE_LOCK;
435 if ((fp = fopen(lock.c_str(),
"r")) != NULL) {
436 int res = fclose(fp);
438 DBG(cerr <<
"Failed to close " << (
void *)fp << endl);
441 REMOVE(lock.c_str());
446 if ((fp = fopen(lock.c_str(),
"w")) == NULL) {
447 DBG(cerr <<
"Could not open for write access" << endl);
451 d_locked_open_file = fp;
455 DBG(cerr <<
"locked_open_file is true" << endl);
461void HTTPCache::release_single_user_lock() {
462 if (d_locked_open_file) {
463 int res = fclose(d_locked_open_file);
465 DBG(cerr <<
"Failed to close " << (
void *)d_locked_open_file << endl);
467 d_locked_open_file = 0;
470 string lock = d_cache_root + CACHE_LOCK;
471 REMOVE(lock.c_str());
490void HTTPCache::create_cache_root(
const string &cache_root) {
492 string::size_type cur = cache_root[1] ==
':' ? 3 : 1;
495 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
496 string dir = cache_root.substr(0, cur);
497 struct stat stat_info;
498 if (stat(dir.c_str(), &stat_info) == -1) {
499 DBG2(cerr <<
"Cache....... Creating " << dir << endl);
500 mode_t mask = UMASK(0);
501 if (MKDIR(dir.c_str(), 0777) < 0) {
502 DBG2(cerr <<
"Error: can't create." << endl);
504 throw Error(
string(
"Could not create the directory for the cache. Failed when building path at ") +
509 DBG2(cerr <<
"Cache....... Found " << dir << endl);
517 mode_t mask = umask(0);
521 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
523 throw Error(
"Could not create the directory for the cache at '" + cache_root +
"' (" + strerror(errno) +
").");
546void HTTPCache::set_cache_root(
const string &root) {
550 if (d_cache_root[d_cache_root.size() - 1] != DIR_SEPARATOR_CHAR)
551 d_cache_root += DIR_SEPARATOR_CHAR;
556 char *cr = (
char *)getenv(
"DODS_CACHE");
558 cr = (
char *)getenv(
"TMP");
560 cr = (
char *)getenv(
"TEMP");
562 cr = (
char *)CACHE_LOCATION;
565 d_cache_root = CACHE_LOCATION;
568 if (d_cache_root[d_cache_root.size() - 1] != DIR_SEPARATOR_CHAR)
569 d_cache_root += DIR_SEPARATOR_CHAR;
571 d_cache_root += CACHE_ROOT;
577 if (d_http_cache_table)
578 d_http_cache_table->set_cache_root(d_cache_root);
593 lock_cache_interface();
595 d_cache_enabled = mode;
597 unlock_cache_interface();
603 DBG2(cerr <<
"In HTTPCache::is_cache_enabled: (" << d_cache_enabled <<
")" << endl);
604 return d_cache_enabled;
617 lock_cache_interface();
619 d_cache_disconnected = mode;
621 unlock_cache_interface();
637 lock_cache_interface();
639 d_expire_ignored = mode;
641 unlock_cache_interface();
647bool HTTPCache::is_expire_ignored()
const {
return d_expire_ignored; }
665 lock_cache_interface();
668 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
669 unsigned long old_size = d_total_size;
670 d_total_size = new_size;
671 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
672 d_gc_buffer = d_total_size / CACHE_GC_PCT;
674 if (new_size < old_size && startGC()) {
675 perform_garbage_collection();
676 d_http_cache_table->cache_index_write();
679 unlock_cache_interface();
680 DBGN(cerr <<
"Unlocking interface." << endl);
684 DBG2(cerr <<
"Cache....... Total cache size: " << d_total_size <<
" with " << d_folder_size
685 <<
" bytes for meta information and folders and at least " << d_gc_buffer <<
" bytes free after every gc"
688 unlock_cache_interface();
704 lock_cache_interface();
707 unsigned long new_size = size * MEGA;
708 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
709 unsigned long old_size = d_max_entry_size;
710 d_max_entry_size = new_size;
711 if (new_size < old_size && startGC()) {
712 perform_garbage_collection();
713 d_http_cache_table->cache_index_write();
717 unlock_cache_interface();
721 DBG2(cerr <<
"Cache...... Max entry cache size is " << d_max_entry_size << endl);
723 unlock_cache_interface();
743 lock_cache_interface();
745 d_default_expiration = exp_time;
747 unlock_cache_interface();
782 lock_cache_interface();
785 d_cache_control = cc;
788 for (i = cc.begin(); i != cc.end(); ++i) {
789 string header = (*i).substr(0, (*i).find(
':'));
790 string value = (*i).substr((*i).find(
": ") + 2);
791 if (header !=
"Cache-Control") {
792 throw InternalErr(__FILE__, __LINE__,
"Expected cache control header not found.");
794 if (value ==
"no-cache" || value ==
"no-store")
795 d_cache_enabled =
false;
796 else if (value.find(
"max-age") != string::npos) {
797 string max_age = value.substr(value.find(
"=") + 1);
799 }
else if (value ==
"max-stale")
801 else if (value.find(
"max-stale") != string::npos) {
802 string max_stale = value.substr(value.find(
"=") + 1);
804 }
else if (value.find(
"min-fresh") != string::npos) {
805 string min_fresh = value.substr(value.find(
"=") + 1);
811 unlock_cache_interface();
815 unlock_cache_interface();
834bool HTTPCache::is_url_in_cache(
const string &url) {
835 DBG(cerr <<
"Is this url in the cache? (" << url <<
")" << endl);
838 bool status = entry != 0;
840 entry->unlock_read_response();
851 return header.find(
"Connection") != string::npos || header.find(
"Keep-Alive") != string::npos ||
852 header.find(
"Proxy-Authenticate") != string::npos || header.find(
"Proxy-Authorization") != string::npos ||
853 header.find(
"Transfer-Encoding") != string::npos || header.find(
"Upgrade") != string::npos;
867void HTTPCache::write_metadata(
const string &cachename,
const vector<string> &headers) {
868 string fname = cachename + CACHE_META;
869 d_open_files.push_back(fname);
871 FILE *dest = fopen(fname.c_str(),
"w");
873 throw InternalErr(__FILE__, __LINE__,
"Could not open named cache entry file.");
876 vector<string>::const_iterator i;
877 for (i = headers.begin(); i != headers.end(); ++i) {
879 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
882 throw InternalErr(__FILE__, __LINE__,
"could not write header: '" + (*i) +
"' " + long_to_string(s));
884 s = fwrite(
"\n", 1, 1, dest);
887 throw InternalErr(__FILE__, __LINE__,
"could not write header: " + long_to_string(s));
892 int res = fclose(dest);
894 DBG(cerr <<
"HTTPCache::write_metadata - Failed to close " << dest << endl);
897 d_open_files.pop_back();
910void HTTPCache::read_metadata(
const string &cachename, vector<string> &headers) {
911 FILE *md = fopen(
string(cachename + CACHE_META).c_str(),
"r");
913 throw InternalErr(__FILE__, __LINE__,
"Could not open named cache entry meta data file.");
916 const size_t line_buf_len = 1024;
917 char line[line_buf_len];
918 while (!feof(md) && fgets(line, line_buf_len, md)) {
919 line[std::min(line_buf_len, strnlen(line, line_buf_len)) - 1] =
'\0';
920 headers.push_back(
string(line));
923 int res = fclose(md);
925 DBG(cerr <<
"HTTPCache::read_metadata - Failed to close " << md << endl);
950int HTTPCache::write_body(
const string &cachename,
const FILE *src) {
951 d_open_files.push_back(cachename);
953 FILE *dest = fopen(cachename.c_str(),
"wb");
955 throw InternalErr(__FILE__, __LINE__,
"Could not open named cache entry file.");
963 while ((n = fread(line, 1, 1024,
const_cast<FILE *
>(src))) > 0) {
964 total += fwrite(line, 1, n, dest);
968 if (ferror(
const_cast<FILE *
>(src)) || ferror(dest)) {
969 int res = fclose(dest);
970 res = res & unlink(cachename.c_str());
972 DBG(cerr <<
"HTTPCache::write_body - Failed to close/unlink " << dest << endl);
974 throw InternalErr(__FILE__, __LINE__,
"I/O error transferring data to the cache.");
977 rewind(
const_cast<FILE *
>(src));
979 int res = fclose(dest);
981 DBG(cerr <<
"HTTPCache::write_body - Failed to close " << dest << endl);
984 d_open_files.pop_back();
997FILE *HTTPCache::open_body(
const string &cachename) {
998 DBG(cerr <<
"cachename: " << cachename << endl);
1000 FILE *src = fopen(cachename.c_str(),
"rb");
1002 throw InternalErr(__FILE__, __LINE__,
"Could not open cache file.");
1034 lock_cache_interface();
1036 DBG(cerr <<
"Caching url: " << url <<
"." << endl);
1040 if (url.find(
"http:") == string::npos && url.find(
"https:") == string::npos) {
1041 unlock_cache_interface();
1048 d_http_cache_table->remove_entry_from_cache_table(url);
1051 entry->lock_write_response();
1054 d_http_cache_table->parse_headers(entry, d_max_entry_size,
1056 if (entry->is_no_cache()) {
1057 DBG(cerr <<
"Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url <<
")"
1059 entry->unlock_write_response();
1062 unlock_cache_interface();
1067 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1069 d_http_cache_table->create_location(entry);
1071 entry->set_size(write_body(entry->get_cachename(), body));
1072 write_metadata(entry->get_cachename(), headers);
1073 d_http_cache_table->add_entry_to_cache_table(entry);
1074 entry->unlock_write_response();
1078 REMOVE(entry->get_cachename().c_str());
1079 REMOVE(
string(entry->get_cachename() + CACHE_META).c_str());
1080 DBG(cerr <<
"Too big; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url <<
")" << endl);
1081 entry->unlock_write_response();
1084 unlock_cache_interface();
1088 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1090 perform_garbage_collection();
1092 d_http_cache_table->cache_index_write();
1095 unlock_cache_interface();
1099 unlock_cache_interface();
1123 lock_cache_interface();
1126 vector<string> headers;
1128 DBG(cerr <<
"Getting conditional request headers for " << url << endl);
1131 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1133 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1135 if (entry->get_etag() !=
"")
1136 headers.push_back(
string(
"If-None-Match: ") + entry->get_etag());
1138 if (entry->get_lm() > 0) {
1139 time_t lm = entry->get_lm();
1140 headers.push_back(
string(
"If-Modified-Since: ") +
date_time_str(&lm));
1141 }
else if (entry->get_max_age() > 0) {
1142 time_t max_age = entry->get_max_age();
1143 headers.push_back(
string(
"If-Modified-Since: ") +
date_time_str(&max_age));
1144 }
else if (entry->get_expires() > 0) {
1145 time_t expires = entry->get_expires();
1146 headers.push_back(
string(
"If-Modified-Since: ") +
date_time_str(&expires));
1148 entry->unlock_read_response();
1149 unlock_cache_interface();
1151 unlock_cache_interface();
1153 entry->unlock_read_response();
1164struct HeaderLess : binary_function<const string &, const string &, bool> {
1165 bool operator()(
const string &s1,
const string &s2)
const {
1166 return s1.substr(0, s1.find(
':')) < s2.substr(0, s2.find(
':'));
1184 lock_cache_interface();
1187 DBG(cerr <<
"Updating the response headers for: " << url << endl);
1190 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1192 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1195 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1198 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1206 set<string, HeaderLess> merged_headers;
1209 copy(headers.begin(), headers.end(), inserter(merged_headers, merged_headers.begin()));
1212 vector<string> old_headers;
1213 read_metadata(entry->get_cachename(), old_headers);
1214 copy(old_headers.begin(), old_headers.end(), inserter(merged_headers, merged_headers.begin()));
1219 vector<string> result;
1220 copy(merged_headers.rbegin(), merged_headers.rend(), back_inserter(result));
1222 write_metadata(entry->get_cachename(), result);
1223 entry->unlock_write_response();
1224 unlock_cache_interface();
1227 entry->unlock_read_response();
1229 unlock_cache_interface();
1246 lock_cache_interface();
1251 DBG(cerr <<
"Is this URL valid? (" << url <<
")" << endl);
1254 if (d_always_validate) {
1255 unlock_cache_interface();
1259 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1261 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1268 if (entry->get_must_revalidate()) {
1269 entry->unlock_read_response();
1270 unlock_cache_interface();
1274 time_t resident_time = time(NULL) - entry->get_response_time();
1275 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1279 if (d_max_age >= 0 && current_age > d_max_age) {
1280 DBG(cerr <<
"Cache....... Max-age validation" << endl);
1281 entry->unlock_read_response();
1282 unlock_cache_interface();
1285 if (d_min_fresh >= 0 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1286 DBG(cerr <<
"Cache....... Min-fresh validation" << endl);
1287 entry->unlock_read_response();
1288 unlock_cache_interface();
1292 freshness = (entry->get_freshness_lifetime() + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1293 entry->unlock_read_response();
1294 unlock_cache_interface();
1297 entry->unlock_read_response();
1299 unlock_cache_interface();
1334 lock_cache_interface();
1339 DBG(cerr <<
"Getting the cached response for " << url << endl);
1342 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1344 unlock_cache_interface();
1348 cacheName = entry->get_cachename();
1349 read_metadata(entry->get_cachename(), headers);
1351 DBG(cerr <<
"Headers just read from cache: " << endl);
1352 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr,
"\n")));
1354 body = open_body(entry->get_cachename());
1356 DBG(cerr <<
"Returning: " << url <<
" from the cache." << endl);
1358 d_http_cache_table->bind_entry_to_data(entry, body);
1362 unlock_cache_interface();
1368 unlock_cache_interface();
1385 string discard_name;
1400 string discard_name;
1401 vector<string> discard_headers;
1418 lock_cache_interface();
1422 d_http_cache_table->uncouple_entry_from_data(body);
1424 unlock_cache_interface();
1428 unlock_cache_interface();
1444 lock_cache_interface();
1447 if (d_http_cache_table->is_locked_read_responses())
1448 throw Error(internal_error,
"Attempt to purge the cache with entries in use.");
1450 d_http_cache_table->delete_all_entries();
1452 unlock_cache_interface();
1456 unlock_cache_interface();
A class for error processing.
std::string get_error_message() const
CacheDisconnectedMode get_cache_disconnected() const
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
vector< string > get_cache_control()
void set_expire_ignored(bool mode)
void set_default_expiration(int exp_time)
string get_cache_root() const
void set_cache_disconnected(CacheDisconnectedMode mode)
void release_cached_response(FILE *response)
vector< string > get_conditional_request_headers(const string &url)
unsigned long get_max_entry_size() const
void set_cache_enabled(bool mode)
unsigned long get_max_size() const
void set_max_entry_size(unsigned long size)
bool get_always_validate() const
int get_default_expiration() const
bool is_url_valid(const string &url)
void set_always_validate(bool validate)
void update_response(const string &url, time_t request_time, const vector< string > &headers)
void set_max_size(unsigned long size)
void set_cache_control(const vector< string > &cc)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
bool is_cache_enabled() const
A class for software fault reporting.
static EventHandler * register_handler(int signum, EventHandler *eh, bool ignore_by_default=false)
static SignalHandler * instance()
static EventHandler * remove_handler(int signum)
top level DAP object to house generic methods
bool is_hop_by_hop_header(const string &header)
string date_time_str(time_t *calendar, bool local)
time_t parse_time(const char *str, bool expand)