libdap Updated for version 3.21.0
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCacheTable.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28#include <limits.h>
29#include <sys/stat.h>
30#include <sys/types.h> // for stat and mkdir
31#include <unistd.h> // for stat
32
33#include <cerrno>
34#include <cstring>
35
36#include <algorithm>
37#include <iostream>
38#include <iterator>
39#include <set>
40#include <sstream>
41
42#include "Error.h"
43#include "InternalErr.h"
44#include "ResponseTooBigErr.h"
45#ifndef WIN32
46#include "SignalHandler.h"
47#endif
48#include "HTTPCacheInterruptHandler.h"
49#include "HTTPCacheMacros.h"
50#include "HTTPCacheTable.h"
51
52#include "debug.h"
53#include "util_mit.h"
54
55#ifdef WIN32
56#include <direct.h>
57#include <fcntl.h>
58#include <time.h>
59#define MKDIR(a, b) _mkdir((a))
60#define REMOVE(a) \
61 do { \
62 int s = remove((a)); \
63 if (s != 0) \
64 throw InternalErr(__FILE__, __LINE__, "Cache error; could not remove file: " + long_to_string(s)); \
65 } while (0);
66#define MKSTEMP(a) _open(_mktemp((a)), _O_CREAT, _S_IREAD | _S_IWRITE)
67#define DIR_SEPARATOR_CHAR '\\'
68#define DIR_SEPARATOR_STR "\\"
69#else
70#define MKDIR(a, b) mkdir((a), (b))
71#define MKSTEMP(a) mkstemp((a))
72#define DIR_SEPARATOR_CHAR '/'
73#define DIR_SEPARATOR_STR "/"
74#endif
75
76#define CACHE_META ".meta"
77#define CACHE_INDEX ".index"
78#define CACHE_EMPTY_ETAG "@cache@"
79
80#define NO_LM_EXPIRATION 24 * 3600 // 24 hours
81#define MAX_LM_EXPIRATION 48 * 3600 // Max expiration from LM
82
83// If using LM to find the expiration then take 10% and no more than
84// MAX_LM_EXPIRATION.
85#ifndef LM_EXPIRATION
86#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
87#endif
88
89const int CACHE_TABLE_SIZE = 1499;
90
91using namespace std;
92
93namespace libdap {
94
98int get_hash(const string &url) {
99 int hash = 0;
100
101 for (const char *ptr = url.c_str(); *ptr; ptr++)
102 hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
103
104 return hash;
105}
106
107HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size)
108 : d_cache_root(cache_root), d_block_size(block_size), d_current_size(0), d_new_entries(0) {
109 d_cache_index = cache_root + CACHE_INDEX;
110
111 d_cache_table = new CacheEntries *[CACHE_TABLE_SIZE];
112
113 // Initialize the cache table.
114 for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
115 d_cache_table[i] = 0;
116
117 cache_index_read();
118}
119
122
123static inline void delete_cache_entry(HTTPCacheTable::CacheEntry *e) {
124 DBG2(cerr << "Deleting CacheEntry: " << e << endl);
125 delete e;
126}
127
128HTTPCacheTable::~HTTPCacheTable() {
129 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
130 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
131 if (cp) {
132 // delete each entry
133 for_each(cp->begin(), cp->end(), delete_cache_entry);
134
135 // now delete the vector that held the entries
136 delete get_cache_table()[i];
137 get_cache_table()[i] = 0;
138 }
139 }
140
141 delete[] d_cache_table;
142}
143
150
151class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
152 time_t d_time;
153 HTTPCacheTable &d_table;
154
155public:
156 DeleteExpired(HTTPCacheTable &table, time_t t) : d_time(t), d_table(table) {
157 if (!t)
158 d_time = time(0); // 0 == now
159 }
160
161 void operator()(HTTPCacheTable::CacheEntry *&e) {
162 if (e && !e->readers && (e->freshness_lifetime < (e->corrected_initial_age + (d_time - e->response_time)))) {
163 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
164 d_table.remove_cache_entry(e);
165 delete e;
166 e = 0;
167 }
168 }
169};
170
171// @param time base deletes againt this time, defaults to 0 (now)
172void HTTPCacheTable::delete_expired_entries(time_t time) {
173 // Walk through and delete all the expired entries.
174 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
175 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
176 if (slot) {
177 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
178 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
179 }
180 }
181}
182
188
189class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
190 HTTPCacheTable &d_table;
191 int d_hits;
192
193public:
194 DeleteByHits(HTTPCacheTable &table, int hits) : d_table(table), d_hits(hits) {}
195
196 void operator()(HTTPCacheTable::CacheEntry *&e) {
197 if (e && !e->readers && e->hits <= d_hits) {
198 DBG(cerr << "Deleting cache entry: " << e->url << endl);
199 d_table.remove_cache_entry(e);
200 delete e;
201 e = 0;
202 }
203 }
204};
205
206void HTTPCacheTable::delete_by_hits(int hits) {
207 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
208 if (get_cache_table()[cnt]) {
209 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
210 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
211 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
212 }
213 }
214}
215
219
220class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
221 HTTPCacheTable &d_table;
222 unsigned int d_size;
223
224public:
225 DeleteBySize(HTTPCacheTable &table, unsigned int size) : d_table(table), d_size(size) {}
226
227 void operator()(HTTPCacheTable::CacheEntry *&e) {
228 if (e && !e->readers && e->size > d_size) {
229 DBG(cerr << "Deleting cache entry: " << e->url << endl);
230 d_table.remove_cache_entry(e);
231 delete e;
232 e = 0;
233 }
234 }
235};
236
237void HTTPCacheTable::delete_by_size(unsigned int size) {
238 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
239 if (get_cache_table()[cnt]) {
240 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
241 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
242 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
243 }
244 }
245}
246
251
253
259
261 d_new_entries = 0;
262
263 return (REMOVE_BOOL(d_cache_index.c_str()) == 0);
264}
265
273
275 FILE *fp = fopen(d_cache_index.c_str(), "r");
276 // If the cache index can't be opened that's OK; start with an empty
277 // cache. 09/05/02 jhrg
278 if (!fp) {
279 return false;
280 }
281
282 char line[1024];
283 while (!feof(fp) && fgets(line, 1024, fp)) {
285 DBG2(cerr << line << endl);
286 }
287
288 int res = fclose(fp);
289 if (res) {
290 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
291 }
292
293 d_new_entries = 0;
294
295 return true;
296}
297
304
306 // Read the line and create the cache object
308 istringstream iss(line);
309 iss >> entry->url;
310 iss >> entry->cachename;
311
312 iss >> entry->etag;
313 if (entry->etag == CACHE_EMPTY_ETAG)
314 entry->etag = "";
315
316 iss >> entry->lm;
317 iss >> entry->expires;
318 iss >> entry->size;
319 iss >> entry->range; // range is not used. 10/02/02 jhrg
320
321 iss >> entry->hash;
322 iss >> entry->hits;
323 iss >> entry->freshness_lifetime;
324 iss >> entry->response_time;
325 iss >> entry->corrected_initial_age;
326
327 iss >> entry->must_revalidate;
328
329 return entry;
330}
331
333
334class WriteOneCacheEntry : public unary_function<HTTPCacheTable::CacheEntry *, void> {
335
336 FILE *d_fp;
337
338public:
339 WriteOneCacheEntry(FILE *fp) : d_fp(fp) {}
340
341 void operator()(HTTPCacheTable::CacheEntry *e) {
342 if (e && fprintf(d_fp, "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n", e->url.c_str(), e->cachename.c_str(),
343 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(), (long)(e->lm), (long)(e->expires), e->size,
344 e->range ? '1' : '0', // not used. 10/02/02 jhrg
345 e->hash, e->hits, (long)(e->freshness_lifetime), (long)(e->response_time),
346 (long)(e->corrected_initial_age), e->must_revalidate ? '1' : '0') < 0)
347 throw Error(internal_error, "Cache Index. Error writing cache index\n");
348 }
349};
350
361 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
362
363 // Open the file for writing.
364 FILE *fp = NULL;
365 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
366 throw Error(string("Cache Index. Can't open `") + d_cache_index + string("' for writing"));
367 }
368
369 // Walk through the list and write it out. The format is really
370 // simple as we keep it all in ASCII.
371
372 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
373 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
374 if (cp)
375 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
376 }
377
378 /* Done writing */
379 int res = fclose(fp);
380 if (res) {
381 DBG(cerr << "HTTPCache::cache_index_write - Failed to close " << (void *)fp << endl);
382 }
383
384 d_new_entries = 0;
385}
386
388
400
402 ostringstream path;
403 path << d_cache_root << hash;
404
405 // Save the mask
406 mode_t mask = umask(0);
407
408 // Ignore the error if the directory exists
409 errno = 0;
410 if (mkdir(path.str().c_str(), 0777) < 0 && errno != EEXIST) {
411 umask(mask);
412 throw Error(internal_error,
413 "Could not create the directory for the cache at '" + path.str() + "' (" + strerror(errno) + ").");
414 }
415
416 // Restore themask
417 umask(mask);
418
419 return path.str();
420}
421
435
437 string hash_dir = create_hash_directory(entry->hash);
438#ifdef WIN32
439 hash_dir += "\\dodsXXXXXX";
440#else
441 hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
442#endif
443
444 // mkstemp uses the storage passed to it; must be writable and local.
445 // char *templat = new char[hash_dir.size() + 1];
446 vector<char> templat(hash_dir.size() + 1);
447 strncpy(templat.data(), hash_dir.c_str(), hash_dir.size() + 1);
448
449 // Open truncated for update. NB: mkstemp() returns a file descriptor.
450 // man mkstemp says "... The file is opened with the O_EXCL flag,
451 // guaranteeing that when mkstemp returns successfully we are the only
452 // user." 09/19/02 jhrg
453#ifndef WIN32
454 // Make sure that temp files are accessible only by the owner.
455 umask(077);
456#endif
457 int fd = MKSTEMP(templat.data()); // fd mode is 666 or 600 (Unix)
458 if (fd < 0) {
459 // delete[] templat; templat = 0;
460 // close(fd); Calling close() when fd is < 0 is a bad idea! jhrg 7/2/15
461 throw Error(internal_error,
462 "The HTTP Cache could not create a file to hold the response; it will not be cached.");
463 }
464
465 entry->cachename = templat.data();
466 // delete[] templat; templat = 0;
467 close(fd);
468}
469
471static inline int entry_disk_space(int size, unsigned int block_size) {
472 unsigned int num_of_blocks = (size + block_size) / block_size;
473
474 DBG(cerr << "size: " << size << ", block_size: " << block_size << ", num_of_blocks: " << num_of_blocks << endl);
475
476 return num_of_blocks * block_size;
477}
478
480
482
489 int hash = entry->hash;
490 if (hash > CACHE_TABLE_SIZE - 1 || hash < 0)
491 throw InternalErr(__FILE__, __LINE__, "Hash value too large!");
492
493 if (!d_cache_table[hash])
494 d_cache_table[hash] = new CacheEntries;
495
496 d_cache_table[hash]->push_back(entry);
497
498 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << ", entry->size: " << entry->size
499 << ", block size: " << d_block_size << endl);
500
501 d_current_size += entry_disk_space(entry->size, d_block_size);
502
503 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
504
505 increment_new_entries();
506}
507
511HTTPCacheTable::CacheEntry *HTTPCacheTable::get_locked_entry_from_cache_table(const string &url) /*const*/
512{
513 return get_locked_entry_from_cache_table(get_hash(url), url);
514}
515
523HTTPCacheTable::CacheEntry *HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url) /*const*/
524{
525 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
526 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
527 if (d_cache_table[hash]) {
528 CacheEntries *cp = d_cache_table[hash];
529 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
530 // Must test *i because perform_garbage_collection may have
531 // removed this entry; the CacheEntry will then be null.
532 if ((*i) && (*i)->url == url) {
533 (*i)->lock_read_response(); // Lock the response
534 return *i;
535 }
536 }
537 }
538
539 return 0;
540}
541
549 int hash = get_hash(url);
550 if (d_cache_table[hash]) {
551 CacheEntries *cp = d_cache_table[hash];
552 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
553 // Must test *i because perform_garbage_collection may have
554 // removed this entry; the CacheEntry will then be null.
555 if ((*i) && (*i)->url == url) {
556 (*i)->lock_write_response(); // Lock the response
557 return *i;
558 }
559 }
560 }
561
562 return 0;
563}
564
573 // This should never happen; all calls to this method are protected by
574 // the caller, hence the InternalErr.
575 if (entry->readers)
576 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
577
578 REMOVE(entry->cachename.c_str());
579 REMOVE(string(entry->cachename + CACHE_META).c_str());
580
581 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
582
583 unsigned int eds = entry_disk_space(entry->size, get_block_size());
584 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
585
586 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
587}
588
591class DeleteCacheEntry : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
592 string d_url;
593 HTTPCacheTable *d_cache_table;
594
595public:
596 DeleteCacheEntry(HTTPCacheTable *c, const string &url) : d_url(url), d_cache_table(c) {}
597
598 void operator()(HTTPCacheTable::CacheEntry *&e) {
599 if (e && e->url == d_url) {
600 e->lock_write_response();
601 d_cache_table->remove_cache_entry(e);
602 e->unlock_write_response();
603 delete e;
604 e = 0;
605 }
606 }
607};
608
616 int hash = get_hash(url);
617 if (d_cache_table[hash]) {
618 CacheEntries *cp = d_cache_table[hash];
619 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
620 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)), cp->end());
621 }
622}
623
625
626class DeleteUnlockedCacheEntry : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
627 HTTPCacheTable &d_table;
628
629public:
630 DeleteUnlockedCacheEntry(HTTPCacheTable &t) : d_table(t) {}
631 void operator()(HTTPCacheTable::CacheEntry *&e) {
632 if (e) {
633 d_table.remove_cache_entry(e);
634 delete e;
635 e = 0;
636 }
637 }
638};
639
640void HTTPCacheTable::delete_all_entries() {
641 // Walk through the cache table and, for every entry in the cache, delete
642 // it on disk and in the cache table.
643 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
644 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
645 if (slot) {
646 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
647 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
648 }
649 }
650
651 cache_index_delete();
652}
653
666
667void HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time) {
668 entry->response_time = time(NULL);
669 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
670 time_t corrected_received_age = max(apparent_age, entry->age);
671 time_t response_delay = entry->response_time - request_time;
672 entry->corrected_initial_age = corrected_received_age + response_delay;
673
674 // Estimate an expires time using the max-age and expires time. If we
675 // don't have an explicit expires time then set it to 10% of the LM date
676 // (although max 24 h). If no LM date is available then use 24 hours.
677 time_t freshness_lifetime = entry->max_age;
678 if (freshness_lifetime < 0) {
679 if (entry->expires < 0) {
680 if (entry->lm < 0) {
681 freshness_lifetime = default_expiration;
682 } else {
683 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
684 }
685 } else
686 freshness_lifetime = entry->expires - entry->date;
687 }
688
689 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
690
691 DBG2(cerr << "Cache....... Received Age " << entry->age << ", corrected " << entry->corrected_initial_age
692 << ", freshness lifetime " << entry->freshness_lifetime << endl);
693}
694
705
706void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
707 const vector<string> &headers) {
709 for (i = headers.begin(); i != headers.end(); ++i) {
710 // skip a blank header.
711 if ((*i).empty())
712 continue;
713
714 string::size_type colon = (*i).find(':');
715
716 // skip a header with no colon in it.
717 if (colon == string::npos)
718 continue;
719
720 string header = (*i).substr(0, (*i).find(':'));
721 string value = (*i).substr((*i).find(": ") + 2);
722 DBG2(cerr << "Header: " << header << endl);
723 DBG2(cerr << "Value: " << value << endl);
724
725 if (header == "ETag") {
726 entry->etag = value;
727 } else if (header == "Last-Modified") {
728 entry->lm = parse_time(value.c_str());
729 } else if (header == "Expires") {
730 entry->expires = parse_time(value.c_str());
731 } else if (header == "Date") {
732 entry->date = parse_time(value.c_str());
733 } else if (header == "Age") {
734 entry->age = parse_time(value.c_str());
735 } else if (header == "Content-Length") {
736 unsigned long clength = strtoul(value.c_str(), 0, 0);
737 if (clength > max_entry_size)
738 entry->set_no_cache(true);
739 } else if (header == "Cache-Control") {
740 // Ignored Cache-Control values: public, private, no-transform,
741 // proxy-revalidate, s-max-age. These are used by shared caches.
742 // See section 14.9 of RFC 2612. 10/02/02 jhrg
743 if (value == "no-cache" || value == "no-store")
744 // Note that we *can* store a 'no-store' response in volatile
745 // memory according to RFC 2616 (section 14.9.2) but those
746 // will be rare coming from DAP servers. 10/02/02 jhrg
747 entry->set_no_cache(true);
748 else if (value == "must-revalidate")
749 entry->must_revalidate = true;
750 else if (value.find("max-age") != string::npos) {
751 string max_age = value.substr(value.find("=") + 1);
752 entry->max_age = parse_time(max_age.c_str());
753 }
754 }
755 }
756}
757
759
760// @TODO Change name to record locked response
761void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
762 entry->hits++; // Mark hit
763 d_locked_entries[body] = entry; // record lock, see release_cached_r...
764}
765
766void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
767
768 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
769 if (!entry)
770 throw InternalErr("There is no cache entry for the response given.");
771
772 d_locked_entries.erase(body);
773 entry->unlock_read_response();
774
775 if (entry->readers < 0)
776 throw InternalErr("An unlocked entry was released");
777}
778
779bool HTTPCacheTable::is_locked_read_responses() { return !d_locked_entries.empty(); }
780
781} // namespace libdap
A class for error processing.
Definition Error.h:92
void create_location(CacheEntry *entry)
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
string create_hash_directory(int hash)
CacheEntry * cache_index_parse_line(const char *line)
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
void add_entry_to_cache_table(CacheEntry *entry)
void remove_entry_from_cache_table(const string &url)
A class for software fault reporting.
Definition InternalErr.h:61
STL iterator class.
top level DAP object to house generic methods
Definition AISConnect.cc:30
int get_hash(const string &url)
time_t parse_time(const char *str, bool expand)
Definition util_mit.cc:144