bes  Updated for version 3.20.10
RemoteResource.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES http package, part of the Hyrax data server.
4 
5 // Copyright (c) 2020 OPeNDAP, Inc.
6 // Author: Nathan Potter <ndp@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 // Authors:
25 // ndp Nathan Potter <ndp@opendap.org>
26 
27 #include "config.h"
28 
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 
33 #include <sstream>
34 #include <fstream>
35 #include <string>
36 #include <iostream>
37 #include <utility>
38 
39 #include "rapidjson/document.h"
40 
41 #include "BESInternalError.h"
42 #include "BESForbiddenError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESNotFoundError.h"
45 #include "BESTimeoutError.h"
46 
47 #include "BESDebug.h"
48 #include "BESUtil.h"
49 
50 #include "HttpCache.h"
51 #include "HttpUtils.h"
52 #include "CurlUtils.h"
53 #include "HttpNames.h"
54 #include "RemoteResource.h"
55 #include "TheBESKeys.h"
56 #include "BESStopWatch.h"
57 #include "BESLog.h"
58 
59 using namespace std;
60 
61 #define BES_CATALOG_ROOT_KEY "BES.Catalog.catalog.RootDirectory"
62 
63 #define prolog std::string("RemoteResource::").append(__func__).append("() - ")
64 #define MODULE HTTP_MODULE
65 
66 namespace http {
67 
68 RemoteResource::RemoteResource(
69  std::shared_ptr<http::url> target_url,
70  const std::string &uid,
71  long long expiredInterval)
72  : d_remoteResourceUrl(std::move(target_url)){
73 
74  d_fd = 0;
75  d_initialized = false;
76 
77  d_uid = uid;
78 
79  d_resourceCacheFileName.clear();
80  d_response_headers = new vector<string>();
81  d_http_response_headers = new map<string, string>();
82 
83  d_expires_interval = expiredInterval;
84 
85 
86  if(d_remoteResourceUrl->protocol() == FILE_PROTOCOL){
87  BESDEBUG(MODULE,prolog << "Found FILE protocol." << endl);
88  d_resourceCacheFileName = d_remoteResourceUrl->path();
89  while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
90  // Strip trailing slashes, because this about files, not directories
91  d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
92  }
93  // Now we check that the data is in the BES_CATALOG_ROOT
94  string catalog_root;
95  bool found;
96  TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
97  if(!found){
98  throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
99  }
100  if(d_resourceCacheFileName.find(catalog_root) !=0 ){
101  d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
102  }
103  BESDEBUG(MODULE,"d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
104  d_initialized =true;
105  }
106  else if( d_remoteResourceUrl->protocol() == HTTPS_PROTOCOL || d_remoteResourceUrl->protocol() == HTTP_PROTOCOL ){
107  BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl->str() << endl);
108 #if 0
109 
110  if (!d_uid.empty()){
111  string client_id_hdr = "User-Id: " + d_uid;
112  BESDEBUG(MODULE, prolog << client_id_hdr << endl);
113  d_request_headers.push_back(client_id_hdr);
114  }
115  if (!d_echo_token.empty()){
116  string echo_token_hdr = "Echo-Token: " + d_echo_token;
117  BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
118  d_request_headers.push_back(echo_token_hdr);
119  }
120 #endif
121 
122  }
123  else {
124  string err = prolog + "Unsupported protocol: " + d_remoteResourceUrl->protocol();
125  throw BESInternalError(err, __FILE__, __LINE__);
126  }
127 
128  // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
129 
130 }
131 
132 
133 #if 0
139  RemoteResource::RemoteResource(const std::string &url, const std::string &uid, const std::string &echo_token) {
140 
141  d_fd = 0;
142  d_initialized = false;
143 
144  d_uid = uid;
145  d_echo_token = echo_token;
146 
147  // d_curl = curl::init(url);
148 
149  d_resourceCacheFileName.clear();
150  d_response_headers = new vector<string>();
151  d_request_headers = new vector<string>();
152  d_http_response_headers = new map<string, string>();
153 
154  if (url.empty()) {
155  throw BESInternalError(prolog + "Remote resource URL is empty.", __FILE__, __LINE__);
156  }
157 
158  if(url.find(FILE_PROTOCOL) == 0){
159  d_resourceCacheFileName = url.substr(strlen(FILE_PROTOCOL));
160  while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
161  // Strip trailing slashes, because this about files, not directories
162  d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
163  }
164  // Now we check that the data is in the BES_CATALOG_ROOT
165  string catalog_root;
166  bool found;
167  TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
168  if(!found){
169  throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
170  }
171  if(d_resourceCacheFileName.find(catalog_root) !=0 ){
172  d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
173  }
174  d_initialized =true;
175  }
176  else if(url.find(HTTPS_PROTOCOL) == 0 || url.find(HTTP_PROTOCOL) == 0){
177  d_remoteResourceUrl = url;
178  BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl << endl);
179 
180  if (!d_uid.empty()){
181  string client_id_hdr = "User-Id: " + d_uid;
182  BESDEBUG(MODULE, prolog << client_id_hdr << endl);
183  d_request_headers->push_back(client_id_hdr);
184  }
185  if (!d_echo_token.empty()){
186  string echo_token_hdr = "Echo-Token: " + d_echo_token;
187  BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
188  d_request_headers->push_back(echo_token_hdr);
189  }
190  }
191  else {
192  string err = prolog + "Unsupported protocol: " + url;
193  throw BESInternalError(err, __FILE__, __LINE__);
194  }
195 
196 
197 
198  // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
199  }
200 #endif
201 
202 
207 RemoteResource::~RemoteResource() {
208  BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl->str() << endl);
209 
210  delete d_response_headers;
211  d_response_headers = 0;
212  BESDEBUG(MODULE, prolog << "Deleted d_response_headers." << endl);
213 
214  if (!d_resourceCacheFileName.empty()) {
215  HttpCache *cache = HttpCache::get_instance();
216  if (cache) {
217  cache->unlock_and_close(d_resourceCacheFileName);
218  BESDEBUG(MODULE, prolog << "Closed and unlocked " << d_resourceCacheFileName << endl);
219  d_resourceCacheFileName.clear();
220  }
221  }
222  BESDEBUG(MODULE, prolog << "END" << endl);
223 }
224 
229 std::string RemoteResource::getCacheFileName() {
230  if (!d_initialized) {
231  throw BESInternalError(prolog + "STATE ERROR: Remote Resource " + d_remoteResourceUrl->str() +
232  " has Not Been Retrieved.", __FILE__, __LINE__);
233  }
234  return d_resourceCacheFileName;
235 }
236 
244 void RemoteResource::retrieveResource() {
245  std::map<std::string, std::string> content_filters;
246  retrieveResource(content_filters);
247 }
248 
260 void RemoteResource::retrieveResource(const std::map<std::string, std::string> &content_filters) {
261  BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl->str() << endl);
262  bool mangle = true;
263 
264  // TODO come back and visit this condition and determine if it is still needed jhrg/sbl 4.14.21
265  if (d_initialized) {
266  BESDEBUG(MODULE, prolog << "END Already initialized." << endl);
267  return;
268  }
269  // Get a pointer to the singleton cache instance for this process.
270  HttpCache *cache = HttpCache::get_instance();
271  if (!cache) {
272  ostringstream oss;
273  oss << prolog << "FAILED to get local cache. ";
274  oss << "Unable to proceed with request for " << this->d_remoteResourceUrl->str();
275  oss << " The server MUST have a valid HTTP cache configuration to operate." << endl;
276  BESDEBUG(MODULE, oss.str());
277  throw BESInternalError(oss.str(), __FILE__, __LINE__);
278  }
279 
280  // Get the name of the file in the cache (either the code finds this file or
281  // or it makes it).
282  d_resourceCacheFileName = cache->get_cache_file_name(d_uid, d_remoteResourceUrl->str(), mangle);
283  BESDEBUG(MODULE, prolog << "d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
284 
285  // @TODO MAKE THIS RETRIEVE THE CACHED DATA TYPE IF THE CACHED RESPONSE IF FOUND
286  // We need to know the type of the resource. HTTP headers are the preferred way to determine the type.
287  // Unfortunately, the current code losses both the HTTP headers sent from the request and the derived type
288  // to subsequent accesses of the cached object. Since we have to have a type, for now we just set the type
289  // from the url. If down below we DO an HTTP GET then the headers will be evaluated and the type set by setType()
290  // But really - we gotta fix this.
291  http::get_type_from_url(d_remoteResourceUrl->str(), d_type);
292  BESDEBUG(MODULE, prolog << "d_type: " << d_type << endl);
293 
294  try {
295  if (cache->get_exclusive_lock(d_resourceCacheFileName, d_fd)) {
296  BESDEBUG(MODULE,
297  prolog << "Remote resource is already in cache. cache_file_name: " << d_resourceCacheFileName
298  << endl);
299 
300  if (cached_resource_is_expired()) {
301  BESDEBUG(MODULE, prolog << "EXISTS - UPDATING " << endl);
302  update_file_and_headers(content_filters);
303  cache->exclusive_to_shared_lock(d_fd);
304  } else {
305  BESDEBUG(MODULE, prolog << "EXISTS - LOADING " << endl);
306  cache->exclusive_to_shared_lock(d_fd);
307  load_hdrs_from_file();
308  }
309  d_initialized = true;
310  return;
311  } else {
312  // Now we actually need to reach out across the interwebs and retrieve the remote resource and put it's
313  // content into a local cache file, given that it's not in the cache.
314  // First make an empty file and get an exclusive lock on it.
315  if (cache->create_and_lock(d_resourceCacheFileName, d_fd)) {
316  BESDEBUG(MODULE, prolog << "DOESN'T EXIST - CREATING " << endl);
317  update_file_and_headers(content_filters);
318  } else {
319  BESDEBUG(MODULE, prolog << " WAS CREATED - LOADING " << endl);
320  cache->get_read_lock(d_resourceCacheFileName, d_fd);
321  load_hdrs_from_file();
322  }
323  d_initialized = true;
324  return;
325  }
326 
327  stringstream msg;
328  msg << prolog + "Failed to acquire cache read lock for remote resource: '";
329  msg << d_remoteResourceUrl->str() << endl;
330  throw BESInternalError(msg.str(), __FILE__, __LINE__);
331 
332  }
333  catch (BESError &besError) {
334  BESDEBUG(MODULE, prolog << "Caught BESError. type: " << besError.get_bes_error_type() <<
335  " message: '" << besError.get_message() <<
336  "' file: " << besError.get_file() << " line: " << besError.get_line() <<
337  " Will unlock cache and re-throw." << endl);
338  cache->unlock_cache();
339  throw;
340  }
341  catch (...) {
342  BESDEBUG(MODULE, prolog << "Caught unknown exception. Will unlock cache and re-throw." << endl);
343  cache->unlock_cache();
344  throw;
345  }
346 
347 } //end RemoteResource::retrieveResource()
348 
352 void RemoteResource::update_file_and_headers(){
353  std::map<std::string, std::string> content_filters;
354  update_file_and_headers(content_filters);
355 }
356 
362 void RemoteResource::update_file_and_headers(const std::map<std::string, std::string> &content_filters){
363 
364  // Get a pointer to the singleton cache instance for this process.
365  HttpCache *cache = HttpCache::get_instance();
366  if (!cache) {
367  ostringstream oss;
368  oss << prolog << "FAILED to get local cache. ";
369  oss << "Unable to proceed with request for " << this->d_remoteResourceUrl->str();
370  oss << " The server MUST have a valid HTTP cache configuration to operate." << endl;
371  BESDEBUG(MODULE, oss.str());
372  throw BESInternalError(oss.str(), __FILE__, __LINE__);
373  }
374 
375  // Write the remote resource to the cache file.
376  try {
377  writeResourceToFile(d_fd);
378  }
379  catch (...) {
380  // If things went south then we need to dump the file because we'll end up with an empty/bogus file clogging the cache
381  unlink(d_resourceCacheFileName.c_str());
382  throw;
383  }
384 
385  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
386  // Filter the response file - If content_filters map is empty then nothing is done.
387  filter_retrieved_resource(content_filters);
388 
389  // Write the headers to the appropriate cache file.
390  string hdr_filename = d_resourceCacheFileName + ".hdrs";
391  std::ofstream hdr_out(hdr_filename.c_str());
392  try {
393  for (size_t i = 0; i < this->d_response_headers->size(); i++) {
394  hdr_out << (*d_response_headers)[i] << endl;
395  }
396  }
397  catch (...) {
398  // If this fails for any reason we:
399  hdr_out.close(); // Close the stream
400  unlink(hdr_filename.c_str()); // unlink the file
401  unlink(d_resourceCacheFileName.c_str()); // unlink the primary cache file.
402  throw;
403  }
404 
405  // #########################################################################################################
406 
407  // Change the exclusive lock on the new file to a shared lock. This keeps
408  // other processes from purging the new file and ensures that the reading
409  // process can use it.
410  cache->exclusive_to_shared_lock(d_fd);
411  BESDEBUG(MODULE, prolog << "Converted exclusive cache lock to shared lock." << endl);
412 
413  // Now update the total cache size info and purge if needed. The new file's
414  // name is passed into the purge method because this process cannot detect its
415  // own lock on the file.
416  unsigned long long size = cache->update_cache_info(d_resourceCacheFileName);
417  BESDEBUG(MODULE, prolog << "Updated cache info" << endl);
418 
419  if (cache->cache_too_big(size)) {
420  cache->update_and_purge(d_resourceCacheFileName);
421  BESDEBUG(MODULE, prolog << "Updated and purged cache." << endl);
422  }
423  BESDEBUG(MODULE, prolog << "END" << endl);
424 
425  return;
426 } //end RemoteResource::update_file_and_headers()
427 
431 void RemoteResource::load_hdrs_from_file(){
432  string hdr_filename = d_resourceCacheFileName + ".hdrs";
433  std::ifstream hdr_ifs(hdr_filename.c_str());
434 
435  if(!hdr_ifs.is_open()){
436  stringstream msg;
437  msg << "ERROR. Internal state error. The headers file: " << hdr_filename << " could not be opened for reading.";
438  BESDEBUG(MODULE, prolog << msg.str() << endl);
439  throw BESInternalError(msg.str(), __FILE__, __LINE__);
440  }
441 
442  BESDEBUG(MODULE, prolog << "Reading response headers from: " << hdr_filename << endl);
443  for (std::string line; std::getline(hdr_ifs, line);) {
444  (*d_response_headers).push_back(line);
445  BESDEBUG(MODULE, prolog << "header: " << line << endl);
446  }
447  ingest_http_headers_and_type();
448 } //end RemoteResource::load_hdrs_from_file()
449 
457 bool RemoteResource::cached_resource_is_expired(){
458  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
459 
460  struct stat statbuf;
461  if (stat(d_resourceCacheFileName.c_str(), &statbuf) == -1){
462  throw BESNotFoundError(strerror(errno), __FILE__, __LINE__);
463  }//end if
464  BESDEBUG(MODULE, prolog << "File exists" << endl);
465 
466  time_t cacheTime = statbuf.st_ctime;
467  BESDEBUG(MODULE, prolog << "Cache file creation time: " << cacheTime << endl);
468  time_t nowTime = time(0);
469  BESDEBUG(MODULE, prolog << "Time now: " << nowTime << endl);
470  double diffSeconds = difftime(nowTime,cacheTime);
471  BESDEBUG(MODULE, prolog << "Time difference between cacheTime and nowTime: " << diffSeconds << endl);
472 
473  if (diffSeconds > d_expires_interval){
474  BESDEBUG(MODULE, prolog << " refresh = TRUE " << endl);
475  return true;
476  }
477  else{
478  BESDEBUG(MODULE, prolog << " refresh = FALSE " << endl);
479  return false;
480  }
481 } //end RemoteResource::is_cache_resource_expired()
482 
491 void RemoteResource::writeResourceToFile(int fd) {
492 
493  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
494  try {
495 
496  BESStopWatch besTimer;
497  if (BESDebug::IsSet("rr") || BESDebug::IsSet(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
498  besTimer.start(prolog + "source url: " + d_remoteResourceUrl->str());
499  }
500 
501  int status = lseek(fd, 0, SEEK_SET);
502  if (-1 == status)
503  throw BESNotFoundError("Could not seek within the response file.", __FILE__, __LINE__);
504  BESDEBUG(MODULE, prolog << "Reset file descriptor to start of file." << endl);
505 
506  status = ftruncate(fd, 0);
507  if (-1 == status)
508  throw BESInternalError("Could not truncate the file prior to updating from remote. ", __FILE__, __LINE__);
509  BESDEBUG(MODULE, prolog << "Truncated file, length is zero." << endl);
510 
511  BESDEBUG(MODULE, prolog << "Saving resource " << d_remoteResourceUrl << " to cache file " << d_resourceCacheFileName << endl);
512  curl::http_get_and_write_resource(d_remoteResourceUrl, fd, d_response_headers); // Throws BESInternalError if there is a curl error.
513 
514  BESDEBUG(MODULE, prolog << "Resource " << d_remoteResourceUrl->str() << " saved to cache file " << d_resourceCacheFileName << endl);
515 
516  // rewind the file
517  // FIXME I think the idea here is that we have the file open and we should just keep
518  // reading from it. But the container mechanism works with file names, so we will
519  // likely have to open the file again. If that's true, lets remove this call. jhrg 3.2.18
520  status = lseek(fd, 0, SEEK_SET);
521  if (-1 == status)
522  throw BESNotFoundError("Could not seek within the response file.", __FILE__, __LINE__);
523  BESDEBUG(MODULE, prolog << "Reset file descriptor to start of file." << endl);
524 
525  // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
526  ingest_http_headers_and_type();
527  }
528  catch (BESError &e) {
529  throw;
530  }
531  BESDEBUG(MODULE, prolog << "END" << endl);
532 }
533 
537 void RemoteResource::ingest_http_headers_and_type() {
538  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
539 
540  const string colon_space = ": ";
541  for (size_t i = 0; i < this->d_response_headers->size(); i++) {
542  string header = (*d_response_headers)[i];
543  BESDEBUG(MODULE, prolog << "Processing header " << header << endl);
544  size_t colon_index = header.find(colon_space);
545  if(colon_index == string::npos){
546  BESDEBUG(MODULE, prolog << "Unable to locate the colon space \": \" delimiter in the header " <<
547  "string: '" << header << "' SKIPPING!" << endl);
548  }
549  else {
550  string key = BESUtil::lowercase(header.substr(0, colon_index));
551  string value = header.substr(colon_index + colon_space.length());
552  BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
553  (*d_http_response_headers)[key] = value;
554  }
555  }
556  BESDEBUG(MODULE, prolog << "Ingested " << d_http_response_headers->size() << " response headers." << endl);
557 
558  std::map<string, string>::iterator it;
559  string type;
560 
561  // Try and figure out the file type first from the
562  // Content-Disposition in the http header response.
563  BESDEBUG(MODULE, prolog << "Checking Content-Disposition headers for type information." << endl);
564  string content_disp_hdr;
565  content_disp_hdr = get_http_response_header("content-disposition");
566  if (!content_disp_hdr.empty()) {
567  // Content disposition exists, grab the filename
568  // attribute
569  http::get_type_from_disposition(content_disp_hdr, type);
570  BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << content_disp_hdr << "' matched type: \"" << type << "\"" << endl);
571  }
572 
573  // still haven't figured out the type. Check the content-type
574  // next, translate to the BES MODULE name. It's also possible
575  // that even though Content-disposition was available, we could
576  // not determine the type of the file.
577  BESDEBUG(MODULE, prolog << "Checking Content-Type headers for type information." << endl);
578  string content_type = get_http_response_header("content-type");
579  if (type.empty() && !content_type.empty()) {
580  http::get_type_from_content_type(content_type, type);
581  BESDEBUG(MODULE,prolog << "Evaluated content-type '" << content_type << "' matched type \"" << type << "\"" << endl);
582  }
583 
584  // still haven't figured out the type. Now check the actual URL
585  // and see if we can't match the URL to a MODULE name
586  BESDEBUG(MODULE, prolog << "Checking URL path for type information." << endl);
587  if (type.empty()) {
588  http::get_type_from_url(d_remoteResourceUrl->str(), type);
589  BESDEBUG(MODULE, prolog << "Evaluated url '" << d_remoteResourceUrl->str() << "' matched type: \"" << type << "\"" << endl);
590  }
591  // still couldn't figure it out, punt
592  if (type.empty()) {
593  string err = prolog + "Unable to determine the type of data"
594  + " returned from '" + d_remoteResourceUrl->str() + "' Setting type to 'unknown'";
595  BESDEBUG(MODULE, err << endl);
596  type = "unknown";
597  //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
598  }
599  d_type = type;
600  BESDEBUG(MODULE, prolog << "END (dataset type: " << d_type << ")" << endl);
601 }
602 
608 std::string
609 RemoteResource::get_http_response_header(const std::string header_name) {
610  string value("");
611  std::map<string, string>::iterator it;
612  it = d_http_response_headers->find(BESUtil::lowercase(header_name));
613  if (it != d_http_response_headers->end())
614  value = it->second;
615  return value;
616 }
617 
629 void RemoteResource::filter_retrieved_resource(const std::map<std::string, std::string> &content_filters){
630 
631  // No filters?
632  if(content_filters.empty()){
633  // No problem...
634  return;
635  }
636  string resource_content;
637  {
638  std::stringstream buffer;
639  // - - - - - - - - - - - - - - - - - - - - - - - -
640  // Read the cached file into a string object
641  std::ifstream cr_istrm(d_resourceCacheFileName);
642  if (!cr_istrm.is_open()) {
643  string msg = "Could not open '" + d_resourceCacheFileName + "' to read cached response.";
644  BESDEBUG(MODULE, prolog << msg << endl);
645  throw BESInternalError(msg, __FILE__, __LINE__);
646  }
647  buffer << cr_istrm.rdbuf();
648 
649  // FIXME Do we need to make a copy here? Could we pass buffer.str() to replace_all??
650  resource_content = buffer.str();
651  } // cr_istrm is closed here.
652 
653  for (const auto& apair : content_filters) {
654  unsigned int replace_count = BESUtil::replace_all(resource_content,apair.first, apair.second);
655  BESDEBUG(MODULE, prolog << "Replaced " << replace_count << " instance(s) of template(" <<
656  apair.first << ") with " << apair.second << " in cached RemoteResource" << endl);
657  }
658 
659 
660  // - - - - - - - - - - - - - - - - - - - - - - - -
661  // Replace the contents of the cached file with the modified string.
662  std::ofstream cr_ostrm(d_resourceCacheFileName);
663  if (!cr_ostrm.is_open()) {
664  string msg = "Could not open '" + d_resourceCacheFileName + "' to write modified cached response.";
665  BESDEBUG(MODULE, prolog << msg << endl);
666  throw BESInternalError(msg, __FILE__, __LINE__);
667  }
668  cr_ostrm << resource_content;
669 
670 }
671 
675 std::string RemoteResource::get_response_as_string() {
676 
677  if(!d_initialized){
678  stringstream msg;
679  msg << "ERROR. Internal state error. " << __PRETTY_FUNCTION__ << " was called prior to retrieving resource.";
680  BESDEBUG(MODULE, prolog << msg.str() << endl);
681  throw BESInternalError(msg.str(), __FILE__, __LINE__);
682  }
683  string cache_file = getCacheFileName();
684  // - - - - - - - - - - - - - - - - - - - - - - - - - - -
685  // Set up cache file input stream.
686  std::ifstream file_istream(cache_file, std::ofstream::in);
687 
688  // If the cache filename is not valid, the stream will not open. Empty is not valid.
689  if(file_istream.is_open()){
690  // If it's open we've got a valid input stream.
691  BESDEBUG(MODULE, prolog << "Using cached file: " << cache_file << endl);
692  std::stringstream buffer;
693  buffer << file_istream.rdbuf();
694  return buffer.str();
695  }
696  else {
697  stringstream msg;
698  msg << "ERROR. Failed to open cache file " << cache_file << " for reading.";
699  BESDEBUG(MODULE, prolog << msg.str() << endl);
700  throw BESInternalError(msg.str(), __FILE__, __LINE__);
701  }
702 
703 }
704 
712 rapidjson::Document RemoteResource::get_as_json() {
713  string response = get_response_as_string();
715  d.Parse(response.c_str());
716  return d;
717 }
718 
722 vector<string> *RemoteResource::getResponseHeaders() {
723  if (!d_initialized){
724  throw BESInternalError(prolog +"STATE ERROR: Remote Resource Has Not Been Retrieved.",__FILE__,__LINE__);
725  }
726  return d_response_headers;
727 }
728 
729 
730 #if 0
731 void RemoteResource::setType(const vector<string> *resp_hdrs) {
732 
733  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
734 
735  string type = "";
736 
737  // Try and figure out the file type first from the
738  // Content-Disposition in the http header response.
739  string disp;
740  string ctype;
741 
742  if (resp_hdrs) {
743  vector<string>::const_iterator i = resp_hdrs->begin();
744  vector<string>::const_iterator e = resp_hdrs->end();
745  for (; i != e; i++) {
746  string hdr_line = (*i);
747 
748  BESDEBUG(MODULE, prolog << "Evaluating header: " << hdr_line << endl);
749 
750  hdr_line = BESUtil::lowercase(hdr_line);
751 
752  string colon_space = ": ";
753  int index = hdr_line.find(colon_space);
754  string hdr_name = hdr_line.substr(0, index);
755  string hdr_value = hdr_line.substr(index + colon_space.length());
756 
757  BESDEBUG(MODULE, prolog << "hdr_name: '" << hdr_name << "' hdr_value: '" << hdr_value << "' " << endl);
758 
759  if (hdr_name.find("content-disposition") != string::npos) {
760  // Content disposition exists
761  BESDEBUG(MODULE, prolog << "Located content-disposition header." << endl);
762  disp = hdr_value;
763  }
764  if (hdr_name.find("content-type") != string::npos) {
765  BESDEBUG(MODULE, prolog << "Located content-type header." << endl);
766  ctype = hdr_value;
767  }
768  }
769  }
770 
771  if (!disp.empty()) {
772  // Content disposition exists, grab the filename
773  // attribute
774  HttpUtils::Get_type_from_disposition(disp, type);
775  BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << disp << "' matched type: \"" << type << "\"" << endl);
776  }
777 
778  // still haven't figured out the type. Check the content-type
779  // next, translate to the BES MODULE name. It's also possible
780  // that even though Content-disposition was available, we could
781  // not determine the type of the file.
782  if (type.empty() && !ctype.empty()) {
783  HttpUtils::Get_type_from_content_type(ctype, type);
784  BESDEBUG(MODULE,prolog << "Evaluated content-type '" << ctype << "' matched type \"" << type << "\"" << endl);
785  }
786 
787  // still haven't figured out the type. Now check the actual URL
788  // and see if we can't match the URL to a MODULE name
789  if (type.empty()) {
790  HttpUtils::Get_type_from_url(d_remoteResourceUrl, type);
791  BESDEBUG(MODULE,prolog << "Evaluated url '" << d_remoteResourceUrl << "' matched type: \"" << type << "\"" << endl);
792  }
793 
794  // still couldn't figure it out, punt
795  if (type.empty()) {
796  string err = prolog + "Unable to determine the type of data"
797  + " returned from '" + d_remoteResourceUrl + "' Setting type to 'unknown'";
798  BESDEBUG(MODULE, err << endl);
799  type = "unknown";
800  //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
801  }
802 
803  // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
804 
805  d_type = type;
806  }
807 #endif
808 
809 
810 } // namespace http
811 
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
virtual void unlock_and_close(const std::string &target)
virtual bool create_and_lock(const std::string &target, int &fd)
Create a file in the cache and lock it for write access.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
virtual bool get_read_lock(const std::string &target, int &fd)
Get a read-only lock on the file if it exists.
virtual bool get_exclusive_lock(const std::string &target, int &fd)
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:961
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
Definition: BESUtil.cc:1015
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:791
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:282
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:2585
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
void get_type_from_disposition(const string &disp, string &type)
Definition: HttpUtils.cc:109