33 #include <curl/curl.h>
35 #include <libdap/util.h>
36 #include <libdap/debug.h>
39 #include "rapidjson/writer.h"
40 #include "rapidjson/prettywriter.h"
41 #include "rapidjson/stringbuffer.h"
42 #include "rapidjson/filereadstream.h"
45 #include "BESNotFoundError.h"
46 #include "BESSyntaxUserError.h"
49 #include "BESStopWatch.h"
51 #include "TheBESKeys.h"
52 #include "CurlUtils.h"
54 #include "RemoteResource.h"
57 #include "NgapNames.h"
58 #include "NgapError.h"
62 #define prolog string("NgapApi::").append(__func__).append("() - ")
66 const unsigned int REFRESH_THRESHOLD = 3600;
69 NgapApi::NgapApi() : d_cmr_hostname(DEFAULT_CMR_ENDPOINT_URL), d_cmr_search_endpoint_path(DEFAULT_CMR_SEARCH_ENDPOINT_PATH) {
74 d_cmr_hostname = cmr_hostname;
77 string cmr_search_endpoint_path;
80 d_cmr_search_endpoint_path = cmr_search_endpoint_path;
86 std::string NgapApi::get_cmr_search_endpoint_url(){
99 std::string NgapApi::build_cmr_query_url_old_rpath_format(
const std::string &restified_path) {
102 string r_path = ( restified_path[0] !=
'/' ?
"/" :
"") + restified_path;
104 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
105 if(provider_index == string::npos){
107 msg << prolog <<
"The specified path '" << r_path <<
"'";
108 msg <<
" does not contain the required path element '" << NGAP_PROVIDERS_KEY <<
"'";
111 if(provider_index != 0){
113 msg << prolog <<
"The specified path '" << r_path <<
"'";
114 msg <<
" has the path element '" << NGAP_PROVIDERS_KEY <<
"' located in the incorrect position (";
115 msg << provider_index <<
") expected 0.";
118 provider_index += string(NGAP_PROVIDERS_KEY).length();
120 bool use_collection_concept_id =
false;
121 size_t collection_index = r_path.find(NGAP_COLLECTIONS_KEY);
122 if(collection_index == string::npos) {
123 size_t concepts_index = r_path.find(NGAP_CONCEPTS_KEY);
124 if (concepts_index == string::npos) {
126 msg << prolog <<
"The specified path '" << r_path <<
"'";
127 msg <<
" contains neither the '" << NGAP_COLLECTIONS_KEY <<
"'";
128 msg <<
" nor the '" << NGAP_CONCEPTS_KEY <<
"'";
129 msg <<
" key, one must be provided.";
132 collection_index = concepts_index;
133 use_collection_concept_id =
true;
135 if(collection_index <= provider_index+1){
137 msg << prolog <<
"The specified path '" << r_path <<
"'";
138 msg <<
" has the path element '" << (use_collection_concept_id?NGAP_CONCEPTS_KEY:NGAP_COLLECTIONS_KEY) <<
"' located in the incorrect position (";
139 msg << collection_index <<
") expected at least " << provider_index+1;
142 string provider = r_path.substr(provider_index,collection_index - provider_index);
143 collection_index += use_collection_concept_id?string(NGAP_CONCEPTS_KEY).length():string(NGAP_COLLECTIONS_KEY).length();
146 size_t granule_index = r_path.find(NGAP_GRANULES_KEY);
147 if(granule_index == string::npos){
149 msg << prolog <<
"The specified path '" << r_path <<
"'";
150 msg <<
" does not contain the required path element '" << NGAP_GRANULES_KEY <<
"'";
153 if(granule_index <= collection_index+1){
155 msg << prolog <<
"The specified path '" << r_path <<
"'";
156 msg <<
" has the path element '" << NGAP_GRANULES_KEY <<
"' located in the incorrect position (";
157 msg << granule_index <<
") expected at least " << collection_index+1;
160 string collection = r_path.substr(collection_index,granule_index - collection_index);
161 granule_index += string(NGAP_GRANULES_KEY).length();
164 string granule = r_path.substr(granule_index);
167 string cmr_url = get_cmr_search_endpoint_url() +
"?";
170 CURL *ceh = curl_easy_init();
171 char *esc_url_content;
174 esc_url_content = curl_easy_escape(ceh, provider.c_str(), provider.size());
175 cmr_url += string(CMR_PROVIDER).append(
"=").append(esc_url_content).append(
"&");
176 curl_free(esc_url_content);
178 esc_url_content = curl_easy_escape(ceh, collection.c_str(), collection.size());
179 if(use_collection_concept_id){
181 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append(
"=").append(esc_url_content).append(
"&");
185 cmr_url += string(CMR_ENTRY_TITLE).append(
"=").append(esc_url_content).append(
"&");
188 curl_free(esc_url_content);
190 esc_url_content = curl_easy_escape(ceh, granule.c_str(), granule.size());
191 cmr_url += string(CMR_GRANULE_UR).append(
"=").append(esc_url_content);
192 curl_free(esc_url_content);
194 curl_easy_cleanup(ceh);
215 std::string NgapApi::build_cmr_query_url(
const std::string &restified_path) {
218 string r_path = ( restified_path[0] !=
'/' ?
"/" :
"") + restified_path;
220 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
221 if(provider_index != string::npos){
222 return build_cmr_query_url_old_rpath_format(restified_path);
225 size_t collections_key_index = r_path.find(NGAP_COLLECTIONS_KEY);
226 if(collections_key_index == string::npos) {
228 msg << prolog <<
"The specified path '" << r_path <<
"'";
229 msg <<
" contains neither the '" << NGAP_COLLECTIONS_KEY <<
"'";
230 msg <<
" nor the '" << NGAP_CONCEPTS_KEY <<
"'";
231 msg <<
" one must be provided.";
234 if(collections_key_index != 0){
236 msg << prolog <<
"The specified path '" << r_path <<
"'";
237 msg <<
" has the path element '" << NGAP_COLLECTIONS_KEY <<
"' located in the incorrect position (";
238 msg << collections_key_index <<
") expected at least " << provider_index + 1;
242 size_t collections_index = collections_key_index + string(NGAP_COLLECTIONS_KEY).length();
244 size_t granules_key_index = r_path.find(NGAP_GRANULES_KEY);
245 if(granules_key_index == string::npos){
247 msg << prolog <<
"The specified path '" << r_path <<
"'";
248 msg <<
" does not contain the required path element '" << NGAP_GRANULES_KEY <<
"'";
254 if(granules_key_index <= collections_index + 1){
256 msg << prolog <<
"The specified path '" << r_path <<
"'";
257 msg <<
" has the path element '" << NGAP_GRANULES_KEY <<
"' located in the incorrect position (";
258 msg << granules_key_index <<
") expected at least " << collections_index + 1;
261 size_t granules_index = granules_key_index + string(NGAP_GRANULES_KEY).length();
263 string granule_name = r_path.substr(granules_index);
267 string collection_name = r_path.substr(collections_index, granules_key_index - collections_index);
273 string optional_part;
274 size_t slash_pos = collection_name.find(
'/');
275 if(slash_pos != string::npos){
276 optional_part = collection_name.substr(slash_pos);
277 BESDEBUG(MODULE, prolog <<
"Found optional collections name component: " << optional_part << endl);
278 collection_name = collection_name.substr(0,slash_pos);
280 BESDEBUG(MODULE, prolog <<
"Found collection_name (aka collection_concept_id): " << collection_name << endl);
283 string cmr_url = get_cmr_search_endpoint_url() +
"?";
286 CURL *ceh = curl_easy_init();
287 char *esc_url_content;
289 esc_url_content = curl_easy_escape(ceh, collection_name.c_str(), collection_name.size());
290 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append(
"=").append(esc_url_content).append(
"&");
291 curl_free(esc_url_content);
293 esc_url_content = curl_easy_escape(ceh, granule_name.c_str(), granule_name.size());
294 cmr_url += string(CMR_GRANULE_UR).append(
"=").append(esc_url_content);
295 curl_free(esc_url_content);
297 curl_easy_cleanup(ceh);
312 std::string NgapApi::find_get_data_url_in_granules_umm_json_v1_4(
const std::string &restified_path,
rapidjson::Document &cmr_granule_response)
315 string data_access_url;
318 int hits = val.GetInt();
320 throw BESNotFoundError(
string(
"The specified path '").append(restified_path).append(
321 "' does not identify a granule in CMR."), __FILE__, __LINE__);
325 if (items.IsArray()) {
328 const string RJ_TYPE_NAMES[] = {string(
"kNullType"),string(
"kFalseType"),string(
"kTrueType"),
329 string(
"kObjectType"),string(
"kArrayType"),string(
"kStringType"),string(
"kNumberType")};
331 ss <<
"items[" << i <<
"]: " << RJ_TYPE_NAMES[items[i].GetType()] << endl;
332 BESDEBUG(MODULE, prolog <<
"items size: " << items.Size() << endl << ss.str() << endl);
337 auto mitr = items_obj.FindMember(
"umm");
340 mitr = umm.FindMember(
"RelatedUrls");
341 if (mitr == umm.MemberEnd()) {
342 throw BESInternalError(
"Error! The umm/RelatedUrls object was not located!", __FILE__, __LINE__);
346 if (!related_urls.IsArray()) {
347 throw BESNotFoundError(
"Error! The RelatedUrls object in the CMR response is not an array!", __FILE__,
351 BESDEBUG(MODULE, prolog <<
" Found RelatedUrls array in CMR response." << endl);
356 mitr = obj.FindMember(
"URL");
357 if (mitr == obj.MemberEnd()) {
359 err <<
"Error! The umm/RelatedUrls[" << i <<
"] does not contain the URL object";
364 mitr = obj.FindMember(
"Type");
365 if (mitr == obj.MemberEnd()) {
367 err <<
"Error! The umm/RelatedUrls[" << i <<
"] does not contain the Type object";
372 noSubtype = obj.FindMember(
"Subtype") == obj.MemberEnd();
374 BESDEBUG(MODULE, prolog <<
"RelatedUrl Object:" <<
375 " URL: '" << r_url.GetString() <<
"'" <<
376 " Type: '" << r_type.GetString() <<
"'" <<
377 " SubType: '" << (noSubtype ?
"Absent" :
"Present") <<
"'" << endl);
379 if ((r_type.GetString() ==
string(CMR_URL_TYPE_GET_DATA)) && noSubtype) {
383 string candidate_url = r_url.GetString();
384 if(candidate_url.substr(0,8) ==
"https://" || candidate_url.substr(0,7) ==
"http://"){
385 data_access_url = candidate_url;
391 if (data_access_url.empty()) {
392 throw BESInternalError(
string(
"ERROR! Failed to locate a data access URL for the path: ") + restified_path,
396 return data_access_url;
423 string NgapApi::convert_ngap_resty_path_to_data_access_url(
424 const std::string &restified_path,
425 const std::string &uid
427 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
428 string data_access_url;
430 string cmr_query_url = build_cmr_query_url(restified_path);
432 BESDEBUG(MODULE, prolog <<
"CMR Request URL: " << cmr_query_url << endl);
434 BESDEBUG(MODULE, prolog <<
"Building new RemoteResource." << endl);
435 std::shared_ptr<http::url> cmr_query_url_ptr(
new http::url(cmr_query_url));
439 if (BESISDEBUG(MODULE) ||
BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
440 besTimer.
start(
"CMR Query: " + cmr_query_url);
446 data_access_url = find_get_data_url_in_granules_umm_json_v1_4(restified_path, cmr_response);
448 BESDEBUG(MODULE, prolog <<
"END (data_access_url: "<< data_access_url <<
")" << endl);
450 return data_access_url;
456 bool NgapApi::signed_url_is_expired(
const http::url &signed_url)
461 BESDEBUG(MODULE, prolog <<
"now: " << now << endl);
463 time_t expires = now;
466 time_t ingest_time = signed_url.ingest_time();
468 if(!cf_expires.empty()){
469 expires = stoll(cf_expires);
470 BESDEBUG(MODULE, prolog <<
"Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY <<
": " << expires << endl);
472 else if(!aws_expires.empty()){
476 time_t start_time = ingest_time;
480 if(!aws_date.empty()){
481 string date = aws_date;
482 string year = date.substr(0,4);
483 string month = date.substr(4,2);
484 string day = date.substr(6,2);
485 string hour = date.substr(9,2);
486 string minute = date.substr(11,2);
487 string second = date.substr(13,2);
489 BESDEBUG(MODULE, prolog <<
"date: "<< date <<
490 " year: " << year <<
" month: " << month <<
" day: " << day <<
491 " hour: " << hour <<
" minute: " << minute <<
" second: " << second << endl);
493 struct tm *ti = gmtime(&now);
494 ti->tm_year = stoll(year) - 1900;
495 ti->tm_mon = stoll(month) - 1;
496 ti->tm_mday = stoll(day);
497 ti->tm_hour = stoll(hour);
498 ti->tm_min = stoll(minute);
499 ti->tm_sec = stoll(second);
501 BESDEBUG(MODULE, prolog <<
"ti->tm_year: "<< ti->tm_year <<
502 " ti->tm_mon: " << ti->tm_mon <<
503 " ti->tm_mday: " << ti->tm_mday <<
504 " ti->tm_hour: " << ti->tm_hour <<
505 " ti->tm_min: " << ti->tm_min <<
506 " ti->tm_sec: " << ti->tm_sec << endl);
509 start_time = mktime(ti);
510 BESDEBUG(MODULE, prolog <<
"AWS (computed) start_time: "<< start_time << endl);
512 expires = start_time + stoll(aws_expires);
513 BESDEBUG(MODULE, prolog <<
"Using "<< AMS_EXPIRES_HEADER_KEY <<
": " << aws_expires <<
514 " (expires: " << expires <<
")" << endl);
516 time_t remaining = expires - now;
517 BESDEBUG(MODULE, prolog <<
"expires_time: " << expires <<
518 " remaining_time: " << remaining <<
519 " refresh_threshold: " << REFRESH_THRESHOLD << endl);
521 is_expired = remaining < REFRESH_THRESHOLD;
522 BESDEBUG(MODULE, prolog <<
"is_expired: " << (is_expired?
"true":
"false") << endl);
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
error thrown if there is a user syntax error in the request or any other user error
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
rapidjson::Document get_as_json()
get_as_json() This function returns the cached resource parsed into a JSON document.
virtual std::string query_parameter_value(const std::string &key) const
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)