bes  Updated for version 3.20.10
BESCatalogUtils.cc
1 // BESCatalogUtils.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <dirent.h>
38 
39 #include <cerrno>
40 #include <iostream>
41 #include <sstream>
42 #include <list>
43 #include <cstring>
44 
45 #include "BESCatalogUtils.h"
46 #include "BESCatalogList.h"
47 #include "TheBESKeys.h"
48 #include "BESInternalError.h"
49 #include "BESSyntaxUserError.h"
50 #include "BESNotFoundError.h"
51 #include "BESRegex.h"
52 #include "BESUtil.h"
53 #include "BESInfo.h"
54 #include "BESContainerStorageList.h"
55 #include "BESContainerStorage.h"
56 #include "BESCatalogEntry.h"
57 
58 using namespace std;
59 
83 BESCatalogUtils::BESCatalogUtils(const string &n, bool strict) :
84  d_name(n), d_follow_syms(false)
85 {
86  string key = "BES.Catalog." + n + ".RootDirectory";
87  bool found = false;
88  TheBESKeys::TheKeys()->get_value(key, d_root_dir, found);
89  if (strict && (!found || d_root_dir == "")) {
90  string s = key + " not defined in BES configuration file";
91  throw BESSyntaxUserError(s, __FILE__, __LINE__);
92  }
93 
94  if(d_root_dir != "UNUSED"){
95  // TODO access() or stat() would test for existence faster. jhrg 2.25.18
96  DIR *dip = opendir(d_root_dir.c_str());
97  if (dip == NULL) {
98  string serr = "BESCatalogDirectory - root directory " + d_root_dir + " does not exist";
99  throw BESNotFoundError(serr, __FILE__, __LINE__);
100  }
101  closedir(dip);
102  }
103 
104  found = false;
105  key = (string) "BES.Catalog." + n + ".Exclude";
106  vector<string> vals;
107  TheBESKeys::TheKeys()->get_values(key, vals, found);
108  vector<string>::iterator ei = vals.begin();
109  vector<string>::iterator ee = vals.end();
110  for (; ei != ee; ei++) {
111  string e_str = (*ei);
112  if (!e_str.empty() && e_str != ";") BESUtil::explode(';', e_str, d_exclude);
113  }
114 
115  key = (string) "BES.Catalog." + n + ".Include";
116  vals.clear();
117  TheBESKeys::TheKeys()->get_values(key, vals, found);
118  vector<string>::iterator ii = vals.begin();
119  vector<string>::iterator ie = vals.end();
120  for (; ii != ie; ii++) {
121  string i_str = (*ii);
122  if (!i_str.empty() && i_str != ";") BESUtil::explode(';', i_str, d_include);
123  }
124 
125  key = "BES.Catalog." + n + ".TypeMatch";
126  list<string> match_list;
127  vals.clear();
128  TheBESKeys::TheKeys()->get_values(key, vals, found);
129  if (strict && (!found || vals.size() == 0)) {
130  string s = key + " not defined in key file";
131  throw BESInternalError(s, __FILE__, __LINE__);
132  }
133  vector<string>::iterator vi = vals.begin();
134  vector<string>::iterator ve = vals.end();
135  for (; vi != ve; vi++) {
136  BESUtil::explode(';', (*vi), match_list);
137  }
138 
139  list<string>::iterator mli = match_list.begin();
140  list<string>::iterator mle = match_list.end();
141  for (; mli != mle; mli++) {
142  if (!((*mli).empty()) && *(mli) != ";") {
143  list<string> amatch;
144  BESUtil::explode(':', (*mli), amatch);
145  if (amatch.size() != 2) {
146  string s = (string) "Catalog type match malformed, " + "looking for type:regexp;[type:regexp;]";
147  throw BESInternalError(s, __FILE__, __LINE__);
148  }
149  list<string>::iterator ami = amatch.begin();
150  handler_regex newval;
151  newval.handler = (*ami);
152  ami++;
153  newval.regex = (*ami);
154  d_match_list.push_back(newval);
155  }
156  }
157 
158  key = (string) "BES.Catalog." + n + ".FollowSymLinks";
159  string s_str;
160  TheBESKeys::TheKeys()->get_value(key, s_str, found);
161  s_str = BESUtil::lowercase(s_str);
162  if (s_str == "yes" || s_str == "on" || s_str == "true") {
163  d_follow_syms = true;
164  }
165 }
166 
178 bool BESCatalogUtils::include(const string &inQuestion) const
179 {
180  bool toInclude = false;
181 
182  // First check the file against the include list. If the file should be
183  // included then check the exclude list to see if there are exceptions
184  // to the include list.
185  if (d_include.size() == 0) {
186  toInclude = true;
187  }
188  else {
189  list<string>::const_iterator i_iter = d_include.begin();
190  list<string>::const_iterator i_end = d_include.end();
191  for (; i_iter != i_end; i_iter++) {
192  string reg = *i_iter;
193  if (!reg.empty()) {
194  try {
195  // must match exactly, meaning result is = to length of string
196  // in question
197  BESRegex reg_expr(reg.c_str());
198  if (reg_expr.match(inQuestion.c_str(), inQuestion.length())
199  == static_cast<int>(inQuestion.length())) {
200  toInclude = true;
201  }
202  }
203  catch (BESError &e) {
204  string serr = (string) "Unable to get catalog information, "
205  + "malformed Catalog Include parameter " + "in bes configuration file around " + reg + ": "
206  + e.get_message();
207  throw BESInternalError(serr, __FILE__, __LINE__);
208  }
209  }
210  }
211  }
212 
213  if (toInclude == true) {
214  if (exclude(inQuestion)) {
215  toInclude = false;
216  }
217  }
218 
219  return toInclude;
220 }
221 
229 bool BESCatalogUtils::exclude(const string &inQuestion) const
230 {
231  list<string>::const_iterator e_iter = d_exclude.begin();
232  list<string>::const_iterator e_end = d_exclude.end();
233  for (; e_iter != e_end; e_iter++) {
234  string reg = *e_iter;
235  if (!reg.empty()) {
236  try {
237  BESRegex reg_expr(reg.c_str());
238  if (reg_expr.match(inQuestion.c_str(), inQuestion.length()) == static_cast<int>(inQuestion.length())) {
239  return true;
240  }
241  }
242  catch (BESError &e) {
243  string serr = (string) "Unable to get catalog information, " + "malformed Catalog Exclude parameter "
244  + "in bes configuration file around " + reg + ": " + e.get_message();
245  throw BESInternalError(serr, __FILE__, __LINE__);
246  }
247  }
248  }
249  return false;
250 }
251 
257 BESCatalogUtils::match_citer BESCatalogUtils::match_list_begin() const
258 {
259  return d_match_list.begin();
260 }
261 
267 BESCatalogUtils::match_citer BESCatalogUtils::match_list_end() const
268 {
269  return d_match_list.end();
270 }
271 
282 unsigned int BESCatalogUtils::get_entries(DIR *dip, const string &fullnode, const string &use_node,
283  BESCatalogEntry *entry, bool dirs_only)
284 {
285  unsigned int cnt = 0;
286 
287  struct stat cbuf;
288  int statret = stat(fullnode.c_str(), &cbuf);
289  if (statret != 0) {
290  if (errno == ENOENT) { // ENOENT means that the path or part of the path does not exist
291  char *s_err = strerror(errno);
292  throw BESNotFoundError((s_err) ? string(s_err) : string("Node ") + use_node + " does not exist", __FILE__,
293  __LINE__);
294  }
295  // any other error means that access is denied for some reason
296  else {
297  char *s_err = strerror(errno);
298  throw BESNotFoundError((s_err) ? string(s_err) : string("Access denied for node ") + use_node, __FILE__,
299  __LINE__);
300  }
301  }
302 
303  struct dirent *dit;
304  while ((dit = readdir(dip)) != NULL) {
305  string dirEntry = dit->d_name;
306  if (dirEntry == "." || dirEntry == "..") {
307  continue;
308  }
309 
310  string fullPath = fullnode + "/" + dirEntry;
311 
312  // Skip this dir entry if it is a sym link and follow links is false
313  if (follow_sym_links() == false) {
314  struct stat lbuf;
315  (void) lstat(fullPath.c_str(), &lbuf);
316  if (S_ISLNK(lbuf.st_mode))
317  continue;
318  }
319 
320  // look at the mode and determine if this is a
321  // directory or a regular file. If it is not
322  // accessible, the stat fails, is not a directory
323  // or regular file, then simply do not include it.
324  struct stat buf;
325  statret = stat(fullPath.c_str(), &buf);
326  if (statret == 0 && S_ISDIR(buf.st_mode)) {
327  if (exclude(dirEntry) == false) {
328  BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
329 
330  bes_add_stat_info(curr_entry, buf);
331 
332  entry->add_entry(curr_entry);
333 
334  // we don't go further than this, so we need
335  // to add a blank node here so that we know
336  // it's a node (collection)
337  BESCatalogEntry *blank_entry = new BESCatalogEntry(".blank", entry->get_catalog());
338  curr_entry->add_entry(blank_entry);
339  }
340  }
341  else if (statret == 0 && S_ISREG(buf.st_mode)) {
342  if (!dirs_only && include(dirEntry)) {
343  BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
344  bes_add_stat_info(curr_entry, buf);
345 
346  list<string> services;
347  // TODO use the d_utils object? jhrg 2.26.18
348  isData(fullPath, d_name, services);
349  curr_entry->set_service_list(services);
350 
351  bes_add_stat_info(curr_entry, buf);
352 
353  entry->add_entry(curr_entry);
354  }
355  }
356  } // end of the while loop
357 
358  // TODO this always return zero. FIXME jhrg 2.26.18
359  return cnt;
360 }
361 
362 void BESCatalogUtils::display_entry(BESCatalogEntry *entry, BESInfo *info)
363 {
364  string defcatname = BESCatalogList::TheCatalogList()->default_catalog_name();
365 
366  // start with the external entry
367  map<string, string> props;
368  if (entry->get_catalog() == defcatname) {
369  props["name"] = entry->get_name();
370  }
371  else {
372  string name = entry->get_catalog() + "/";
373  if (entry->get_name() != "/") {
374  name = name + entry->get_name();
375  }
376  props["name"] = name;
377  }
378  props["catalog"] = entry->get_catalog();
379  props["size"] = entry->get_size();
380  props["lastModified"] = entry->get_mod_date() + "T" + entry->get_mod_time();
381  if (entry->is_collection()) {
382  props["node"] = "true";
383  ostringstream strm;
384  strm << entry->get_count();
385  props["count"] = strm.str();
386  }
387  else {
388  props["node"] = "false";
389  }
390  info->begin_tag("dataset", &props);
391 
392  list<string> services = entry->get_service_list();
393  if (services.size()) {
394  list<string>::const_iterator si = services.begin();
395  list<string>::const_iterator se = services.end();
396  for (; si != se; si++) {
397  info->add_tag("serviceRef", (*si));
398  }
399  }
400 }
401 
416 std::string
417 BESCatalogUtils::get_handler_name(const std::string &item) const
418 {
419  for (auto i = match_list_begin(), e = match_list_end(); i != e; ++i) {
420  BESRegex expr((*i).regex.c_str());
421  if (expr.match(item.c_str(), item.length()) == (int)item.length()) {
422  return (*i).handler;
423  }
424  }
425 
426  return "";
427 }
428 
441 bool
442 BESCatalogUtils::is_data(const std::string &item) const
443 {
444  for (auto i = match_list_begin(), e = match_list_end(); i != e; ++i) {
445  BESRegex expr((*i).regex.c_str());
446  if (expr.match(item.c_str(), item.length()) == (int)item.length()) {
447  return true;
448  }
449  }
450 
451  return false;
452 }
453 
461 void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, const string &fullnode)
462 {
463  struct stat cbuf;
464  int statret = stat(fullnode.c_str(), &cbuf);
465  if (statret == 0) {
466  bes_add_stat_info(entry, cbuf);
467  }
468 }
469 
470 void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, struct stat &buf)
471 {
472  off_t sz = buf.st_size;
473  entry->set_size(sz);
474 
475  // %T = %H:%M:%S
476  // %F = %Y-%m-%d
477  time_t mod = buf.st_mtime;
478  struct tm stm;
479  gmtime_r(&mod, &stm);
480  char mdate[64];
481  strftime(mdate, 64, "%Y-%m-%d", &stm);
482  char mtime[64];
483  strftime(mtime, 64, "%T", &stm);
484 
485  ostringstream sdt;
486  sdt << mdate;
487  entry->set_mod_date(sdt.str());
488 
489  ostringstream stt;
490  stt << mtime;
491  entry->set_mod_time(stt.str());
492 }
493 
494 bool BESCatalogUtils::isData(const string &inQuestion, const string &catalog, list<string> &services)
495 {
496  BESContainerStorage *store = BESContainerStorageList::TheList()->find_persistence(catalog);
497  if (!store) return false;
498 
499  return store->isData(inQuestion, services);
500 }
501 
502 void BESCatalogUtils::dump(ostream &strm) const
503 {
504  strm << BESIndent::LMarg << "BESCatalogUtils::dump - (" << (void *) this << ")" << endl;
505  BESIndent::Indent();
506 
507  strm << BESIndent::LMarg << "root directory: " << d_root_dir << endl;
508 
509  if (d_include.size()) {
510  strm << BESIndent::LMarg << "include list:" << endl;
511  BESIndent::Indent();
512  list<string>::const_iterator i_iter = d_include.begin();
513  list<string>::const_iterator i_end = d_include.end();
514  for (; i_iter != i_end; i_iter++) {
515  if (!(*i_iter).empty()) {
516  strm << BESIndent::LMarg << *i_iter << endl;
517  }
518  }
519  BESIndent::UnIndent();
520  }
521  else {
522  strm << BESIndent::LMarg << "include list: empty" << endl;
523  }
524 
525  if (d_exclude.size()) {
526  strm << BESIndent::LMarg << "exclude list:" << endl;
527  BESIndent::Indent();
528  list<string>::const_iterator e_iter = d_exclude.begin();
529  list<string>::const_iterator e_end = d_exclude.end();
530  for (; e_iter != e_end; e_iter++) {
531  if (!(*e_iter).empty()) {
532  strm << BESIndent::LMarg << *e_iter << endl;
533  }
534  }
535  BESIndent::UnIndent();
536  }
537  else {
538  strm << BESIndent::LMarg << "exclude list: empty" << endl;
539  }
540 
541  if (d_match_list.size()) {
542  strm << BESIndent::LMarg << "type matches:" << endl;
543  BESIndent::Indent();
544  BESCatalogUtils::match_citer i = d_match_list.begin();
545  BESCatalogUtils::match_citer ie = d_match_list.end();
546  for (; i != ie; i++) {
547  handler_regex match = (*i);
548  strm << BESIndent::LMarg << match.handler << " : " << match.regex << endl;
549  }
550  BESIndent::UnIndent();
551  }
552  else {
553  strm << BESIndent::LMarg << " type matches: empty" << endl;
554  }
555 
556  if (d_follow_syms) {
557  strm << BESIndent::LMarg << " follow symbolic links: on" << endl;
558  }
559  else {
560  strm << BESIndent::LMarg << " follow symbolic links: off" << endl;
561  }
562 
563  BESIndent::UnIndent();
564 }
565 
566 #if 0
568 BESCatalogUtils::Utils(const string &cat_name)
569 {
570  BESCatalogUtils *utils = BESCatalogUtils::_instances[cat_name];
571  if (!utils) {
572  utils = new BESCatalogUtils(cat_name);
573  BESCatalogUtils::_instances[cat_name] = utils;
574  }
575  return utils;
576 }
577 #endif
578 
579 
580 #if 0
581 // Added 12/24/12
582 void BESCatalogUtils::delete_all_catalogs()
583 {
584  map<string, BESCatalogUtils*>::iterator i = BESCatalogUtils::_instances.begin();
585  map<string, BESCatalogUtils*>::iterator e = BESCatalogUtils::_instances.end();
586  while (i != e) {
587  delete (*i++).second;
588  }
589 }
590 
591 #endif
592 
virtual std::string default_catalog_name() const
The name of the default catalog.
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
virtual bool exclude(const std::string &inQuestion) const
Should this file/directory be excluded in the catalog?
virtual unsigned int get_entries(DIR *dip, const std::string &fullnode, const std::string &use_node, BESCatalogEntry *entry, bool dirs_only)
virtual bool include(const std::string &inQuestion) const
Should this file/directory be included in the catalog?
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
bool is_data(const std::string &item) const
is there a handler that can process this
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
virtual BESContainerStorage * find_persistence(const std::string &persist_name)
find the persistence store with the given name
provides persistent storage for data storage information represented by a container.
virtual bool isData(const std::string &inQuestion, std::list< std::string > &provides)=0
determine if the given container is data and what services are available for it
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
informational response object
Definition: BESInfo.h:63
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
error thrown if there is a user syntax error in the request or any other user error
static void explode(char delim, const std::string &str, std::list< std::string > &values)
Definition: BESUtil.cc:580
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371