SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #include <config.h> 00039 00040 #include <stdio.h> 00041 #include <stdlib.h> 00042 #include <string.h> 00043 #ifdef HAVE_UNISTD_H 00044 #include <unistd.h> 00045 #endif 00046 #ifdef HAVE_SYS_STAT_H 00047 #include <sys/stat.h> 00048 #endif 00049 #ifdef HAVE_SYS_TYPES_H 00050 #include <sys/types.h> 00051 #endif 00052 #include <assert.h> 00053 00054 #include "sphinxbase/pio.h" 00055 #include "sphinxbase/filename.h" 00056 #include "sphinxbase/err.h" 00057 #include "sphinxbase/strfuncs.h" 00058 #include "sphinxbase/ckd_alloc.h" 00059 00060 #ifndef EXEEXT 00061 #define EXEEXT "" 00062 #endif 00063 00064 enum { 00065 COMP_NONE, 00066 COMP_COMPRESS, 00067 COMP_GZIP, 00068 COMP_BZIP2 00069 }; 00070 00071 static void 00072 guess_comptype(char const *file, int32 *ispipe, int32 *isgz) 00073 { 00074 int k; 00075 00076 k = strlen(file); 00077 *ispipe = 0; 00078 *isgz = COMP_NONE; 00079 if ((k > 2) 00080 && ((strcmp(file + k - 2, ".Z") == 0) 00081 || (strcmp(file + k - 2, ".z") == 0))) { 00082 *ispipe = 1; 00083 *isgz = COMP_COMPRESS; 00084 } 00085 else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0) 00086 || (strcmp(file + k - 3, ".GZ") == 0))) { 00087 *ispipe = 1; 00088 *isgz = COMP_GZIP; 00089 } 00090 else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0) 00091 || (strcmp(file + k - 4, ".BZ2") == 0))) { 00092 *ispipe = 1; 00093 *isgz = COMP_BZIP2; 00094 } 00095 } 00096 00097 FILE * 00098 fopen_comp(const char *file, const char *mode, int32 * ispipe) 00099 { 00100 FILE *fp; 00101 00102 #ifndef HAVE_POPEN 00103 *ispipe = 0; /* No popen() on WinCE */ 00104 #else /* HAVE_POPEN */ 00105 int32 isgz; 00106 guess_comptype(file, ispipe, &isgz); 00107 #endif /* HAVE_POPEN */ 00108 00109 if (*ispipe) { 00110 #ifndef HAVE_POPEN 00111 /* Shouldn't get here, anyway */ 00112 E_FATAL("No popen() on WinCE\n"); 00113 #else 00114 if (strcmp(mode, "r") == 0) { 00115 char *command; 00116 switch (isgz) { 00117 case COMP_GZIP: 00118 command = string_join("gunzip" EXEEXT, " -c ", file, NULL); 00119 break; 00120 case COMP_COMPRESS: 00121 command = string_join("zcat" EXEEXT, " ", file, NULL); 00122 break; 00123 case COMP_BZIP2: 00124 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL); 00125 break; 00126 default: 00127 command = NULL; /* Make compiler happy. */ 00128 E_FATAL("Unknown compression type %d\n", isgz); 00129 } 00130 if ((fp = popen(command, mode)) == NULL) { 00131 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode); 00132 ckd_free(command); 00133 return NULL; 00134 } 00135 ckd_free(command); 00136 } 00137 else if (strcmp(mode, "w") == 0) { 00138 char *command; 00139 switch (isgz) { 00140 case COMP_GZIP: 00141 command = string_join("gzip" EXEEXT, " > ", file, NULL); 00142 break; 00143 case COMP_COMPRESS: 00144 command = string_join("compress" EXEEXT, " -c > ", file, NULL); 00145 break; 00146 case COMP_BZIP2: 00147 command = string_join("bzip2" EXEEXT, " > ", file, NULL); 00148 break; 00149 default: 00150 command = NULL; /* Make compiler happy. */ 00151 E_FATAL("Unknown compression type %d\n", isgz); 00152 } 00153 if ((fp = popen(command, mode)) == NULL) { 00154 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode); 00155 ckd_free(command); 00156 return NULL; 00157 } 00158 ckd_free(command); 00159 } 00160 else { 00161 E_ERROR("fopen_comp not implemented for mode = %s\n", mode); 00162 return NULL; 00163 } 00164 #endif /* HAVE_POPEN */ 00165 } 00166 else { 00167 fp = fopen(file, mode); 00168 } 00169 00170 return (fp); 00171 } 00172 00173 00174 void 00175 fclose_comp(FILE * fp, int32 ispipe) 00176 { 00177 if (ispipe) { 00178 #ifdef HAVE_POPEN 00179 #if defined(_WIN32) && (!defined(__SYMBIAN32__)) 00180 _pclose(fp); 00181 #else 00182 pclose(fp); 00183 #endif 00184 #endif 00185 } 00186 else 00187 fclose(fp); 00188 } 00189 00190 00191 FILE * 00192 fopen_compchk(const char *file, int32 * ispipe) 00193 { 00194 #ifndef HAVE_POPEN 00195 *ispipe = 0; /* No popen() on WinCE */ 00196 /* And therefore the rest of this function is useless. */ 00197 return (fopen_comp(file, "r", ispipe)); 00198 #else /* HAVE_POPEN */ 00199 int32 isgz; 00200 FILE *fh; 00201 00202 /* First just try to fopen_comp() it */ 00203 if ((fh = fopen_comp(file, "r", ispipe)) != NULL) 00204 return fh; 00205 else { 00206 char *tmpfile; 00207 int k; 00208 00209 /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ 00210 guess_comptype(file, ispipe, &isgz); 00211 k = strlen(file); 00212 tmpfile = ckd_calloc(k+5, 1); 00213 strcpy(tmpfile, file); 00214 switch (isgz) { 00215 case COMP_GZIP: 00216 tmpfile[k - 3] = '\0'; 00217 break; 00218 case COMP_BZIP2: 00219 tmpfile[k - 4] = '\0'; 00220 break; 00221 case COMP_COMPRESS: 00222 tmpfile[k - 2] = '\0'; 00223 break; 00224 case COMP_NONE: 00225 strcpy(tmpfile + k, ".gz"); 00226 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00227 E_WARN("Using %s instead of %s\n", tmpfile, file); 00228 ckd_free(tmpfile); 00229 return fh; 00230 } 00231 strcpy(tmpfile + k, ".bz2"); 00232 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00233 E_WARN("Using %s instead of %s\n", tmpfile, file); 00234 ckd_free(tmpfile); 00235 return fh; 00236 } 00237 strcpy(tmpfile + k, ".Z"); 00238 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00239 E_WARN("Using %s instead of %s\n", tmpfile, file); 00240 ckd_free(tmpfile); 00241 return fh; 00242 } 00243 ckd_free(tmpfile); 00244 return NULL; 00245 } 00246 E_WARN("Using %s instead of %s\n", tmpfile, file); 00247 fh = fopen_comp(tmpfile, "r", ispipe); 00248 ckd_free(tmpfile); 00249 return NULL; 00250 } 00251 #endif /* HAVE_POPEN */ 00252 } 00253 00254 lineiter_t * 00255 lineiter_start(FILE *fh) 00256 { 00257 lineiter_t *li; 00258 00259 li = ckd_calloc(1, sizeof(*li)); 00260 li->buf = ckd_malloc(128); 00261 li->buf[0] = '\0'; 00262 li->bsiz = 128; 00263 li->len = 0; 00264 li->fh = fh; 00265 00266 return lineiter_next(li); 00267 } 00268 00269 lineiter_t * 00270 lineiter_next(lineiter_t *li) 00271 { 00272 /* Read a line and check for EOF. */ 00273 if (fgets(li->buf, li->bsiz, li->fh) == NULL) { 00274 lineiter_free(li); 00275 return NULL; 00276 } 00277 /* If we managed to read the whole thing, then we are done 00278 * (this will be by far the most common result). */ 00279 li->len = strlen(li->buf); 00280 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') 00281 return li; 00282 00283 /* Otherwise we have to reallocate and keep going. */ 00284 while (1) { 00285 li->bsiz *= 2; 00286 li->buf = ckd_realloc(li->buf, li->bsiz); 00287 /* If we get an EOF, we are obviously done. */ 00288 if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) { 00289 li->len += strlen(li->buf + li->len); 00290 return li; 00291 } 00292 li->len += strlen(li->buf + li->len); 00293 /* If we managed to read the whole thing, then we are done. */ 00294 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') 00295 return li; 00296 } 00297 00298 /* Shouldn't get here. */ 00299 return li; 00300 } 00301 00302 void 00303 lineiter_free(lineiter_t *li) 00304 { 00305 if (li == NULL) 00306 return; 00307 ckd_free(li->buf); 00308 ckd_free(li); 00309 } 00310 00311 char * 00312 fread_line(FILE *stream, size_t *out_len) 00313 { 00314 char *output, *outptr; 00315 char buf[128]; 00316 00317 output = outptr = NULL; 00318 while (fgets(buf, sizeof(buf), stream)) { 00319 size_t len = strlen(buf); 00320 /* Append this data to the buffer. */ 00321 if (output == NULL) { 00322 output = ckd_malloc(len + 1); 00323 outptr = output; 00324 } 00325 else { 00326 size_t cur = outptr - output; 00327 output = ckd_realloc(output, cur + len + 1); 00328 outptr = output + cur; 00329 } 00330 memcpy(outptr, buf, len + 1); 00331 outptr += len; 00332 /* Stop on a short read or end of line. */ 00333 if (len < sizeof(buf)-1 || buf[len-1] == '\n') 00334 break; 00335 } 00336 if (out_len) *out_len = outptr - output; 00337 return output; 00338 } 00339 00340 00341 #define FREAD_RETRY_COUNT 60 00342 00343 int32 00344 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream) 00345 { 00346 char *data; 00347 uint32 n_items_read; 00348 uint32 n_items_rem; 00349 uint32 n_retry_rem; 00350 int32 loc; 00351 00352 n_retry_rem = FREAD_RETRY_COUNT; 00353 00354 data = pointer; 00355 loc = 0; 00356 n_items_rem = num_items; 00357 00358 do { 00359 n_items_read = fread(&data[loc], size, n_items_rem, stream); 00360 00361 n_items_rem -= n_items_read; 00362 00363 if (n_items_rem > 0) { 00364 /* an incomplete read occurred */ 00365 00366 if (n_retry_rem == 0) 00367 return -1; 00368 00369 if (n_retry_rem == FREAD_RETRY_COUNT) { 00370 E_ERROR_SYSTEM("fread() failed; retrying...\n"); 00371 } 00372 00373 --n_retry_rem; 00374 00375 loc += n_items_read * size; 00376 #ifdef HAVE_UNISTD_H 00377 sleep(1); 00378 #endif 00379 } 00380 } while (n_items_rem > 0); 00381 00382 return num_items; 00383 } 00384 00385 00386 /* Silvio Moioli: updated to use Unicode */ 00387 #ifdef _WIN32_WCE /* No stat() on WinCE */ 00388 int32 00389 stat_retry(const char *file, struct stat * statbuf) 00390 { 00391 WIN32_FIND_DATAW file_data; 00392 HANDLE *h; 00393 wchar_t *wfile; 00394 size_t len; 00395 00396 len = mbstowcs(NULL, file, 0) + 1; 00397 wfile = ckd_calloc(len, sizeof(*wfile)); 00398 mbstowcs(wfile, file, len); 00399 if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) { 00400 ckd_free(wfile); 00401 return -1; 00402 } 00403 ckd_free(wfile); 00404 memset(statbuf, 0, sizeof(statbuf)); 00405 statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime; 00406 statbuf->st_size = file_data.nFileSizeLow; 00407 FindClose(h); 00408 00409 return 0; 00410 } 00411 00412 00413 int32 00414 stat_mtime(const char *file) 00415 { 00416 struct stat statbuf; 00417 00418 if (stat_retry(file, &statbuf) != 0) 00419 return -1; 00420 00421 return ((int32) statbuf.st_mtime); 00422 } 00423 #else 00424 #define STAT_RETRY_COUNT 10 00425 int32 00426 stat_retry(const char *file, struct stat * statbuf) 00427 { 00428 int32 i; 00429 00430 00431 00432 for (i = 0; i < STAT_RETRY_COUNT; i++) { 00433 00434 #ifndef HAVE_SYS_STAT_H 00435 FILE *fp; 00436 00437 if ((fp=(FILE *)fopen(file, "r"))!= 0) 00438 { 00439 fseek( fp, 0, SEEK_END); 00440 statbuf->st_size = ftell( fp ); 00441 fclose(fp); 00442 return 0; 00443 } 00444 00445 #else /* HAVE_SYS_STAT_H */ 00446 if (stat(file, statbuf) == 0) 00447 return 0; 00448 #endif 00449 if (i == 0) { 00450 E_ERROR_SYSTEM("stat(%s) failed; retrying...\n", file); 00451 } 00452 #ifdef HAVE_UNISTD_H 00453 sleep(1); 00454 #endif 00455 } 00456 00457 return -1; 00458 } 00459 00460 int32 00461 stat_mtime(const char *file) 00462 { 00463 struct stat statbuf; 00464 00465 #ifdef HAVE_SYS_STAT_H 00466 if (stat(file, &statbuf) != 0) 00467 return -1; 00468 #else /* HAVE_SYS_STAT_H */ 00469 if (stat_retry(file, &statbuf) != 0) 00470 return -1; 00471 #endif /* HAVE_SYS_STAT_H */ 00472 00473 return ((int32) statbuf.st_mtime); 00474 } 00475 #endif /* !_WIN32_WCE */ 00476 00477 struct bit_encode_s { 00478 FILE *fh; 00479 unsigned char buf, bbits; 00480 int16 refcount; 00481 }; 00482 00483 bit_encode_t * 00484 bit_encode_attach(FILE *outfh) 00485 { 00486 bit_encode_t *be; 00487 00488 be = ckd_calloc(1, sizeof(*be)); 00489 be->refcount = 1; 00490 be->fh = outfh; 00491 return be; 00492 } 00493 00494 bit_encode_t * 00495 bit_encode_retain(bit_encode_t *be) 00496 { 00497 ++be->refcount; 00498 return be; 00499 } 00500 00501 int 00502 bit_encode_free(bit_encode_t *be) 00503 { 00504 if (be == NULL) 00505 return 0; 00506 if (--be->refcount > 0) 00507 return be->refcount; 00508 ckd_free(be); 00509 00510 return 0; 00511 } 00512 00513 int 00514 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits) 00515 { 00516 int tbits; 00517 00518 tbits = nbits + be->bbits; 00519 if (tbits < 8) { 00520 /* Append to buffer. */ 00521 be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits)); 00522 } 00523 else { 00524 int i = 0; 00525 while (tbits >= 8) { 00526 /* Shift bits out of the buffer and splice with high-order bits */ 00527 fputc(be->buf | ((bits[i]) >> be->bbits), be->fh); 00528 /* Put low-order bits back into buffer */ 00529 be->buf = (bits[i] << (8 - be->bbits)) & 0xff; 00530 tbits -= 8; 00531 ++i; 00532 } 00533 } 00534 /* tbits contains remaining number of bits. */ 00535 be->bbits = tbits; 00536 00537 return nbits; 00538 } 00539 00540 int 00541 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits) 00542 { 00543 unsigned char bits[4]; 00544 codeword <<= (32 - nbits); 00545 bits[0] = (codeword >> 24) & 0xff; 00546 bits[1] = (codeword >> 16) & 0xff; 00547 bits[2] = (codeword >> 8) & 0xff; 00548 bits[3] = codeword & 0xff; 00549 return bit_encode_write(be, bits, nbits); 00550 } 00551 00552 int 00553 bit_encode_flush(bit_encode_t *be) 00554 { 00555 if (be->bbits) { 00556 fputc(be->buf, be->fh); 00557 be->bbits = 0; 00558 } 00559 return 0; 00560 } 00561 00562 #ifdef HAVE_SYS_STAT_H /* Unix, Cygwin */ 00563 int 00564 build_directory(const char *path) 00565 { 00566 int rv; 00567 00568 /* Utterly failed... */ 00569 if (strlen(path) == 0) 00570 return -1; 00571 /* Utterly succeeded... */ 00572 else if ((rv = mkdir(path, 0777)) == 0) 00573 return 0; 00574 /* Or, it already exists... */ 00575 else if (errno == EEXIST) 00576 return 0; 00577 else if (errno != ENOENT) { 00578 E_ERROR_SYSTEM("Failed to create %s"); 00579 return -1; 00580 } 00581 else { 00582 char *dirname = ckd_salloc(path); 00583 path2dirname(path, dirname); 00584 build_directory(dirname); 00585 ckd_free(dirname); 00586 return mkdir(path, 0777); 00587 } 00588 } 00589 #elif defined(_WIN32) 00590 /* FIXME: Implement this. */ 00591 int 00592 build_directory(const char *path) 00593 { 00594 E_ERROR("build_directory() unimplemented on your platform!\n"); 00595 return -1; 00596 } 00597 #else 00598 int 00599 build_directory(const char *path) 00600 { 00601 E_ERROR("build_directory() unimplemented on your platform!\n"); 00602 return -1; 00603 } 00604 #endif