From 5b4cfc7fe92c7a40aef013e8af96fdbba28216a5 Mon Sep 17 00:00:00 2001 From: badasahog <52379863+badasahog@users.noreply.github.com> Date: Wed, 21 May 2025 11:18:52 -0400 Subject: [PATCH 1/6] copied commit from local branch --- src/fread.c | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/fread.c b/src/fread.c index a8c2a49cf2..dc60dccdc6 100644 --- a/src/fread.c +++ b/src/fread.c @@ -205,27 +205,21 @@ static inline int64_t clamp_i64t(int64_t x, int64_t lower, int64_t upper) { * is constructed manually (using say snprintf) that warning(), stop() * and Rprintf() are all called as warning(_("%s"), msg) and not warning(msg). */ -static const char* strlim(const char *ch, size_t limit) { - static char buf[1002]; - static int flip = 0; - char *ptr = buf + 501 * flip; - flip = 1 - flip; - char *ch2 = ptr; - limit = imin(limit, 500); +static const char* strlim(const char *ch, char buf[static 500], size_t limit) { + char *ch2 = buf; size_t width = 0; while ((*ch>'\r' || (*ch!='\0' && *ch!='\r' && *ch!='\n')) && width++nLetters) INTERNAL_STOP("NUMTYPE(%d) > nLetters(%d)", NUMTYPE, nLetters); // # nocov - static char str[101]; int i=0; if (ncol<=100) { for (; i> shift) == 0) continue; @@ -426,18 +419,18 @@ static const char* filesize_to_str(const size_t fsize) } if (ndigits == 0 || (fsize == (fsize >> shift << shift))) { if (i < sizeof(suffixes)) { - snprintf(output, sizeof(output), "%"PRIu64"%cB (%"PRIu64" bytes)", // # notranslate + snprintf(output, 100, "%"PRIu64"%cB (%"PRIu64" bytes)", // # notranslate (fsize >> shift), suffixes[i], fsize); return output; } } else { - snprintf(output, sizeof(output), "%.*f%cB (%"PRIu64" bytes)", // # notranslate + snprintf(output, 100, "%.*f%cB (%"PRIu64" bytes)", // # notranslate ndigits, (double)fsize / (1LL << shift), suffixes[i], fsize); return output; } } if (fsize == 1) return "1 byte"; - snprintf(output, sizeof(output), "%"PRIu64" bytes", fsize); // # notranslate + snprintf(output, 100, "%"PRIu64" bytes", fsize); // # notranslate return output; } @@ -1423,7 +1416,7 @@ int freadMain(freadMainArgs _args) { } fileSize = (size_t) stat_buf.st_size; if (fileSize == 0) {close(fd); STOP(_("File is empty: %s"), fnam);} - if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize)); + if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize)); // No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage // Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried). @@ -1455,7 +1448,7 @@ int freadMain(freadMainArgs _args) { if (GetFileSizeEx(hFile,&liFileSize)==0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); } fileSize = (size_t)liFileSize.QuadPart; if (fileSize<=0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); } - if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize)); + if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize)); HANDLE hMap=CreateFileMapping(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL); if (hMap==NULL) { CloseHandle(hFile); STOP(_("This is Windows, CreateFileMapping returned error %lu for file %s"), GetLastError(), fnam); } mmp = MapViewOfFile(hMap,FILE_MAP_COPY,0,0,fileSize); // fileSize must be <= hilo passed to CreateFileMapping above. @@ -1464,7 +1457,7 @@ int freadMain(freadMainArgs _args) { if (mmp == NULL) { #endif int nbit = 8*sizeof(char *); // #nocov - STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str(fileSize), nbit, // # nocov + STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str((char[100]) {}, fileSize), nbit, // # nocov nbit<=32 ? _("Please upgrade to 64bit") : _("There is probably not enough contiguous virtual memory available")); // # nocov } sof = (const char*) mmp; @@ -1561,7 +1554,7 @@ int freadMain(freadMainArgs _args) { // # nocov start if (!verbose) DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg); - STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); + STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize)); // # nocov end } if (verbose) @@ -1642,7 +1635,7 @@ int freadMain(freadMainArgs _args) { if (ch>=eof) STOP(_("Input is either empty, fully whitespace, or skip has been set after the last non-whitespace.")); if (verbose) { if (lineStart>ch) DTPRINT(_(" Moved forward to first non-blank line (%d)\n"), row1line); - DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, 30)); + DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[500]) {}, 30)); } ch = pos = lineStart; } @@ -1832,7 +1825,7 @@ int freadMain(freadMainArgs _args) { if (!fill && tt!=ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov if (verbose) { DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"), - tt, row1line, strlim(pos, 30)); + tt, row1line, strlim(pos, (char[500]) {}, 30)); DTPRINT(_(" Quote rule picked = %d\n"), quoteRule); DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill?"true":"false", ncol); } @@ -1849,7 +1842,7 @@ int freadMain(freadMainArgs _args) { // # nocov start if (!verbose) DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg); - STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); + STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize)); // # nocov end } if (verbose) @@ -1995,7 +1988,7 @@ int freadMain(freadMainArgs _args) { memcpy(type, tmpType, ncol); } if (verbose && (bumped || jump==0 || jump==nJumps-1)) { - DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString(ncol), quoteRule); + DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString((char[101]) {}, ncol), quoteRule); } } @@ -2090,7 +2083,7 @@ int freadMain(freadMainArgs _args) { type[j] = tmpType[j]; } } - if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString(ncol), quoteRule); + if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString((char[101]) {}, ncol), quoteRule); } estnrow=1; @@ -2222,7 +2215,7 @@ int freadMain(freadMainArgs _args) { rowSize8 += (size[j] & 8); if (type[j] == CT_STRING) nStringCols++; else nNonStringCols++; } - if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString(ncol)); + if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString((char[101]) {}, ncol)); tColType = wallclock(); } @@ -2689,7 +2682,7 @@ int freadMain(freadMainArgs _args) { for (int i=0; i>"), strlim(skippedFooter,500)); + DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {}, 500)); } else { ch = headPos; int tt = countfields(&ch); if (fill>0) { DTWARN(_("Stopped early on line %"PRIu64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"), - (uint64_t)DTi+row1line, ncol, tt, tt, strlim(skippedFooter,500)); + (uint64_t)DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500)); } else { DTWARN(_("Stopped early on line %"PRIu64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"), - (uint64_t)DTi+row1line, ncol, tt, strlim(skippedFooter,500)); + (uint64_t)DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500)); } } } } if (quoteRuleBumpedCh!=NULL && quoteRuleBumpedCh>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), (uint64_t)quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, 500)); + DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRIu64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), (uint64_t)quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {}, 500)); } if (verbose) { From b1d4cd07ed4f4e7f98971e4d53e27035a0feab51 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 27 May 2025 17:17:46 +0000 Subject: [PATCH 2/6] restore uint64_t --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 5ed582734d..81efa83c99 100644 --- a/src/fread.c +++ b/src/fread.c @@ -409,7 +409,7 @@ double wallclock(void) * multiple threads at the same time, or hold on to the value returned for * extended periods of time. */ -static const char* filesize_to_str(char output[static 100], const size_t fsize) +static const char* filesize_to_str(char output[static 100], const uint64_t fsize) { static const char suffixes[] = {'T', 'G', 'M', 'K'}; for (int i = 0; i <= sizeof(suffixes); i++) { From 21a2dba07a9b96a9138b23a745754d10a927a473 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 27 May 2025 17:21:19 +0000 Subject: [PATCH 3/6] new signature in two new call sites --- src/fread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fread.c b/src/fread.c index 81efa83c99..fcf99fc89a 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1418,7 +1418,7 @@ int freadMain(freadMainArgs _args) { } if (stat_buf.st_size > SIZE_MAX) { close(fd); // # nocov - STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(stat_buf.st_size), fnam); // # nocov + STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, stat_buf.st_size), fnam); // # nocov } fileSize = (size_t) stat_buf.st_size; if (fileSize == 0) {close(fd); STOP(_("File is empty: %s"), fnam);} @@ -1454,7 +1454,7 @@ int freadMain(freadMainArgs _args) { if (GetFileSizeEx(hFile,&liFileSize)==0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); } if (liFileSize.QuadPart > SIZE_MAX) { CloseHandle(hFile); // # nocov - STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(liFileSize.QuadPart), fnam); // # nocov + STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, liFileSize.QuadPart), fnam); // # nocov } fileSize = (size_t)liFileSize.QuadPart; if (fileSize==0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); } From d9de32502048ea336384eabab6df0bf32bbec9d8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 28 May 2025 09:07:57 -0700 Subject: [PATCH 4/6] restore diff lost to sloppy merge --- src/fread.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/fread.c b/src/fread.c index 247c53a5c4..d6c63276c6 100644 --- a/src/fread.c +++ b/src/fread.c @@ -2683,8 +2683,11 @@ int freadMain(freadMainArgs _args) { } // else nrowLimit applied and stopped early normally } - - + + // tell progress meter to finish up; e.g. write final newline + // if there's a reread, the progress meter will start again from 0 + if (args.showProgress) progress(100, 0); + if (firstTime) { tReread = tRead = wallclock(); From 7807adb4492fdaaebd72ab756066d1ccf34ccee2 Mon Sep 17 00:00:00 2001 From: badasahog <52379863+badasahog@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:44:08 -0400 Subject: [PATCH 5/6] corrected error from conflict resolution --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 17aba5d5f3..962ddec9ec 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1822,7 +1822,7 @@ int freadMain(freadMainArgs _args) { if (!fill && tt != ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov if (verbose) { DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"), - tt, row1line, strlim(pos, 30)); + tt, row1line, strlim(pos, (char[500]) {}, 30)); DTPRINT(_(" Quote rule picked = %d\n"), quoteRule); DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol); } From 75542b5b69e0c33578c3f388756405200c05e2bb Mon Sep 17 00:00:00 2001 From: badasahog <52379863+badasahog@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:55:50 -0400 Subject: [PATCH 6/6] scrapped and redone --- src/fread.c | 110 +++++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/src/fread.c b/src/fread.c index 962ddec9ec..8879eec33e 100644 --- a/src/fread.c +++ b/src/fread.c @@ -222,9 +222,10 @@ static const char* strlim(const char *ch, char buf[static 500], size_t limit) { static const char *typeLetter = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; -static char *typesAsString(char str[static 101], int ncol) { +static char *typesAsString(int ncol) { int nLetters = strlen(typeLetter); if (NUMTYPE > nLetters) INTERNAL_STOP("NUMTYPE(%d) > nLetters(%d)", NUMTYPE, nLetters); // # nocov + static char str[101]; int i = 0; if (ncol <= 100) { for (; i < ncol; i++) str[i] = typeLetter[IGNORE_BUMP(type[i])]; @@ -404,9 +405,10 @@ double wallclock(void) * multiple threads at the same time, or hold on to the value returned for * extended periods of time. */ -static const char* filesize_to_str(char output[static 100], const uint64_t fsize) +static const char* filesize_to_str(const uint64_t fsize) { - static const char suffixes[] = {'T', 'G', 'M', 'K'}; + static const char suffixes[] = { 'T', 'G', 'M', 'K' }; + static char output[100]; for (int i = 0; i <= sizeof(suffixes); i++) { int shift = (sizeof(suffixes) - i) * 10; if ((fsize >> shift) == 0) continue; @@ -416,18 +418,18 @@ static const char* filesize_to_str(char output[static 100], const uint64_t fsize } if (ndigits == 0 || (fsize == (fsize >> shift << shift))) { if (i < sizeof(suffixes)) { - snprintf(output, 100, "%"PRIu64"%ciB (%"PRIu64" bytes)", // # notranslate - (fsize >> shift), suffixes[i], fsize); + snprintf(output, sizeof(output), "%"PRIu64"%ciB (%"PRIu64" bytes)", // # notranslate + fsize >> shift, suffixes[i], fsize); return output; } } else { - snprintf(output, 100, "%.*f%ciB (%"PRIu64" bytes)", // # notranslate + snprintf(output, sizeof(output), "%.*f%ciB (%"PRIu64" bytes)", // # notranslate ndigits, (double)fsize / (1LL << shift), suffixes[i], fsize); return output; } } if (fsize == 1) return "1 byte"; - snprintf(output, 100, "%"PRIu64" bytes", fsize); // # notranslate + snprintf(output, sizeof(output), "%"PRIu64" bytes", fsize); // # notranslate return output; } @@ -1405,11 +1407,11 @@ int freadMain(freadMainArgs _args) { } if (stat_buf.st_size > SIZE_MAX) { close(fd); // # nocov - STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, stat_buf.st_size), fnam); // # nocov + STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(stat_buf.st_size), fnam); // # nocov } fileSize = (size_t) stat_buf.st_size; if (fileSize == 0) {close(fd); STOP(_("File is empty: %s"), fnam);} - if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize)); + if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize)); // No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage // Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried). @@ -1441,20 +1443,20 @@ int freadMain(freadMainArgs _args) { if (GetFileSizeEx(hFile, &liFileSize)==0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); } if (liFileSize.QuadPart > SIZE_MAX) { CloseHandle(hFile); // # nocov - STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, liFileSize.QuadPart), fnam); // # nocov + STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(liFileSize.QuadPart), fnam); // # nocov } fileSize = (size_t)liFileSize.QuadPart; - if (fileSize == 0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); } - if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize)); - HANDLE hMap=CreateFileMapping(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL); + if (fileSize==0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); } + if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize)); + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL); if (hMap == NULL) { CloseHandle(hFile); STOP(_("This is Windows, CreateFileMapping returned error %lu for file %s"), GetLastError(), fnam); } - mmp = MapViewOfFile(hMap, FILE_MAP_COPY, 0, 0, fileSize); // fileSize must be <= hilo passed to CreateFileMapping above. + mmp = MapViewOfFile(hMap,FILE_MAP_COPY,0,0,fileSize); // fileSize must be <= hilo passed to CreateFileMapping above. CloseHandle(hMap); // we don't need to keep the file open; the MapView keeps an internal reference; CloseHandle(hFile); // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa366537(v=vs.85).aspx if (mmp == NULL) { #endif int nbit = 8 * sizeof(char *); // #nocov - STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str((char[100]) {}, fileSize), nbit, // # nocov + STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str(fileSize), nbit, // # nocov nbit <= 32 ? _("Please upgrade to 64bit") : _("There is probably not enough contiguous virtual memory available")); // # nocov } sof = (const char*) mmp; @@ -1550,7 +1552,7 @@ int freadMain(freadMainArgs _args) { // # nocov start if (!verbose) DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg); - STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize)); + STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); // # nocov end } if (verbose) @@ -1826,33 +1828,31 @@ int freadMain(freadMainArgs _args) { DTPRINT(_(" Quote rule picked = %d\n"), quoteRule); DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol); } - } - - if (ncol < 1 || row1line < 1) INTERNAL_STOP("ncol==%d line==%d after detecting sep, ncol and first line", ncol, row1line); // # nocov - int tt = countfields(&ch); - ch = pos; // move back to start of line since countfields() moved to next - if (!fill && tt!=ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov - if (verbose) { - DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"), - tt, row1line, strlim(pos, (char[500]) {}, 30)); - DTPRINT(_(" Quote rule picked = %d\n"), quoteRule); - DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill?"true":"false", ncol); - } - - if (ncol == 1 && lastEOLreplaced && (eof[-1]=='\n' || eof[-1]=='\r')) { - // Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end) - if (fileSize % 4096 == 0) { - const char *msg = _("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too"); - if (verbose) - DTPRINT(_(" Copying file in RAM. %s\n"), msg); - ASSERT(mmp_copy == NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov - double time_taken = copyFile(fileSize); - if (time_taken == -1.0) { - // # nocov start - if (!verbose) - DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg); - STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize)); - // # nocov end + + if (ncol == 1 && lastEOLreplaced && (eof[-1] == '\n' || eof[-1] == '\r')) { + // Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end) + if (fileSize % 4096 == 0) { + const char *msg = _("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too"); + if (verbose) + DTPRINT(_(" Copying file in RAM. %s\n"), msg); + ASSERT(mmp_copy == NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov + double time_taken = copyFile(fileSize); + if (time_taken == -1.0) { + // # nocov start + if (!verbose) + DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg); + STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); + // # nocov end + } + if (verbose) + DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken); + else if (tt > 0.5) // # nocov + DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded + pos = sof + (pos - (const char *)mmp); + firstJumpEnd = sof + (firstJumpEnd - (const char *)mmp); + } else { + if (verbose) DTPRINT(_(" 1-column file ends with 2 or more end-of-line. Restoring last eol using extra byte in cow page.\n")); + eof++; } *const_cast(eof - 1) = eol_one_r ? '\r' : '\n'; *const_cast(eof) = '\0'; @@ -1986,10 +1986,8 @@ int freadMain(freadMainArgs _args) { ASSERT(jump > 0, "jump(%d)>0", jump); memcpy(type, tmpType, ncol); } - if (verbose && (bumped || jump == 0 || jump == nJumps - 1)) { - DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString((char[101]) {}, ncol), quoteRule); - + DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString(ncol), quoteRule); } } @@ -2084,7 +2082,7 @@ int freadMain(freadMainArgs _args) { type[j] = tmpType[j]; } } - if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString((char[101]) {}, ncol), quoteRule); + if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString(ncol), quoteRule); } estnrow = 1; @@ -2216,8 +2214,7 @@ int freadMain(freadMainArgs _args) { rowSize8 += (size[j] & 8); if (type[j] == CT_STRING) nStringCols++; else nNonStringCols++; } - - if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString((char[101]) {}, ncol)); + if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString(ncol)); tColType = wallclock(); } @@ -2680,11 +2677,11 @@ int freadMain(freadMainArgs _args) { } // else nrowLimit applied and stopped early normally } - + // tell progress meter to finish up; e.g. write final newline // if there's a reread, the progress meter will start again from 0 if (args.showProgress) progress(100, 0); - + if (firstTime) { tReread = tRead = wallclock(); @@ -2694,7 +2691,7 @@ int freadMain(freadMainArgs _args) { for (int i = 0; i < ncol; i++) typeCounts[IGNORE_BUMP(type[i])]++; if (nTypeBump) { - if (verbose) DTPRINT(_(" %d out-of-sample type bumps: %s\n"), nTypeBump, typesAsString((char[101]) {}, ncol)); + if (verbose) DTPRINT(_(" %d out-of-sample type bumps: %s\n"), nTypeBump, typesAsString(ncol)); rowSize1 = rowSize4 = rowSize8 = 0; nStringCols = 0; nNonStringCols = 0; @@ -2732,7 +2729,7 @@ int freadMain(freadMainArgs _args) { } double tTot = tReread - t0; // tReread==tRead when there was no reread if (verbose) DTPRINT(_("Read %"PRIu64" rows x %d columns from %s file in %02d:%06.3f wall clock time\n"), - (uint64_t)DTi, ncol - ndrop, filesize_to_str((char[100]) {}, fileSize), (int)tTot / 60, fmod(tTot, 60.0)); + (uint64_t)DTi, ncol - ndrop, filesize_to_str(fileSize), (int)tTot / 60, fmod(tTot, 60.0)); //********************************************************************************************* // [12] Finalize the datatable @@ -2758,22 +2755,21 @@ int freadMain(freadMainArgs _args) { while (ch < eof && isspace(*ch)) ch++; if (ch == eof) { DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {}, 500)); - } else { ch = headPos; int tt = countfields(&ch); if (fill > 0) { DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"), - DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500)); + DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500)); } else { DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"), - DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500)); + DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500)); } } } } - if (quoteRuleBumpedCh!=NULL && quoteRuleBumpedCh>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {}, 500)); }