#include <limits.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <error.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <pthread.h>
#include <getopt.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
Go to the source code of this file.
Data Structures | |
struct | rlimit |
struct | line |
struct | buffer |
Input buffers. More... | |
struct | mergefile |
Defines | |
#define | _FILE_OFFSET_BITS 64 |
Program for merging some sorted files with large sizes. | |
#define | getrlimit(Resource, Rlp) (-1) |
#define | program_name "for_merge" |
#define | NONZERO(x) ((x) != 0) |
#define | MAX(a, b) ((a) > (b) ? (a) : (b)) |
#define | MIN(a, b) (((a) < (b)) ? (a) : (b)) |
#define | MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) |
#define | MIN_MERGE_SIZE (16 * MIN_MERGE_BUFFER_SIZE) |
#define | EXIT_SUCCESS 0 |
#define | EXIT_FAILURE 1 |
Enumerations | |
enum | { SORT_OUT_OF_ORDER = 1, MERGE_FAILURE = 2 } |
Functions | |
static void | die (char const *message, char const *file) |
void * | xmalloc (size_t n, size_t s) |
void | usage (int status) |
static void | write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file) |
double | physmem_total () |
double | physmem_available () |
static size_t | get_memory_available (void) |
static void | initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) |
static struct line * | buffer_linelim (struct buffer const *buf) |
void * | fill_input_buffer_thread (void *args) |
static int | compare (const struct line *a, const struct line *b) |
void | reconstruct_table (size_t nfiles, struct line const **cur, size_t *ord) |
static size_t | open_input_files (struct mergefile *files, size_t nfiles, FILE ***pfps, char *const *f) |
void | fill_input_buffer (struct buffer *buf, struct buffer *buf2, FILE *fp, char const *file) |
bool | swap_buffers (struct buffer *from, struct buffer *to) |
static void | merge_fps (struct mergefile *files, size_t nfiles, FILE *ofp, char const *output_file, FILE **fps) |
int | main (int argc, char **argv) |
Variables | |
static char | eolchar = '\n' |
static size_t | merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024) |
static size_t | out_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024) |
bool | flag_fcst_buffer_ready |
pthread_cond_t | flag_buffer_ready_cv |
pthread_mutex_t | flag_buffer_ready_mutex |
bool | flag_fcst_buffer_start |
pthread_cond_t | flag_fillbuf_start_cv |
pthread_mutex_t | flag_fillbuf_start_mutex |
buffer * | fillbuf_buf |
buffer * | fillbuf_buf2 |
FILE * | fillbuf_fp |
char const * | fillbuf_file |
bool | fillbuf_ret |
static char const | short_options [] = "o:s:S:zh" |
static struct option const | long_options [] |
#define _FILE_OFFSET_BITS 64 |
Program for merging some sorted files with large sizes.
Use a Forecasting merge algorithm, in the style suggested by Knuth volume 3 (2nd edition), exercise 5.4.6F. Use the optimization suggested by exercise 5.4.9.
This method keeps track of the buffer that will be emptied first and uses an extra buffer to read the appropriate next part from the disk, while the contents of the remaining buffers are continued to be processed.
Definition at line 11 of file for_merge.c.
#define EXIT_FAILURE 1 |
Definition at line 116 of file for_merge.c.
#define EXIT_SUCCESS 0 |
Some systems do not define EXIT_*, despite otherwise supporting C89.
Definition at line 115 of file for_merge.c.
#define getrlimit | ( | Resource, | |||
Rlp | ) | (-1) |
#define MAX | ( | a, | |||
b | ) | ((a) > (b) ? (a) : (b)) |
#define MIN | ( | a, | |||
b | ) | (((a) < (b)) ? (a) : (b)) |
#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) |
Minimum size for a buffer.
Definition at line 103 of file for_merge.c.
#define MIN_MERGE_SIZE (16 * MIN_MERGE_BUFFER_SIZE) |
Minimum size for a merge.
Definition at line 106 of file for_merge.c.
Referenced by get_memory_available(), and main().
#define NONZERO | ( | x | ) | ((x) != 0) |
#define program_name "for_merge" |
anonymous enum |
Exit statuses.
Definition at line 48 of file for_merge.c.
00049 { 00050 /*! POSIX says to exit with status 1 if invoked with -c and the input is not properly sorted. */ 00051 SORT_OUT_OF_ORDER = 1, 00052 00053 /*! POSIX says any other irregular exit must exit with a status code greater than 1. */ 00054 MERGE_FAILURE = 2 00055 };
Return one past the limit of the line array.
Definition at line 312 of file for_merge.c.
References buffer::alloc, and buffer::buf.
Referenced by fill_input_buffer_thread(), and merge_fps().
00313 { 00314 return (struct line *) (buf->buf + buf->alloc); 00315 }
Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B.
Definition at line 432 of file for_merge.c.
References line::length, MIN, NONZERO, and line::text.
Referenced by merge_fps(), and reconstruct_table().
00433 { 00434 int diff; 00435 size_t alen, blen; 00436 00437 /* If the keys all compare equal (or no keys were specified) 00438 fall through to the default comparison. */ 00439 alen = a->length - 1, blen = b->length - 1; 00440 00441 if (alen == 0) 00442 diff = -NONZERO (blen); 00443 else if (blen == 0) 00444 diff = 1; 00445 else if (!(diff = memcmp (a->text, b->text, MIN (alen, blen)))) 00446 diff = alen < blen ? -1 : alen != blen; 00447 00448 return diff; 00449 }
static void die | ( | char const * | message, | |
char const * | file | |||
) | [static] |
Report MESSAGE for FILE, then clean up and exit. If FILE is null, it represents standard output.
Definition at line 135 of file for_merge.c.
References MERGE_FAILURE.
Referenced by fill_input_buffer_thread(), main(), merge_fps(), and write_bytes().
00136 { 00137 error (MERGE_FAILURE, errno, "%s: %s", message, file ? file : ("standard output")); 00138 }
Start filling input buffer in background thread. Use second buffer BUF2 to check whether it has some unprocessed data. If it has - copy this data to the begin of the first buffer.
Definition at line 503 of file for_merge.c.
References fillbuf_buf, fillbuf_buf2, fillbuf_file, fillbuf_fp, flag_fcst_buffer_start, flag_fillbuf_start_cv, and flag_fillbuf_start_mutex.
Referenced by merge_fps().
00504 { 00505 pthread_mutex_lock (&flag_fillbuf_start_mutex); 00506 fillbuf_buf = buf; 00507 fillbuf_buf2 = buf2; 00508 fillbuf_fp = fp; 00509 fillbuf_file = file; 00510 flag_fcst_buffer_start = true; 00511 pthread_cond_signal (&flag_fillbuf_start_cv); 00512 pthread_mutex_unlock (&flag_fillbuf_start_mutex); 00513 }
void* fill_input_buffer_thread | ( | void * | args | ) |
Fill forecast buffer in background.
Definition at line 320 of file for_merge.c.
References buffer::buf, buffer_linelim(), die(), buffer::eof, eolchar, fillbuf_buf, fillbuf_buf2, fillbuf_file, fillbuf_fp, fillbuf_ret, flag_buffer_ready_cv, flag_buffer_ready_mutex, flag_fcst_buffer_ready, flag_fcst_buffer_start, flag_fillbuf_start_cv, flag_fillbuf_start_mutex, buffer::left, line::length, buffer::line_bytes, buffer::nlines, line::text, and buffer::used.
Referenced by main().
00321 { 00322 char eol = eolchar; 00323 00324 while (1) 00325 { 00326 00327 pthread_mutex_lock (&flag_fillbuf_start_mutex); 00328 while (!flag_fcst_buffer_start) 00329 pthread_cond_wait (&flag_fillbuf_start_cv, &flag_fillbuf_start_mutex); 00330 00331 struct buffer *buf = fillbuf_buf; 00332 struct buffer *buf2 = fillbuf_buf2; 00333 FILE *fp = fillbuf_fp; 00334 char const *file = fillbuf_file; 00335 flag_fcst_buffer_start = false; 00336 fillbuf_ret = false; 00337 pthread_mutex_unlock(&flag_fillbuf_start_mutex); 00338 00339 size_t line_bytes = buf->line_bytes; 00340 00341 if (buf->eof) 00342 goto fillbuf_ret_false; 00343 00344 // check unprocessed symbols 00345 if (buf2->used != buf2->left) 00346 { 00347 memmove (buf->buf, buf2->buf + buf2->used - buf2->left, buf2->left); 00348 buf->used = buf2->left; 00349 buf->nlines = 0; 00350 buf->left = buf2->left; 00351 } 00352 00353 for (;;) 00354 { 00355 char *ptr = buf->buf + buf->used; 00356 struct line *linelim = buffer_linelim (buf); 00357 struct line *line = linelim - buf->nlines; 00358 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; 00359 char *line_start = buf->nlines ? line->text + line->length : buf->buf; 00360 00361 while (line_bytes + 1 < avail) 00362 { 00363 /* Read as many bytes as possible, but do not read so many 00364 bytes that there might not be enough room for the 00365 corresponding line array. */ 00366 size_t readsize = (avail - 1) / (line_bytes + 1); 00367 00368 size_t bytes_read = fread (ptr, 1, readsize, fp); 00369 char *ptrlim = ptr + bytes_read; 00370 char *p; 00371 avail -= bytes_read; 00372 00373 if (bytes_read != readsize) 00374 { 00375 if (ferror (fp)) 00376 die (("read failed"), file); 00377 if (feof (fp)) 00378 { 00379 buf->eof = true; 00380 if (buf->buf == ptrlim) 00381 goto fillbuf_ret_false; 00382 if (ptrlim[-1] != eol) 00383 *ptrlim++ = eol; 00384 } 00385 } 00386 00387 /* Find and record each line in the just-read input. */ 00388 while ((p = memchr (ptr, eol, ptrlim - ptr))) 00389 { 00390 ptr = p + 1; 00391 line--; 00392 line->text = line_start; 00393 line->length = ptr - line_start; 00394 avail -= line_bytes; 00395 00396 line_start = ptr; 00397 } 00398 00399 ptr = ptrlim; 00400 if (buf->eof) 00401 break; 00402 } 00403 00404 buf->used = ptr - buf->buf; 00405 buf->nlines = buffer_linelim (buf) - line; 00406 if (buf->nlines != 0) 00407 { 00408 buf->left = ptr - line_start; 00409 goto fillbuf_ret_true; 00410 } 00411 } 00412 00413 fillbuf_ret_true: 00414 00415 fillbuf_ret = true; 00416 00417 fillbuf_ret_false: 00418 00419 pthread_mutex_lock (&flag_buffer_ready_mutex); 00420 flag_fcst_buffer_ready = true; 00421 pthread_cond_signal (&flag_buffer_ready_cv); 00422 pthread_mutex_unlock (&flag_buffer_ready_mutex); 00423 00424 } // while (1) 00425 }
static size_t get_memory_available | ( | void | ) | [static] |
Return size of the available memory.
Definition at line 252 of file for_merge.c.
References getrlimit, MAX, MIN_MERGE_SIZE, physmem_available(), physmem_total(), and rlimit::rlim_cur.
Referenced by main().
00253 { 00254 00255 /* Let MEM be available memory or 1/8 of total memory, whichever 00256 is greater. */ 00257 double avail = physmem_available (); 00258 double total = physmem_total (); 00259 double mem = MAX (avail, total / 8); 00260 struct rlimit rlimit; 00261 00262 /* Let SIZE be MEM, but no more than the maximum object size or 00263 system resource limits. Avoid the MIN macro here, as it is not 00264 quite right when only one argument is floating point. Don't 00265 bother to check for values like RLIM_INFINITY since in practice 00266 they are not much less than SIZE_MAX. */ 00267 size_t size = SIZE_MAX; 00268 if (mem < size) 00269 size = mem; 00270 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) 00271 size = rlimit.rlim_cur; 00272 #ifdef RLIMIT_AS 00273 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) 00274 size = rlimit.rlim_cur; 00275 #endif 00276 00277 /* Leave a large safety margin for the above limits, as failure can occur when they are exceeded. */ 00278 size /= 2; 00279 00280 #ifdef RLIMIT_RSS 00281 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. 00282 Exceeding RSS is not fatal, but can be quite slow. */ 00283 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) 00284 size = rlimit.rlim_cur / 16 * 15; 00285 #endif 00286 00287 /* Use no less than the minimum. */ 00288 return MAX (size, MIN_MERGE_SIZE); 00289 }
static void initbuf | ( | struct buffer * | buf, | |
size_t | line_bytes, | |||
size_t | alloc | |||
) | [static] |
Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES must be at least sizeof (struct line). Allocate ALLOC bytes initially.
Definition at line 295 of file for_merge.c.
References buffer::alloc, buffer::buf, buffer::eof, buffer::left, buffer::line_bytes, MERGE_FAILURE, buffer::nlines, and buffer::used.
Referenced by merge_fps().
00296 { 00297 /* Ensure that the line array is properly aligned. */ 00298 alloc += sizeof (struct line) - alloc % sizeof (struct line); 00299 buf->buf = malloc (alloc); 00300 if (!buf->buf) 00301 error (MERGE_FAILURE, 0, "-memory exhausted"); 00302 00303 buf->line_bytes = line_bytes; 00304 buf->alloc = alloc; 00305 buf->used = buf->left = buf->nlines = 0; 00306 buf->eof = false; 00307 }
int main | ( | int | argc, | |
char ** | argv | |||
) |
Definition at line 720 of file for_merge.c.
References die(), eolchar, EXIT_SUCCESS, fill_input_buffer_thread(), fillbuf_buf, fillbuf_buf2, fillbuf_file, fillbuf_fp, fillbuf_ret, flag_buffer_ready_cv, flag_buffer_ready_mutex, flag_fcst_buffer_ready, flag_fcst_buffer_start, flag_fillbuf_start_cv, flag_fillbuf_start_mutex, get_memory_available(), long_options, MAX, merge_buffer_size, MERGE_FAILURE, merge_fps(), MIN_MERGE_SIZE, mergefile::name, open_input_files(), out_buffer_size, short_options, usage(), and xmalloc().
00721 { 00722 int c = 0; 00723 size_t nfiles = 0; 00724 char **ifiles; 00725 char const *output_file = NULL; 00726 size_t merge_size = 0; 00727 size_t out_size = 0; 00728 FILE **fps; 00729 00730 ifiles = (char **) xmalloc (argc, sizeof *ifiles); 00731 00732 for (;;) 00733 { 00734 int oi = -1; 00735 00736 if (c == -1 || ((c = getopt_long (argc, argv, short_options, long_options, &oi)) == -1)) 00737 { 00738 if (argc <= optind) 00739 break; 00740 ifiles[nfiles++] = argv[optind++]; 00741 } 00742 else 00743 switch (c) 00744 { 00745 case 1: 00746 ifiles[nfiles++] = optarg; 00747 break; 00748 00749 case 'o': 00750 if (output_file && !strcmp (output_file, optarg)) 00751 error (MERGE_FAILURE, 0, ("multiple output files specified")); 00752 output_file = optarg; 00753 break; 00754 00755 case 's': 00756 { 00757 char *endptr; 00758 merge_size = strtol (optarg, &endptr, 10); 00759 if ((errno == ERANGE && (merge_size == LONG_MAX || merge_size == LONG_MIN)) || (errno != 0 && merge_size == 0)) 00760 { 00761 error (MERGE_FAILURE, 0, ("strtol")); 00762 } 00763 break; 00764 } 00765 00766 case 'S': 00767 { 00768 char *endptr; 00769 out_size = strtol (optarg, &endptr, 10); 00770 if ((errno == ERANGE && (out_size == LONG_MAX || out_size == LONG_MIN)) || (errno != 0 && out_size == 0)) 00771 { 00772 error (MERGE_FAILURE, 0, ("strtol")); 00773 } 00774 break; 00775 } 00776 00777 case 'z': 00778 eolchar = 0; 00779 break; 00780 00781 case 'h': 00782 usage (EXIT_SUCCESS); 00783 break; 00784 00785 default: 00786 usage (EXIT_SUCCESS); 00787 } 00788 } 00789 00790 if (nfiles < 2) 00791 error (MERGE_FAILURE, 0, ("More input files are required for merging")); 00792 00793 setenv ("LC_ALL", "C", 1); 00794 00795 size_t memory_available = get_memory_available (); 00796 if (merge_size > 0) 00797 { 00798 // the user has specified a size 00799 merge_size = MAX (merge_size, MIN_MERGE_SIZE); 00800 merge_buffer_size = merge_size; 00801 } 00802 else 00803 { 00804 merge_buffer_size = MAX (merge_buffer_size, memory_available / (2 * nfiles + 2)); 00805 } 00806 00807 if (out_size > 0) 00808 { 00809 // the user has specified a size 00810 out_size = MAX (out_size, out_buffer_size); 00811 out_buffer_size = out_size; 00812 } 00813 else 00814 { 00815 out_buffer_size = MAX (out_buffer_size, memory_available / (2 * nfiles + 2)); 00816 } 00817 00818 fprintf (stdout, " merge_size = %ld\n", (long int) merge_size); 00819 fprintf (stdout, " out_size = %ld\n", (long int) out_size); 00820 fprintf (stdout, " memory_available = %ld\n", (long int) memory_available); 00821 fprintf (stdout, " merge_buffer_size = %ld\n", (long int) merge_buffer_size); 00822 fprintf (stdout, " out_buffer_size = %ld\n", (long int) out_buffer_size); 00823 00824 00825 flag_fcst_buffer_ready = false; 00826 pthread_cond_init (&flag_buffer_ready_cv, NULL); 00827 pthread_mutex_init (&flag_buffer_ready_mutex, NULL); 00828 00829 flag_fcst_buffer_start = false; 00830 fillbuf_buf = NULL; 00831 fillbuf_buf2 = NULL; 00832 fillbuf_fp = NULL; 00833 fillbuf_file = NULL; 00834 fillbuf_ret = false; 00835 pthread_cond_init (&flag_fillbuf_start_cv, NULL); 00836 pthread_mutex_init (&flag_fillbuf_start_mutex, NULL); 00837 00838 pthread_t fillbuf_thread_id; 00839 pthread_create (&fillbuf_thread_id, NULL, &fill_input_buffer_thread, NULL); 00840 00841 00842 struct mergefile *files = (struct mergefile *) calloc (nfiles, sizeof *files); 00843 if (!files) 00844 error (MERGE_FAILURE, 0, "memory exhausted"); 00845 00846 size_t i; 00847 for (i = 0; i < nfiles; ++i) 00848 files[i].name = ifiles[i]; 00849 00850 size_t nopened = open_input_files (files, nfiles, &fps, ifiles); 00851 00852 if (nopened == nfiles) 00853 { 00854 FILE *ofp = fopen (output_file, "w"); 00855 if (ofp) 00856 { 00857 merge_fps (files, nfiles, ofp, output_file, fps); 00858 } 00859 else if (errno != EMFILE || nopened <= 2) 00860 die (("open failed"), output_file); 00861 } 00862 else if (nopened <= 2) 00863 die (("open failed"), files[nopened].name); 00864 00865 pthread_cancel (fillbuf_thread_id); 00866 pthread_join (fillbuf_thread_id, NULL); 00867 00868 free (files); 00869 00870 exit (EXIT_SUCCESS); 00871 }
static void merge_fps | ( | struct mergefile * | files, | |
size_t | nfiles, | |||
FILE * | ofp, | |||
char const * | output_file, | |||
FILE ** | fps | |||
) | [static] |
Merge lines from FILES onto OFP. NFILES is the number of files; FPS is the vector of open stream corresponding to the files. Close input and output streams before returning. OUTPUT_FILE gives the name of the output file.
< Input buffers for each file.
< Current line in each line table.
< Base of each line table.
< Table representing a permutation of fps, such that cur[ord[0]] is the smallest line and will be next output.
< Buffers for forecasting.
< Table representing a permutation of buffers' ends, such that buffer_fcst[ord_base[0]] is the buffer with smallest line and will be emptied first (and should be filled first)
output buffer
Definition at line 544 of file for_merge.c.
References buffer_linelim(), compare(), die(), fill_input_buffer(), initbuf(), line::length, merge_buffer_size, MERGE_FAILURE, out_buffer_size, reconstruct_table(), swap_buffers(), line::text, write_bytes(), and xmalloc().
Referenced by main().
00545 { 00546 struct buffer *buffer = (struct buffer *) xmalloc (nfiles, sizeof *buffer); /*!< Input buffers for each file. */ 00547 struct line const **cur = (struct line const **) xmalloc (nfiles, sizeof *cur); /*!< Current line in each line table. */ 00548 struct line const **base = (struct line const **) xmalloc (nfiles, sizeof *base); /*!< Base of each line table. */ 00549 size_t *ord = (size_t *) xmalloc (nfiles, sizeof *ord); /*!< Table representing a permutation of fps, such that cur[ord[0]] is the smallest line and will be next output. */ 00550 struct buffer *buffer_fcst = (struct buffer *) xmalloc (nfiles, sizeof *buffer); /*!< Buffers for forecasting. */ 00551 size_t *ord_base = (size_t *) xmalloc (nfiles, sizeof *ord_base); /*!< Table representing a permutation of buffers' ends, such that buffer_fcst[ord_base[0]] is the buffer with smallest line and will be emptied first (and should be filled first) */ 00552 00553 00554 size_t i; 00555 size_t j; 00556 size_t t; 00557 00558 /*! output buffer */ 00559 char *out_buf = malloc (out_buffer_size); 00560 if (!out_buf) 00561 error (MERGE_FAILURE, 0, "memory exhausted"); 00562 char *out_buf_cur = out_buf; 00563 size_t out_buf_len_cur = out_buffer_size; 00564 00565 /* Prepare buffers and read initial lines from each input file. */ 00566 for (i = 0; i < nfiles;) 00567 { 00568 initbuf (&buffer[i], sizeof (struct line), merge_buffer_size); 00569 initbuf (&buffer_fcst[i], sizeof (struct line), merge_buffer_size); 00570 00571 fill_input_buffer (&buffer_fcst[i], &buffer[i], fps[i], files[i].name); 00572 00573 if (swap_buffers (&buffer_fcst[i], &buffer[i])) 00574 { 00575 struct line const *linelim = buffer_linelim (&buffer[i]); 00576 cur[i] = linelim - 1; 00577 base[i] = linelim - buffer[i].nlines; 00578 i++; 00579 } 00580 else 00581 { 00582 // fps[i] is empty; eliminate it from future consideration. 00583 if (fclose (fps[i]) != 0) 00584 die (("close failed"), files[i].name); 00585 free (buffer[i].buf); 00586 free (buffer_fcst[i].buf); 00587 --nfiles; 00588 for (j = i; j < nfiles; ++j) 00589 { 00590 files[j] = files[j + 1]; 00591 fps[j] = fps[j + 1]; 00592 } 00593 } 00594 00595 } 00596 00597 /* Set up the ord table according to comparisons among input lines. 00598 Since this only reorders two items if one is strictly greater than 00599 the other, it is stable. */ 00600 for (i = 0; i < nfiles; ++i) { 00601 ord[i] = i; 00602 } 00603 for (i = 1; i < nfiles; ++i) { 00604 if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) 00605 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; 00606 } 00607 00608 /* Set up the ord_base table according to comparisons among input lines. */ 00609 for (i = 0; i < nfiles; ++i) 00610 ord_base[i] = i; 00611 for (i = 1; i < nfiles; ++i) 00612 if (0 < compare (base[ord_base[i - 1]], base[ord_base[i]])) 00613 t = ord_base[i - 1], ord_base[i - 1] = ord_base[i], ord_base[i] = t, i = 0; 00614 00615 fill_input_buffer (&buffer_fcst[ord_base[0]], &buffer[ord_base[0]], fps[ord_base[0]], files[ord_base[0]].name); 00616 00617 /* Repeatedly output the smallest line until no input remains. */ 00618 while (nfiles) 00619 { 00620 struct line const *smallest = cur[ord[0]]; 00621 00622 if (out_buf_len_cur > smallest->length) 00623 { 00624 memmove (out_buf_cur, smallest->text, smallest->length); 00625 out_buf_len_cur -= smallest->length; 00626 out_buf_cur += smallest->length; 00627 } 00628 else 00629 { 00630 write_bytes (out_buf, out_buffer_size - out_buf_len_cur, ofp, output_file); 00631 out_buf_len_cur = out_buffer_size; 00632 out_buf_cur = out_buf; 00633 00634 memmove (out_buf_cur, smallest->text, smallest->length); 00635 out_buf_len_cur -= smallest->length; 00636 out_buf_cur += smallest->length; 00637 } 00638 00639 /* Check if we need to read more lines into core. */ 00640 if (base[ord[0]] < smallest) 00641 cur[ord[0]] = smallest - 1; 00642 else 00643 { 00644 if (buffer[ord[0]].eof) 00645 { 00646 //We reached EOF on fps[ord[0]]. 00647 for (i = 1; i < nfiles; ++i) 00648 if (ord[i] > ord[0]) 00649 --ord[i]; 00650 00651 for (i = 1; i < nfiles; ++i) 00652 if (ord_base[i] > ord_base[0]) 00653 --ord_base[i]; 00654 00655 --nfiles; 00656 if (fclose (fps[ord[0]]) != 0) 00657 die (("close failed"), files[ord[0]].name); 00658 00659 free (buffer[ord[0]].buf); 00660 for (i = ord[0]; i < nfiles; ++i) 00661 { 00662 fps[i] = fps[i + 1]; 00663 files[i] = files[i + 1]; 00664 buffer[i] = buffer[i + 1]; 00665 cur[i] = cur[i + 1]; 00666 base[i] = base[i + 1]; 00667 } 00668 for (i = 0; i < nfiles; ++i) 00669 ord[i] = ord[i + 1]; 00670 00671 free (buffer_fcst[ord_base[0]].buf); 00672 for (i = ord_base[0]; i < nfiles; ++i) 00673 { 00674 buffer_fcst[i] = buffer_fcst[i + 1]; 00675 } 00676 for (i = 0; i < nfiles; ++i) 00677 ord_base[i] = ord_base[i + 1]; 00678 00679 continue; 00680 } 00681 else 00682 { 00683 /* The end of buffer has just been reached. 00684 Forecasted buffer has already been filled and ready to use. */ 00685 bool return_code = swap_buffers (&buffer_fcst[ord_base[0]], &buffer[ord[0]]); 00686 00687 struct line const *linelim = buffer_linelim (&buffer[ord[0]]); 00688 cur[ord[0]] = linelim - 1; 00689 base[ord[0]] = linelim - buffer[ord[0]].nlines; 00690 00691 reconstruct_table (nfiles, base, ord_base); 00692 00693 if (return_code) 00694 fill_input_buffer (&buffer_fcst[ord_base[0]], &buffer[ord_base[0]], fps[ord_base[0]], files[ord_base[0]].name); 00695 } 00696 } 00697 reconstruct_table (nfiles, cur, ord); 00698 } // while (nfiles) 00699 00700 00701 if (out_buffer_size != out_buf_len_cur) 00702 { 00703 write_bytes (out_buf, out_buffer_size - out_buf_len_cur, ofp, output_file); 00704 } 00705 00706 if (fclose (ofp) != 0) 00707 die (("close failed"), output_file); 00708 free (fps); 00709 free (buffer); 00710 free (buffer_fcst); 00711 free (ord); 00712 free (ord_base); 00713 free (base); 00714 free (cur); 00715 free (out_buf); 00716 }
static size_t open_input_files | ( | struct mergefile * | files, | |
size_t | nfiles, | |||
FILE *** | pfps, | |||
char *const * | f | |||
) | [static] |
Open FILES (there are NFILES of them) and store the resulting array of stream pointers into (*PFPS). Allocate the array. Return the number of successfully opened files, setting errno if this value is less than NFILES.
Definition at line 483 of file for_merge.c.
References xmalloc().
Referenced by main().
00484 { 00485 FILE **fps = *pfps = xmalloc (nfiles, sizeof *fps); 00486 int i; 00487 00488 /* Open input files with mmap for reading. */ 00489 for (i = 0; i < nfiles; i++) 00490 { 00491 fps[i] = fopen (files[i].name, "rm"); 00492 if (!fps[i]) 00493 break; 00494 } 00495 00496 return i; 00497 }
double physmem_available | ( | ) |
Return the amount of physical memory available.
Definition at line 236 of file for_merge.c.
Referenced by get_memory_available().
00237 { 00238 #if defined _SC_AVPHYS_PAGES && defined _SC_PAGESIZE 00239 { /* This works on linux-gnu, solaris2 and cygwin. */ 00240 double pages = sysconf (_SC_AVPHYS_PAGES); 00241 double pagesize = sysconf (_SC_PAGESIZE); 00242 if (0 <= pages && 0 <= pagesize) 00243 return pages * pagesize; 00244 } 00245 #endif 00246 return 0; 00247 00248 }
double physmem_total | ( | ) |
Return the total amount of physical memory.
Definition at line 220 of file for_merge.c.
Referenced by get_memory_available().
00221 { 00222 #if defined _SC_PHYS_PAGES && defined _SC_PAGESIZE 00223 { /* This works on linux-gnu, solaris2 and cygwin. */ 00224 double pages = sysconf (_SC_PHYS_PAGES); 00225 double pagesize = sysconf (_SC_PAGESIZE); 00226 if (0 <= pages && 0 <= pagesize) 00227 return pages * pagesize; 00228 } 00229 #endif 00230 return 0; 00231 }
void reconstruct_table | ( | size_t | nfiles, | |
struct line const ** | cur, | |||
size_t * | ord | |||
) | [inline] |
The new line just read in may be larger than other lines already in main memory; push it back in the queue until we encounter a line larger than it. Optimize for the common case where the new line is smallest.
Definition at line 454 of file for_merge.c.
References compare().
Referenced by merge_fps().
00455 { 00456 int j; 00457 size_t lo = 1; 00458 size_t hi = nfiles; 00459 size_t probe = lo; 00460 size_t ord0 = ord[0]; 00461 size_t count_of_smaller_lines; 00462 00463 while (lo < hi) 00464 { 00465 int cmp = compare (cur[ord0], cur[ord[probe]]); 00466 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) 00467 hi = probe; 00468 else 00469 lo = probe + 1; 00470 probe = (lo + hi) / 2; 00471 } 00472 00473 count_of_smaller_lines = lo - 1; 00474 for (j = 0; j < count_of_smaller_lines; j++) 00475 ord[j] = ord[j + 1]; 00476 ord[count_of_smaller_lines] = ord0; 00477 }
Swap buffers: first one has new data, second has old data.
Definition at line 517 of file for_merge.c.
References buffer::alloc, buffer::buf, buffer::eof, fillbuf_ret, flag_buffer_ready_cv, flag_buffer_ready_mutex, flag_fcst_buffer_ready, buffer::left, buffer::line_bytes, buffer::nlines, and buffer::used.
Referenced by merge_fps().
00518 { 00519 pthread_mutex_lock (&flag_buffer_ready_mutex); 00520 while (!flag_fcst_buffer_ready) 00521 pthread_cond_wait (&flag_buffer_ready_cv, &flag_buffer_ready_mutex); 00522 flag_fcst_buffer_ready = false; 00523 bool return_code = fillbuf_ret; 00524 00525 to->line_bytes = from->line_bytes; 00526 to->alloc = from->alloc; 00527 to->used = from->used; 00528 to->left = from->left; 00529 to->nlines = from->nlines; 00530 to->eof = from->eof; 00531 char *tmp = to->buf; 00532 to->buf = from->buf; 00533 from->buf = tmp; 00534 00535 pthread_mutex_unlock (&flag_buffer_ready_mutex); 00536 00537 return return_code; 00538 }
void usage | ( | int | status | ) |
Definition at line 152 of file for_merge.c.
References EXIT_SUCCESS, and program_name.
Referenced by main().
00153 { 00154 if (status != EXIT_SUCCESS) 00155 fprintf (stderr, "Try `%s --help' for more information.\n", program_name); 00156 else 00157 { 00158 printf (("\ 00159 Usage: %s [OPTION]... [FILE1]...\n\ 00160 "), program_name); 00161 fputs (("\ 00162 Write concatenation of sorted FILEs to file.\n\ 00163 \n\ 00164 "), stdout); 00165 fputs (("\ 00166 Mandatory arguments to long options are mandatory for short options too.\n\ 00167 "), stdout); 00168 fputs (("\ 00169 Options:\n\ 00170 \n\ 00171 "), stdout); 00172 fputs (("\ 00173 -o, --output=OFILE MANDATORY - write result to OFILE\n\ 00174 -s, --buffer-size=SIZE use SIZE bytes for input buffer (2*SIZE for each input file)\n\ 00175 -S, --output-size=SIZE use SIZE bytes for output buffer\n\ 00176 -z, --zero-terminated end lines with 0 byte, not newline\n\ 00177 -h, --help print this help\n\ 00178 "), stdout); 00179 fputs (("\ 00180 \n\ 00181 \n\ 00182 *** WARNING ***\n\ 00183 The locale specified by the environment doesn't affect sort order.\n\ 00184 LC_ALL=C is set to get the traditional sort order that uses\n\ 00185 native byte values.\n\ 00186 *** NOTE ***\n\ 00187 Forecasting merge algorithm (P-way merge) \n\ 00188 is required 2*P input buffers, where P - number of sorted input files.\n\ 00189 "), stdout); 00190 } 00191 00192 exit (status); 00193 }
static void write_bytes | ( | const char * | buf, | |
size_t | n_bytes, | |||
FILE * | fp, | |||
const char * | output_file | |||
) | [static] |
Definition at line 212 of file for_merge.c.
References die().
Referenced by merge_fps().
00213 { 00214 if (fwrite (buf, 1, n_bytes, fp) != n_bytes) 00215 die (("write failed"), output_file); 00216 }
void* xmalloc | ( | size_t | n, | |
size_t | s | |||
) |
Allocate N bytes of memory dynamically, with error checking.
Definition at line 142 of file for_merge.c.
References MERGE_FAILURE.
Referenced by main(), merge_fps(), and open_input_files().
00143 { 00144 size_t ns = n * s; 00145 void *p = malloc (ns); 00146 if (!p && ns != 0) 00147 error (MERGE_FAILURE, 0, "memory exhausted"); 00148 return p; 00149 }
char eolchar = '\n' [static] |
The character marking end of line. Default to
.
Definition at line 60 of file for_merge.c.
Referenced by fill_input_buffer_thread(), and main().
struct buffer* fillbuf_buf |
Definition at line 127 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
struct buffer* fillbuf_buf2 |
Definition at line 128 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
char const* fillbuf_file |
Definition at line 130 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
FILE* fillbuf_fp |
Definition at line 129 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
bool fillbuf_ret |
Definition at line 131 of file for_merge.c.
Referenced by fill_input_buffer_thread(), main(), and swap_buffers().
pthread_cond_t flag_buffer_ready_cv |
Definition at line 120 of file for_merge.c.
Referenced by fill_input_buffer_thread(), main(), and swap_buffers().
pthread_mutex_t flag_buffer_ready_mutex |
Definition at line 121 of file for_merge.c.
Referenced by fill_input_buffer_thread(), main(), and swap_buffers().
This flag says that input buffer was filled and ready to use.
Definition at line 119 of file for_merge.c.
Referenced by fill_input_buffer_thread(), main(), and swap_buffers().
This flag wakes up thread to fill input forecasted buffer from disk.
Definition at line 124 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
pthread_cond_t flag_fillbuf_start_cv |
Definition at line 125 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
pthread_mutex_t flag_fillbuf_start_mutex |
Definition at line 126 of file for_merge.c.
Referenced by fill_input_buffer(), fill_input_buffer_thread(), and main().
struct option const long_options[] [static] |
Initial value:
{ {"output", required_argument, NULL, 'o'}, {"buffer-size", required_argument, NULL, 's'}, {"output-size", required_argument, NULL, 'S'}, {"zero-terminated", no_argument, NULL, 'z'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0}, }
Definition at line 197 of file for_merge.c.
Referenced by main().
size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024) [static] |
The number of bytes needed for a merge.
Definition at line 109 of file for_merge.c.
Referenced by main(), and merge_fps().
size_t out_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024) [static] |
The number of bytes needed for output buffer.
Definition at line 112 of file for_merge.c.
Referenced by main(), and merge_fps().
char const short_options[] = "o:s:S:zh" [static] |