1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved	*/
28
29/*
30 * Copyright (c) 2018, Joyent, Inc.
31 */
32
33/*
34 * csplit - Context or line file splitter
35 * Compile: cc -O -s -o csplit csplit.c
36 */
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <unistd.h>
41#include <string.h>
42#include <ctype.h>
43#include <errno.h>
44#include <limits.h>
45#include <regexpr.h>
46#include <signal.h>
47#include <locale.h>
48#include <libintl.h>
49
50#define	LAST	0LL
51#define	ERR	-1
52#define	FALSE	0
53#define	TRUE	1
54#define	EXPMODE	2
55#define	LINMODE	3
56#define	LINSIZ	LINE_MAX	/* POSIX.2 - read lines LINE_MAX long */
57
58	/* Globals */
59
60char linbuf[LINSIZ];		/* Input line buffer */
61char *expbuf;
62char tmpbuf[BUFSIZ];		/* Temporary buffer for stdin */
63char file[8192] = "xx";		/* File name buffer */
64char *targ;			/* Arg ptr for error messages */
65char *sptr;
66FILE *infile, *outfile;		/* I/O file streams */
67int silent, keep, create;	/* Flags: -s(ilent), -k(eep), (create) */
68int errflg;
69int fiwidth = 2;		/* file index width (output file names) */
70extern int optind;
71extern char *optarg;
72offset_t offset;		/* Regular expression offset value */
73offset_t curline;		/* Current line in input file */
74
75/*
76 * These defines are needed for regexp handling(see regexp(7))
77 */
78#define	PERROR(x)	fatal("%s: Illegal Regular Expression\n", targ);
79
80static int asc_to_ll(char *, long long *);
81static void closefile(void);
82static void fatal(char *, char *);
83static offset_t findline(char *, offset_t);
84static void flush(void);
85static FILE *getfile(void);
86static char *getaline(int);
87static void line_arg(char *);
88static void num_arg(char *, int);
89static void re_arg(char *);
90static void sig(int);
91static void to_line(offset_t);
92static void usage(void);
93
94int
95main(int argc, char **argv)
96{
97	int ch, mode;
98	char *ptr;
99
100	(void) setlocale(LC_ALL, "");
101#if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
102#define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
103#endif
104	(void) textdomain(TEXT_DOMAIN);
105
106	while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
107		switch (ch) {
108			case 'f':
109				(void) strcpy(file, optarg);
110				if ((ptr = strrchr(optarg, '/')) == NULL)
111					ptr = optarg;
112				else
113					ptr++;
114
115				break;
116			case 'n':		/* POSIX.2 */
117				for (ptr = optarg; *ptr != '\0'; ptr++)
118					if (!isdigit((int)*ptr))
119						fatal("-n num\n", NULL);
120				fiwidth = atoi(optarg);
121				break;
122			case 'k':
123				keep++;
124				break;
125			case 's':
126				silent++;
127				break;
128			case '?':
129				errflg++;
130		}
131	}
132
133	argv = &argv[optind];
134	argc -= optind;
135	if (argc <= 1 || errflg)
136		usage();
137
138	if (strcmp(*argv, "-") == 0) {
139		infile = tmpfile();
140
141		while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
142			if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
143				if (errno == ENOSPC) {
144					(void) fprintf(stderr, "csplit: ");
145					(void) fprintf(stderr, gettext(
146					    "No space left on device\n"));
147					exit(1);
148				} else {
149					(void) fprintf(stderr, "csplit: ");
150					(void) fprintf(stderr, gettext(
151					    "Bad write to temporary "
152					    "file\n"));
153					exit(1);
154				}
155
156	/* clear the buffer to get correct size when writing buffer */
157
158			(void) memset(tmpbuf, '\0', sizeof (tmpbuf));
159		}
160		rewind(infile);
161	} else if ((infile = fopen(*argv, "r")) == NULL)
162		fatal("Cannot open %s\n", *argv);
163	++argv;
164	curline = (offset_t)1;
165	(void) signal(SIGINT, sig);
166
167	/*
168	 * The following for loop handles the different argument types.
169	 * A switch is performed on the first character of the argument
170	 * and each case calls the appropriate argument handling routine.
171	 */
172
173	for (; *argv; ++argv) {
174		targ = *argv;
175		switch (**argv) {
176		case '/':
177			mode = EXPMODE;
178			create = TRUE;
179			re_arg(*argv);
180			break;
181		case '%':
182			mode = EXPMODE;
183			create = FALSE;
184			re_arg(*argv);
185			break;
186		case '{':
187			num_arg(*argv, mode);
188			mode = FALSE;
189			break;
190		default:
191			mode = LINMODE;
192			create = TRUE;
193			line_arg(*argv);
194			break;
195		}
196	}
197	create = TRUE;
198	to_line(LAST);
199	return (0);
200}
201
202/*
203 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
204 * It returns ERR if an illegal character.  The reason that asc_to_ll
205 * does not return an answer(long long) is that any value for the long
206 * long is legal, and this version of asc_to_ll detects error strings.
207 */
208
209static int
210asc_to_ll(char *str, long long *plc)
211{
212	int f;
213	*plc = 0;
214	f = 0;
215	for (; ; str++) {
216		switch (*str) {
217		case ' ':
218		case '\t':
219			continue;
220		case '-':
221			f++;
222			/* FALLTHROUGH */
223		case '+':
224			str++;
225		}
226		break;
227	}
228	for (; *str != '\0'; str++)
229		if (*str >= '0' && *str <= '9')
230			*plc = *plc * 10 + *str - '0';
231		else
232			return (ERR);
233	if (f)
234		*plc = -(*plc);
235	return (TRUE);	/* not error */
236}
237
238/*
239 * Closefile prints the byte count of the file created,(via fseeko
240 * and ftello), if the create flag is on and the silent flag is not on.
241 * If the create flag is on closefile then closes the file(fclose).
242 */
243
244static void
245closefile()
246{
247	if (!silent && create) {
248		(void) fseeko(outfile, (offset_t)0, SEEK_END);
249		(void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
250	}
251	if (create)
252		(void) fclose(outfile);
253}
254
255/*
256 * Fatal handles error messages and cleanup.
257 * Because "arg" can be the global file, and the cleanup processing
258 * uses the global file, the error message is printed first.  If the
259 * "keep" flag is not set, fatal unlinks all created files.  If the
260 * "keep" flag is set, fatal closes the current file(if there is one).
261 * Fatal exits with a value of 1.
262 */
263
264static void
265fatal(char *string, char *arg)
266{
267	char *fls;
268	int num;
269
270	(void) fprintf(stderr, "csplit: ");
271
272	/* gettext dynamically replaces string */
273
274	(void) fprintf(stderr, gettext(string), arg);
275	if (!keep) {
276		if (outfile) {
277			(void) fclose(outfile);
278			for (fls = file; *fls != '\0'; fls++)
279				continue;
280			fls -= fiwidth;
281			for (num = atoi(fls); num >= 0; num--) {
282				(void) sprintf(fls, "%.*d", fiwidth, num);
283				(void) unlink(file);
284			}
285		}
286	} else
287		if (outfile)
288			closefile();
289	exit(1);
290}
291
292/*
293 * Findline returns the line number referenced by the current argument.
294 * Its arguments are a pointer to the compiled regular expression(expr),
295 * and an offset(oset).  The variable lncnt is used to count the number
296 * of lines searched.  First the current stream location is saved via
297 * ftello(), and getaline is called so that R.E. searching starts at the
298 * line after the previously referenced line.  The while loop checks
299 * that there are more lines(error if none), bumps the line count, and
300 * checks for the R.E. on each line.  If the R.E. matches on one of the
301 * lines the old stream location is restored, and the line number
302 * referenced by the R.E. and the offset is returned.
303 */
304
305static offset_t
306findline(char *expr, offset_t oset)
307{
308	static int benhere = 0;
309	offset_t lncnt = 0, saveloc;
310
311	saveloc = ftello(infile);
312	if (curline != (offset_t)1 || benhere)	/* If first line, first time, */
313		(void) getaline(FALSE);		/* then don't skip */
314	else
315		lncnt--;
316	benhere = 1;
317	while (getaline(FALSE) != NULL) {
318		lncnt++;
319		if ((sptr = strrchr(linbuf, '\n')) != NULL)
320			*sptr = '\0';
321		if (step(linbuf, expr)) {
322			(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323			return (curline+lncnt+oset);
324		}
325	}
326	(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
327	return (curline+lncnt+oset+2);
328}
329
330/*
331 * Flush uses fputs to put lines on the output file stream(outfile)
332 * Since fputs does its own buffering, flush doesn't need to.
333 * Flush does nothing if the create flag is not set.
334 */
335
336static void
337flush()
338{
339	if (create)
340		(void) fputs(linbuf, outfile);
341}
342
343/*
344 * Getfile does nothing if the create flag is not set.  If the create
345 * flag is set, getfile positions the file pointer(fptr) at the end of
346 * the file name prefix on the first call(fptr=0).  The file counter is
347 * stored in the file name and incremented.  If the subsequent fopen
348 * fails, the file name is copied to tfile for the error message, the
349 * previous file name is restored for cleanup, and fatal is called.  If
350 * the fopen succeeds, the stream(opfil) is returned.
351 */
352
353FILE *
354getfile()
355{
356	static char *fptr;
357	static int ctr;
358	FILE *opfil;
359	char tfile[15];
360	char *delim;
361	char savedelim;
362
363	if (create) {
364		if (fptr == 0)
365			for (fptr = file; *fptr != '\0'; fptr++)
366				continue;
367		(void) sprintf(fptr, "%.*d", fiwidth, ctr++);
368
369		/* check for suffix length overflow */
370		if (strlen(fptr) > fiwidth) {
371			fatal("Suffix longer than %ld chars; increase -n\n",
372			    (char *)fiwidth);
373		}
374
375		/* check for filename length overflow */
376
377		delim = strrchr(file, '/');
378		if (delim == (char *)NULL) {
379			if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
380				fatal("Name too long: %s\n", file);
381			}
382		} else {
383			/* truncate file at pathname delim to do pathconf */
384			savedelim = *delim;
385			*delim = '\0';
386			/*
387			 * file: pppppppp\0fffff\0
388			 * ..... ^ file
389			 * ............. ^ delim
390			 */
391			if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
392				fatal("Name too long: %s\n", delim + 1);
393			}
394			*delim = savedelim;
395		}
396
397		if ((opfil = fopen(file, "w")) == NULL) {
398			(void) strlcpy(tfile, file, sizeof (tfile));
399			(void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
400			fatal("Cannot create %s\n", tfile);
401		}
402		return (opfil);
403	}
404	return (NULL);
405}
406
407/*
408 * Getline gets a line via fgets from the input stream "infile".
409 * The line is put into linbuf and may not be larger than LINSIZ.
410 * If getaline is called with a non-zero value, the current line
411 * is bumped, otherwise it is not(for R.E. searching).
412 */
413
414static char *
415getaline(int bumpcur)
416{
417	char *ret;
418	if (bumpcur)
419		curline++;
420	ret = fgets(linbuf, LINSIZ, infile);
421	return (ret);
422}
423
424/*
425 * Line_arg handles line number arguments.
426 * line_arg takes as its argument a pointer to a character string
427 * (assumed to be a line number).  If that character string can be
428 * converted to a number(long long), to_line is called with that number,
429 * otherwise error.
430 */
431
432static void
433line_arg(char *line)
434{
435	long long to;
436
437	if (asc_to_ll(line, &to) == ERR)
438		fatal("%s: bad line number\n", line);
439	to_line(to);
440}
441
442/*
443 * Num_arg handles repeat arguments.
444 * Num_arg copies the numeric argument to "rep" (error if number is
445 * larger than 20 characters or } is left off).  Num_arg then converts
446 * the number and checks for validity.  Next num_arg checks the mode
447 * of the previous argument, and applys the argument the correct number
448 * of times. If the mode is not set properly its an error.
449 */
450
451static void
452num_arg(char *arg, int md)
453{
454	offset_t repeat, toline;
455	char rep[21];
456	char *ptr;
457	int		len;
458
459	ptr = rep;
460	for (++arg; *arg != '}'; arg += len) {
461		if (*arg == '\0')
462			fatal("%s: missing '}'\n", targ);
463		if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
464			len = 1;
465		if ((ptr + len) >= &rep[20])
466			fatal("%s: Repeat count too large\n", targ);
467		(void) memcpy(ptr, arg, len);
468		ptr += len;
469	}
470	*ptr = '\0';
471	if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
472		fatal("Illegal repeat count: %s\n", targ);
473	if (md == LINMODE) {
474		toline = offset = curline;
475		for (; repeat > 0LL; repeat--) {
476			toline += offset;
477			to_line(toline);
478		}
479	} else	if (md == EXPMODE)
480			for (; repeat > 0LL; repeat--)
481				to_line(findline(expbuf, offset));
482		else
483			fatal("No operation for %s\n", targ);
484}
485
486/*
487 * Re_arg handles regular expression arguments.
488 * Re_arg takes a csplit regular expression argument.  It checks for
489 * delimiter balance, computes any offset, and compiles the regular
490 * expression.  Findline is called with the compiled expression and
491 * offset, and returns the corresponding line number, which is used
492 * as input to the to_line function.
493 */
494
495static void
496re_arg(char *string)
497{
498	char *ptr;
499	char ch;
500	int		len;
501
502	ch = *string;
503	ptr = string;
504	ptr++;
505	while (*ptr != ch) {
506		if (*ptr == '\\')
507			++ptr;
508
509		if (*ptr == '\0')
510			fatal("%s: missing delimiter\n", targ);
511
512		if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
513			len = 1;
514		ptr += len;
515	}
516
517	/*
518	 * The line below was added because compile no longer supports
519	 * the fourth argument being passed.  The fourth argument used
520	 * to be '/' or '%'.
521	 */
522
523	*ptr = '\0';
524	if (asc_to_ll(++ptr, &offset) == ERR)
525		fatal("%s: illegal offset\n", string);
526
527	/*
528	 * The line below was added because INIT which did this for us
529	 * was removed from compile in regexp.h
530	 */
531
532	string++;
533	expbuf = compile(string, (char *)0, (char *)0);
534	if (regerrno)
535		PERROR(regerrno);
536	to_line(findline(expbuf, offset));
537}
538
539/*
540 * Sig handles breaks.  When a break occurs the signal is reset,
541 * and fatal is called to clean up and print the argument which
542 * was being processed at the time the interrupt occured.
543 */
544
545/* ARGSUSED */
546static void
547sig(int s)
548{
549	(void) signal(SIGINT, sig);
550	fatal("Interrupt - program aborted at arg '%s'\n", targ);
551}
552
553/*
554 * To_line creates split files.
555 * To_line gets as its argument the line which the current argument
556 * referenced.  To_line calls getfile for a new output stream, which
557 * does nothing if create is False.  If to_line's argument is not LAST
558 * it checks that the current line is not greater than its argument.
559 * While the current line is less than the desired line to_line gets
560 * lines and flushes(error if EOF is reached).
561 * If to_line's argument is LAST, it checks for more lines, and gets
562 * and flushes lines till the end of file.
563 * Finally, to_line calls closefile to close the output stream.
564 */
565
566static void
567to_line(offset_t ln)
568{
569	outfile = getfile();
570	if (ln != LAST) {
571		if (curline > ln)
572			fatal("%s - out of range\n", targ);
573		while (curline < ln) {
574			if (getaline(TRUE) == NULL)
575				fatal("%s - out of range\n", targ);
576			flush();
577		}
578	} else		/* last file */
579		if (getaline(TRUE) != NULL) {
580			flush();
581			for (;;) {
582				if (getaline(TRUE) == NULL)
583					break;
584				flush();
585			}
586		} else
587			fatal("%s - out of range\n", targ);
588	closefile();
589}
590
591static void
592usage()
593{
594	(void) fprintf(stderr, gettext(
595	    "usage: csplit [-ks] [-f prefix] [-n number] "
596	    "file arg1 ...argn\n"));
597	exit(1);
598}
599