1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       file_io.c
4/// \brief      File opening, unlinking, and closing
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "private.h"
14
15#include <fcntl.h>
16
17#ifdef TUKLIB_DOSLIKE
18#	include <io.h>
19#else
20#	include <poll.h>
21static bool warn_fchown;
22#endif
23
24#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25#	include <sys/time.h>
26#elif defined(HAVE__FUTIME)
27#	include <sys/utime.h>
28#elif defined(HAVE_UTIME)
29#	include <utime.h>
30#endif
31
32#ifdef HAVE_CAPSICUM
33#	ifdef HAVE_SYS_CAPSICUM_H
34#		include <sys/capsicum.h>
35#	else
36#		include <sys/capability.h>
37#	endif
38#endif
39
40#include "tuklib_open_stdxxx.h"
41
42#ifndef O_BINARY
43#	define O_BINARY 0
44#endif
45
46#ifndef O_NOCTTY
47#	define O_NOCTTY 0
48#endif
49
50// Using this macro to silence a warning from gcc -Wlogical-op.
51#if EAGAIN == EWOULDBLOCK
52#	define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
53#else
54#	define IS_EAGAIN_OR_EWOULDBLOCK(e) \
55		((e) == EAGAIN || (e) == EWOULDBLOCK)
56#endif
57
58
59typedef enum {
60	IO_WAIT_MORE,    // Reading or writing is possible.
61	IO_WAIT_ERROR,   // Error or user_abort
62	IO_WAIT_TIMEOUT, // poll() timed out
63} io_wait_ret;
64
65
66/// If true, try to create sparse files when decompressing.
67static bool try_sparse = true;
68
69#ifdef ENABLE_SANDBOX
70/// True if the conditions for sandboxing (described in main()) have been met.
71static bool sandbox_allowed = false;
72#endif
73
74#ifndef TUKLIB_DOSLIKE
75/// File status flags of standard input. This is used by io_open_src()
76/// and io_close_src().
77static int stdin_flags;
78static bool restore_stdin_flags = false;
79
80/// Original file status flags of standard output. This is used by
81/// io_open_dest() and io_close_dest() to save and restore the flags.
82static int stdout_flags;
83static bool restore_stdout_flags = false;
84
85/// Self-pipe used together with the user_abort variable to avoid
86/// race conditions with signal handling.
87static int user_abort_pipe[2];
88#endif
89
90
91static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
92
93
94extern void
95io_init(void)
96{
97	// Make sure that stdin, stdout, and stderr are connected to
98	// a valid file descriptor. Exit immediately with exit code ERROR
99	// if we cannot make the file descriptors valid. Maybe we should
100	// print an error message, but our stderr could be screwed anyway.
101	tuklib_open_stdxxx(E_ERROR);
102
103#ifndef TUKLIB_DOSLIKE
104	// If fchown() fails setting the owner, we warn about it only if
105	// we are root.
106	warn_fchown = geteuid() == 0;
107
108	// Create a pipe for the self-pipe trick.
109	if (pipe(user_abort_pipe))
110		message_fatal(_("Error creating a pipe: %s"),
111				strerror(errno));
112
113	// Make both ends of the pipe non-blocking.
114	for (unsigned i = 0; i < 2; ++i) {
115		int flags = fcntl(user_abort_pipe[i], F_GETFL);
116		if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
117				flags | O_NONBLOCK) == -1)
118			message_fatal(_("Error creating a pipe: %s"),
119					strerror(errno));
120	}
121#endif
122
123#ifdef __DJGPP__
124	// Avoid doing useless things when statting files.
125	// This isn't important but doesn't hurt.
126	_djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
127#endif
128
129	return;
130}
131
132
133#ifndef TUKLIB_DOSLIKE
134extern void
135io_write_to_user_abort_pipe(void)
136{
137	// If the write() fails, it's probably due to the pipe being full.
138	// Failing in that case is fine. If the reason is something else,
139	// there's not much we can do since this is called in a signal
140	// handler. So ignore the errors and try to avoid warnings with
141	// GCC and glibc when _FORTIFY_SOURCE=2 is used.
142	uint8_t b = '\0';
143	const int ret = write(user_abort_pipe[1], &b, 1);
144	(void)ret;
145	return;
146}
147#endif
148
149
150extern void
151io_no_sparse(void)
152{
153	try_sparse = false;
154	return;
155}
156
157
158#ifdef ENABLE_SANDBOX
159extern void
160io_allow_sandbox(void)
161{
162	sandbox_allowed = true;
163	return;
164}
165
166
167/// Enables operating-system-specific sandbox if it is possible.
168/// src_fd is the file descriptor of the input file.
169static void
170io_sandbox_enter(int src_fd)
171{
172	if (!sandbox_allowed) {
173		// This message is more often annoying than useful so
174		// it's commented out. It can be useful when developing
175		// the sandboxing code.
176		//message(V_DEBUG, _("Sandbox is disabled due "
177		//		"to incompatible command line arguments"));
178		return;
179	}
180
181	const char dummy_str[] = "x";
182
183	// Try to ensure that both libc and xz locale files have been
184	// loaded when NLS is enabled.
185	snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL));
186
187	// Try to ensure that iconv data files needed for handling multibyte
188	// characters have been loaded. This is needed at least with glibc.
189	tuklib_mbstr_width(dummy_str, NULL);
190
191#ifdef HAVE_CAPSICUM
192	// Capsicum needs FreeBSD 10.0 or later.
193	cap_rights_t rights;
194
195	if (cap_rights_limit(src_fd, cap_rights_init(&rights,
196			CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)))
197		goto error;
198
199	if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
200			CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP,
201			CAP_WRITE, CAP_SEEK)))
202		goto error;
203
204	if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights,
205			CAP_EVENT)))
206		goto error;
207
208	if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights,
209			CAP_WRITE)))
210		goto error;
211
212	if (cap_enter())
213		goto error;
214
215#else
216#	error ENABLE_SANDBOX is defined but no sandboxing method was found.
217#endif
218
219	// This message is annoying in xz -lvv.
220	//message(V_DEBUG, _("Sandbox was successfully enabled"));
221	return;
222
223error:
224	message(V_DEBUG, _("Failed to enable the sandbox"));
225}
226#endif // ENABLE_SANDBOX
227
228
229#ifndef TUKLIB_DOSLIKE
230/// \brief      Waits for input or output to become available or for a signal
231///
232/// This uses the self-pipe trick to avoid a race condition that can occur
233/// if a signal is caught after user_abort has been checked but before e.g.
234/// read() has been called. In that situation read() could block unless
235/// non-blocking I/O is used. With non-blocking I/O something like select()
236/// or poll() is needed to avoid a busy-wait loop, and the same race condition
237/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
238/// POSIX) but neither is portable enough in 2013. The self-pipe trick is
239/// old and very portable.
240static io_wait_ret
241io_wait(file_pair *pair, int timeout, bool is_reading)
242{
243	struct pollfd pfd[2];
244
245	if (is_reading) {
246		pfd[0].fd = pair->src_fd;
247		pfd[0].events = POLLIN;
248	} else {
249		pfd[0].fd = pair->dest_fd;
250		pfd[0].events = POLLOUT;
251	}
252
253	pfd[1].fd = user_abort_pipe[0];
254	pfd[1].events = POLLIN;
255
256	while (true) {
257		const int ret = poll(pfd, 2, timeout);
258
259		if (user_abort)
260			return IO_WAIT_ERROR;
261
262		if (ret == -1) {
263			if (errno == EINTR || errno == EAGAIN)
264				continue;
265
266			message_error(_("%s: poll() failed: %s"),
267					is_reading ? pair->src_name
268						: pair->dest_name,
269					strerror(errno));
270			return IO_WAIT_ERROR;
271		}
272
273		if (ret == 0)
274			return IO_WAIT_TIMEOUT;
275
276		if (pfd[0].revents != 0)
277			return IO_WAIT_MORE;
278	}
279}
280#endif
281
282
283/// \brief      Unlink a file
284///
285/// This tries to verify that the file being unlinked really is the file that
286/// we want to unlink by verifying device and inode numbers. There's still
287/// a small unavoidable race, but this is much better than nothing (the file
288/// could have been moved/replaced even hours earlier).
289static void
290io_unlink(const char *name, const struct stat *known_st)
291{
292#if defined(TUKLIB_DOSLIKE)
293	// On DOS-like systems, st_ino is meaningless, so don't bother
294	// testing it. Just silence a compiler warning.
295	(void)known_st;
296#else
297	struct stat new_st;
298
299	// If --force was used, use stat() instead of lstat(). This way
300	// (de)compressing symlinks works correctly. However, it also means
301	// that xz cannot detect if a regular file foo is renamed to bar
302	// and then a symlink foo -> bar is created. Because of stat()
303	// instead of lstat(), xz will think that foo hasn't been replaced
304	// with another file. Thus, xz will remove foo even though it no
305	// longer is the same file that xz used when it started compressing.
306	// Probably it's not too bad though, so this doesn't need a more
307	// complex fix.
308	const int stat_ret = opt_force
309			? stat(name, &new_st) : lstat(name, &new_st);
310
311	if (stat_ret
312#	ifdef __VMS
313			// st_ino is an array, and we don't want to
314			// compare st_dev at all.
315			|| memcmp(&new_st.st_ino, &known_st->st_ino,
316				sizeof(new_st.st_ino)) != 0
317#	else
318			// Typical POSIX-like system
319			|| new_st.st_dev != known_st->st_dev
320			|| new_st.st_ino != known_st->st_ino
321#	endif
322			)
323		// TRANSLATORS: When compression or decompression finishes,
324		// and xz is going to remove the source file, xz first checks
325		// if the source file still exists, and if it does, does its
326		// device and inode numbers match what xz saw when it opened
327		// the source file. If these checks fail, this message is
328		// shown, %s being the filename, and the file is not deleted.
329		// The check for device and inode numbers is there, because
330		// it is possible that the user has put a new file in place
331		// of the original file, and in that case it obviously
332		// shouldn't be removed.
333		message_error(_("%s: File seems to have been moved, "
334				"not removing"), name);
335	else
336#endif
337		// There's a race condition between lstat() and unlink()
338		// but at least we have tried to avoid removing wrong file.
339		if (unlink(name))
340			message_error(_("%s: Cannot remove: %s"),
341					name, strerror(errno));
342
343	return;
344}
345
346
347/// \brief      Copies owner/group and permissions
348///
349/// \todo       ACL and EA support
350///
351static void
352io_copy_attrs(const file_pair *pair)
353{
354	// Skip chown and chmod on Windows.
355#ifndef TUKLIB_DOSLIKE
356	// This function is more tricky than you may think at first.
357	// Blindly copying permissions may permit users to access the
358	// destination file who didn't have permission to access the
359	// source file.
360
361	// Try changing the owner of the file. If we aren't root or the owner
362	// isn't already us, fchown() probably doesn't succeed. We warn
363	// about failing fchown() only if we are root.
364	if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
365			&& warn_fchown)
366		message_warning(_("%s: Cannot set the file owner: %s"),
367				pair->dest_name, strerror(errno));
368
369	mode_t mode;
370
371	if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) {
372		message_warning(_("%s: Cannot set the file group: %s"),
373				pair->dest_name, strerror(errno));
374		// We can still safely copy some additional permissions:
375		// `group' must be at least as strict as `other' and
376		// also vice versa.
377		//
378		// NOTE: After this, the owner of the source file may
379		// get additional permissions. This shouldn't be too bad,
380		// because the owner would have had permission to chmod
381		// the original file anyway.
382		mode = ((pair->src_st.st_mode & 0070) >> 3)
383				& (pair->src_st.st_mode & 0007);
384		mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
385	} else {
386		// Drop the setuid, setgid, and sticky bits.
387		mode = pair->src_st.st_mode & 0777;
388	}
389
390	if (fchmod(pair->dest_fd, mode))
391		message_warning(_("%s: Cannot set the file permissions: %s"),
392				pair->dest_name, strerror(errno));
393#endif
394
395	// Copy the timestamps. We have several possible ways to do this, of
396	// which some are better in both security and precision.
397	//
398	// First, get the nanosecond part of the timestamps. As of writing,
399	// it's not standardized by POSIX, and there are several names for
400	// the same thing in struct stat.
401	long atime_nsec;
402	long mtime_nsec;
403
404#	if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
405	// GNU and Solaris
406	atime_nsec = pair->src_st.st_atim.tv_nsec;
407	mtime_nsec = pair->src_st.st_mtim.tv_nsec;
408
409#	elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
410	// BSD
411	atime_nsec = pair->src_st.st_atimespec.tv_nsec;
412	mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
413
414#	elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
415	// GNU and BSD without extensions
416	atime_nsec = pair->src_st.st_atimensec;
417	mtime_nsec = pair->src_st.st_mtimensec;
418
419#	elif defined(HAVE_STRUCT_STAT_ST_UATIME)
420	// Tru64
421	atime_nsec = pair->src_st.st_uatime * 1000;
422	mtime_nsec = pair->src_st.st_umtime * 1000;
423
424#	elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
425	// UnixWare
426	atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
427	mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
428
429#	else
430	// Safe fallback
431	atime_nsec = 0;
432	mtime_nsec = 0;
433#	endif
434
435	// Construct a structure to hold the timestamps and call appropriate
436	// function to set the timestamps.
437#if defined(HAVE_FUTIMENS)
438	// Use nanosecond precision.
439	struct timespec tv[2];
440	tv[0].tv_sec = pair->src_st.st_atime;
441	tv[0].tv_nsec = atime_nsec;
442	tv[1].tv_sec = pair->src_st.st_mtime;
443	tv[1].tv_nsec = mtime_nsec;
444
445	(void)futimens(pair->dest_fd, tv);
446
447#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
448	// Use microsecond precision.
449	struct timeval tv[2];
450	tv[0].tv_sec = pair->src_st.st_atime;
451	tv[0].tv_usec = atime_nsec / 1000;
452	tv[1].tv_sec = pair->src_st.st_mtime;
453	tv[1].tv_usec = mtime_nsec / 1000;
454
455#	if defined(HAVE_FUTIMES)
456	(void)futimes(pair->dest_fd, tv);
457#	elif defined(HAVE_FUTIMESAT)
458	(void)futimesat(pair->dest_fd, NULL, tv);
459#	else
460	// Argh, no function to use a file descriptor to set the timestamp.
461	(void)utimes(pair->dest_name, tv);
462#	endif
463
464#elif defined(HAVE__FUTIME)
465	// Use one-second precision with Windows-specific _futime().
466	// We could use utime() too except that for some reason the
467	// timestamp will get reset at close(). With _futime() it works.
468	// This struct cannot be const as _futime() takes a non-const pointer.
469	struct _utimbuf buf = {
470		.actime = pair->src_st.st_atime,
471		.modtime = pair->src_st.st_mtime,
472	};
473
474	// Avoid warnings.
475	(void)atime_nsec;
476	(void)mtime_nsec;
477
478	(void)_futime(pair->dest_fd, &buf);
479
480#elif defined(HAVE_UTIME)
481	// Use one-second precision. utime() doesn't support using file
482	// descriptor either. Some systems have broken utime() prototype
483	// so don't make this const.
484	struct utimbuf buf = {
485		.actime = pair->src_st.st_atime,
486		.modtime = pair->src_st.st_mtime,
487	};
488
489	// Avoid warnings.
490	(void)atime_nsec;
491	(void)mtime_nsec;
492
493	(void)utime(pair->dest_name, &buf);
494#endif
495
496	return;
497}
498
499
500/// Opens the source file. Returns false on success, true on error.
501static bool
502io_open_src_real(file_pair *pair)
503{
504	// There's nothing to open when reading from stdin.
505	if (pair->src_name == stdin_filename) {
506		pair->src_fd = STDIN_FILENO;
507#ifdef TUKLIB_DOSLIKE
508		setmode(STDIN_FILENO, O_BINARY);
509#else
510		// Try to set stdin to non-blocking mode. It won't work
511		// e.g. on OpenBSD if stdout is e.g. /dev/null. In such
512		// case we proceed as if stdin were non-blocking anyway
513		// (in case of /dev/null it will be in practice). The
514		// same applies to stdout in io_open_dest_real().
515		stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
516		if (stdin_flags == -1) {
517			message_error(_("Error getting the file status flags "
518					"from standard input: %s"),
519					strerror(errno));
520			return true;
521		}
522
523		if ((stdin_flags & O_NONBLOCK) == 0
524				&& fcntl(STDIN_FILENO, F_SETFL,
525					stdin_flags | O_NONBLOCK) != -1)
526			restore_stdin_flags = true;
527#endif
528#ifdef HAVE_POSIX_FADVISE
529		// It will fail if stdin is a pipe and that's fine.
530		(void)posix_fadvise(STDIN_FILENO, 0, 0,
531				opt_mode == MODE_LIST
532					? POSIX_FADV_RANDOM
533					: POSIX_FADV_SEQUENTIAL);
534#endif
535		return false;
536	}
537
538	// Symlinks are not followed unless writing to stdout or --force
539	// was used.
540	const bool follow_symlinks = opt_stdout || opt_force;
541
542	// We accept only regular files if we are writing the output
543	// to disk too. bzip2 allows overriding this with --force but
544	// gzip and xz don't.
545	const bool reg_files_only = !opt_stdout;
546
547	// Flags for open()
548	int flags = O_RDONLY | O_BINARY | O_NOCTTY;
549
550#ifndef TUKLIB_DOSLIKE
551	// Use non-blocking I/O:
552	//   - It prevents blocking when opening FIFOs and some other
553	//     special files, which is good if we want to accept only
554	//     regular files.
555	//   - It can help avoiding some race conditions with signal handling.
556	flags |= O_NONBLOCK;
557#endif
558
559#if defined(O_NOFOLLOW)
560	if (!follow_symlinks)
561		flags |= O_NOFOLLOW;
562#elif !defined(TUKLIB_DOSLIKE)
563	// Some POSIX-like systems lack O_NOFOLLOW (it's not required
564	// by POSIX). Check for symlinks with a separate lstat() on
565	// these systems.
566	if (!follow_symlinks) {
567		struct stat st;
568		if (lstat(pair->src_name, &st)) {
569			message_error("%s: %s", pair->src_name,
570					strerror(errno));
571			return true;
572
573		} else if (S_ISLNK(st.st_mode)) {
574			message_warning(_("%s: Is a symbolic link, "
575					"skipping"), pair->src_name);
576			return true;
577		}
578	}
579#else
580	// Avoid warnings.
581	(void)follow_symlinks;
582#endif
583
584	// Try to open the file. Signals have been blocked so EINTR shouldn't
585	// be possible.
586	pair->src_fd = open(pair->src_name, flags);
587
588	if (pair->src_fd == -1) {
589		// Signals (that have a signal handler) have been blocked.
590		assert(errno != EINTR);
591
592#ifdef O_NOFOLLOW
593		// Give an understandable error message if the reason
594		// for failing was that the file was a symbolic link.
595		//
596		// Note that at least Linux, OpenBSD, Solaris, and Darwin
597		// use ELOOP to indicate that O_NOFOLLOW was the reason
598		// that open() failed. Because there may be
599		// directories in the pathname, ELOOP may occur also
600		// because of a symlink loop in the directory part.
601		// So ELOOP doesn't tell us what actually went wrong,
602		// and this stupidity went into POSIX-1.2008 too.
603		//
604		// FreeBSD associates EMLINK with O_NOFOLLOW and
605		// Tru64 uses ENOTSUP. We use these directly here
606		// and skip the lstat() call and the associated race.
607		// I want to hear if there are other kernels that
608		// fail with something else than ELOOP with O_NOFOLLOW.
609		bool was_symlink = false;
610
611#	if defined(__FreeBSD__) || defined(__DragonFly__)
612		if (errno == EMLINK)
613			was_symlink = true;
614
615#	elif defined(__digital__) && defined(__unix__)
616		if (errno == ENOTSUP)
617			was_symlink = true;
618
619#	elif defined(__NetBSD__)
620		if (errno == EFTYPE)
621			was_symlink = true;
622
623#	else
624		if (errno == ELOOP && !follow_symlinks) {
625			const int saved_errno = errno;
626			struct stat st;
627			if (lstat(pair->src_name, &st) == 0
628					&& S_ISLNK(st.st_mode))
629				was_symlink = true;
630
631			errno = saved_errno;
632		}
633#	endif
634
635		if (was_symlink)
636			message_warning(_("%s: Is a symbolic link, "
637					"skipping"), pair->src_name);
638		else
639#endif
640			// Something else than O_NOFOLLOW failing
641			// (assuming that the race conditions didn't
642			// confuse us).
643			message_error("%s: %s", pair->src_name,
644					strerror(errno));
645
646		return true;
647	}
648
649	// Stat the source file. We need the result also when we copy
650	// the permissions, and when unlinking.
651	//
652	// NOTE: Use stat() instead of fstat() with DJGPP, because
653	// then we have a better chance to get st_ino value that can
654	// be used in io_open_dest_real() to prevent overwriting the
655	// source file.
656#ifdef __DJGPP__
657	if (stat(pair->src_name, &pair->src_st))
658		goto error_msg;
659#else
660	if (fstat(pair->src_fd, &pair->src_st))
661		goto error_msg;
662#endif
663
664	if (S_ISDIR(pair->src_st.st_mode)) {
665		message_warning(_("%s: Is a directory, skipping"),
666				pair->src_name);
667		goto error;
668	}
669
670	if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
671		message_warning(_("%s: Not a regular file, skipping"),
672				pair->src_name);
673		goto error;
674	}
675
676#ifndef TUKLIB_DOSLIKE
677	if (reg_files_only && !opt_force) {
678		if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
679			// gzip rejects setuid and setgid files even
680			// when --force was used. bzip2 doesn't check
681			// for them, but calls fchown() after fchmod(),
682			// and many systems automatically drop setuid
683			// and setgid bits there.
684			//
685			// We accept setuid and setgid files if
686			// --force was used. We drop these bits
687			// explicitly in io_copy_attr().
688			message_warning(_("%s: File has setuid or "
689					"setgid bit set, skipping"),
690					pair->src_name);
691			goto error;
692		}
693
694		if (pair->src_st.st_mode & S_ISVTX) {
695			message_warning(_("%s: File has sticky bit "
696					"set, skipping"),
697					pair->src_name);
698			goto error;
699		}
700
701		if (pair->src_st.st_nlink > 1) {
702			message_warning(_("%s: Input file has more "
703					"than one hard link, "
704					"skipping"), pair->src_name);
705			goto error;
706		}
707	}
708
709	// If it is something else than a regular file, wait until
710	// there is input available. This way reading from FIFOs
711	// will work when open() is used with O_NONBLOCK.
712	if (!S_ISREG(pair->src_st.st_mode)) {
713		signals_unblock();
714		const io_wait_ret ret = io_wait(pair, -1, true);
715		signals_block();
716
717		if (ret != IO_WAIT_MORE)
718			goto error;
719	}
720#endif
721
722#ifdef HAVE_POSIX_FADVISE
723	// It will fail with some special files like FIFOs but that is fine.
724	(void)posix_fadvise(pair->src_fd, 0, 0,
725			opt_mode == MODE_LIST
726				? POSIX_FADV_RANDOM
727				: POSIX_FADV_SEQUENTIAL);
728#endif
729
730	return false;
731
732error_msg:
733	message_error("%s: %s", pair->src_name, strerror(errno));
734error:
735	(void)close(pair->src_fd);
736	return true;
737}
738
739
740extern file_pair *
741io_open_src(const char *src_name)
742{
743	if (is_empty_filename(src_name))
744		return NULL;
745
746	// Since we have only one file open at a time, we can use
747	// a statically allocated structure.
748	static file_pair pair;
749
750	pair = (file_pair){
751		.src_name = src_name,
752		.dest_name = NULL,
753		.src_fd = -1,
754		.dest_fd = -1,
755		.src_eof = false,
756		.src_has_seen_input = false,
757		.flush_needed = false,
758		.dest_try_sparse = false,
759		.dest_pending_sparse = 0,
760	};
761
762	// Block the signals, for which we have a custom signal handler, so
763	// that we don't need to worry about EINTR.
764	signals_block();
765	const bool error = io_open_src_real(&pair);
766	signals_unblock();
767
768#ifdef ENABLE_SANDBOX
769	if (!error)
770		io_sandbox_enter(pair.src_fd);
771#endif
772
773	return error ? NULL : &pair;
774}
775
776
777/// \brief      Closes source file of the file_pair structure
778///
779/// \param      pair    File whose src_fd should be closed
780/// \param      success If true, the file will be removed from the disk if
781///                     closing succeeds and --keep hasn't been used.
782static void
783io_close_src(file_pair *pair, bool success)
784{
785#ifndef TUKLIB_DOSLIKE
786	if (restore_stdin_flags) {
787		assert(pair->src_fd == STDIN_FILENO);
788
789		restore_stdin_flags = false;
790
791		if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
792			message_error(_("Error restoring the status flags "
793					"to standard input: %s"),
794					strerror(errno));
795	}
796#endif
797
798	if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
799		// Close the file before possibly unlinking it. On DOS-like
800		// systems this is always required since unlinking will fail
801		// if the file is open. On POSIX systems it usually works
802		// to unlink open files, but in some cases it doesn't and
803		// one gets EBUSY in errno.
804		//
805		// xz 5.2.2 and older unlinked the file before closing it
806		// (except on DOS-like systems). The old code didn't handle
807		// EBUSY and could fail e.g. on some CIFS shares. The
808		// advantage of unlinking before closing is negligible
809		// (avoids a race between close() and stat()/lstat() and
810		// unlink()), so let's keep this simple.
811		(void)close(pair->src_fd);
812
813		if (success && !opt_keep_original)
814			io_unlink(pair->src_name, &pair->src_st);
815	}
816
817	return;
818}
819
820
821static bool
822io_open_dest_real(file_pair *pair)
823{
824	if (opt_stdout || pair->src_fd == STDIN_FILENO) {
825		// We don't modify or free() this.
826		pair->dest_name = (char *)"(stdout)";
827		pair->dest_fd = STDOUT_FILENO;
828#ifdef TUKLIB_DOSLIKE
829		setmode(STDOUT_FILENO, O_BINARY);
830#else
831		// Try to set O_NONBLOCK if it isn't already set.
832		// If it fails, we assume that stdout is non-blocking
833		// in practice. See the comments in io_open_src_real()
834		// for similar situation with stdin.
835		//
836		// NOTE: O_APPEND may be unset later in this function
837		// and it relies on stdout_flags being set here.
838		stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
839		if (stdout_flags == -1) {
840			message_error(_("Error getting the file status flags "
841					"from standard output: %s"),
842					strerror(errno));
843			return true;
844		}
845
846		if ((stdout_flags & O_NONBLOCK) == 0
847				&& fcntl(STDOUT_FILENO, F_SETFL,
848					stdout_flags | O_NONBLOCK) != -1)
849				restore_stdout_flags = true;
850#endif
851	} else {
852		pair->dest_name = suffix_get_dest_name(pair->src_name);
853		if (pair->dest_name == NULL)
854			return true;
855
856#ifdef __DJGPP__
857		struct stat st;
858		if (stat(pair->dest_name, &st) == 0) {
859			// Check that it isn't a special file like "prn".
860			if (st.st_dev == -1) {
861				message_error("%s: Refusing to write to "
862						"a DOS special file",
863						pair->dest_name);
864				free(pair->dest_name);
865				return true;
866			}
867
868			// Check that we aren't overwriting the source file.
869			if (st.st_dev == pair->src_st.st_dev
870					&& st.st_ino == pair->src_st.st_ino) {
871				message_error("%s: Output file is the same "
872						"as the input file",
873						pair->dest_name);
874				free(pair->dest_name);
875				return true;
876			}
877		}
878#endif
879
880		// If --force was used, unlink the target file first.
881		if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
882			message_error(_("%s: Cannot remove: %s"),
883					pair->dest_name, strerror(errno));
884			free(pair->dest_name);
885			return true;
886		}
887
888		// Open the file.
889		int flags = O_WRONLY | O_BINARY | O_NOCTTY
890				| O_CREAT | O_EXCL;
891#ifndef TUKLIB_DOSLIKE
892		flags |= O_NONBLOCK;
893#endif
894		const mode_t mode = S_IRUSR | S_IWUSR;
895		pair->dest_fd = open(pair->dest_name, flags, mode);
896
897		if (pair->dest_fd == -1) {
898			message_error("%s: %s", pair->dest_name,
899					strerror(errno));
900			free(pair->dest_name);
901			return true;
902		}
903	}
904
905#ifndef TUKLIB_DOSLIKE
906	// dest_st isn't used on DOS-like systems except as a dummy
907	// argument to io_unlink(), so don't fstat() on such systems.
908	if (fstat(pair->dest_fd, &pair->dest_st)) {
909		// If fstat() really fails, we have a safe fallback here.
910#	if defined(__VMS)
911		pair->dest_st.st_ino[0] = 0;
912		pair->dest_st.st_ino[1] = 0;
913		pair->dest_st.st_ino[2] = 0;
914#	else
915		pair->dest_st.st_dev = 0;
916		pair->dest_st.st_ino = 0;
917#	endif
918	} else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
919		// When writing to standard output, we need to be extra
920		// careful:
921		//  - It may be connected to something else than
922		//    a regular file.
923		//  - We aren't necessarily writing to a new empty file
924		//    or to the end of an existing file.
925		//  - O_APPEND may be active.
926		//
927		// TODO: I'm keeping this disabled for DOS-like systems
928		// for now. FAT doesn't support sparse files, but NTFS
929		// does, so maybe this should be enabled on Windows after
930		// some testing.
931		if (pair->dest_fd == STDOUT_FILENO) {
932			if (!S_ISREG(pair->dest_st.st_mode))
933				return false;
934
935			if (stdout_flags & O_APPEND) {
936				// Creating a sparse file is not possible
937				// when O_APPEND is active (it's used by
938				// shell's >> redirection). As I understand
939				// it, it is safe to temporarily disable
940				// O_APPEND in xz, because if someone
941				// happened to write to the same file at the
942				// same time, results would be bad anyway
943				// (users shouldn't assume that xz uses any
944				// specific block size when writing data).
945				//
946				// The write position may be something else
947				// than the end of the file, so we must fix
948				// it to start writing at the end of the file
949				// to imitate O_APPEND.
950				if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
951					return false;
952
953				// Construct the new file status flags.
954				// If O_NONBLOCK was set earlier in this
955				// function, it must be kept here too.
956				int flags = stdout_flags & ~O_APPEND;
957				if (restore_stdout_flags)
958					flags |= O_NONBLOCK;
959
960				// If this fcntl() fails, we continue but won't
961				// try to create sparse output. The original
962				// flags will still be restored if needed (to
963				// unset O_NONBLOCK) when the file is finished.
964				if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
965					return false;
966
967				// Disabling O_APPEND succeeded. Mark
968				// that the flags should be restored
969				// in io_close_dest(). (This may have already
970				// been set when enabling O_NONBLOCK.)
971				restore_stdout_flags = true;
972
973			} else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
974					!= pair->dest_st.st_size) {
975				// Writing won't start exactly at the end
976				// of the file. We cannot use sparse output,
977				// because it would probably corrupt the file.
978				return false;
979			}
980		}
981
982		pair->dest_try_sparse = true;
983	}
984#endif
985
986	return false;
987}
988
989
990extern bool
991io_open_dest(file_pair *pair)
992{
993	signals_block();
994	const bool ret = io_open_dest_real(pair);
995	signals_unblock();
996	return ret;
997}
998
999
1000/// \brief      Closes destination file of the file_pair structure
1001///
1002/// \param      pair    File whose dest_fd should be closed
1003/// \param      success If false, the file will be removed from the disk.
1004///
1005/// \return     Zero if closing succeeds. On error, -1 is returned and
1006///             error message printed.
1007static bool
1008io_close_dest(file_pair *pair, bool success)
1009{
1010#ifndef TUKLIB_DOSLIKE
1011	// If io_open_dest() has disabled O_APPEND, restore it here.
1012	if (restore_stdout_flags) {
1013		assert(pair->dest_fd == STDOUT_FILENO);
1014
1015		restore_stdout_flags = false;
1016
1017		if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
1018			message_error(_("Error restoring the O_APPEND flag "
1019					"to standard output: %s"),
1020					strerror(errno));
1021			return true;
1022		}
1023	}
1024#endif
1025
1026	if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1027		return false;
1028
1029	if (close(pair->dest_fd)) {
1030		message_error(_("%s: Closing the file failed: %s"),
1031				pair->dest_name, strerror(errno));
1032
1033		// Closing destination file failed, so we cannot trust its
1034		// contents. Get rid of junk:
1035		io_unlink(pair->dest_name, &pair->dest_st);
1036		free(pair->dest_name);
1037		return true;
1038	}
1039
1040	// If the operation using this file wasn't successful, we git rid
1041	// of the junk file.
1042	if (!success)
1043		io_unlink(pair->dest_name, &pair->dest_st);
1044
1045	free(pair->dest_name);
1046
1047	return false;
1048}
1049
1050
1051extern void
1052io_close(file_pair *pair, bool success)
1053{
1054	// Take care of sparseness at the end of the output file.
1055	if (success && pair->dest_try_sparse
1056			&& pair->dest_pending_sparse > 0) {
1057		// Seek forward one byte less than the size of the pending
1058		// hole, then write one zero-byte. This way the file grows
1059		// to its correct size. An alternative would be to use
1060		// ftruncate() but that isn't portable enough (e.g. it
1061		// doesn't work with FAT on Linux; FAT isn't that important
1062		// since it doesn't support sparse files anyway, but we don't
1063		// want to create corrupt files on it).
1064		if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1065				SEEK_CUR) == -1) {
1066			message_error(_("%s: Seeking failed when trying "
1067					"to create a sparse file: %s"),
1068					pair->dest_name, strerror(errno));
1069			success = false;
1070		} else {
1071			const uint8_t zero[1] = { '\0' };
1072			if (io_write_buf(pair, zero, 1))
1073				success = false;
1074		}
1075	}
1076
1077	signals_block();
1078
1079	// Copy the file attributes. We need to skip this if destination
1080	// file isn't open or it is standard output.
1081	if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1082		io_copy_attrs(pair);
1083
1084	// Close the destination first. If it fails, we must not remove
1085	// the source file!
1086	if (io_close_dest(pair, success))
1087		success = false;
1088
1089	// Close the source file, and unlink it if the operation using this
1090	// file pair was successful and we haven't requested to keep the
1091	// source file.
1092	io_close_src(pair, success);
1093
1094	signals_unblock();
1095
1096	return;
1097}
1098
1099
1100extern void
1101io_fix_src_pos(file_pair *pair, size_t rewind_size)
1102{
1103	assert(rewind_size <= IO_BUFFER_SIZE);
1104
1105	if (rewind_size > 0) {
1106		// This doesn't need to work on unseekable file descriptors,
1107		// so just ignore possible errors.
1108		(void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1109	}
1110
1111	return;
1112}
1113
1114
1115extern size_t
1116io_read(file_pair *pair, io_buf *buf, size_t size)
1117{
1118	// We use small buffers here.
1119	assert(size < SSIZE_MAX);
1120
1121	size_t pos = 0;
1122
1123	while (pos < size) {
1124		const ssize_t amount = read(
1125				pair->src_fd, buf->u8 + pos, size - pos);
1126
1127		if (amount == 0) {
1128			pair->src_eof = true;
1129			break;
1130		}
1131
1132		if (amount == -1) {
1133			if (errno == EINTR) {
1134				if (user_abort)
1135					return SIZE_MAX;
1136
1137				continue;
1138			}
1139
1140#ifndef TUKLIB_DOSLIKE
1141			if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1142				// Disable the flush-timeout if no input has
1143				// been seen since the previous flush and thus
1144				// there would be nothing to flush after the
1145				// timeout expires (avoids busy waiting).
1146				const int timeout = pair->src_has_seen_input
1147						? mytime_get_flush_timeout()
1148						: -1;
1149
1150				switch (io_wait(pair, timeout, true)) {
1151				case IO_WAIT_MORE:
1152					continue;
1153
1154				case IO_WAIT_ERROR:
1155					return SIZE_MAX;
1156
1157				case IO_WAIT_TIMEOUT:
1158					pair->flush_needed = true;
1159					return pos;
1160
1161				default:
1162					message_bug();
1163				}
1164			}
1165#endif
1166
1167			message_error(_("%s: Read error: %s"),
1168					pair->src_name, strerror(errno));
1169
1170			return SIZE_MAX;
1171		}
1172
1173		pos += (size_t)(amount);
1174
1175		if (!pair->src_has_seen_input) {
1176			pair->src_has_seen_input = true;
1177			mytime_set_flush_time();
1178		}
1179	}
1180
1181	return pos;
1182}
1183
1184
1185extern bool
1186io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
1187{
1188	// Using lseek() and read() is more portable than pread() and
1189	// for us it is as good as real pread().
1190	if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
1191		message_error(_("%s: Error seeking the file: %s"),
1192				pair->src_name, strerror(errno));
1193		return true;
1194	}
1195
1196	const size_t amount = io_read(pair, buf, size);
1197	if (amount == SIZE_MAX)
1198		return true;
1199
1200	if (amount != size) {
1201		message_error(_("%s: Unexpected end of file"),
1202				pair->src_name);
1203		return true;
1204	}
1205
1206	return false;
1207}
1208
1209
1210static bool
1211is_sparse(const io_buf *buf)
1212{
1213	assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1214
1215	for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1216		if (buf->u64[i] != 0)
1217			return false;
1218
1219	return true;
1220}
1221
1222
1223static bool
1224io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1225{
1226	assert(size < SSIZE_MAX);
1227
1228	while (size > 0) {
1229		const ssize_t amount = write(pair->dest_fd, buf, size);
1230		if (amount == -1) {
1231			if (errno == EINTR) {
1232				if (user_abort)
1233					return true;
1234
1235				continue;
1236			}
1237
1238#ifndef TUKLIB_DOSLIKE
1239			if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1240				if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1241					continue;
1242
1243				return true;
1244			}
1245#endif
1246
1247			// Handle broken pipe specially. gzip and bzip2
1248			// don't print anything on SIGPIPE. In addition,
1249			// gzip --quiet uses exit status 2 (warning) on
1250			// broken pipe instead of whatever raise(SIGPIPE)
1251			// would make it return. It is there to hide "Broken
1252			// pipe" message on some old shells (probably old
1253			// GNU bash).
1254			//
1255			// We don't do anything special with --quiet, which
1256			// is what bzip2 does too. If we get SIGPIPE, we
1257			// will handle it like other signals by setting
1258			// user_abort, and get EPIPE here.
1259			if (errno != EPIPE)
1260				message_error(_("%s: Write error: %s"),
1261					pair->dest_name, strerror(errno));
1262
1263			return true;
1264		}
1265
1266		buf += (size_t)(amount);
1267		size -= (size_t)(amount);
1268	}
1269
1270	return false;
1271}
1272
1273
1274extern bool
1275io_write(file_pair *pair, const io_buf *buf, size_t size)
1276{
1277	assert(size <= IO_BUFFER_SIZE);
1278
1279	if (pair->dest_try_sparse) {
1280		// Check if the block is sparse (contains only zeros). If it
1281		// sparse, we just store the amount and return. We will take
1282		// care of actually skipping over the hole when we hit the
1283		// next data block or close the file.
1284		//
1285		// Since io_close() requires that dest_pending_sparse > 0
1286		// if the file ends with sparse block, we must also return
1287		// if size == 0 to avoid doing the lseek().
1288		if (size == IO_BUFFER_SIZE) {
1289			// Even if the block was sparse, treat it as non-sparse
1290			// if the pending sparse amount is large compared to
1291			// the size of off_t. In practice this only matters
1292			// on 32-bit systems where off_t isn't always 64 bits.
1293			const off_t pending_max
1294				= (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
1295			if (is_sparse(buf) && pair->dest_pending_sparse
1296					< pending_max) {
1297				pair->dest_pending_sparse += (off_t)(size);
1298				return false;
1299			}
1300		} else if (size == 0) {
1301			return false;
1302		}
1303
1304		// This is not a sparse block. If we have a pending hole,
1305		// skip it now.
1306		if (pair->dest_pending_sparse > 0) {
1307			if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1308					SEEK_CUR) == -1) {
1309				message_error(_("%s: Seeking failed when "
1310						"trying to create a sparse "
1311						"file: %s"), pair->dest_name,
1312						strerror(errno));
1313				return true;
1314			}
1315
1316			pair->dest_pending_sparse = 0;
1317		}
1318	}
1319
1320	return io_write_buf(pair, buf->u8, size);
1321}
1322