xref: /illumos-gate/usr/src/cmd/sgs/ld/common/ld.c (revision 92a02081)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include	<stdio.h>
28 #include	<stdlib.h>
29 #include	<unistd.h>
30 #include	<stdarg.h>
31 #include	<string.h>
32 #include	<strings.h>
33 #include	<errno.h>
34 #include	<fcntl.h>
35 #include	<libintl.h>
36 #include	<locale.h>
37 #include	<fcntl.h>
38 #include	"conv.h"
39 #include	"libld.h"
40 #include	"machdep.h"
41 #include	"msg.h"
42 
43 /*
44  * The following prevent us from having to include ctype.h which defines these
45  * functions as macros which reference the __ctype[] array.  Go through .plt's
46  * to get to these functions in libc rather than have every invocation of ld
47  * have to suffer the R_SPARC_COPY overhead of the __ctype[] array.
48  */
49 extern int	isspace(int);
50 
51 /*
52  * Print a message to stdout
53  */
54 /* VARARGS3 */
55 void
56 eprintf(Lm_list *lml, Error error, const char *format, ...)
57 {
58 	va_list			args;
59 	static const char	*strings[ERR_NUM] = { MSG_ORIG(MSG_STR_EMPTY) };
60 
61 #if	defined(lint)
62 	/*
63 	 * The lml argument is only meaningful for diagnostics sent to ld.so.1.
64 	 * Supress the lint error by making a dummy assignment.
65 	 */
66 	lml = 0;
67 #endif
68 	if (error > ERR_NONE) {
69 		if (error == ERR_WARNING) {
70 			if (strings[ERR_WARNING] == 0)
71 				strings[ERR_WARNING] =
72 				    MSG_INTL(MSG_ERR_WARNING);
73 		} else if (error == ERR_FATAL) {
74 			if (strings[ERR_FATAL] == 0)
75 				strings[ERR_FATAL] = MSG_INTL(MSG_ERR_FATAL);
76 		} else if (error == ERR_ELF) {
77 			if (strings[ERR_ELF] == 0)
78 				strings[ERR_ELF] = MSG_INTL(MSG_ERR_ELF);
79 		}
80 		(void) fputs(MSG_ORIG(MSG_STR_LDDIAG), stderr);
81 	}
82 	(void) fputs(strings[error], stderr);
83 
84 	va_start(args, format);
85 	(void) vfprintf(stderr, format, args);
86 	if (error == ERR_ELF) {
87 		int	elferr;
88 
89 		if ((elferr = elf_errno()) != 0)
90 			(void) fprintf(stderr, MSG_ORIG(MSG_STR_ELFDIAG),
91 			    elf_errmsg(elferr));
92 	}
93 	(void) fprintf(stderr, MSG_ORIG(MSG_STR_NL));
94 	(void) fflush(stderr);
95 	va_end(args);
96 }
97 
98 
99 /*
100  * Determine:
101  *	- ELFCLASS of resulting object (aoutclass)
102  *	- Whether we need the 32 or 64-bit libld (ldclass)
103  *	- ELF machine type of resulting object (m_mach)
104  */
105 static int
106 process_args(int argc, char **argv, uchar_t *aoutclass, uchar_t *ldclass,
107     Half *mach)
108 {
109 #if	defined(_LP64)
110 	uchar_t lclass = ELFCLASS64;
111 #else
112 	uchar_t	lclass = ELFCLASSNONE;
113 #endif
114 	uchar_t	aclass = ELFCLASSNONE;
115 	Half	mach32 = EM_NONE, mach64 = EM_NONE;
116 	int	c;
117 
118 	/*
119 	 * In general, libld.so is responsible for processing the
120 	 * command line options. The exception to this are those options
121 	 * that contain information about which linker to run and the
122 	 * class/machine of the output object. We examine the options
123 	 * here looking for the following:
124 	 *
125 	 *	-64
126 	 *		Produce an ELFCLASS64 object. Use the 64-bit linker.
127 	 *
128 	 *	-z altexec64
129 	 *		Use the 64-bit linker regardless of the class
130 	 *		of the output object.
131 	 *
132 	 *	-z target=platform
133 	 *		Produce output object for the specified platform.
134 	 *
135 	 * The -64 and -ztarget options are used when the only input to
136 	 * ld() is a mapfile or archive, and a 64-bit or non-native output
137 	 * object is required.
138 	 *
139 	 * If we've already processed a 32-bit object and we find -64, we have
140 	 * an error condition, but let this fall through to libld to obtain the
141 	 * default error message.
142 	 */
143 	opterr = 0;
144 	optind = 1;
145 getmore:
146 	while ((c = ld_getopt(0, optind, argc, argv)) != -1) {
147 		switch (c) {
148 		case '6':
149 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_FOUR),
150 			    MSG_ARG_FOUR_SIZE) == 0)
151 				aclass = ELFCLASS64;
152 			break;
153 
154 		case 'z':
155 #if	!defined(_LP64)
156 			/* -z altexec64 */
157 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_ALTEXEC64),
158 			    MSG_ARG_ALTEXEC64_SIZE) == 0) {
159 				lclass = ELFCLASS64;
160 				break;
161 			}
162 #endif
163 			/* -z target=platform */
164 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_TARGET),
165 			    MSG_ARG_TARGET_SIZE) == 0) {
166 				char *pstr = optarg + MSG_ARG_TARGET_SIZE;
167 
168 				if (strcasecmp(pstr,
169 				    MSG_ORIG(MSG_TARG_SPARC)) == 0) {
170 					mach32 = EM_SPARC;
171 					mach64 = EM_SPARCV9;
172 				} else if (strcasecmp(pstr,
173 				    MSG_ORIG(MSG_TARG_X86)) == 0) {
174 					mach32 = EM_386;
175 					mach64 = EM_AMD64;
176 				} else {
177 					eprintf(0, ERR_FATAL,
178 					    MSG_INTL(MSG_ERR_BADTARG), pstr);
179 					return (1);
180 				}
181 			}
182 			break;
183 		}
184 	}
185 
186 	/*
187 	 * Continue to look for the first ELF object to determine the class of
188 	 * objects to operate on.
189 	 */
190 	for (; optind < argc; optind++) {
191 		int		fd;
192 		Elf32_Ehdr	ehdr32;
193 
194 		/*
195 		 * If we detect some more options return to getopt().
196 		 * Checking argv[optind][1] against null prevents a forever
197 		 * loop if an unadorned `-' argument is passed to us.
198 		 */
199 		if (argv[optind][0] == '-') {
200 			if (argv[optind][1] == '\0')
201 				continue;
202 			else
203 				goto getmore;
204 		}
205 
206 		/*
207 		 * If we've already determined the object class and
208 		 * machine type, continue to the next argument. Only
209 		 * the first object contributes to this decision, and
210 		 * there's no value to opening or examing the subsequent
211 		 * ones. We do need to keep going though, because there
212 		 * may be additional options that might affect our
213 		 * class/machine decision.
214 		 */
215 		if ((aclass != ELFCLASSNONE) && (mach32 != EM_NONE))
216 			continue;
217 
218 		/*
219 		 * Open the file and determine the files ELF class.
220 		 */
221 		if ((fd = open(argv[optind], O_RDONLY)) == -1) {
222 			int err = errno;
223 
224 			eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN),
225 			    argv[optind], strerror(err));
226 			return (1);
227 		}
228 
229 		/*
230 		 * Note that we read an entire 32-bit ELF header struct
231 		 * here, even though we have yet to determine that the
232 		 * file is an ELF object or that it is ELFCLASS32. We
233 		 * do this because:
234 		 *	- Any valid ELF object of any class must
235 		 *		have at least this number of bytes in it,
236 		 *		since an ELF header is manditory, and since
237 		 *		a 32-bit header is smaller than a 64-bit one.
238 		 *	- The 32 and 64-bit ELF headers are identical
239 		 *		up through the e_version field, so we can
240 		 *		obtain the e_machine value of a 64-bit
241 		 *		object via the e_machine value we read into
242 		 *		the 32-bit version. This cannot change, because
243 		 *		the layout of an ELF header is fixed by the ABI.
244 		 *
245 		 * Note however that we do have to worry about the byte
246 		 * order difference between the object and the system
247 		 * running this program when we read the e_machine value,
248 		 * since it is a multi-byte value;
249 		 */
250 		if ((read(fd, &ehdr32, sizeof (ehdr32)) == sizeof (ehdr32)) &&
251 		    (ehdr32.e_ident[EI_MAG0] == ELFMAG0) &&
252 		    (ehdr32.e_ident[EI_MAG1] == ELFMAG1) &&
253 		    (ehdr32.e_ident[EI_MAG2] == ELFMAG2) &&
254 		    (ehdr32.e_ident[EI_MAG3] == ELFMAG3)) {
255 			if (aclass == ELFCLASSNONE) {
256 				aclass = ehdr32.e_ident[EI_CLASS];
257 				if ((aclass != ELFCLASS32) &&
258 				    (aclass != ELFCLASS64))
259 					aclass = ELFCLASSNONE;
260 			}
261 
262 			if (mach32 == EM_NONE) {
263 				int	one = 1;
264 				uchar_t	*one_p = (uchar_t *)&one;
265 				int	ld_elfdata;
266 
267 				ld_elfdata = (one_p[0] == 1) ?
268 				    ELFDATA2LSB : ELFDATA2MSB;
269 				/*
270 				 * Both the 32 and 64-bit versions get the
271 				 * type from the object. If the user has
272 				 * asked for an inconsistant class/machine
273 				 * combination, libld will catch it.
274 				 */
275 				mach32 = mach64 =
276 				    (ld_elfdata == ehdr32.e_ident[EI_DATA]) ?
277 				    ehdr32.e_machine :
278 				    BSWAP_HALF(ehdr32.e_machine);
279 			}
280 		}
281 
282 		(void) close(fd);
283 	}
284 
285 	/*
286 	 * If we couldn't establish a class, default to 32-bit.
287 	 */
288 	if (aclass == ELFCLASSNONE)
289 		aclass = ELFCLASS32;
290 	*aoutclass = aclass;
291 
292 	if (lclass == ELFCLASSNONE)
293 		lclass = ELFCLASS32;
294 	*ldclass = lclass;
295 
296 	/*
297 	 * Use the machine type that goes with the class we've determined.
298 	 * If we didn't find a usable machine type, use the native
299 	 * machine.
300 	 */
301 	*mach = (aclass == ELFCLASS64) ? mach64 : mach32;
302 	if (*mach == EM_NONE)
303 		*mach = (aclass == ELFCLASS64) ? M_MACH_64 : M_MACH_32;
304 
305 	return (0);
306 }
307 
308 /*
309  * Process an LD_OPTIONS environment string.  This routine is first called to
310  * count the number of options, and second to initialize a new argument array
311  * with each option.
312  */
313 static int
314 process_ldoptions(char *str, char **nargv)
315 {
316 	int	argc = 0;
317 	char	*arg = str;
318 
319 	/*
320 	 * Walk the environment string processing any arguments that are
321 	 * separated by white space.
322 	 */
323 	while (*str != '\0') {
324 		if (isspace(*str)) {
325 			/*
326 			 * If a new argument array has been provided, terminate
327 			 * the original environment string, and initialize the
328 			 * appropriate argument array entry.
329 			 */
330 			if (nargv) {
331 				*str++ = '\0';
332 				nargv[argc] = arg;
333 			}
334 
335 			argc++;
336 			while (isspace(*str))
337 				str++;
338 			arg = str;
339 		} else
340 			str++;
341 	}
342 	if (arg != str) {
343 		/*
344 		 * If a new argument array has been provided, initialize the
345 		 * final argument array entry.
346 		 */
347 		if (nargv)
348 			nargv[argc] = arg;
349 		argc++;
350 	}
351 
352 	return (argc);
353 }
354 
355 /*
356  * Determine whether an LD_OPTIONS environment variable is set, and if so,
357  * prepend environment string as a series of options to the argv array.
358  */
359 static int
360 prepend_ldoptions(int *argcp, char ***argvp)
361 {
362 	int	nargc;
363 	char	**nargv, *ld_options;
364 	int	err, count;
365 
366 	if ((ld_options = getenv(MSG_ORIG(MSG_LD_OPTIONS))) == NULL)
367 		return (0);
368 
369 	/*
370 	 * Prevent modification of actual environment strings.
371 	 */
372 	if ((ld_options = strdup(ld_options)) == NULL) {
373 		err = errno;
374 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
375 		return (1);
376 	}
377 
378 	/*
379 	 * Get rid of any leading white space, and make sure the environment
380 	 * string has size.
381 	 */
382 	while (isspace(*ld_options))
383 		ld_options++;
384 	if (*ld_options == '\0')
385 		return (1);
386 
387 	/*
388 	 * Determine the number of options provided.
389 	 */
390 	nargc = process_ldoptions(ld_options, NULL);
391 
392 	/*
393 	 * Allocate a new argv array big enough to hold the new options from
394 	 * the environment string and the old argv options.
395 	 */
396 	if ((nargv = malloc((nargc + *argcp + 1) * sizeof (char *))) == NULL) {
397 		err = errno;
398 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
399 		return (1);
400 	}
401 
402 	/*
403 	 * Initialize first element of new argv array to be the first element
404 	 * of the old argv array (ie. calling programs name).  Then add the new
405 	 * args obtained from the environment.
406 	 */
407 	nargc = 0;
408 	nargv[nargc++] = (*argvp)[0];
409 	nargc += process_ldoptions(ld_options, &nargv[nargc]);
410 
411 	/*
412 	 * Now add the original argv array (skipping argv[0]) to the end of the
413 	 * new argv array, and re-vector argc and argv to reference this new
414 	 * array
415 	 */
416 	for (count = 1; count < *argcp; count++, nargc++)
417 		nargv[nargc] = (*argvp)[count];
418 
419 	nargv[nargc] = NULL;
420 
421 	*argcp = nargc;
422 	*argvp = nargv;
423 
424 	return (0);
425 }
426 
427 /*
428  * Check to see if there is a LD_ALTEXEC=<path to alternate ld> in the
429  * environment.  If so, first null the environment variable out, and then
430  * exec() the binary pointed to by the environment variable, passing the same
431  * arguments as the originating process.  This mechanism permits using
432  * alternate link-editors (debugging/developer copies) even in complex build
433  * environments.
434  */
435 static int
436 ld_altexec(char **argv, char **envp)
437 {
438 	char	*execstr;
439 	char	**str;
440 	int	err;
441 
442 	for (str = envp; *str; str++) {
443 		if (strncmp(*str, MSG_ORIG(MSG_LD_ALTEXEC),
444 		    MSG_LD_ALTEXEC_SIZE) == 0) {
445 			break;
446 		}
447 	}
448 
449 	/*
450 	 * If LD_ALTEXEC isn't set, return to continue executing the present
451 	 * link-editor.
452 	 */
453 	if (*str == 0)
454 		return (0);
455 
456 	/*
457 	 * Get a pointer to the actual string.  If it's a null entry, return.
458 	 */
459 	execstr = strdup(*str + MSG_LD_ALTEXEC_SIZE);
460 	if (*execstr == '\0')
461 		return (0);
462 
463 	/*
464 	 * Null out the LD_ALTEXEC= environment entry.
465 	 */
466 	(*str)[MSG_LD_ALTEXEC_SIZE] = '\0';
467 
468 	/*
469 	 * Set argv[0] to point to our new linker
470 	 */
471 	argv[0] = execstr;
472 
473 	/*
474 	 * And attempt to execute it.
475 	 */
476 	(void) execve(execstr, argv, envp);
477 
478 	/*
479 	 * If the exec() fails, return a failure indication.
480 	 */
481 	err = errno;
482 	eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_EXEC), execstr,
483 	    strerror(err));
484 	return (1);
485 }
486 
487 int
488 main(int argc, char **argv, char **envp)
489 {
490 	char		**oargv = argv;
491 	uchar_t 	aoutclass, ldclass, checkclass;
492 	Half		mach;
493 
494 	/*
495 	 * Establish locale.
496 	 */
497 	(void) setlocale(LC_MESSAGES, MSG_ORIG(MSG_STR_EMPTY));
498 	(void) textdomain(MSG_ORIG(MSG_SUNW_OST_SGS));
499 
500 	/*
501 	 * Execute an alternate linker if the LD_ALTEXEC environment variable is
502 	 * set.  If a specified alternative could not be found, bail.
503 	 */
504 	if (ld_altexec(argv, envp))
505 		return (1);
506 
507 	/*
508 	 * Check the LD_OPTIONS environment variable, and if present prepend
509 	 * the arguments specified to the command line argument list.
510 	 */
511 	if (prepend_ldoptions(&argc, &argv))
512 		return (1);
513 
514 	/*
515 	 * Examine the command arguments to determine:
516 	 *	- object class
517 	 *	- link-editor class
518 	 *	- target machine
519 	 */
520 	if (process_args(argc, argv, &aoutclass, &ldclass, &mach))
521 		return (1);
522 
523 	/*
524 	 * If we're processing 64-bit objects, or the user specifically asked
525 	 * for a 64-bit link-editor, determine if a 64-bit ld() can be executed.
526 	 * Bail if a 64-bit ld() was explicitly asked for, but one could not be
527 	 * found.
528 	 */
529 	if ((aoutclass == ELFCLASS64) || (ldclass == ELFCLASS64))
530 		checkclass = conv_check_native(oargv, envp);
531 
532 	if ((ldclass == ELFCLASS64) && (checkclass != ELFCLASS64)) {
533 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_64));
534 		return (1);
535 	}
536 
537 	/*
538 	 * Reset the getopt(3c) error message flag, and call the generic entry
539 	 * point using the appropriate class.
540 	 */
541 	if (aoutclass == ELFCLASS64)
542 		return (ld64_main(argc, argv, mach));
543 	else
544 		return (ld32_main(argc, argv, mach));
545 }
546 
547 /*
548  * Exported interfaces required by our dependencies.  libld and friends bind to
549  * the different implementations of these provided by either ld or ld.so.1.
550  */
551 const char *
552 _ld_msg(Msg mid)
553 {
554 	return (gettext(MSG_ORIG(mid)));
555 }
556