xref: /illumos-gate/usr/src/lib/libc/amd64/gen/strncat.S (revision 55fea89d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26	.file	"strncat.s"
27
28/
29/ strncat(s1, s2, n)
30/
31/ Concatenates s2 on the end of s1.  s1's space must be large enough.
32/ At most n characters are moved.
33/ Returns s1.
34/
35/ Fast assembly language version of the following C-program strncat
36/ which represents the `standard' for the C-library.
37/
38/	char *
39/	strncat(char *s1, const char *s2, size_t n)
40/	{
41/		char	*os1 = s1;
42/
43/		n++;
44/		while (*s1++)
45/			;
46/		--s1;
47/		while (*s1++ = *s2++)
48/			if (--n == 0) {
49/				s1[-1] = '\0';
50/				break;
51/			}
52/		return (os1);
53/	}
54/
55/ In this assembly language version, the following expression is used
56/ to check if a 32-bit word data contains a null byte or not:
57/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
58/ If the above expression geneates a value other than 0x80808080,
59/ that means the 32-bit word data contains a null byte.
60/
61/ The above has been extended for 64-bit support.
62/
63
64#include "SYS.h"
65
66	ENTRY(strncat)		/* (char *, char *, size_t) */
67	movq	%rdi, %rax		/ save return value
68	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
69	movq	%r8, %r9
70	notq	%r9				/ %r9 = 0x80...
71	testq	$7, %rdi		/ if %rdi not quadword aligned
72	jnz	.L1			/ goto .L1
73	.align	4
74.L2:
75	movq	(%rdi), %r11		/ move 1 quadword from (%rdi) to %r11
76	movq	%r8, %rcx
77	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
78	addq	$8, %rdi		/ next quadword
79	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
80	orq	%r11, %rcx		/ %rcx |= %r11
81	andq	%r9, %rcx		/ %rcx &= 0x80808080
82	cmpq	%r9, %rcx		/ if no null byte in this quadword
83	je	.L2			/ goto .L2
84	subq	$8, %rdi		/ post-incremented
85.L1:
86	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
87	je	.L3			/ goto .L3
88	incq	%rdi			/ next byte
89	testq	$7, %rdi		/ if %rdi not quadword aligned
90	jnz	.L1			/ goto .L1
91	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
92	.align	4
93.L3:
94	/ %rdi points to a null byte in destination string
95
96	testq	$7, %rsi		/ if %rsi not quadword aligned
97	jnz	.L4			/ goto .L4
98	cmpq	$8, %rdx		/ if number of bytes < 8
99	jb	.L7			/ goto .L7
100	.align	4
101.L5:
102	movq	(%rsi), %r11		/ move 1 quadword from (%rsi) to %r11
103	movq	%r8, %rcx
104	andq	%r11, %rcx		/ %rcx = %r11 & 0x7f7f7f7f
105	addq	$8, %rsi		/ next quadword
106	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f
107	orq	%r11, %rcx		/ %rcx |= %r11
108	andq	%r9, %rcx		/ %rcx &= 0x80808080
109	cmpq	%r9, %rcx		/ if null byte in this quadword
110	jne	.L6			/ goto .L6
111	movq	%r11, (%rdi)		/ copy this quadword to (%rdi)
112	subq	$8, %rdx		/ decrement number of bytes by 8
113	addq	$8, %rdi		/ next quadword
114	cmpq	$8, %rdx		/ if number of bytes >= 8
115	jae	.L5			/ goto .L5
116	jmp	.L7			/ goto .L7
117.L6:
118	subq	$8, %rsi		/ post-incremented
119	.align	4
120.L7:
121	/ number of bytes < 8  or  a null byte found in the quadword
122	cmpq	$0, %rdx		/ if number of bytes == 0
123	jz	.L8			/ goto .L8 (finished)
124	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
125	decq	%rdx			/ decrement number of bytes by 1
126	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
127	incq	%rsi			/ next byte
128	incq	%rdi			/ next byte
129	cmpb	$0, %r11b		/ compare %r11b with a null byte
130	je	.L9			/ if %r11b is a null, goto .L9
131	jmp	.L7			/ goto .L7
132	.align	4
133
134.L4:
135	/ %rsi not aligned
136	cmpq	$0, %rdx		/ if number of bytes == 0
137	jz	.L8			/ goto .L8 (finished)
138	movb	(%rsi), %r11b		/ %r11b = a byte in (%rsi)
139	decq	%rdx			/ decrement number of bytes by 1
140	movb	%r11b, (%rdi)		/ copy %r11b to (%rdi)
141	incq	%rdi			/ next byte
142	incq	%rsi			/ next byte
143	cmpb	$0, %r11b		/ compare %r11b with a null byte
144	je	.L9			/ if %r11b is a null, goto .L9
145	jmp	.L4			/ goto .L4
146	.align	4
147.L8:
148	movb	$0, (%rdi)		/ null termination
149.L9:
150	ret
151	SET_SIZE(strncat)
152