1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Converts files from one char set to another
29 *
30 * Written 11/09/87 Eddy Bell
31 *
32 */
33
34
35 /*
36 * INCLUDED and DEFINES
37 */
38 #include <stdio.h>
39 #include <fcntl.h>
40 #include <sys/systeminfo.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <errno.h>
44
45 /*#include <io.h> for microsoft c 4.0 */
46
47 #define CONTENTS_ASCII 0
48 #define CONTENTS_ASCII8 1
49 #define CONTENTS_ISO 2
50 #define CONTENTS_DOS 3
51 #ifdef _F_BIN
52 #define DOS_BUILD 1
53 #else
54 #define UNIX_BUILD 1
55 #endif
56
57 /******************************************************************************
58 * INCLUDES AND DEFINES
59 ******************************************************************************/
60 #ifdef UNIX_BUILD
61 #include <sys/types.h>
62 #include <sys/kbio.h>
63 #include <sys/time.h>
64 #include <fcntl.h>
65 #include "../sys/dos_iso.h"
66 #endif
67
68 #ifdef DOS_BUILD
69 #include <dos.h>
70 #include "..\sys\dos_iso.h"
71 #endif
72
73
74 #define GLOBAL
75 #define LOCAL static
76 #define VOID int
77 #define BOOL int
78
79 #define FALSE 0
80 #define TRUE ~FALSE
81
82 #define CR 0x0D
83 #define LF 0x0A
84 #define DOS_EOF 0x1A
85 #define MAXLEN 1024
86
87
88 /******************************************************************************
89 * FUNCTION AND VARIABLE DECLARATIONS
90 ******************************************************************************/
91 static void error();
92 static void usage();
93 static int tmpfd = -1;
94
95 /******************************************************************************
96 * ENTRY POINTS
97 ******************************************************************************/
98
99 int
main(int argc,char ** argv)100 main(int argc, char **argv)
101 {
102 FILE *in_stream = NULL;
103 FILE *out_stream = NULL;
104 unsigned char tmp_buff[512];
105 unsigned char *src_str, *dest_str;
106 char *in_file_name, *out_file_name;
107 int num_read, i, j, out_len, translate_mode, same_name; /* char count for fread() */
108 unsigned char * dos_to_iso;
109 int type;
110 int code_page_overide; /* over ride of default codepage */
111 #ifdef UNIX_BUILD
112 int kbdfd;
113 #endif
114 char sysinfo_str[MAXLEN];
115
116 same_name = FALSE;
117 out_file_name = (char *)0;
118
119 /* The filename parameter is positionally dependent - it must be the
120 * second argument, immediately following the program name. Except
121 * when a char set switch is passed then the file name must be third
122 * argument.
123 */
124
125 argv++;
126 in_stream = stdin;
127 out_stream = stdout;
128 j = 0; /* count for file names 0 -> source 1-> dest */
129 translate_mode = CONTENTS_ISO; /*default trans mode*/
130 code_page_overide = 0;
131 for (i=1; i<argc; i++) {
132 if (*argv[0] == '-') {
133 if (argc > 1 && !strncmp(*argv,"-iso",4)) {
134 translate_mode = CONTENTS_ISO;
135 argv++;
136 } else if (argc > 1 && !strncmp(*argv,"-7",2)) {
137 translate_mode = CONTENTS_ASCII;
138 argv++;
139 } else if (argc > 1 && !strncmp(*argv,"-ascii",6)) {
140 translate_mode = CONTENTS_DOS;
141 argv++;
142 } else if (argc > 1 && !strncmp(*argv,"-437",4)) {
143 code_page_overide = CODE_PAGE_US;
144 argv++;
145 } else if (argc > 1 && !strncmp(*argv,"-850",4)) {
146 code_page_overide = CODE_PAGE_MULTILINGUAL;
147 argv++;
148 } else if (argc > 1 && !strncmp(*argv,"-860",4)) {
149 code_page_overide = CODE_PAGE_PORTUGAL;
150 argv++;
151 } else if (argc > 1 && !strncmp(*argv,"-863",4)) {
152 code_page_overide = CODE_PAGE_CANADA_FRENCH;
153 argv++;
154 } else if (argc > 1 && !strncmp(*argv,"-865",4)) {
155 code_page_overide = CODE_PAGE_NORWAY;
156 argv++;
157 } else
158 argv++;
159 continue;
160 }else{ /* not a command so must be filename */
161 switch(j){
162 case IN_FILE: /* open in file from cmdline */
163 in_file_name = *argv;
164 j++; /* next file name is outfile */
165 break;
166
167 case OUT_FILE: /* open out file from cmdline */
168 out_file_name = *argv;
169 j++;
170 break;
171
172 default:
173 usage();
174 }
175 }
176
177
178 argv++;
179 }
180
181 /* input file is specified */
182 if (j > 0) {
183 in_stream = fopen(in_file_name, "r");
184 if (in_stream == NULL)
185 error("Couldn't open input file %s.", in_file_name);
186 }
187
188 /* output file is secified */
189 if (j > 1) {
190 if(!strcmp(in_file_name, out_file_name)){
191 /* input and output have same name */
192 if (access(out_file_name, 2))
193 error("%s not writable.", out_file_name);
194 strcpy(out_file_name, "/tmp/udXXXXXX");
195 tmpfd = mkstemp(out_file_name);
196 if (tmpfd == -1) {
197 error("Couldn't create output file %s.",
198 out_file_name);
199 }
200 (void) close(tmpfd);
201 same_name = TRUE;
202 } else
203 same_name = FALSE;
204 out_stream = fopen(out_file_name, "w");
205 if (out_stream == NULL) {
206 (void) unlink(out_file_name);
207 error("Couldn't open output file %s.", out_file_name);
208 }
209 }
210
211 #ifdef _F_BIN
212 setmode(fileno(in_stream), O_BINARY);
213 setmode(fileno(out_stream), O_BINARY);
214 #endif
215
216 #ifdef UNIX_BUILD
217 if(!code_page_overide){
218 if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN) < 0) {
219 fprintf(stderr,"could not obtain system information\n");
220 (void) unlink(out_file_name);
221 exit(1);
222
223 }
224 if (strcmp(sysinfo_str,"i386")) {
225 if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) {
226 fprintf(stderr, "could not open /dev/kbd to "
227 "get keyboard type US keyboard assumed\n");
228 }
229 if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) {
230 fprintf(stderr,"could not get keyboard type US keyboard assumed\n");
231 }
232 } else {
233 type = 0;
234 }
235 switch(type){
236 case 0:
237 case 1: /* United States */
238 dos_to_iso = &dos_to_iso_cp_437[0];
239 break;
240
241 case 2: /* Belgian French */
242 dos_to_iso = &dos_to_iso_cp_437[0];
243 break;
244
245 case 3: /* Canadian French */
246 dos_to_iso = &dos_to_iso_cp_863[0];
247 break;
248
249 case 4: /* Danish */
250 dos_to_iso = &dos_to_iso_cp_865[0];
251 break;
252
253 case 5: /* German */
254 dos_to_iso = &dos_to_iso_cp_437[0];
255 break;
256
257 case 6: /* Italian */
258 dos_to_iso = &dos_to_iso_cp_437[0];
259 break;
260
261 case 7: /* Netherlands Dutch */
262 dos_to_iso = &dos_to_iso_cp_437[0];
263 break;
264
265 case 8: /* Norwegian */
266 dos_to_iso = &dos_to_iso_cp_865[0];
267 break;
268
269 case 9: /* Portuguese */
270 dos_to_iso = &dos_to_iso_cp_860[0];
271 break;
272
273 case 10: /* Spanish */
274 dos_to_iso = &dos_to_iso_cp_437[0];
275 break;
276
277 case 11: /* Swedish Finnish */
278 dos_to_iso = &dos_to_iso_cp_437[0];
279 break;
280
281 case 12: /* Swiss French */
282 dos_to_iso = &dos_to_iso_cp_437[0];
283 break;
284
285 case 13: /* Swiss German */
286 dos_to_iso = &dos_to_iso_cp_437[0];
287 break;
288
289 case 14: /* United Kingdom */
290 dos_to_iso = &dos_to_iso_cp_437[0];
291
292 break;
293
294 default:
295 dos_to_iso = &dos_to_iso_cp_437[0];
296 break;
297 }
298 }else{
299 switch(code_page_overide){
300 case CODE_PAGE_US:
301 dos_to_iso = &dos_to_iso_cp_437[0];
302 break;
303
304 case CODE_PAGE_MULTILINGUAL:
305 dos_to_iso = &dos_to_iso_cp_850[0];
306 break;
307
308 case CODE_PAGE_PORTUGAL:
309 dos_to_iso = &dos_to_iso_cp_860[0];
310 break;
311
312 case CODE_PAGE_CANADA_FRENCH:
313 dos_to_iso = &dos_to_iso_cp_863[0];
314 break;
315
316 case CODE_PAGE_NORWAY:
317 dos_to_iso = &dos_to_iso_cp_865[0];
318 break;
319 }
320 }
321
322 #endif
323 #ifdef DOS_BUILD
324 if(!code_page_overide){
325 {
326 union REGS regs;
327 regs.h.ah = 0x66; /* get/set global code page */
328 regs.h.al = 0x01; /* get */
329 intdos(®s, ®s);
330 type = regs.x.bx;
331 }
332 switch(type){
333 case 437: /* United States */
334 dos_to_iso = &dos_to_iso_cp_437[0];
335 break;
336
337 case 850: /* Multilingual */
338 dos_to_iso = &dos_to_iso_cp_850[0];
339 break;
340
341 case 860: /* Portuguese */
342 dos_to_iso = &dos_to_iso_cp_860[0];
343 break;
344
345 case 863: /* Canadian French */
346 dos_to_iso = &dos_to_iso_cp_863[0];
347 break;
348
349 case 865: /* Danish */
350 dos_to_iso = &dos_to_iso_cp_865[0];
351 break;
352
353 default:
354 dos_to_iso = &dos_to_iso_cp_437[0];
355 break;
356 }
357 }else{
358 switch(code_page_overide){
359 case CODE_PAGE_US:
360 dos_to_iso = &dos_to_iso_cp_437[0];
361 break;
362
363 case CODE_PAGE_MULTILINGUAL:
364 dos_to_iso = &dos_to_iso_cp_850[0];
365 break;
366
367 case CODE_PAGE_PORTUGAL:
368 dos_to_iso = &dos_to_iso_cp_860[0];
369 break;
370
371 case CODE_PAGE_CANADA_FRENCH:
372 dos_to_iso = &dos_to_iso_cp_863[0];
373 break;
374
375 case CODE_PAGE_NORWAY:
376 dos_to_iso = &dos_to_iso_cp_865[0];
377 break;
378 }
379 }
380
381
382 #endif
383
384 /* While not EOF, read in chars and send them to out_stream
385 * if current char is not a CR.
386 */
387
388 do {
389 num_read = fread(&tmp_buff[0], 1, 100, in_stream);
390 i = 0;
391 out_len = 0;
392 src_str = dest_str = &tmp_buff[0];
393 switch (translate_mode){
394 case CONTENTS_ISO:
395 {
396 while ( i++ != num_read ){
397 if( *src_str == '\r'){
398 src_str++;
399 }
400 else{
401 out_len++;
402 *dest_str++ = dos_to_iso[*src_str++];
403 }
404 }
405 }
406 break;
407
408 case CONTENTS_ASCII:
409 {
410 while ( i++ != num_read){
411 if( *src_str == '\r'){
412 src_str++;
413 continue;
414 }
415 else if ( *src_str > 127 ){
416 *dest_str++ = (unsigned char) ' ';
417 src_str++;
418 out_len++;
419 }
420 else{
421 out_len++;
422 *dest_str++ = *src_str++;
423 }
424 }
425 }
426 break;
427
428 case CONTENTS_DOS:
429 {
430 while ( i++ != num_read){
431 if( *src_str == '\r'){
432 src_str++;
433 continue;
434 }
435 *dest_str++ = *src_str++;
436 out_len++;
437 }
438 }
439 break;
440 }
441 if (out_len > num_read)
442 out_len = num_read;
443 if (tmp_buff[out_len-2] == DOS_EOF)
444 out_len -= 2;
445 else if (tmp_buff[out_len-1] == DOS_EOF)
446 out_len -= 1;
447
448 if( out_len > 0 &&
449 out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream)))
450 error("Error writing %s.", out_file_name);
451
452 } while (!feof(in_stream));
453
454 fclose(out_stream);
455 fclose(in_stream);
456 if(same_name){
457 unlink(in_file_name);
458 in_stream = fopen(out_file_name, "r");
459 out_stream = fopen(in_file_name, "w");
460 #ifdef _F_BIN
461 setmode(fileno(in_stream), O_BINARY);
462 setmode(fileno(out_stream), O_BINARY);
463 #endif
464 while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) {
465 if( num_read != fwrite(tmp_buff, 1, num_read, out_stream))
466 error("Error writing %s.", in_file_name);
467 }
468 fclose(out_stream);
469 fclose(in_stream);
470 unlink(out_file_name);
471 }
472 return (0);
473 }
474
error(format,args)475 void error(format, args)
476 char *format;
477 char *args;
478 {
479 fprintf(stderr, "dos2unix: ");
480 fprintf(stderr, format, args);
481 fprintf(stderr, " %s.\n", strerror(errno));
482 exit(1);
483 }
484
usage()485 void usage()
486 {
487 fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n");
488 exit(1);
489 }
490
491