/* C program to read ISO-8859-1 characters from a file file and output UTF-8 encoded chars Note: the terminal window expects UTF-8 TO RUN, BE IN THIS DIRECTORY, AND TYPE: ./iso2utf */ #include char * iso2utf(char* s, char c) /* convert one character c to possibly 2 bytes of utf-8 and store in string starting at address s We need to return the advanced pointer It would probably have been better to process a whole line, with args of both string pointers, much like strcpy() */ { char cc; if (c>=0) // if leftmost bit is set, a char is regarded // as a negative number. This causes problems with shifting { *s++ = c; // ascii char, 0xxxxxxx } else // extended, 1xxxxxxx { *s++ = 0xC0 | (c>>6)&0x3; /* 110yyy1x (the y bits are 0) 2 leftmost bits, shifted the extra mask is needed to remove extended sign bits! */ *s++ = 0x80 | 0x3f & c; // 10xxxxxx } return s; } int main() { FILE *in; int ch; char str[400]; char* ptr=str; in = fopen("isotest.txt","r"); // a file created with ISO-8859-1 encoding while (EOF!= (ch = fgetc(in))) { //printf("%c %2x\n",ch, ch&0xff); // debug stmt ptr=iso2utf(ptr, ch); } fclose(in); *ptr = '\0'; // terminate string printf(str); return 0; }