您的位置:首页 > 编程语言 > Java开发

java中编码方式的总结

2016-08-12 10:14 316 查看

下面有三个函数,分别演示了错误编码后的结果,系统的编码(GBK)方式,jvm的编码方式

/**

* java中,字符串在jvm中的存储永远是用unicode存储的,

* 但只要是为人所能看到的地方,如:网页、数据库、编码区、控制台等,都会有相应的编码方式,

* 要想正确的编码、解码就必须显式地指明相应的编码或解码方式

* @author Administrator

*

*/

import java.io.*;

public class JavaEncoded {

public static void main(String[] args) throws Exception {

// isoEncode();

// encodeLenght();

jvmEncode();

}

public static void isoEncode() throws IOException {

InputStreamReader isr = new InputStreamReader(System.in, "iso8859-1");

// Create an InputStreamReader that uses the given charset decoder

BufferedReader br = new BufferedReader(isr);

String strLine = br.readLine();

br.close();

isr.close();

System.out.println(strLine);

System.out.println(new String(strLine.getBytes(), "iso8859-1"));

// Encodes this String (strLine) into a sequence of bytes using the

// platform's

// default charset(gb2312) then constructs a new String by decoding the

// specified array of bytes using the specified charset (iso8859-1)

// because this String (strLine) uses the charset decoder "iso8859-1",so

// it can

// only be encoded by "iso8859-1",cann't be encoded by the platform's

// default

// charset "gb2312",so this line is wrong.

System.out.println(new String(strLine.getBytes("iso8859-1")));

// Encodes this String (strLine) into a sequence of bytes using the

// named

// charset (iso8859-1),then constructs a new String by decoding the

// specified array of bytes using the platform's default charset

// (gb2312).

// This line is right.

}

/**

* 根据系统,所有我们看到的字符都是GBK编码的,

* 中文占两个字节

* 英文占一个字节

* @throws UnsupportedEncodingException

*/

public static void encodeLenght() throws UnsupportedEncodingException {

System.out.println();

char c = '我';

System.out.println(String.valueOf(c).getBytes().length); //根据系统,所有我们看到的字符都是GBK编码的,

System.out.println(String.valueOf(c).getBytes("GBK").length);

byte[] bytes = String.valueOf(c).getBytes();

for (int i = 0; i < bytes.length; i++) {

System.out.print(bytes[i] + " ");

}

System.out.println();

byte[] bytes2 = String.valueOf(c).getBytes("GBK");

for (int i = 0; i < bytes2.length; i++) {

System.out.print(bytes2[i] + " ");

}

System.out.println();

//"我"十六机制GBK:CED2

//转化为二进制(补码)为:11001110 11010010

//反取原码:10110010 10101110

//即-50 -46 ,为byte的输出结果

char c2 = 'A';

System.out.println(String.valueOf(c2).getBytes().length); //根据系统,所有我们看到的字符都是GBK编码的,

System.out.println(String.valueOf(c2).getBytes("GBK").length);

byte[] bytes3 = String.valueOf(c2).getBytes();

for (int i = 0; i < bytes3.length; i++) {

System.out.print(bytes3[i] + " ");

}

System.out.println();

byte[] bytes4 = String.valueOf(c2).getBytes("GBK");

for (int i = 0; i < bytes4.length; i++) {

System.out.print(bytes4[i] + " ");

}

}

/**

* 永的unicode为\u6c38

* 运行结果:

* 6c38

* 永

*/

public static void jvmEncode() {

char han ='永';

System.out.format("%x\n", (short)han);

char han2 = 0x6c38;

System.out.println(han);

}

}

//~
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: