您的位置：首页 > Web前端

java io系列23之 BufferedReader(字符缓冲输入流)

2016-10-09 17:25 856 查看

本文摘抄至：
skywang12345

摘要： BufferedReader 缓冲字符输入流、他的功能是为传入的底层字符输入流提供缓冲功能、他会通过底层字符输入流（in）中的字符读取到自己的buffer中（内置缓存字符数组）、然后程序调用BufferedReader的read方法将buffer中的字符读取到程序中、当buffer中的字符被读取完之后、BufferedReader会从in中读取下一个数据块到buffer中供程序读取、直到in中数据被读取完毕、这样做的好处
一、是提高了读取的效率、二、是减少了打开存储介质的连接次数。其有个关键的方法fill()…

BufferedReader 介绍

BufferedReader 是缓冲字符输入流。它继承于 Reader。

BufferedReader 的作用是为其它字符输入流添加一些缓冲功能。

BufferedReader 函数列表

BufferedReader(Reader in)
BufferedReader(Reader in, int size)

void     close()
void     mark(int markLimit)
boolean  markSupported()
int      read()
int      read(char[] buffer, int offset, int length)
String   readLine()
boolean  ready()
void     reset()
long     skip(long charCount)

BufferedReader 源码分析(基于jdk1.7.40)

package java.io

public class BufferedReader extends Reader {

private Reader in;

//字符缓冲区
private char cb[];
//nChars 是 cb缓冲区字符的总个数
//nextChar 是下一个要读取的字符在 cb缓冲区的位置
private int nChars, nextChar;

//表示标记无效
//(01).INVALIDATED 设置了标记，但是太长，导致成为无效标记。
//(02).UNMARKED 表示压根儿就没有设置过标记
private static final int INVALIDATED = -2;
//表示没有设置 标记
private int markedChar = UNMARKED;
//能“标记”的最大位置长度
private int readAheadLimit = 0; /* Valid only when markedChar > 0 */

//skipLF（即 skip Line Feed）表示“是否忽略换行标记”
private boolean skipLF = false;

//设置“标记”时，保存 skipLF的值
private boolean markedSkipLF = false;

//默认字符缓冲去的大小 (8k)
private static int defaultCharBufferSize = 8192;
//默认每一行的字符个数
private static int defaultExpectedLineLength = 80;

//创建“Reader”对应的 BufferedReader 对象，sz是BufferedReader的缓冲区大小 。
public BufferedReader(Reader in, int sz) {
super(in);
if (sz <= 0)
throw new IllegalArgumentException("Buffer size <= 0");
this.in = in;
cb = new char[sz];
nextChar = nChars = 0;
}

//创建“Reader”对应的 BufferedReader对象，默认的 BufferedReader缓冲区大小是 8k
public BufferedReader(Reader in) {
this(in, defaultCharBufferSize);
}

//确保 “BufferedReader”是打开状态
private void ensureOpen() throws IOException {
if (in == null)
throw new IOException("Stream closed");
}

//填充缓冲区函数，有一下 两种情况呗调用 ：
//(01).缓冲区没有数据时，通过fill()可以向缓冲区填充数据
//(02).缓冲区数据被读取完的时候，需要更新。通过fill()向缓冲区更新新的数据
private void fill() throws IOException {
//dst 表示“cb缓冲区中填充数据的其实位置”。
int dst;
if (markedChar <= UNMARKED) {
//如果没有标记，则设 dst = 0 ；
dst = 0;
} else {
//delta 表示“当前标记的长度”，它等于“下一个被读取字符的位置”减去“所标记的位置”的差值 ；
int delta = nextChar - markedChar;
if (delta >= readAheadLimit) {
//若“当前标记的长度”超过了“标记的上限(readAheadLimit)” 。
//则丢弃标签
markedChar = INVALIDATED;
readAheadLimit = 0;
dst = 0;
} else {
if (readAheadLimit <= cb.length) {
//若“当前标记的长度”没有超过“标记的上限(readAheaLimit)” 。
//并且“标记上限(radAheadLimit)”小于 | 等于"缓冲长度"。
//则先将“下一个要被读取的位置，距离我们所标记的位置的距离”间的字符保存到 cb中。
System.arraycopy(cb, markedChar, cb, 0, delta);
markedChar = 0;
dst = delta;
} else {
//若“当前标记的长度”没有超过“标记上限(readAheadLimit)”。
//并且“标记上限(readAheadLimit)”大于“缓冲长度”。
// 则重新设置缓冲区大小，并将“下一个要被读取的位置，距离我们标记的置符的距离”间的字符保存到cb中。
char ncb[] = new char[readAheadLimit];
System.arraycopy(cb, markedChar, ncb, 0, delta);
cb = ncb;
markedChar = 0;
dst = delta;
}
// 更新nextChar和nChars
nextChar = nChars = delta;
}
}

int n;
do {
//从“in”中读取数据，并存储到字符数组 cb中
//从 cb的dst位置开始存储，读取的字符个数是 cb.lenght - dst
//n 是实际读取的字符个数；若 n==0(即一个也没有读到)，则继续读取 ！
n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
//如果从“in”中读取到了数据，则设置 nChars(cb 中字符的数目) = dst + n,
//并且，nextChar（下一个被读取的字符位置） = dst
if (n > 0) {
nChars = dst + n;
nextChar = dst;
}
}
//从 BufferedRead中读取一个字符，该字符以 int的方式返回
public int read() throws IOException {
synchronized (lock) {
ensureOpen();
for (;;) {
//若是“缓冲区的数据已经被读完”.
//则下通过fill()更新缓冲区数据
if (nextChar >= nChars) {
fill();
if (nextChar >= nChars)
return -1;
}
//若要“忽略换行符”
//则对下一个字符是否是换行符惊醒处理。
if (skipLF) {
skipLF = false;
if (cb[nextChar] == '\n') {
nextChar++;
continue;
}
}
//返回下一个字符
return cb[nextChar++];
}
}
}

//将缓冲区中的数据写入到数组 cbuf中。off是数组cbuf中的写入起始位置，len是写入长度
private int read1(char[] cbuf, int off, int len) throws IOException {
// 若“缓冲区的数据已经被读完”，则更新缓冲区数据。
if (nextChar >= nChars) {

if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
return in.read(cbuf, off, len);
}
fill();
}
//若是更新数据之后，没有任何的变换，则退出。
if (nextChar >= nChars) return -1;
//若是“忽略换行符”，则进行相应的处理。
if (skipLF) {
skipLF = false;
if (cb[nextChar] == '\n') {
nextChar++;
if (nextChar >= nChars)
fill();
if (nextChar >= nChars)
return -1;
}
}
//拷贝字符操作
int n = Math.min(len, nChars - nextChar);
System.arraycopy(cb, nextChar, cbuf, off, n);
nextChar += n;
return n;
}

//对 read()的封装，添加了“同步处理”和“z阻塞是读取”等功能。
public int read(char cbuf[], int off, int len) throws IOException {
synchronized (lock) {
ensureOpen();
if ((off < 0) || (off > cbuf.length) || (len < 0) ||
((off + len) > cbuf.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}

int n = read1(cbuf, off, len);
if (n <= 0) return n;
while ((n < len) && in.ready()) {
int n1 = read1(cbuf, off + n, len - n);
if (n1 <= 0) break;
n += n1;
}
return n;
}
}

//读取一行数据。ignoreLF是“是否忽略换行符”
String readLine(boolean ignoreLF) throws IOException {
StringBuffer s = null;
int startChar;

synchronized (lock) {
ensureOpen();
boolean omitLF = ignoreLF || skipLF;

bufferLoop:
for (;;) {

if (nextChar >= nChars)
fill();
if (nextChar >= nChars) { /* EOF */
if (s != null && s.length() > 0)
return s.toString();
else
return null;
}
boolean eol = false;
char c = 0;
int i;

/* Skip a leftover '\n', if necessary */
if (omitLF && (cb[nextChar] == '\n'))
nextChar++;
skipLF = false;
omitLF = false;

charLoop:
for (i = nextChar; i < nChars; i++) {
c = cb[i];
if ((c == '\n') || (c == '\r')) {
eol = true;
break charLoop;
}
}

startChar = nextChar;
nextChar = i;

if (eol) {
String str;
if (s == null) {
str = new String(cb, startChar, i - startChar);
} else {
s.append(cb, startChar, i - startChar);
str = s.toString();
}
nextChar++;
if (c == '\r') {
skipLF = true;
}
return str;
}

if (s == null)
s = new StringBuffer(defaultExpectedLineLength);
s.append(cb, startChar, i - startChar);
}
}
}

// 读取一行数据。不忽略换行符
public String readLine() throws IOException {
return readLine(false);
}

// 跳过n个字符
public long skip(long n) throws IOException {
if (n < 0L) {
throw new IllegalArgumentException("skip value is negative");
}
synchronized (lock) {
ensureOpen();
long r = n;
while (r > 0) {
if (nextChar >= nChars)
fill();
if (nextChar >= nChars) /* EOF */
break;
if (skipLF) {
skipLF = false;
if (cb[nextChar] == '\n') {
nextChar++;
}
}
long d = nChars - nextChar;
if (r <= d) {
nextChar += r;
r = 0;
break;
}
else {
r -= d;
nextChar = nChars;
}
}
return n - r;
}
}

// “下一个字符”是否可读
public boolean ready() throws IOException {
synchronized (lock) {
ensureOpen();

// 若忽略换行符为true；
// 则判断下一个符号是否是换行符，若是的话，则忽略
if (skipLF) {

if (nextChar >= nChars && in.ready()) {
fill();
}
if (nextChar < nChars) {
if (cb[nextChar] == '\n')
nextChar++;
skipLF = false;
}
}
return (nextChar < nChars) || in.ready();
}
}

public boolean markSupported() {
return true;
}

// 标记当前BufferedReader的下一个要读取位置。关于readAheadLimit的作用，参考后面的说明。
public void mark(int readAheadLimit) throws IOException {
if (readAheadLimit < 0) {
throw new IllegalArgumentException("Read-ahead limit < 0");
}
synchronized (lock) {
ensureOpen();
// 设置readAheadLimit
this.readAheadLimit = readAheadLimit;
// 保存下一个要读取的位置
markedChar = nextChar;
// 保存“是否忽略换行符”标记
markedSkipLF = skipLF;
}
}

// 重置BufferedReader的下一个要读取位置，
// 将其还原到mark()中所保存的位置。
public void reset() throws IOException {
synchronized (lock) {
ensureOpen();
if (markedChar < 0)
throw new IOException((markedChar == INVALIDATED)
? "Mark invalid"
: "Stream not marked");
nextChar = markedChar;
skipLF = markedSkipLF;
}
}

public void close() throws IOException {
synchronized (lock) {
if (in == null)
return;

in.close();
in = null;
cb = null;

}
}

}

说明：

要想读懂BufferReader的源码，就要先理解它的思想。BufferReader的作用是为其它Reader提供缓冲功能。

创建 BufferReader 时，我们或通过它的构造函数指定某个 Reader为参数。

BufferReader 会将该 Reader中的数据分批读取，每次读取一部分到缓冲中。

操作完成缓冲中的嗯这部分数据之后，在从Reader中读取下一部分数据.

为什么需要缓冲呢？原因很简单，效率问题！缓冲中的数据实际上是保存在内存中，而原始数据可能是保存在硬盘或NandFlash中；而我们知道，从内存中读取数据的速度比从硬盘读取数据的速度至少快10倍以上。

那干嘛不干脆一次性将全部数据都读取到缓冲中呢？

第一，读取全部的数据所需要的时间可能会很长。

第二，内存价格很贵，容量不想硬盘那么大。

下面，我就BufferReader中最重要的函数fill()进行说明。其它的函数很容易理解，我就不详细介绍了，大家可以参考源码中的注释进行理解。我们先看看fill()的源码：

1 private void fill() throws IOException {
2     int dst;
3     if (markedChar <= UNMARKED) {
4         /* No mark */
5         dst = 0;
6     } else {
7         /* Marked */
8         int delta = nextChar - markedChar;
9         if (delta >= readAheadLimit) {
10             /* Gone past read-ahead limit: Invalidate mark */
11             markedChar = INVALIDATED;
12             readAheadLimit = 0;
13             dst = 0;
14         } else {
15             if (readAheadLimit <= cb.length) {
16                 /* Shuffle in the current buffer */
17                 System.arraycopy(cb, markedChar, cb, 0, delta);
18                 markedChar = 0;
19                 dst = delta;
20             } else {
21                 /* Reallocate buffer to accommodate read-ahead limit */
22                 char ncb[] = new char[readAheadLimit];
23                 System.arraycopy(cb, markedChar, ncb, 0, delta);
24                 cb = ncb;
25                 markedChar = 0;
26                 dst = delta;
27             }
28             nextChar = nChars = delta;
29         }
30     }
31
32     int n;
33     do {
34         n = in.read(cb, dst, cb.length - dst);
35     } while (n == 0);
36     if (n > 0) {
37         nChars = dst + n;
38         nextChar = dst;
39     }
40 }

根据fill()中的if…else…，我将fill()分为4种情况进行说明。

情况1：读取完缓冲区的数据，并且缓冲区没有被标记

执行流程如下：

其他函数调用 fill（），来更新缓冲区数据

fill( ) 执行代码 if(markedChar

private void fill() throws IOException{

int dst;
if(mark <= UNMARKED){
/* No mark */
dst = 0 ;
}

do{
n = in.read(cb,dst,cb.length - dst);
}while(n==0) ;

if(n > 0){
nChars = dst + n ;
nextChar = dst ;
}
}

说明：

这种情况发生的情况是——Reader中有很长的数据，我们每一次从中读取一部分数据到缓冲中进行操作。每次当我们读取完成缓冲中数据之后，并且此时BufferReader没有被标记；那么，就接着从 (BufferReader提供的缓冲功能的Reader) 中读下一部分的数据到缓冲中。

其中，判断是否读完缓冲中的数据，是通过“比较nextChar和nChars之间大小” 来判断的。其中， nChars 是缓冲区中字符的总的个数，而 nextChar 是缓冲区中写一个要读取的字符的位置。

判断BufferedReader有没有被标记，是通过“markedChar”来判断的。

理解这个思想之后，我们再对这种情况下的fill()的代码进行分析，就特别容易理解了。

if (markedChar <= UNMARKED) 它的作用是判断“BufferedReader是否被标记”。若没有被标记，则dst=0。

in.read(cb, dst, cb.length - dst) 等价于 in.read(cb, 0, cb.length)，意思是从Reader对象 in 中读取 cb.length个数据，并存储到缓冲区 cb中，而且从缓冲区 cb的位置 0 开始存储。该函数返回值等于 n，也就是 n 表示实际读取的字符个数。若 n=0 (即没有读取到数据)，则继续读取，直到读到数据为止。

nChars=dst+n 等价于 nChars=n；意味着，更新缓冲区数据 cb 之后，设置，nChars(缓冲区的数据个数)为n。

extChar=dst 等价于 nextChar=0；意味着，更新缓冲区数据 cb之后，设置，nextChar(缓冲区中下一个会被读取的字符的索引值)为0。

情况2：读取完缓冲区的数据，缓冲区的标记位置>0，并且“当前标记的长度”超过“标记上限(readAheadLimit)”

执行流程如下，

执行流程如下：

其它函数调用 fill()，来更新缓冲区的数据

fill() 执行代码 if (delta >= readAheadLimit) { … }

`为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：

private void fill() throws IOException {

int dst;
if (markedChar > UNMARKED) {
/* Marked */
int delta = nextChar - markedChar;
if (delta >= readAheadLimit) {
/* Gone past read-ahead limit: Invalidate mark */
markedChar = INVALIDATED;
readAheadLimit = 0;
dst = 0;
}
}

int n;
do {
n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
if (n > 0) {
nChars = dst + n;
nextChar = dst;
}

}

说明：

这种情况发生的情况是——BufferReader只给你有很长的数据，我们每次从中读取一部分数据到缓存区进行操作。当我们读取完缓冲区中的数据之后，并且此时，BufferReader存在标记，同时，“当前标记的长度”大于“标记上限”；那么，就发生情况 2.
此时，我们会丢弃“标记”并更新缓冲区。

delta = nextChar - markedChar；其中，delta就是“当前标记的长度”，它是“下一个被读取字符的位置”减去“被标记的位置”的差值。

if (delta >= readAheadLimit)；其中，当delta >= readAheadLimit，就意味着，“当前标记的长度”>=“标记上限”。为什么要有标记上限，即readAheadLimit的值到底有何意义呢？
我们标记一个位置之后，更新缓存区的时候，被标记的位置会被保存；当我们不停的更新缓冲区的时候，被标记的位置会被不停的放大。然后内存的容量是有效的，我们不可能不限制长度的存储标记。所以，需要readAheadLimit来限制标记的长度！

in.read(cb, dst, cb.length - dst) 等价于 in.read(cb, 0, cb.length)，意思是从Reader对象 in 中读取 cb.length个数据，并存储到缓冲区cb中，而且从缓冲区cb的位置0开始存储。该函数返回值等于n，也就是n表示实际读取的字符个数。若n=0(即没有读取到数据)，则继续读取，直到读到数据为止。

nChars=dst+n 等价于 nChars=n；意味着，更新缓冲区数据cb之后，设置nChars(缓冲区的数据个数)为n。

nextChar=dst 等价于 nextChar=0；意味着，更新缓冲区数据cb之后，设置nextChar(缓冲区中下一个会被读取的字符的索引值)为0。

情况3：读读取完缓冲区的数据，缓冲区的标记位置>0，“当前标记的长度”没超过“标记上限(readAheadLimit)”，并且“标记上限(readAheadLimit)”小于/等于“缓冲的长度”；

执行流程如下：

其它函数调用 fill()，来更新缓冲区的数据

fill() 执行代码 if (readAheadLimit <= cb.length) { … }

`为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：

private void fill() throws IOException {

int dst;
if (markedChar > UNMARKED) {
/* Marked */
int delta = nextChar - markedChar;
if (readAheadLimit <= cb.length) {
/* Shuffle in the current buffer */
System.arraycopy(cb, markedChar, cb, 0, delta);
markedChar = 0;
dst = delta;

nextChar = nChars = delta;
}
}

int n;
do {
n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
if (n > 0) {
nChars = dst + n;
nextChar = dst;
}
}

}

说明：

这种情况发生的情况是 ——BufferedReader中有很长的数据，我们每次从中读取一部分数据到缓冲中进行操作。当我们读取完成缓冲区的数据之后，并且此时，BufferReader 存在标记时，同时，“当前标记的长度”小于“标记上限”，并且“标记上限”小于/等于“缓冲区长度”；那么，就发生情况3。此时，我们保留“被标记的位置”(即，保留被标记位置开始的数据)，并更新缓冲区(将新增的数据，追加到保留的数据之后)。

情况4：读取完缓冲区的数据，缓冲区的标记位置>0，“当前标记的长度”没超过“标记上限(readAheadLimit)”，并且“标记上限(readAheadLimit)”大于“缓冲的长度”；

执行流程如下：

其它函数调用 fill()，来更新缓冲区的数据

fill() 执行代码 else { char ncb[] = new char[readAheadLimit]; … }

`为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：

private void fill() throws IOException {

int dst;
if (markedChar > UNMARKED) {
int delta = nextChar - markedChar;
if ((delta < readAheadLimit) &&  (readAheadLimit > cb.length) ) {
char ncb[] = new char[readAheadLimit];
System.arraycopy(cb, markedChar, ncb, 0, delta);
cb = ncb;
markedChar = 0;
dst = delta;

nextChar = nChars = delta;
}
}

int n;
do {
n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
if (n > 0) {
nChars = dst + n;
nextChar = dst;
}

}

说明：

这种情况发生的情况是 — — BufferedReader中有很长的数据，我们每次从中读取一部分数据到缓冲区中进行操作。当我们读取完缓冲区中的数据之后，并且此时，BufferedReader存在标记时，同时，“当前标记的长度”小于“标记上限”，并且“标记上限”大于“缓冲区长度”；那么，就发生情况4。此时，我们要先更新缓冲区的大小，然后再保留“被标记的位置”(即，保留被标记位置开始的数据)，并更新缓冲区数据(将新增的数据，追加到保留的数据之后)。

示例代码

public class BufferedReade {

private static final int LEN = 5;

public static void main(String[] args) {

testBufferedReader();
}

private static void testBufferedReader() {

try {
// 创建BufferedReader字符流，内容是ArrayLetters数组
File file = new File("bufferedreader.txt");
BufferedReader in = new BufferedReader(new FileReader(file));

// 从字符流中读取5个字符。“abcde”
for (int i = 0; i < LEN; i++) {
// 若能继续读取下一个字符，则读取下一个字符
if (in.ready()) {
// 读取“字符流的下一个字符”
int tmp = in.read();
System.out.printf("%d : %c\n", i, tmp);
}
}

// 若“该字符流”不支持标记功能，则直接退出
if (!in.markSupported()) {
System.out.println("make not supported!");
return;
}

// 标记“当前索引位置”，即标记第6个位置的元素--“f”
// 1024 对应 marklimit
in.mark(1024);

// 跳过23字符
in.skip(23);

// 读取5个字节
char[] buf = new char[LEN];
in.read(buf, 0, LEN);
System.out.printf("buf=%s\n", String.valueOf(buf));
// 读取该行剩余的数据
System.err.printf("readLine=%s\n", in.readLine());

// 重置“输入流的索引”为mark()所标记的位置，即重置到“f”处。
in.reset();
// 从“重置后的字符流”中读取5个字符到buf中。即读取“fghij”
in.read(buf, 0, LEN);
System.out.printf("buf=%s\n", String.valueOf(buf));

in.close();

} catch (Exception e) {
e.printStackTrace();
}
}

}

程序中读取的bufferedreader.txt的内容如下：

abcdefghijklmnopqrstuvwxyz

0123456789

ABCDEFGHIJKLMNOPQRSTUVWXYZ

运行结果：

0 : a

1 : b

2 : c

3 : d

4 : e

buf=01234

buf=fghij

readLine=56789

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航