private static byte[] loadFile(String file) throws IOException
{
// save data to binary stream
ByteArrayOutputStream byteStream = null;
// read file to stream
InputStream in = null;
// binary data which get from file
byte[] data = null;
try
{
byteStream = new ByteArrayOutputStream();
if (mIdxFile.equals(file))
{
in = new BufferedInputStream(new FileInputStream(file));
}
else
{
in = new GZIPInputStream(new FileInputStream(file));
}
byte[] buf = new byte[1024];
int numRead = 0;
while((numRead = in.read(buf)) != -1)
{
byteStream.write(buf, 0, numRead);
}
data = byteStream.toByteArray();
}
finally
{
in.close();
byteStream.close();
}
return data;
}
/*
* index file structure
* word + '\0' (1 byte) + data offset in dict file (4 bytes) + data size in dict file (4 bytes)
*
* */
private static List<StarDictWord> loadDictIndex() throws IOException
{
List<StarDictWord> words = new ArrayList<StarDictWord>();
byte[] splitByte = SPLIT_CHAR.getBytes();
int currentIndex = 0;
int dataIndex = 0;
byte[] data = loadFile(mIdxFile);
int dataLength = data.length;
while (currentIndex < dataLength)
{
// search for '\0'
if (data[currentIndex] == splitByte[0])
{
// 4 bytes for data offset and 4 bytes for data size in index file
if (currentIndex + 8 < dataLength)
{
// data size in index file
int dataSize = currentIndex - dataIndex;
StarDictWord word = new StarDictWord();
byte[] wordData = new byte[dataSize];
for (int i = 0; i < dataSize; i++)
{
wordData[i] = data[i + dataIndex];
}
byte[] dataOffsetByte = new byte[4];
byte[] dataSizeByte = new byte[4];
for (int i = 1; i < 4; i++)
{
dataOffsetByte[i] = data[currentIndex + i + 1];
dataSizeByte[i] = data[currentIndex + i + 5];
}
try
{
String content = new String(wordData, "utf-8");
word.setContent(content);
word.setDictName(mDictName);
word.setDictFileOffset(ByteBuffer.wrap(dataOffsetByte).getInt());
word.setDictFileSize(ByteBuffer.wrap(dataSizeByte).getInt());
words.add(word);
}
catch (Exception e)
{
logger.severe("error in " + dataIndex);
}
}
currentIndex += 8;
// reset data index
dataIndex = currentIndex + 1;
}
currentIndex += 1;
}
return words;
}
private static void loadDictData(List<StarDictWord> words) throws IOException
{
// skip for empty list
if (words.size() == 0)
{
return;
}
byte[] data = loadFile(mDictFile);
int dataLength = data.length;
int offset = 0;
int size = 0;
for(StarDictWord word : words)
{
offset = word.getDictFileOffset();
size = word.getDictFileSize();
if (offset + size - 1 < dataLength)
{
byte[] wordData = new byte[size];
for (int i = 0; i < size; i++)
{
wordData[i] = data[i + offset];
}
loadWordData(word, wordData);
}
}
references
1. http://stardict.sourceforge.net
2. http://www.ohloh.net/p/pystardict