diff --git a/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy b/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy index 393439ee..28e3c836 100644 --- a/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy +++ b/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy @@ -12,12 +12,7 @@ class SearchIndex { private final SkipList keywords SearchIndex(String name) { - File f = File.createTempFile(name, "db") - if (f.exists()) - f.delete() - f.createNewFile() - f.deleteOnExit() - BlockFile blockFile = new BlockFile(f, true) + BlockFile blockFile = new BlockFile(name, true) keywords = blockFile.makeIndex("keywords", new KeySerializer(), new ValueSerializer()) } diff --git a/core/src/main/java/net/metanotion/io/RAIFile.java b/core/src/main/java/net/metanotion/io/RAIFile.java index 65cf09cc..8a965db4 100644 --- a/core/src/main/java/net/metanotion/io/RAIFile.java +++ b/core/src/main/java/net/metanotion/io/RAIFile.java @@ -40,72 +40,159 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; public class RAIFile implements RandomAccessInterface { - private static final long MAX_SIZE = 0x1 << 29; + static final long MAX_SIZE = 0x1 << 20; - private File f; - private final ByteBuffer byteBuffer; - private final FileChannel fileChannel; - - private boolean r=false, w=false; - - private int maxPosition = 2048; // PAGESIZE * 2 + private final String prefix; - public RAIFile(RandomAccessFile file) throws IOException { - this.f = null; - fileChannel = file.getChannel(); - byteBuffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, MAX_SIZE); + private final List chunkList = new ArrayList<>(); + + private FileChunk current; + private int maxPosition; + + public RAIFile(String prefix) throws IOException { + this.prefix = prefix; + chunkList.add(new FileChunk(0, prefix)); + current = chunkList.get(0); } - - public RAIFile(File file, boolean read, boolean write) throws FileNotFoundException, IOException { - this.f = file; - this.r = read; - this.w = write; - Set openOptionSet = new HashSet<>(); - if(this.r) { openOptionSet.add(StandardOpenOption.READ); } - if(this.w) { openOptionSet.add(StandardOpenOption.WRITE); } - fileChannel = (FileChannel) Files.newByteChannel(f.toPath(), openOptionSet); - byteBuffer = fileChannel.map(FileChannel.MapMode.READ_WRITE,0, MAX_SIZE); + + private static class FileChunk { + private final int index; + private final ByteBuffer byteBuffer; + private final FileChannel fileChannel; + + FileChunk(int index, String prefix) throws IOException { + this.index = index; + File f = File.createTempFile(prefix,"db" + index); + f.createNewFile(); + f.deleteOnExit(); + fileChannel = (FileChannel) Files.newByteChannel(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE); + byteBuffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, MAX_SIZE); + } + + long position() { + return index * MAX_SIZE + byteBuffer.position(); + } + + void close() throws IOException { + fileChannel.close(); + DataUtil.tryUnmap(byteBuffer); + } + } + + private void switchChunk(long position) throws IOException { + int idx = (int) (position / MAX_SIZE); + while(chunkList.size() <= idx) + chunkList.add(new FileChunk(chunkList.size(), prefix)); + current = chunkList.get(idx); + current.byteBuffer.position((int)(position % MAX_SIZE)); + } + + private void ensureCapacity(int size) throws IOException { + int remaining = current.byteBuffer.remaining(); + if (remaining < size) + switchChunk(current.position() + remaining); } private void updateMaxPosition() { - maxPosition = Math.max(maxPosition, byteBuffer.position()); + maxPosition = (int) Math.max(maxPosition, current.position()); } - public long getFilePointer() throws IOException { return byteBuffer.position(); } + public long getFilePointer() throws IOException { return current.position(); } public long length() throws IOException { return maxPosition; } public void setLength(long length) { maxPosition = (int)length; } - public int read() throws IOException { return byteBuffer.get(); } - public int read(byte[] b) throws IOException { byteBuffer.get(b); return b.length; } - public int read(byte[] b, int off, int len) throws IOException { byteBuffer.get(b,off,len); return len; } - public void seek(long pos) throws IOException { byteBuffer.position((int)pos); updateMaxPosition();} + + public int read() throws IOException { + if (current.byteBuffer.remaining() < 1) + switchChunk(current.position() + 1); + return current.byteBuffer.get(); + } + + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + public int read(byte[] b, int off, int len) throws IOException { + if (len > MAX_SIZE) + throw new IllegalArgumentException("length too long " + len); + int remaining = current.byteBuffer.remaining(); + int rv = 0; + if (remaining < len) { + current.byteBuffer.get(b, off, remaining); + off += remaining; + len -= remaining; + rv += remaining; + switchChunk(current.position() + 1); + } + current.byteBuffer.get(b, off, len); + return rv + len; + } + public void seek(long pos) throws IOException { + switchChunk(pos); + updateMaxPosition(); + } // Closeable Methods // TODO May need to change. public void close() throws IOException { - fileChannel.close(); - DataUtil.tryUnmap(byteBuffer); + for (FileChunk chunk : chunkList) + chunk.close(); } // DataInput Methods - public boolean readBoolean() throws IOException { return byteBuffer.get() == (byte)1; } - public byte readByte() throws IOException { return byteBuffer.get(); } - public char readChar() throws IOException { return byteBuffer.getChar(); } - public double readDouble() throws IOException { return byteBuffer.getDouble(); } - public float readFloat() throws IOException { return byteBuffer.getFloat(); } - public void readFully(byte[] b) throws IOException { byteBuffer.get(b); } - public void readFully(byte[] b, int off, int len) throws IOException { byteBuffer.get(b,off,len); } - public int readInt() throws IOException { return byteBuffer.getInt(); } - public long readLong() throws IOException { return byteBuffer.getLong(); } - public short readShort() throws IOException { return byteBuffer.getShort(); } - public int readUnsignedByte() throws IOException { return byteBuffer.get() & 0xFF; } - public int readUnsignedShort() throws IOException { return byteBuffer.getShort() & 0xFFFF; } + public boolean readBoolean() throws IOException { + ensureCapacity(1); + return current.byteBuffer.get() == (byte)1; + } + + public byte readByte() throws IOException { + ensureCapacity(1); + return current.byteBuffer.get(); + } + + public char readChar() throws IOException { + ensureCapacity(2); + return current.byteBuffer.getChar(); + } + + public double readDouble() throws IOException { + ensureCapacity(8); + return current.byteBuffer.getDouble(); + } + + public float readFloat() throws IOException { + ensureCapacity(4); + return current.byteBuffer.getFloat(); + } + + public void readFully(byte[] b) throws IOException { read(b); } + public void readFully(byte[] b, int off, int len) throws IOException { read(b,off,len); } + + + public int readInt() throws IOException { + ensureCapacity(4); + return current.byteBuffer.getInt(); + } + + public long readLong() throws IOException { + ensureCapacity(8); + return current.byteBuffer.getLong(); + } + + public short readShort() throws IOException { + ensureCapacity(2); + return current.byteBuffer.getShort(); + } + public int readUnsignedByte() throws IOException { return read() & 0xFF; } + public int readUnsignedShort() throws IOException { return readShort() & 0xFFFF; } /** Read a UTF encoded string I would delegate here. But Java's read/writeUTF combo suck. @@ -116,34 +203,104 @@ public class RAIFile implements RandomAccessInterface { This is followed by the bytes of the UTF encoded string, as returned by String.getBytes("UTF-8"); */ - public String readUTF() throws IOException { - int len = byteBuffer.getInt(); - if((len < 0) || (len >= 16777216)) { throw new IOException("Bad Length Encoding"); } + public String readUTF() throws IOException { + ensureCapacity(4); + int len = current.byteBuffer.getInt(); + if((len < 0) || (len >= MAX_SIZE)) { throw new IOException("Bad Length Encoding"); } byte[] bytes = new byte[len]; - byteBuffer.get(bytes); + + int remaining = current.byteBuffer.remaining(); + int offset = 0; + if (remaining < len) { + current.byteBuffer.get(bytes, 0, remaining); + len -= remaining; + offset += remaining; + switchChunk(current.position()); + } + current.byteBuffer.get(bytes, offset, len ); String s = new String(bytes, StandardCharsets.UTF_8); return s; } public int skipBytes(int n) throws IOException { - byteBuffer.position(byteBuffer.position() + n); + long desiredPosition = current.position() + n; + switchChunk(desiredPosition); updateMaxPosition(); return n; } // DataOutput Methods - public void write(int b) throws IOException { byteBuffer.put((byte)b); updateMaxPosition(); } - public void write(byte[] b) throws IOException { byteBuffer.put(b); updateMaxPosition();} - public void write(byte[] b, int off, int len) throws IOException { byteBuffer.put(b,off,len); updateMaxPosition();} - public void writeBoolean(boolean v) throws IOException { byteBuffer.put(v ? (byte)1 : (byte)0); updateMaxPosition();} - public void writeByte(int v) throws IOException { byteBuffer.put((byte)v); updateMaxPosition();} - public void writeShort(int v) throws IOException { byteBuffer.putShort((short)v); updateMaxPosition();} - public void writeChar(int v) throws IOException { byteBuffer.putChar((char)v); updateMaxPosition();} - public void writeInt(int v) throws IOException { byteBuffer.putInt(v); updateMaxPosition();} - public void writeLong(long v) throws IOException { byteBuffer.putLong(v); updateMaxPosition();} - public void writeFloat(float v) throws IOException { byteBuffer.putFloat(v); updateMaxPosition();} - public void writeDouble(double v) throws IOException { byteBuffer.putDouble(v); updateMaxPosition();} - public void writeBytes(String s) throws IOException { byteBuffer.put(s.getBytes()); updateMaxPosition();} + public void write(int b) throws IOException { + ensureCapacity(1); + current.byteBuffer.put((byte)b); + updateMaxPosition(); + } + + public void write(byte[] b) throws IOException { write(b, 0, b.length);} + + public void write(byte[] b, int off, int len) throws IOException { + if (len > MAX_SIZE) + throw new IllegalArgumentException("length too long " + len); + int remaining = current.byteBuffer.remaining(); + if (remaining < len) { + current.byteBuffer.put(b, off, remaining); + off += remaining; + len -= remaining; + switchChunk(current.position()); + } + current.byteBuffer.put(b, off, len); + updateMaxPosition(); + } + + public void writeBoolean(boolean v) throws IOException { + ensureCapacity(1); + current.byteBuffer.put(v ? (byte)1 : (byte)0); + updateMaxPosition(); + } + + public void writeByte(int v) throws IOException { + ensureCapacity(1); + current.byteBuffer.put((byte)v); + updateMaxPosition(); + } + + public void writeShort(int v) throws IOException { + ensureCapacity(2); + current.byteBuffer.putShort((short)v); + updateMaxPosition(); + } + + public void writeChar(int v) throws IOException { + ensureCapacity(2); + current.byteBuffer.putChar((char)v); + updateMaxPosition(); + } + + public void writeInt(int v) throws IOException { + ensureCapacity(4); + current.byteBuffer.putInt(v); + updateMaxPosition(); + } + + public void writeLong(long v) throws IOException { + ensureCapacity(8); + current.byteBuffer.putLong(v); + updateMaxPosition(); + } + + public void writeFloat(float v) throws IOException { + ensureCapacity(4); + current.byteBuffer.putFloat(v); + updateMaxPosition(); + } + + public void writeDouble(double v) throws IOException { + ensureCapacity(8); + current.byteBuffer.putDouble(v); + updateMaxPosition(); + } + + public void writeBytes(String s) throws IOException { write(s.getBytes());} /** Write a UTF encoded string I would delegate here. But Java's read/writeUTF combo suck. @@ -156,9 +313,9 @@ public class RAIFile implements RandomAccessInterface { */ public void writeUTF(String str) throws IOException { byte[] string = str.getBytes(StandardCharsets.UTF_8); - if(string.length >= 16777216) { throw new IOException("String to long for encoding type"); } - byteBuffer.putInt(string.length); - byteBuffer.put(string); - updateMaxPosition(); + if(string.length >= MAX_SIZE) { throw new IOException("String to long for encoding type"); } + ensureCapacity(4); + current.byteBuffer.putInt(string.length); + write(string); } } diff --git a/core/src/main/java/net/metanotion/io/block/BlockFile.java b/core/src/main/java/net/metanotion/io/block/BlockFile.java index d7d3eae4..8ec8fcbb 100644 --- a/core/src/main/java/net/metanotion/io/block/BlockFile.java +++ b/core/src/main/java/net/metanotion/io/block/BlockFile.java @@ -245,7 +245,7 @@ public class BlockFile implements Closeable { } /** File must be writable */ - public BlockFile(File f, boolean init) throws IOException { this(new RAIFile(f, true, true), init); } + public BlockFile(String prefix, boolean init) throws IOException { this(new RAIFile(prefix), init); } /** Use this constructor with a readonly RAI and init = false for a readonly blockfile */ public BlockFile(RandomAccessInterface rai, boolean init) throws IOException { diff --git a/core/src/test/groovy/net/metanotion/io/RAIFileTest.groovy b/core/src/test/groovy/net/metanotion/io/RAIFileTest.groovy new file mode 100644 index 00000000..37fe5995 --- /dev/null +++ b/core/src/test/groovy/net/metanotion/io/RAIFileTest.groovy @@ -0,0 +1,56 @@ +package net.metanotion.io + +import org.junit.After +import org.junit.Before +import org.junit.Test + +class RAIFileTest { + + private RAIFile rf + + @Before + void setup() { + rf = new RAIFile("test") + } + + @After + void tearDown() { + rf.close() + } + + @Test + void testManySmallWrites() { + assert 0 == rf.getFilePointer() + assert 0 == rf.length() + + RAIFile.MAX_SIZE.times {rf.writeByte((byte)1)} + + assert RAIFile.MAX_SIZE == rf.getFilePointer() + assert RAIFile.MAX_SIZE == rf.length() + + // this should now cause second chunk + rf.writeByte((byte)1) + + assert (RAIFile.MAX_SIZE + 1) == rf.getFilePointer() + assert (RAIFile.MAX_SIZE + 1) == rf.length() + } + + @Test + void writeUTF8AtBoundary() { + final int zeroes = RAIFile.MAX_SIZE - 8 + final String longString = "long long string" + zeroes.times {rf.writeByte(0)} + rf.writeUTF(longString) + rf.seek(zeroes) + assert longString == rf.readUTF() + } + + @Test + void writeLongAtBoundary() { + final int zeroes = RAIFile.MAX_SIZE - 4 + zeroes.times {rf.writeByte(0)} + rf.writeLong(Long.MAX_VALUE) + rf.seek(zeroes) + assert Long.MAX_VALUE == rf.readLong() + } +}