-
Notifications
You must be signed in to change notification settings - Fork 188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Github-issue#1048 : s3-sink with local-file buffer implementation. #2645
Changes from 3 commits
ab1862c
00e2e5f
dbe2811
1cff631
45e1242
00f59cf
c2394dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.dataprepper.plugins.sink.accumulator; | ||
|
||
import org.apache.commons.lang3.time.StopWatch; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import software.amazon.awssdk.core.sync.RequestBody; | ||
import software.amazon.awssdk.services.s3.S3Client; | ||
import software.amazon.awssdk.services.s3.model.PutObjectRequest; | ||
import java.io.BufferedOutputStream; | ||
import java.io.File; | ||
import java.io.FileOutputStream; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import java.util.UUID; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
/** | ||
* A buffer can hold local file data and flushing it to S3. | ||
*/ | ||
public class LocalFileBuffer implements Buffer { | ||
|
||
private static final Logger LOG = LoggerFactory.getLogger(LocalFileBuffer.class); | ||
private BufferedOutputStream bufferedOutputStream; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend making this just an
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modified. |
||
private int eventCount; | ||
private final StopWatch watch; | ||
private File localFile; | ||
|
||
LocalFileBuffer() { | ||
try { | ||
localFile = new File(String.valueOf(UUID.randomUUID())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should use a temp file here. It could be in the system default temp, or we can allow a user to configure this directory. Java provides two There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. Implemented using Java provided functionality. |
||
bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(localFile)); | ||
} catch (IOException e) { | ||
LOG.error("Unable to create local file ", e); | ||
} | ||
|
||
eventCount = 0; | ||
|
||
watch = new StopWatch(); | ||
watch.start(); | ||
} | ||
|
||
@Override | ||
public long getSize() { | ||
try { | ||
bufferedOutputStream.flush(); | ||
} catch (IOException e) { | ||
LOG.error("An exception occurred while flushing data to buffered output stream :", e); | ||
} | ||
return localFile.length(); | ||
} | ||
|
||
@Override | ||
public int getEventCount() { | ||
return eventCount; | ||
} | ||
|
||
@Override | ||
public long getDuration(){ | ||
return watch.getTime(TimeUnit.SECONDS); | ||
} | ||
|
||
/** | ||
* Upload accumulated data to amazon s3. | ||
* @param s3Client s3 client object. | ||
* @param bucket bucket name. | ||
* @param key s3 object key path. | ||
*/ | ||
@Override | ||
public void flushToS3(S3Client s3Client, String bucket, String key) { | ||
try { | ||
bufferedOutputStream.flush(); | ||
bufferedOutputStream.close(); | ||
} catch (IOException e) { | ||
LOG.error("An exception occurred while flushing data to buffered output stream :", e); | ||
} | ||
s3Client.putObject( | ||
PutObjectRequest.builder().bucket(bucket).key(key).build(), | ||
RequestBody.fromFile(localFile)); | ||
removeTemporaryFile(); | ||
} | ||
|
||
/** | ||
* write byte array to output stream. | ||
* @param bytes byte array. | ||
* @throws IOException while writing to output stream fails. | ||
*/ | ||
@Override | ||
public void writeEvent(byte[] bytes) throws IOException { | ||
bufferedOutputStream.write(bytes); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need some synchronization for these writes. Or the events may end up out-of-order. We can add it here (and to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we have synchronized in the |
||
bufferedOutputStream.write(System.lineSeparator().getBytes()); | ||
eventCount++; | ||
} | ||
|
||
/** | ||
* Remove the local temp file after flushing data to s3. | ||
*/ | ||
private void removeTemporaryFile() { | ||
if (localFile != null) { | ||
try { | ||
Files.deleteIfExists(Paths.get(localFile.toString())); | ||
} catch (IOException e) { | ||
LOG.error("Unable to delete Local file {}", localFile, e); | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.dataprepper.plugins.sink.accumulator; | ||
|
||
public class LocalFileBufferFactory implements BufferFactory { | ||
@Override | ||
public Buffer getBuffer() { | ||
return new LocalFileBuffer(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can probably improve your testing by choosing the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.dataprepper.plugins.sink.accumulator; | ||
|
||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
import static org.hamcrest.CoreMatchers.instanceOf; | ||
import static org.hamcrest.MatcherAssert.assertThat; | ||
|
||
class LocalFileBufferFactoryTest { | ||
@Test | ||
void test_localFileBufferFactory_notNull() { | ||
LocalFileBufferFactory localFileBufferFactory = new LocalFileBufferFactory(); | ||
Assertions.assertNotNull(localFileBufferFactory); | ||
} | ||
|
||
@Test | ||
void test_buffer_notNull() { | ||
LocalFileBufferFactory localFileBufferFactory = new LocalFileBufferFactory(); | ||
Assertions.assertNotNull(localFileBufferFactory); | ||
Buffer buffer = localFileBufferFactory.getBuffer(); | ||
Assertions.assertNotNull(buffer); | ||
assertThat(buffer, instanceOf(Buffer.class)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should assert that this is an instance of
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @deepaksahu562 , Did you push the change here? I don't see a new assertion for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dlvenable, Earlier, I didn't understand. Now I can understand and modify as requested. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be as simple as changing this line to:
Here it is with a little more context.
We already know this is a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please change this line per my latest comment. Then we should be good. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your clarification, Modified as you suggested. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package org.opensearch.dataprepper.plugins.sink.accumulator; | ||
|
||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.mockito.Mock; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
import software.amazon.awssdk.services.s3.S3Client; | ||
import java.io.IOException; | ||
import java.util.UUID; | ||
|
||
import static org.hamcrest.CoreMatchers.equalTo; | ||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.Matchers.greaterThan; | ||
import static org.hamcrest.Matchers.greaterThanOrEqualTo; | ||
import static org.hamcrest.Matchers.lessThanOrEqualTo; | ||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; | ||
|
||
@ExtendWith(MockitoExtension.class) | ||
class LocalFileBufferTest { | ||
|
||
public static final String BUCKET_NAME = UUID.randomUUID().toString(); | ||
@Mock | ||
private S3Client s3Client; | ||
private LocalFileBuffer localFileBuffer; | ||
|
||
@Test | ||
void test_with_write_event_into_buffer() throws IOException { | ||
localFileBuffer = new LocalFileBuffer(); | ||
|
||
while (localFileBuffer.getEventCount() < 55) { | ||
localFileBuffer.writeEvent(generateByteArray()); | ||
} | ||
assertThat(localFileBuffer.getSize(), greaterThan(1l)); | ||
assertThat(localFileBuffer.getEventCount(), equalTo(55)); | ||
assertThat(localFileBuffer.getDuration(), greaterThanOrEqualTo(0L)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should be able to assert the exact duration here.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. |
||
} | ||
|
||
@Test | ||
void test_without_write_event_into_buffer() { | ||
localFileBuffer = new LocalFileBuffer(); | ||
assertThat(localFileBuffer.getSize(), equalTo(0L)); | ||
assertThat(localFileBuffer.getEventCount(), equalTo(0)); | ||
assertThat(localFileBuffer.getDuration(), lessThanOrEqualTo(0L)); | ||
|
||
} | ||
|
||
@Test | ||
void test_with_write_event_into_buffer_and_flush_toS3() throws IOException { | ||
localFileBuffer = new LocalFileBuffer(); | ||
|
||
while (localFileBuffer.getEventCount() < 55) { | ||
localFileBuffer.writeEvent(generateByteArray()); | ||
} | ||
assertThat(localFileBuffer.getSize(), greaterThan(1l)); | ||
assertThat(localFileBuffer.getEventCount(), equalTo(55)); | ||
assertThat(localFileBuffer.getDuration(), greaterThanOrEqualTo(0L)); | ||
assertDoesNotThrow(() -> { | ||
localFileBuffer.flushToS3(s3Client, BUCKET_NAME, "log.txt"); | ||
}); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to verify that the file was flushed to S3. Since this is not an integration test, you will do this via mocking. It should look something like the following:
Also, this should validate that the file was actually deleted. You will do this with real files (not mocks). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modified and addressed as per suggestions. |
||
|
||
@Test | ||
void test_uploadedToS3_success() { | ||
localFileBuffer = new LocalFileBuffer(); | ||
Assertions.assertNotNull(localFileBuffer); | ||
assertDoesNotThrow(() -> { | ||
localFileBuffer.flushToS3(s3Client, BUCKET_NAME, "log.txt"); | ||
}); | ||
} | ||
|
||
private byte[] generateByteArray() { | ||
byte[] bytes = new byte[1000]; | ||
for (int i = 0; i < 1000; i++) { | ||
bytes[i] = (byte) i; | ||
} | ||
return bytes; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought the plan is to support both INMEMORY and LOCALFILE. Isn't it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are supporting both INMEMORY and LOCALFILE buffer type. The INMEMORY buffer functionality was raised as separate PR: #2623