Commit 80b0701e authored by Lukasz Waskiewicz's avatar Lukasz Waskiewicz
Browse files

refs #1517 optymalizacja zrównoleglania importu cząsteczek

limit na ilości równoczesnych wykonań create table
parent 86f47524
...@@ -10,6 +10,7 @@ import java.util.concurrent.Callable; ...@@ -10,6 +10,7 @@ import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors; import java.util.stream.Collectors;
...@@ -35,6 +36,8 @@ import com.google.common.collect.PeekingIterator; ...@@ -35,6 +36,8 @@ import com.google.common.collect.PeekingIterator;
import parquet.schema.PrimitiveType.PrimitiveTypeName; import parquet.schema.PrimitiveType.PrimitiveTypeName;
import pl.edu.icm.cocos.fileSystem.FileSystemDecorator; import pl.edu.icm.cocos.fileSystem.FileSystemDecorator;
import pl.edu.icm.cocos.imports.impala.statement.DatabaseUnawareStatement;
import pl.edu.icm.cocos.imports.impala.statement.StatementWithConcurrentExecutionLimit;
@Component @Component
@Scope("step") @Scope("step")
...@@ -95,7 +98,13 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -95,7 +98,13 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
@Value("#{jobParameters['concurrencyLevel']?:1}") @Value("#{jobParameters['concurrencyLevel']?:1}")
private int concurrencyLevel; private int concurrencyLevel;
@Value("#{jobParameters['concurrencyLimit']?:-1}")
private int concurrencyLimit;
private Semaphore semaphore;
public void initialize(StepContext stepContext) throws IOException { public void initialize(StepContext stepContext) throws IOException {
semaphore = new Semaphore(concurrencyLimit <=0 ? Integer.MAX_VALUE : concurrencyLimit);
if (initialized) { if (initialized) {
return; return;
} }
...@@ -136,14 +145,14 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -136,14 +145,14 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
for (Iterator<ProcessClauses> processClauses = partitionInsertIteratorFactory.get().buildProcessClauses(file.getPath()); processClauses for (Iterator<ProcessClauses> processClauses = partitionInsertIteratorFactory.get().buildProcessClauses(file.getPath()); processClauses
.hasNext();) { .hasNext();) {
ProcessClauses clause = processClauses.next(); ProcessClauses clause = processClauses.next();
List<String> concurrentStatements = new ArrayList<String>(); List<Object> concurrentStatements = new ArrayList<>();
String partitionTableName = tableName + "_" + clause.getWhereClause().replace("=", "_"); String partitionTableName = tableName + "_" + clause.getWhereClause().replace("=", "_");
String partitionTableLocation = parquetDirPath + "/" + clause.getWhereClause(); String partitionTableLocation = parquetDirPath + "/" + clause.getWhereClause();
String externalPartitionTableName = getExternalTableName(partitionTableName); String externalPartitionTableName = getExternalTableName(partitionTableName);
String createPartitionTable = new CreateTableBuilder(partitionTableLocation, externalPartitionTableName, sourceTableLikePath).ifNotExists() String createPartitionTable = new CreateTableBuilder(partitionTableLocation, externalPartitionTableName, sourceTableLikePath).ifNotExists()
.external(!deleteProcessedPartitions).build(); .external(!deleteProcessedPartitions).build();
concurrentStatements.add(createPartitionTable); concurrentStatements.add(new StatementWithConcurrentExecutionLimit(createPartitionTable, "createTable"));
concurrentStatements.add(computeStatsStatement(externalPartitionTableName)); concurrentStatements.add(computeStatsStatement(externalPartitionTableName));
String insertStatementFromPart = " FROM " + externalPartitionTableName; String insertStatementFromPart = " FROM " + externalPartitionTableName;
...@@ -181,9 +190,9 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -181,9 +190,9 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
return true; return true;
} }
if (item instanceof List) { if (item instanceof List) {
start.set(start.get() || batchState.equals(statmentListStateIndicator((List<String>) item))); start.set(start.get() || batchState.equals(statmentListStateIndicator((List<?>) item)));
} else { } else {
start.set(start.get() || batchState.equals(item)); start.set(start.get() || batchState.equals(item.toString()));
} }
return false; return false;
}).iterator()); }).iterator());
...@@ -240,7 +249,7 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -240,7 +249,7 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
while (executorException == null && isAnotherConcurrentTaskAvailable()) { while (executorException == null && isAnotherConcurrentTaskAvailable()) {
if (mayStartAnotherConcurrentTask(statementHolder.pendingSize())) { if (mayStartAnotherConcurrentTask(statementHolder.pendingSize())) {
List<String> statement = (List<String>) statementsIterator.next(); List<?> statement = (List<?>) statementsIterator.next();
handleConcurrentStatements(statement); handleConcurrentStatements(statement);
statementHolder.submit(statmentListStateIndicator(statement)); statementHolder.submit(statmentListStateIndicator(statement));
} else { } else {
...@@ -270,8 +279,8 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -270,8 +279,8 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
return executorException; return executorException;
} }
private String statmentListStateIndicator(List<String> statementsList) { private String statmentListStateIndicator(List<?> statementsList) {
return statementsList.get(0); return statementsList.get(0).toString();
} }
private boolean mayStartAnotherConcurrentTask(int taskCounter) { private boolean mayStartAnotherConcurrentTask(int taskCounter) {
...@@ -282,28 +291,50 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet { ...@@ -282,28 +291,50 @@ public class CocosImpalaImportTasklet extends CocosGenericImpalaImportTasklet {
return statementsIterator.hasNext() && statementsIterator.peek() instanceof List; return statementsIterator.hasNext() && statementsIterator.peek() instanceof List;
} }
private void handleConcurrentStatements(List<String> statement) { private void handleConcurrentStatements(List<?> statements) {
executor.submit(new Callable<String>() { executor.submit(new Callable<String>() {
@Override @Override
public String call() throws Exception { public String call() throws Exception {
statement.forEach(single -> handleSingleStatement(single)); statements.forEach((Object single) -> handleSingleStatement(single));
return statmentListStateIndicator(statement); return statmentListStateIndicator(statements);
} }
}); });
} }
private void handleSingleStatement(Object statement) {
if (statement instanceof String) {
handleSingleStatement((String) statement);
} else if (statement instanceof DatabaseUnawareStatement) {
handleSingleStatement((DatabaseUnawareStatement) statement);
} else if (statement instanceof StatementWithConcurrentExecutionLimit) {
handleSingleStatement((StatementWithConcurrentExecutionLimit) statement);
} else {
throw new RuntimeException("Wrong config");
}
}
protected void handleSingleStatement(DatabaseUnawareStatement statement) { protected void handleSingleStatement(DatabaseUnawareStatement statement) {
customStatementsList.forEach(customStatement -> operations.execute(customStatement + ";")); executeUpdateStatement(statement.toString());
}
operations.update(statement.toString()); protected void handleSingleStatement(StatementWithConcurrentExecutionLimit statement) {
semaphore.acquireUninterruptibly();
try{
handleSingleStatement(statement.toString());
} finally {
semaphore.release();
}
} }
protected void handleSingleStatement(String statement) { protected void handleSingleStatement(String statement) {
String databaseName = databasePrefix + simulationBusinessId; String databaseName = databasePrefix + simulationBusinessId;
operations.execute("USE " + databaseName + ";"); operations.execute("USE " + databaseName + ";");
customStatementsList.forEach(customStatement -> operations.execute(customStatement + ";")); executeUpdateStatement(statement);
}
private void executeUpdateStatement(String statement) {
customStatementsList.forEach(customStatement -> operations.execute(customStatement + ";"));
operations.update(statement); operations.update(statement);
} }
......
...@@ -4,6 +4,8 @@ import java.util.ArrayList; ...@@ -4,6 +4,8 @@ import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import pl.edu.icm.cocos.imports.impala.statement.ConcurrentStatementState;
public class ConcurrentStatementHolder { public class ConcurrentStatementHolder {
private List<ConcurrentStatementState> statements = new ArrayList<>(); private List<ConcurrentStatementState> statements = new ArrayList<>();
......
package pl.edu.icm.cocos.imports.impala; package pl.edu.icm.cocos.imports.impala.statement;
public class ConcurrentStatementState { public class ConcurrentStatementState extends Statement{
private final String statement;
private boolean completed = false; private boolean completed = false;
public ConcurrentStatementState(String statement) { public ConcurrentStatementState(String statement) {
super(); super(statement);
this.statement = statement;
}
public String getStatement() {
return statement;
} }
public boolean isCompleted() { public boolean isCompleted() {
......
package pl.edu.icm.cocos.imports.impala.statement;
import java.io.Serializable;
public class DatabaseUnawareStatement extends Statement implements Serializable {
private static final long serialVersionUID = 2036961357590202563L;
public DatabaseUnawareStatement(String statement) {
super(statement);
}
}
package pl.edu.icm.cocos.imports.impala; package pl.edu.icm.cocos.imports.impala.statement;
import java.io.Serializable; public abstract class Statement {
public class DatabaseUnawareStatement implements Serializable {
private static final long serialVersionUID = 2036961357590202563L;
private final String statement; private final String statement;
public DatabaseUnawareStatement(String statement) { public Statement(String statement) {
super(); super();
this.statement = statement; this.statement = statement;
} }
...@@ -16,7 +12,7 @@ public class DatabaseUnawareStatement implements Serializable { ...@@ -16,7 +12,7 @@ public class DatabaseUnawareStatement implements Serializable {
public String getStatement() { public String getStatement() {
return statement; return statement;
} }
@Override @Override
public String toString() { public String toString() {
return statement; return statement;
......
package pl.edu.icm.cocos.imports.impala.statement;
public class StatementWithConcurrentExecutionLimit extends Statement {
private final String flagName;
public StatementWithConcurrentExecutionLimit(String statement, String flagName) {
super(statement);
this.flagName = flagName;
}
public String getFlagName() {
return flagName;
}
}
...@@ -59,6 +59,7 @@ public class CocosImpalaImportTaskletTest { ...@@ -59,6 +59,7 @@ public class CocosImpalaImportTaskletTest {
CocosImpalaImportTaskletTest test; CocosImpalaImportTaskletTest test;
String inputPath = "/"; String inputPath = "/";
int concurrencyLevel = 1; int concurrencyLevel = 1;
int concurrencyLimit = -1;
String databasePrefix ="dbPrefix_"; String databasePrefix ="dbPrefix_";
String simulationBusinessId ="simId"; String simulationBusinessId ="simId";
String batchState = StringUtils.EMPTY; String batchState = StringUtils.EMPTY;
...@@ -97,6 +98,11 @@ public class CocosImpalaImportTaskletTest { ...@@ -97,6 +98,11 @@ public class CocosImpalaImportTaskletTest {
this.concurrencyLevel = concurrencyLevel; this.concurrencyLevel = concurrencyLevel;
return this; return this;
} }
Setup withConcurrecyLimit(int concurrencyLimit) {
this.concurrencyLimit = concurrencyLimit;
return this;
}
Setup withBatchState(String batchState) { Setup withBatchState(String batchState) {
this.batchState = batchState; this.batchState = batchState;
...@@ -125,6 +131,7 @@ public class CocosImpalaImportTaskletTest { ...@@ -125,6 +131,7 @@ public class CocosImpalaImportTaskletTest {
reflectionSet(tasklet, "inputPath", inputPath); reflectionSet(tasklet, "inputPath", inputPath);
reflectionSet(tasklet, "concurrencyLevel", concurrencyLevel); reflectionSet(tasklet, "concurrencyLevel", concurrencyLevel);
reflectionSet(tasklet, "concurrencyLimit", concurrencyLimit);
reflectionSet(tasklet, "databasePrefix", databasePrefix); reflectionSet(tasklet, "databasePrefix", databasePrefix);
reflectionSet(tasklet, "simulationBusinessId", simulationBusinessId); reflectionSet(tasklet, "simulationBusinessId", simulationBusinessId);
reflectionSet(tasklet, "customStatements", customStatements); reflectionSet(tasklet, "customStatements", customStatements);
...@@ -566,6 +573,7 @@ public class CocosImpalaImportTaskletTest { ...@@ -566,6 +573,7 @@ public class CocosImpalaImportTaskletTest {
final int minCount = flatSize(expectedUpdateStatements) - DATABASE_UNAWARE_STATEMENTS; final int minCount = flatSize(expectedUpdateStatements) - DATABASE_UNAWARE_STATEMENTS;
validateExecuteStatements(minCount, minCount+8, "USE dbPrefix_simId;"); validateExecuteStatements(minCount, minCount+8, "USE dbPrefix_simId;");
} }
@Test(invocationCount = 30) @Test(invocationCount = 30)
public void shouldHandleFailWhenRunMultithreadedImportWithIteratorFactoryWhenFailingSecond() throws Exception { public void shouldHandleFailWhenRunMultithreadedImportWithIteratorFactoryWhenFailingSecond() throws Exception {
final CocosImpalaTableSpec spec = new CocosImpalaTableSpec(); final CocosImpalaTableSpec spec = new CocosImpalaTableSpec();
...@@ -664,6 +672,7 @@ public class CocosImpalaImportTaskletTest { ...@@ -664,6 +672,7 @@ public class CocosImpalaImportTaskletTest {
private void validateUpdateStatements(List<String> expectedUpdateStatements) { private void validateUpdateStatements(List<String> expectedUpdateStatements) {
ArgumentCaptor<String> updateCaptor = ArgumentCaptor.forClass(String.class); ArgumentCaptor<String> updateCaptor = ArgumentCaptor.forClass(String.class);
// might usee times(expectedUpdateStatements.size()) but next assert gives better notifications
verify(operations, atLeastOnce()).update(updateCaptor.capture()); verify(operations, atLeastOnce()).update(updateCaptor.capture());
Assertions.assertThat(updateCaptor.getAllValues()).containsExactlyElementsOf(expectedUpdateStatements); Assertions.assertThat(updateCaptor.getAllValues()).containsExactlyElementsOf(expectedUpdateStatements);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment