Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ cuebot/.project
/pycue/opencue/compiled_proto/
/rqd/rqd/compiled_proto/
docker-compose-local.yml
/sandbox/kafka*
/sandbox/zookeeper*
docs/_site/
docs/bin/
sandbox/kafka-data
sandbox/zookeeper-data
sandbox/zookeeper-logs
docs/_data/version.yml
target/*
2 changes: 1 addition & 1 deletion VERSION.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.14
1.15
4 changes: 2 additions & 2 deletions cuebot/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ dependencies {

compileJava {
dependsOn generateProto
options.compilerArgs << "-Xlint:all" << "-Werror"
options.compilerArgs << "-Xlint:all,-serial" << "-Werror"
}

compileTestJava {
dependsOn generateProto
options.compilerArgs << "-Xlint:all" << "-Werror"
options.compilerArgs << "-Xlint:all,-serial" << "-Werror"
}

protobuf {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class LayerDetail extends LayerEntity implements LayerInterface {
public int timeout_llu;
public int dispatchOrder;
public int totalFrameCount;
public int slotsRequired;

public Set<String> tags = new LinkedHashSet<String>();
public Set<String> services = new LinkedHashSet<String>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ public interface HostDao {
*/
void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap,
long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, long freeGpuMemory,
int load, Timestamp bootTime, String os);
int load, Timestamp bootTime, String os, int runningProcs);

/**
* Return true if the HardwareState is Up, false if it is anything else.
Expand Down
8 changes: 8 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,14 @@ public interface LayerDao {
*/
void updateTimeoutLLU(LayerInterface layer, int timeout_llu);

/**
* Updates the slots required for a layer.
*
* @param layer the layer to update
* @param slots the number of slots required (<=0 means not slot-based)
*/
void updateLayerSlotsRequired(LayerInterface layer, int slots);

/**
* Lowers the minimum memory on a layer if the layer is using less memory and the currnet min
* memory is the dispatcher default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,22 +395,23 @@ public CallableStatement createCallableStatement(Connection con) throws SQLExcep
+ " int_load = ?, "
+ " ts_booted = ?, "
+ " ts_ping = current_timestamp, "
+ " str_os = ? "
+ " str_os = ?, "
+ " int_running_procs = ? "
+ "WHERE "
+ " pk_host = ?";

@Override
public void updateHostStats(HostInterface host, long totalMemory, long freeMemory,
long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory,
long freeGpuMemory, int load, Timestamp bootTime, String os) {
long freeGpuMemory, int load, Timestamp bootTime, String os, int runningProcs) {

if (os == null) {
os = Dispatcher.OS_DEFAULT;
}

getJdbcTemplate().update(UPDATE_RENDER_HOST, totalMemory, freeMemory, totalSwap, freeSwap,
totalMcp, freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os,
host.getHostId());
runningProcs, host.getHostId());
}

@Override
Expand Down Expand Up @@ -631,7 +632,7 @@ public boolean isNimbyHost(HostInterface h) {
/**
* Checks if the passed in name looks like a fully qualified domain name. If so, returns the
* hostname without the domain. Otherwise returns the passed in name unchanged.
*
*
* @param fqdn - String
* @return String - hostname
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException {
layer.services.addAll(Lists.newArrayList(rs.getString("str_services").split(",")));
layer.timeout = rs.getInt("int_timeout");
layer.timeout_llu = rs.getInt("int_timeout_llu");
layer.slotsRequired = rs.getInt("int_slots_required");
return layer;
}
};
Expand Down Expand Up @@ -241,7 +242,8 @@ public LayerInterface getLayer(String id) {
+ "int_dispatch_order, " + "str_tags, " + "str_type," + "int_cores_min, "
+ "int_cores_max, " + "b_threadable, " + "int_mem_min, " + "int_gpus_min, "
+ "int_gpus_max, " + "int_gpu_mem_min, " + "str_services, " + "int_timeout,"
+ "int_timeout_llu " + ") " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
+ "int_timeout_llu, " + "int_slots_required " + ") "
+ "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";

@Override
public void insertLayerDetail(LayerDetail l) {
Expand All @@ -250,7 +252,7 @@ public void insertLayerDetail(LayerDetail l) {
l.chunkSize, l.dispatchOrder, StringUtils.join(l.tags, " | "), l.type.toString(),
l.minimumCores, l.maximumCores, l.isThreadable, l.minimumMemory, l.minimumGpus,
l.maximumGpus, l.minimumGpuMemory, StringUtils.join(l.services, ","), l.timeout,
l.timeout_llu);
l.timeout_llu, l.slotsRequired);
}

@Override
Expand Down Expand Up @@ -553,6 +555,12 @@ public void updateTimeoutLLU(LayerInterface layer, int timeout_llu) {
layer.getLayerId());
}

@Override
public void updateLayerSlotsRequired(LayerInterface layer, int slots) {
getJdbcTemplate().update("UPDATE layer SET int_slots_required=? WHERE pk_layer=?", slots,
layer.getLayerId());
}

@Override
public void enableMemoryOptimizer(LayerInterface layer, boolean value) {
getJdbcTemplate().update("UPDATE layer SET b_optimize=? WHERE pk_layer=?", value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1183,7 +1183,8 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException {
Arrays.asList(SqlUtil.getString(rs, "str_limit_names").split(",")))
.setMemoryOptimizerEnabled(rs.getBoolean("b_optimize"))
.setTimeout(rs.getInt("int_timeout"))
.setTimeoutLlu(rs.getInt("int_timeout_llu"));
.setTimeoutLlu(rs.getInt("int_timeout_llu"))
.setSlotsRequired(rs.getInt("int_slots_required"));

LayerStats.Builder statsBuilder = LayerStats.newBuilder()
.setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ public void handleHostReport(HostReport report, boolean isBoot) {
rhost.getTotalSwap(), rhost.getFreeSwap(), rhost.getTotalMcp(),
rhost.getFreeMcp(), rhost.getTotalGpuMem(), rhost.getFreeGpuMem(),
rhost.getLoad(), new Timestamp(rhost.getBootTime() * 1000l),
rhost.getAttributesMap().get("SP_OS"));
rhost.getAttributesMap().get("SP_OS"), report.getFramesCount());

// Both logics are conflicting, only change hardware state if
// there was no need for a tempDirStorage state change
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@
import com.imageworks.spcue.grpc.job.LayerSetTimeoutResponse;
import com.imageworks.spcue.grpc.job.LayerSetTimeoutLLURequest;
import com.imageworks.spcue.grpc.job.LayerSetTimeoutLLUResponse;
import com.imageworks.spcue.grpc.job.LayerSetSlotsRequiredRequest;
import com.imageworks.spcue.grpc.job.LayerSetSlotsRequiredResponse;
import com.imageworks.spcue.grpc.job.LayerStaggerFramesRequest;
import com.imageworks.spcue.grpc.job.LayerStaggerFramesResponse;
import com.imageworks.spcue.grpc.limit.Limit;
Expand Down Expand Up @@ -432,6 +434,15 @@ public void setTimeoutLLU(LayerSetTimeoutLLURequest request,
}
}

@Override
public void setSlotsRequired(LayerSetSlotsRequiredRequest request,
StreamObserver<LayerSetSlotsRequiredResponse> responseObserver) {
updateLayer(request.getLayer());
jobManager.setLayerSlotsRequired(layer, request.getSlots());
responseObserver.onNext(LayerSetSlotsRequiredResponse.newBuilder().build());
responseObserver.onCompleted();
}

@Override
public void addLimit(LayerAddLimitRequest request,
StreamObserver<LayerAddLimitResponse> responseObserver) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public interface HostManager {
*/
void setHostStatistics(HostInterface host, long totalMemory, long freeMemory, long totalSwap,
long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, long freeGpuMemory,
int load, Timestamp bootTime, String os);
int load, Timestamp bootTime, String os, int runningProcs);

void deleteHost(HostInterface host);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ public void rebootNow(HostInterface host) {
@Override
public void setHostStatistics(HostInterface host, long totalMemory, long freeMemory,
long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory,
long freeGpuMemory, int load, Timestamp bootTime, String os) {
long freeGpuMemory, int load, Timestamp bootTime, String os, int runningProcs) {

hostDao.updateHostStats(host, totalMemory, freeMemory, totalSwap, freeSwap, totalMcp,
freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os);
freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os, runningProcs);
}

@Transactional(propagation = Propagation.SUPPORTS, readOnly = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,14 @@ public interface JobManager {
*/
void setLayerMinGpus(LayerInterface layer, int gpuUnits);

/**
* Sets the slots required for a layer.
*
* @param layer the layer to update
* @param slots the number of slots required
*/
void setLayerSlotsRequired(LayerInterface layer, int slots);

/**
* Add a limit to the given layer.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,11 @@ public void setLayerMinGpus(LayerInterface layer, int gpu) {
layerDao.updateLayerMinGpus(layer, gpu);
}

@Override
public void setLayerSlotsRequired(LayerInterface layer, int slots) {
layerDao.updateLayerSlotsRequired(layer, slots);
}

@Override
public void setLayerMaxGpus(LayerInterface layer, int gpu) {
layerDao.updateLayerMaxGpus(layer, gpu);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,10 @@ private void handleLayerTags(BuildableJob buildableJob, Element jobTag) {
layer.timeout_llu = Integer.parseInt(layerTag.getChildTextTrim("timeout_llu"));
}

if (layerTag.getChildTextTrim("slots_required") != null) {
layer.slotsRequired = Integer.parseInt(layerTag.getChildTextTrim("slots_required"));
}

/*
* Handle the layer environment
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- Add a field to limit the max amount of concurrent procs a host can run
-- -1 means no limit
alter table host
add int_concurrent_procs_limit INT NOT NULL DEFAULT -1;

alter table host_stat
add int_running_procs INT NOT NULL DEFAULT 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Add a field to mark a layer as requiring at least a specific number of slots
-- <=0 means slots are not required
alter table layer
add int_slots_required INT NOT NULL DEFAULT 0;
106 changes: 106 additions & 0 deletions cuebot/src/main/resources/public/dtd/cjsl-1.16.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<!-- ================================================================= -->
<!-- SpiCue Job Spec DTD ver 1.16 -->
<!-- middle-tier@imageworks.com -->
<!-- ================================================================= -->

<!ELEMENT spec (facility?,dept?,show,shot,user,email?,uid?,job*,depends*)>
<!ELEMENT facility (#PCDATA)*>
<!ELEMENT dept (#PCDATA)*>
<!ELEMENT show (#PCDATA)*>
<!ELEMENT shot (#PCDATA)*>
<!ELEMENT user (#PCDATA)*>
<!ELEMENT email (#PCDATA)*>
<!ELEMENT uid (#PCDATA)*>

<!-- ================================================================= -->
<!-- Jobs -->
<!-- ================================================================= -->
<!ELEMENT job (paused?,priority?,maxretries?,maxcores?,maxgpus?,autoeat?,localbook?,os?,env*,layers?)>
<!ATTLIST job
name NMTOKEN #REQUIRED
>
<!ELEMENT paused (#PCDATA)*>
<!ELEMENT priority (#PCDATA)*>
<!ELEMENT maxretries (#PCDATA)*>
<!ELEMENT maxcores (#PCDATA)*>
<!ELEMENT maxgpus (#PCDATA)*>
<!ELEMENT autoeat (#PCDATA)*>
<!ELEMENT os (#PCDATA)*>
<!ELEMENT localbook (#PCDATA)*>
<!ATTLIST localbook
host NMTOKEN #REQUIRED
cores NMTOKEN #REQUIRED
memory NMTOKEN #REQUIRED
threads NMTOKEN #REQUIRED
gpu NMTOKEN #REQUIRED
>

<!-- ================================================================= -->
<!-- Layers -->
<!-- ================================================================= -->

<!ELEMENT layers (layer+)>
<!ELEMENT layer (cmd,range,chunk,slots_required?,cores?,threadable?,memory?,gpus?,gpu?,gpu_memory?,timeout?,timeout_llu?,tags?,limits?,env*,services?,outputs*)>
<!ATTLIST layer
name NMTOKEN #REQUIRED
type (Render | Util | Post) #REQUIRED
>
<!ELEMENT cmd (#PCDATA)*>
<!ELEMENT range (#PCDATA)*>
<!ELEMENT chunk (#PCDATA)*>
<!ELEMENT slots_required (#PCDATA)*>
<!ELEMENT cores (#PCDATA)*>
<!ELEMENT threadable (#PCDATA)*>
<!ELEMENT memory (#PCDATA)*>
<!ELEMENT gpus (#PCDATA)*>
<!ELEMENT gpu (#PCDATA)*> <!-- deprecated: use gpu_memory -->
<!ELEMENT gpu_memory (#PCDATA)*>
<!ELEMENT timeout (#PCDATA)*>
<!ELEMENT timeout_llu (#PCDATA)*>
<!ELEMENT tags (#PCDATA)*>
<!ELEMENT limits (limit+)>
<!ELEMENT services (service+)>
<!ELEMENT outputs (output*)>
<!ELEMENT env (key*)>
<!-- ================================================================= -->
<!-- Layer Services -->
<!-- ================================================================= -->
<!ELEMENT service (#PCDATA)*>

<!-- ================================================================= -->
<!-- Layer Ouuputs -->
<!-- ================================================================= -->
<!ELEMENT output (#PCDATA)*>
<!ATTLIST output
name NMTOKEN #REQUIRED
>
<!-- ================================================================= -->
<!-- Layer Limits -->
<!-- ================================================================= -->
<!ELEMENT limit (#PCDATA)*>

<!-- ================================================================= -->
<!-- Environment Variables -->
<!-- ================================================================= -->

<!ELEMENT key (#PCDATA)*>
<!ATTLIST key
name NMTOKEN #REQUIRED
>

<!-- ================================================================= -->
<!-- Dependencies -->
<!-- ================================================================= -->

<!ELEMENT depends (depend*)>
<!ELEMENT depend (depjob,deplayer?,depframe?,onjob,onlayer?,onframe?)>
<!ATTLIST depend
anyframe NMTOKEN #IMPLIED
type (LAYER_ON_SIM_FRAME|PREVIOUS_FRAME|JOB_ON_JOB|JOB_ON_LAYER|JOB_ON_FRAME|LAYER_ON_JOB|LAYER_ON_FRAME|LAYER_ON_LAYER|FRAME_ON_JOB|FRAME_ON_LAYER|FRAME_ON_FRAME|FRAME_BY_FRAME) #REQUIRED
>
<!ELEMENT depjob (#PCDATA)*>
<!ELEMENT onjob (#PCDATA)*>
<!ELEMENT deplayer (#PCDATA)*>
<!ELEMENT onlayer (#PCDATA)*>
<!ELEMENT depframe (#PCDATA)*>
<!ELEMENT onframe (#PCDATA)*>
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@ public void updateHostStats() {

DispatchHost dispatchHost = hostDao.findDispatchHost(TEST_HOST);
hostDao.updateHostStats(dispatchHost, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8,
CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1");
CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1",
2);

Map<String, Object> result = jdbcTemplate
.queryForMap("SELECT * FROM host_stat WHERE pk_host=?", dispatchHost.getHostId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,26 @@ public void testParseMaxCoresAndMaxGpus() {
assertEquals(job.maxGpusOverride, Integer.valueOf(42));
}

@Test
public void testParseSlotsRequired() {
String xml = readJobSpec("jobspec_1_16.xml");
JobSpec spec = jobLauncher.parse(xml);
assertEquals(spec.getDoc().getDocType().getPublicID(), "SPI Cue Specification Language");
assertEquals(spec.getDoc().getDocType().getSystemID(),
"http://localhost:8080/spcue/dtd/cjsl-1.16.dtd");
assertEquals(spec.getJobs().size(), 1);
BuildableJob job = spec.getJobs().get(0);
assertEquals(job.getBuildableLayers().size(), 2);

// First layer uses slot-based booking
LayerDetail slotBasedLayer = job.getBuildableLayers().get(0).layerDetail;
assertEquals(slotBasedLayer.name, "slot_based_layer");
assertEquals(slotBasedLayer.slotsRequired, 4);

// Second layer uses regular resource booking (default slots_required = 0)
LayerDetail regularLayer = job.getBuildableLayers().get(1).layerDetail;
assertEquals(regularLayer.name, "regular_layer");
assertEquals(regularLayer.slotsRequired, 0);
}

}
Loading