package neureka.devices.host;
import neureka.Data;
import neureka.Tensor;
import neureka.backend.api.Operation;
import neureka.common.utility.DataConverter;
import neureka.common.utility.LogUtil;
import neureka.devices.AbstractDevice;
import neureka.devices.Device;
import neureka.devices.host.concurrent.Parallelism;
import neureka.devices.host.concurrent.WorkScheduler;
import neureka.devices.host.machine.ConcreteMachine;
import neureka.dtype.DataType;
import neureka.dtype.custom.*;
import neureka.math.Function;
import neureka.ndim.NDConstructor;
import neureka.ndim.config.NDConfiguration;
import neureka.ndim.config.types.views.virtual.VirtualNDConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.IntSupplier;
import java.util.stream.IntStream;
* The CPU class, one of many implementations of the {@link Device} interface,
* is simply supposed to be an API for dispatching threaded workloads onto the CPU
* as well as reading from or writing to tensors it stores.
* Contrary to other types of devices, the CPU will represent a tensors' data by default, simply
* because the tensors will be stored in RAM (JVM heap) by default if no device was specified.
* This means that they are implicitly "stored" on the {@link CPU} device.
* The class is also a singleton instead of being part of a {@link neureka.backend.api.BackendExtension}.
public class CPU extends AbstractDevice<Object>
private static final Logger _LOG = LoggerFactory.getLogger( CPU.class );
private static final CPU _INSTANCE;
private static final WorkScheduler.Divider _DIVIDER;
private static final IntSupplier _PARALLELISM;
public static final int PARALLELIZATION_THRESHOLD = 32;
public static final String THREAD_PREFIX = "neureka-daemon";
static {
_INSTANCE = new CPU();
_DIVIDER = new WorkScheduler.Divider(_INSTANCE._executor._pool);
private final JVMExecutor _executor = new JVMExecutor();
private CPU() { super(); }
* Use this method to access the singleton instance of this {@link CPU} class,
* which is a {@link Device} type and default location for freshly instantiated {@link Tensor} instances.
* {@link Tensor} instances located on the {@link CPU} device will reside in regular RAM
* causing operations to run on the JVM and thereby the CPU.
* @return The singleton instance of this {@link CPU} class.
public static CPU get() { return _INSTANCE; }
* The {@link JVMExecutor} offers a similar functionality as the parallel stream API,
* however it differs in that the {@link JVMExecutor} is processing {@link RangeWorkload} lambdas
* instead of simply exposing a single index or concrete elements for a given workload size.
* @return A parallel range based execution API running on the JVM.
public JVMExecutor getExecutor() { return _executor; }
protected boolean _approveExecutionOf(Tensor<?>[] tensors, int d, Operation operation ) { return true; }
* This method will shut down the internal thread-pool used by this
* class to execute JVM/CPU based operations in parallel.
public void dispose() {
_numberOfTensors = 0;
"Main thread pool in '"+this.getClass()+"' shutting down! " +
"Newly incoming operations will not be executed in parallel."
public CPU restore( Tensor<Object> tensor ) { return this; }
public <T> CPU store( Tensor<T> tensor ) {
if ( !this.has( tensor ) )
tensor.getMut().getData().owner().restore( tensor );
return this;
protected final <T> int _sizeOccupiedBy(Tensor<T> tensor) {
Object data = tensor.getMut().getData().getOrNull();
if ( data instanceof float[] ) return ( (float[]) data).length;
else if ( data instanceof double[] ) return ( (double[]) data).length;
else if ( data instanceof short[] ) return ( (short[]) data).length;
else if ( data instanceof int[] ) return ( (int[]) data).length;
else if ( data instanceof byte[] ) return ( (byte[]) data).length;
else if ( data instanceof long[] ) return ( (long[]) data).length;
else if ( data instanceof boolean[] ) return ( (boolean[]) data).length;
else if ( data instanceof char[] ) return ( (char[]) data).length;
else return ( (Object[]) data).length;
protected final <T> Object _readAll(Tensor<T> tensor, boolean clone ) {
Object data = tensor.getMut().getData().getOrNull();
if ( clone ) {
if ( data instanceof double[] ) return ( (double[]) data ).clone();
if ( data instanceof float[] ) return ( (float[]) data ).clone();
if ( data instanceof byte[] ) return ( (byte[]) data ).clone();
if ( data instanceof short[] ) return ( (short[]) data ).clone();
if ( data instanceof int[] ) return ( (int[]) data ).clone();
if ( data instanceof long[] ) return ( (long[]) data ).clone();
if ( data instanceof char[] ) return ( (char[]) data ).clone();
if ( data instanceof boolean[] ) return ( (boolean[]) data ).clone();
if ( data instanceof Object[] ) return ( (Object[]) data ).clone();
return data;
protected final <T> T _readItem(Tensor<T> tensor, int index ) {
Object data = tensor.getMut().getData().getOrNull();
if ( data instanceof float[] ) return (T)Float.valueOf( ((float[]) data)[ index ] );
else if ( data instanceof double[] ) return (T)Double.valueOf( ((double[]) data)[ index ] );
else if ( data instanceof short[] ) return (T)Short.valueOf( ((short[]) data)[ index ] );
else if ( data instanceof int[] ) return (T)Integer.valueOf( ((int[]) data)[ index ] );
else if ( data instanceof byte[] ) return (T)Byte.valueOf( ((byte[]) data)[ index ] );
else if ( data instanceof long[] ) return (T)Long.valueOf( ((long[]) data)[ index ] );
else if ( data instanceof boolean[] ) return (T)Boolean.valueOf( ((boolean[]) data)[ index ] );
else if ( data instanceof char[] ) return (T)Character.valueOf( ((char[]) data)[ index ] );
else return (T)( (Object[]) data)[ index ];
protected final <T, A> A _readArray(
Tensor<T> tensor, Class<A> arrayType, int start, int size
) {
if ( arrayType == float[].class ) {
float[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), float[].class);
float[] data = new float[size];
System.arraycopy(source, start, data, 0, size);
return (A) data;
} else if ( arrayType == short[].class ){
short[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), short[].class);
short[] data = new short[size];
System.arraycopy(source, start, data, 0, size);
return (A) data;
} else if ( arrayType == byte[].class ){
byte[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), byte[].class);
byte[] data = new byte[size];
System.arraycopy(source, start, data, 0, size);
return (A) data;
} else if ( arrayType == boolean[].class ){
boolean[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), boolean[].class);
boolean[] data = new boolean[size];
System.arraycopy(source, start, data, 0, size);
return (A) data;
} else if ( arrayType == char[].class ){
char[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), char[].class);
char[] data = new char[size];
System.arraycopy(source, start, data, 0, size);
return (A) data;
} else if ( arrayType == double[].class ){
double[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), double[].class);
return (A) java.util.Arrays.stream(source, start, start + size).toArray();
} else if ( arrayType == int[].class ){
int[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), int[].class);
return (A) java.util.Arrays.stream(source, start, start + size).toArray();
} else if ( arrayType == long[].class ){
long[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), long[].class);
return (A) java.util.Arrays.stream(source, start, start + size).toArray();
} else if ( arrayType == Object[].class ){
Object[] source = DataConverter.get().convert(tensor.getMut().getData().getOrNull(), Object[].class);
return (A) java.util.Arrays.stream(source, start, start + size).toArray();
throw new IllegalArgumentException("Array type '"+arrayType.getSimpleName()+"' not supported!");
protected final <T> void _writeItem(Tensor<T> tensor, T item, int start, int size ) {
Object data = tensor.getMut().getData().getOrNull();
Class<?> arrayType = data.getClass();
if ( arrayType == float[].class ) {
float source = DataConverter.get().convert(item, Float.class);
float[] target = (float[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == short[].class ){
short source = DataConverter.get().convert(item, Short.class);
short[] target = (short[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == byte[].class ){
byte source = DataConverter.get().convert(item, Byte.class);
byte[] target = (byte[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == boolean[].class ){
boolean source = DataConverter.get().convert(item, Boolean.class);
boolean[] target = (boolean[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == double[].class ){
double source = DataConverter.get().convert(item, Double.class);
double[] target = (double[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == int[].class ){
int source = DataConverter.get().convert(item, Integer.class);
int[] target = (int[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == long[].class ){
long source = DataConverter.get().convert(item, Long.class);
long[] target = (long[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == char[].class ){
char source = DataConverter.get().convert(item, Character.class);
char[] target = (char[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = source;
} else if ( arrayType == Object[].class ) {
Object[] target = (Object[]) data;
for ( int i = start; i < (start+size); i++ ) target[i] = item;
private <V> CPUData<V> _createDataFor( Object reference, DataType<V> dataType ) {
return new CPUData<>( this, reference, dataType);
protected final <T> void _writeArray(
Tensor<T> tensor, Object array, int offset, int start, int size
) {
Object data = tensor.getMut().getData() == null ? null : tensor.getMut().getData().getOrNull();
if ( data == null ) {
DataType<?> dataType = tensor.getDataType() != null ? tensor.getDataType() : _dataTypeOf(array);
tensor.getMut().setData( _createDataFor( array, (DataType<T>) dataType) );
Class<?> arrayType = data.getClass();
if ( arrayType == float[].class ) {
float[] source = DataConverter.get().convert(array, float[].class);
float[] target = (float[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == short[].class ){
short[] source = DataConverter.get().convert(array, short[].class);
short[] target = (short[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == byte[].class ){
byte[] source = DataConverter.get().convert(array, byte[].class);
byte[] target = (byte[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == boolean[].class ){
boolean[] source = DataConverter.get().convert(array, boolean[].class);
boolean[] target = (boolean[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == double[].class ){
double[] source = DataConverter.get().convert(array, double[].class);
double[] target = (double[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == int[].class ){
int[] source = DataConverter.get().convert(array, int[].class);
int[] target = (int[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == char[].class ){
char[] source = DataConverter.get().convert(array, char[].class);
char[] target = (char[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == long[].class ){
long[] source = DataConverter.get().convert(array, long[].class);
long[] target = (long[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
} else if ( arrayType == Object[].class ){
Object[] source = DataConverter.get().convert(array, Object[].class);
Object[] target = (Object[]) data;
System.arraycopy(source, offset, target, start, Math.min(size, source.length));
else throw new IllegalArgumentException("Array type '"+arrayType.getSimpleName()+"' not supported!");
public <T> Data<T> allocateFromOne(DataType<T> dataType, NDConfiguration ndc, T initialValue ) {
int size = ndc instanceof VirtualNDConfiguration ? 1 : ndc.size();
Class<?> type = dataType.getItemTypeClass();
Data<T> array = allocate( dataType, size );
Object data = array.getOrNull();
if ( type == Double .class ) Arrays.fill((double[]) data, (Double) initialValue);
else if ( type == Float .class ) Arrays.fill((float[]) data, (Float) initialValue);
else if ( type == Integer .class ) Arrays.fill((int[]) data, (Integer) initialValue);
else if ( type == Short .class ) Arrays.fill((short[]) data, (Short) initialValue);
else if ( type == Byte .class ) Arrays.fill((byte[]) data, (Byte) initialValue);
else if ( type == Long .class ) Arrays.fill((long[]) data, (Long) initialValue);
else if ( type == Boolean .class ) Arrays.fill((boolean[]) data, (Boolean) initialValue);
else if ( type == Character.class ) Arrays.fill((char[]) data, (Character)initialValue);
else Arrays.fill((Object[]) data, initialValue);
return array;
public <T> Data<T> allocateFromAll( DataType<T> dataType, NDConfiguration ndc, Object jvmData )
int desiredSize = ndc.size();
Data<T> data = (Data<T>) _createDataFor( jvmData, (DataType<Object>) (dataType != null ? dataType : _dataTypeOf(jvmData)));
if ( jvmData instanceof int[] ) {
int[] array = (int[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate( DataType.of(I32.class), desiredSize );
for ( int i = 0; i < desiredSize; i++ ) data.as(int[].class)[ i ] = array[ i % array.length ];
return data;
} else if ( jvmData instanceof float[] ) {
float[] array = (float[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate( DataType.of(F32.class), desiredSize );
for ( int i = 0; i < desiredSize; i++ ) data.as(float[].class)[ i ] = array[ i % array.length ];
return data;
} else if ( jvmData instanceof double[] ) {
double[] array = (double[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate( DataType.of(F64.class), desiredSize );
for ( int i = 0; i < desiredSize; i++ ) data.as(double[].class)[ i ] = array[ i % array.length ];
return data;
} else if ( jvmData instanceof long[] ) {
long[] array = (long[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate( DataType.of(I64.class), desiredSize );
for ( int i = 0; i < desiredSize; i++ ) data.as(long[].class)[ i ] = array[ i % array.length ];
return data;
} else if ( jvmData instanceof short[] ) {
short[] array = (short[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate( DataType.of(I16.class), desiredSize );
for ( int i = 0; i < desiredSize; i++ ) data.as(short[].class)[ i ] = array[ i % array.length ];
return data;
} else if ( jvmData instanceof byte[] ) {
byte[] array = (byte[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate(DataType.of(I8.class), desiredSize);
for (int i = 0; i < desiredSize; i++) data.as(byte[].class)[i] = array[i % array.length];
return data;
} else if ( jvmData instanceof boolean[] ) {
boolean[] array = (boolean[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate(DataType.of(Boolean.class), desiredSize);
for (int i = 0; i < desiredSize; i++) data.as(boolean[].class)[i] = array[i % array.length];
return data;
} else if ( jvmData instanceof char[] ) {
char[] array = (char[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate(DataType.of(Character.class), desiredSize);
for (int i = 0; i < desiredSize; i++) data.as(char[].class)[i] = array[i % array.length];
return data;
} else if ( jvmData instanceof Object[] ) {
Object[] array = (Object[]) jvmData;
if ( desiredSize != array.length ) {
data = (Data<T>) CPU.get().allocate(DataType.of(Object.class), desiredSize);
for (int i = 0; i < desiredSize; i++) data.as(Object[].class)[i] = array[i % array.length];
return data;
throw new IllegalArgumentException("Array type '"+jvmData.getClass().getSimpleName()+"' not supported!");
private Data<Object> _allocate( Object data ) {
int size;
if ( data instanceof Object[] ) size = ( (Object[]) data ).length;
else if ( data instanceof int[] ) size = ( (int[]) data ).length;
else if ( data instanceof long[] ) size = ( (long[]) data ).length;
else if ( data instanceof float[] ) size = ( (float[]) data ).length;
else if ( data instanceof double[] ) size = ( (double[]) data ).length;
else if ( data instanceof short[] ) size = ( (short[]) data ).length;
else if ( data instanceof byte[] ) size = ( (byte[]) data ).length;
else if ( data instanceof boolean[] ) size = ( (boolean[]) data ).length;
else if ( data instanceof char[] ) size = ( (char[]) data ).length;
throw new IllegalArgumentException( "Unsupported data type: " + data.getClass() );
Data dataArray = CPU.get().allocateFromAll( _dataTypeOf(data), NDConstructor.of(size).produceNDC(false), data );
if ( dataArray.getOrNull() != data )
throw new IllegalStateException( "CPU seems to have reallocated some already valid data unnecessarily! This is most likely a bug." );
return dataArray;
public final <T> Data<T> allocate( Class<T> type, Object data ) {
Data<Object> dataArray = _allocate( data );
// Now we check if the data is of the correct type
Class<?> arrayType = DataType.of( type ).dataArrayType();
if ( !arrayType.isAssignableFrom( dataArray.getOrNull().getClass() ) )
throw new IllegalArgumentException(
"Data is not of the correct type! Expected: " + arrayType.getSimpleName() + ", " +
"but got: " + dataArray.getOrNull().getClass().getSimpleName()
return (Data<T>) dataArray;
public final <T> Data<T> allocate( Class<T> type, int size, Object source ) {
if ( source instanceof Object[] )
source = _autoConvertAndOptimizeObjectArray( (Object[]) source, DataType.of(type), size );
Data<Object> dataArray = _allocate( source );
// Now we check if the data is of the correct type
Class<?> arrayType = DataType.of( type ).dataArrayType();
if ( !arrayType.isAssignableFrom( dataArray.getOrNull().getClass() ) )
throw new IllegalArgumentException(
"Data is not of the correct type! Expected: " + arrayType.getSimpleName() + ", " +
"but got: " + dataArray.getOrNull().getClass().getSimpleName()
return (Data<T>) dataArray;
private Object _autoConvertAndOptimizeObjectArray( Object[] data, DataType<?> dataType, int size ) {
if ( Arrays.stream( data ).anyMatch( e -> e != null && DataType.of(e.getClass()) != dataType ) )
for ( int i = 0; i < ( data ).length; i++ )
( data )[i] = DataConverter.get().convert( ( (Object[]) data )[i], dataType.getItemTypeClass() );
return _compactAndSizeObjectArray( dataType, data, size );
* If possible, turns the provided {@code Object} array into a memory compact array of primitive types.
* @param dataType The {@link DataType} of the elements in the provided array.
* @param values The array of values which ought to be optimized into a flat array of primitives.
* @param size The size of the optimized array of primitives.
* @return An optimized flat array of primitives.
private static Object _compactAndSizeObjectArray( DataType<?> dataType, Object[] values, int size ) {
Object data = values;
IntStream indices = IntStream.iterate( 0, i -> i + 1 ).limit(size);
if ( size > 1_000 ) indices = indices.parallel();
indices = indices.map( i -> i % values.length );
if ( dataType == DataType.of(Double.class) ) data = indices.mapToDouble( i -> (Double) values[i] ).toArray();
else if ( dataType == DataType.of(Integer.class) ) data = indices.map( i -> (Integer) values[i] ).toArray();
else if ( dataType == DataType.of(Long.class) ) data = indices.mapToLong( i -> (Long) values[i] ).toArray();
else if ( dataType == DataType.of(Float.class) ) {
float[] floats = new float[size];
for( int i = 0; i < size; i++ ) floats[ i ] = (Float) values[ i % values.length ];
data = floats;
else if ( dataType == DataType.of(Byte.class) ) {
byte[] bytes = new byte[size];
for( int i = 0; i < size; i++ ) bytes[ i ] = (Byte) values[ i % values.length ];
data = bytes;
else if ( dataType == DataType.of(Short.class) ) {
short[] shorts = new short[size];
for( int i = 0; i < size; i++ ) shorts[ i ] = (Short) values[ i % values.length ];
data = shorts;
} else if ( dataType == DataType.of(Boolean.class) ) {
boolean[] booleans = new boolean[size];
for( int i = 0; i < size; i++ ) booleans[ i ] = (Boolean) values[ i % values.length ];
data = booleans;
else if ( dataType == DataType.of(Character.class) ) {
char[] chars = new char[size];
for( int i = 0; i < size; i++ ) chars[ i ] = (Character) values[ i % values.length ];
data = chars;
} else if ( values.length != size ) {
Object[] objects = new Object[size];
for( int i = 0; i < size; i++ ) objects[ i ] = values[ i % values.length ];
data = objects;
return data;
protected final Data<Object> _actualize( Tensor<?> tensor ) {
Data<Object> data = (Data<Object>) tensor.getMut().getData();
Object value = data.getOrNull();
DataType<?> dataType = tensor.getDataType();
int size = tensor.size();
Class<?> typeClass = dataType.getRepresentativeType();
Object newValue;
if ( typeClass == F64.class ) {
if ( ( (double[]) value ).length == size ) return data;
newValue = new double[ size ];
if ( ( (double[]) value )[ 0 ] != 0d ) Arrays.fill( (double[]) newValue, ( (double[]) value )[ 0 ] );
} else if ( typeClass == F32.class ) {
if ( ( (float[]) value ).length == size ) return data;
newValue = new float[size];
if ( ( (float[]) value )[ 0 ] != 0f ) Arrays.fill( (float[]) newValue, ( (float[]) value )[ 0 ] );
} else if ( typeClass == I32.class ) {
if ( ( (int[]) value ).length == size ) return data;
newValue = new int[ size ];
if ( ( (int[]) value )[ 0 ] != 0 ) Arrays.fill( (int[]) newValue, ( (int[]) value )[ 0 ] );
} else if ( typeClass == I16.class ) {
if ( ( (short[]) value ).length == size ) return data;
newValue = new short[ size ];
if ( ( (short[]) value )[ 0 ] != 0 ) Arrays.fill( (short[]) newValue, ( (short[]) value )[ 0 ] );
} else if ( typeClass == I8.class ) {
if ( ( (byte[]) value ).length == size ) return data;
newValue = new byte[ size ];
if ( ( (byte[]) value )[ 0 ] != 0 ) Arrays.fill( (byte[]) newValue, ( (byte[]) value )[ 0 ] );
} else if ( typeClass == I64.class ) {
if ( ( (long[]) value ).length == size ) return data;
newValue = new long[ size ];
if ( ( (long[]) value )[ 0 ] != 0 ) Arrays.fill( (long[]) newValue, ( (long[]) value )[ 0 ] );
} else if ( typeClass == Boolean.class ) {
if ( ( (boolean[]) value ).length == size ) return data;
newValue = new boolean[ size ];
Arrays.fill( (boolean[]) newValue, ( (boolean[]) value )[ 0 ] );
} else if ( typeClass == Character.class ) {
if ( ( (char[]) value ).length == size ) return data;
newValue = new char[ size ];
if ( ( (char[]) value )[ 0 ] != (char) 0 ) Arrays.fill( (char[]) newValue, ( (char[]) value )[ 0 ] );
} else {
if ( ( (Object[]) value ).length == size ) return data;
newValue = new Object[ size ];
if ( ( (Object[]) value )[ 0 ] != null ) Arrays.fill( (Object[]) newValue, ( (Object[]) value )[ 0 ] );
return _createDataFor( newValue, (DataType<Object>) dataType);
protected final Data<Object> _virtualize( Tensor<?> tensor ) {
Class<?> typeClass = tensor.getDataType().getRepresentativeType();
Data data = tensor.getMut().getData();
Object value = data == null ? null : data.getOrNull();
assert value != null;
Object newValue;
if ( typeClass == F64.class )
newValue = ( ( (double[]) value ).length <= 1 ) ? value : new double[]{ ( (double[]) value )[ 0 ] };
else if ( typeClass == F32.class )
newValue = ( ( (float[]) value ).length <= 1 ) ? value : new float[]{ ( (float[]) value )[ 0 ] };
else if ( typeClass == I64.class )
newValue = ( ( (long[]) value ).length <= 1 ) ? value : new long[]{ ( (long[]) value )[ 0 ] };
else if ( typeClass == I32.class )
newValue = ( ( (int[]) value ).length <= 1 ) ? value : new int[]{ ( (int[]) value )[ 0 ] };
else if ( typeClass == I16.class )
newValue = ( ( (short[]) value ).length <= 1 ) ? value : new short[]{ ( (short[]) value )[ 0 ] };
else if ( typeClass == I8.class )
newValue = ( ( (byte[]) value ).length <= 1 ) ? value : new byte[]{ ( (byte[]) value )[ 0 ] };
newValue = ( ( (Object[]) value ).length <= 1 ) ? value : new Object[]{ ( (Object[]) value )[ 0 ] };
return CPU.get().allocateFromAll( data.dataType(), NDConstructor.of(1).produceNDC(false), newValue);
protected final DataType<?> _dataTypeOf(Object rawData) {
LogUtil.nullArgCheck( rawData, "rawData", Object.class );
if ( rawData instanceof double[] ) return DataType.of( F64.class );
if ( rawData instanceof float[] ) return DataType.of( F32.class );
if ( rawData instanceof int[] ) return DataType.of( I32.class );
if ( rawData instanceof short[] ) return DataType.of( I16.class );
if ( rawData instanceof byte[] ) return DataType.of( I8.class );
if ( rawData instanceof long[] ) return DataType.of( I64.class );
if ( rawData instanceof boolean[] ) return DataType.of( Boolean.class );
if ( rawData instanceof char[] ) return DataType.of( Character.class );
if ( rawData instanceof Object[] ) return DataType.of( Object.class );
throw new IllegalArgumentException( "Unsupported data type: " + rawData.getClass() );
public <T> CPU free( Tensor<T> tensor ) {
LogUtil.nullArgCheck( tensor, "tensor", Tensor.class );
return this;
protected <T> void _swap(Tensor<T> former, Tensor<T> replacement ) {}
public <T> Data<T> allocate( DataType<T> dataType, NDConfiguration ndc ) {
int size;
if ( ndc instanceof VirtualNDConfiguration )
size = 1;
size = ndc.size();
Class<?> typeClass = dataType.getRepresentativeType();
if ( typeClass == F64.class )
return _createDataFor( new double[ size ], dataType );
else if ( typeClass == F32.class )
return _createDataFor( new float[ size ], dataType );
else if ( typeClass == I32.class || typeClass == UI32.class )
return _createDataFor( new int[ size ], dataType );
else if ( typeClass == I16.class || typeClass == UI16.class )
return _createDataFor( new short[ size ], dataType );
else if ( typeClass == I8.class || typeClass == UI8.class )
return _createDataFor( new byte[ size ], dataType );
else if ( typeClass == I64.class || typeClass == UI64.class )
return _createDataFor( new long[ size ], dataType );
else if ( dataType.getItemTypeClass() == Boolean.class )
return _createDataFor( new boolean[ size ], dataType );
else if ( dataType.getItemTypeClass() == Character.class )
return _createDataFor( new char[ size ], dataType );
return _createDataFor( new Object[ size ], dataType );
public Operation optimizedOperationOf( Function function, String name ) { throw new IllegalStateException(); }
* This method is part of the component system built into the {@link Tensor} class.
* Do not use this as part of anything but said component system.
* @param changeRequest An API which describes the type of update and a method for executing said update.
* @return The truth value determining if this {@link Device} ought to be added to a tensor (Here always false!).
public boolean update( OwnerChangeRequest<Tensor<Object>> changeRequest ) {
super.update( changeRequest );
return false; // This type of device can not be a component simply because it is the default device
* Returns the number of CPU cores available to the Java virtual machine.
* This value may change during a particular invocation of the virtual machine.
* Applications that are sensitive to the number of available processors should
* therefore occasionally poll this property and adjust their resource usage appropriately.
* @return The maximum number of CPU cores available to the JVM.
* This number is never smaller than one!
public int getCoreCount() { return Runtime.getRuntime().availableProcessors(); }
public String toString() { return this.getClass().getSimpleName()+"[cores="+getCoreCount()+"]"; }
* A simple functional interface for executing a range whose implementations will
* either be executed sequentially or they are being dispatched to
* a thread-pool, given that the provided workload is large enough.
public interface RangeWorkload { void execute( int start, int end ); }
public interface IndexedWorkload { void execute( int i ); }
* The {@link JVMExecutor} offers a similar functionality as the parallel stream API,
* however it differs in that the {@link JVMExecutor} is processing {@link RangeWorkload} lambdas
* instead of simply exposing a single index or concrete elements for a given workload size.
* This means that a {@link RangeWorkload} lambda will be called with the work range of a single worker thread
* processing its current workload.
* This range is dependent on the number of available threads as well as the size of the workload.
* If the workload is very small, then the current main thread will process the entire workload range
* whereas the underlying {@link ThreadPoolExecutor} will not be used to avoid unnecessary overhead.
public static class JVMExecutor
private static final AtomicInteger _COUNTER = new AtomicInteger();
private static final ThreadGroup _GROUP = new ThreadGroup(THREAD_PREFIX+"-group");
The following 2 constants determine if any given workload size will be parallelized or not...
We might want to adjust this some more for better performance...
private static final int _MIN_THREADED_WORKLOAD_SIZE = 32;
private static final int _MIN_WORKLOAD_PER_THREAD = 8;
private final ThreadPoolExecutor _pool =
new ThreadPoolExecutor(
new SynchronousQueue<Runnable>(), // This is basically always of size 1
private static ThreadFactory _newThreadFactory( final String name ) {
return _newThreadFactory( _GROUP, name );
private static ThreadFactory _newThreadFactory( final ThreadGroup group, final String name ) {
String prefix = name.endsWith("-") ? name : name + "-";
return target -> {
Thread thread = new Thread(
group, target,
prefix + _COUNTER.incrementAndGet() // The name, including the thread number.
return thread;
* Returns the approximate number of threads that are actively
* executing tasks.
* @return the number of threads
public int getActiveThreadCount() { return _pool.getActiveCount(); }
* Returns the core number of threads.
* @return the core number of threads
public int getCorePoolSize() { return _pool.getCorePoolSize(); }
* Returns the approximate total number of tasks that have
* completed execution. Because the states of tasks and threads
* may change dynamically during computation, the returned value
* is only an approximation, but one that does not ever decrease
* across successive calls.
* @return the number of tasks
public long getCompletedTaskCount() { return _pool.getCompletedTaskCount(); }
* This method slices the provided workload size into multiple ranges which can be executed in parallel.
* @param workloadSize The total workload size which ought to be split into multiple ranges.
* @param workload The range lambda which ought to be executed across multiple threads.
public void threaded( int workloadSize, RangeWorkload workload )
LogUtil.nullArgCheck( workload, "workload", RangeWorkload.class );
int cores = get().getCoreCount();
cores = ( cores == 0 ? 1 : cores );
if ( workloadSize >= _MIN_THREADED_WORKLOAD_SIZE && ( ( workloadSize / cores ) >= _MIN_WORKLOAD_PER_THREAD) ) {
threaded(0, workloadSize, workload );
else sequential( workloadSize, workload );
* Executes the provided workload lambda across multiple threads
* where the provided worker lambda will receive the index/id of the current worker.
* @param numberOfWorkloads The total number of workloads to be executed.
* @param workload The workload lambda to be executed.
public void threaded( int numberOfWorkloads, IndexedWorkload workload ) {
LogUtil.nullArgCheck( workload, "workload", IndexedWorkload.class );
_DIVIDER.parallelism( _PARALLELISM )
.threshold( 1 )
.submit( numberOfWorkloads, (i)-> workload.execute(i) );
* This method will simply execute the provided {@link RangeWorkload} lambda sequentially
* with 0 as the start index and {@code workloadSize} as the exclusive range. <br><br>
* @param workloadSize The workload size which will be passed to the provided {@link RangeWorkload} as second argument.
* @param workload The {@link RangeWorkload} which will be executed sequentially.
public void sequential( int workloadSize, RangeWorkload workload ) {
LogUtil.nullArgCheck( workload, "workload", RangeWorkload.class );
workload.execute( 0, workloadSize );
* Takes the provided range and divides it into multithreaded workloads.
* @param first The start index of the threaded workload range.
* @param limit The limit for the workload range, which is exclusive.
* @param rangeWorkload A workload lambda which will be called by different threads with different sub-ranges.
public void threaded(
final int first,
final int limit,
final RangeWorkload rangeWorkload
) {
LogUtil.nullArgCheck( rangeWorkload, "rangeWorkload", RangeWorkload.class );
_DIVIDER.parallelism( _PARALLELISM )
.divide( first, limit, rangeWorkload);