ParseUtil.java

package neureka.math.parsing;

import neureka.Neureka;
import neureka.backend.api.Operation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * Utility for parsing function expressions.
 **/
public final class ParseUtil
{
    private ParseUtil() {}

     
    public static int numberOfOperationsWithin( final List<String> operations ) {
        int counter = 0;
        for( Operation ot : Neureka.get().backend().getOperations() ) {
            if (operations.contains(ot.getOperator())) ++counter;
        }
        return counter;
    }

    
    public static String parsedOperation( final String exp, final int index ) {
        if (exp.length() <= index) return null;
        String operation;
        for ( int i = exp.length()-1; i >= index; i--) {
            operation = exp.substring(index, i);
            if ( ParseUtil.isAnOperation(operation) || ParseUtil.isAnOperation(operation.toLowerCase()) ) {
                return operation;
            }
        }
        return null;
    }

    
    public static String findComponentIn( String exp, final int index ) {
        exp = exp.trim();
        if (exp.length() <= index) return null;
        int bracketDepth = 0;
        StringBuilder component = new StringBuilder();
        for ( int i = index; i < exp.length(); ++i)
        {
            if ( exp.charAt( i ) == ')' ) --bracketDepth;
            else if ( exp.charAt( i ) == '(' ) ++bracketDepth;
            if ( bracketDepth != 0 ) {
                component.append( exp.charAt( i ) );
                continue;
            }
            for ( int ii = exp.length() - 1; ii >= i + 1; ii--) {
                String found = ParseUtil.parsedOperation( exp.substring( i, ii ), i );
                if (
                     found != null && // If the found string is a function then we continue!
                     !Neureka.get().backend().getOperation(found).getOperator().equals(found)
                ) {
                    ii = -1; // end inner loop
                    component.append( found, 0, found.length() - 1 );
                    i += found.length()-1;
                }
                else if ( _isOperationComponent( exp, i, ii ) )
                    return component.append( exp.charAt( i ) ).toString();
            }
            component.append( exp.charAt( i ) );
        }
        return component.toString();
    }

    
    private static boolean _isOperationComponent( String exp, int i, int ii ) {
        String possibleOperation = exp.substring( i + 1, ii );
        return ParseUtil.isAnOperation( possibleOperation )
                &&
               ( exp.charAt(i) == 'j' || !(Character.isLetter(exp.charAt(i)) || exp.charAt(i) == '_') );
    }

    
    public static List<String> findParametersIn( String exp, final int index ) {
        exp = exp.trim();
        if ( exp.length() <= index ) return null;
        int bracketDepth = 0;
        List<String> parameters = new ArrayList<>();
        StringBuilder component = new StringBuilder();
        for ( int i = index; i < exp.length(); ++i )
        {
            if ( exp.charAt( i ) == '(' || exp.charAt( i ) == '[' ) {
                if ( bracketDepth != 0 ) component.append(exp.charAt( i ));
                ++bracketDepth;
            } else if ( exp.charAt( i ) == ')' || exp.charAt( i ) == ']' ) {
                --bracketDepth;
                if ( bracketDepth != 0 ) component.append(exp.charAt( i ));
            } else if ( exp.charAt( i ) != ',' || bracketDepth > 1 ) { // Use depth!
                component.append( exp.charAt( i ) );
            }
            if ( bracketDepth == 0 ) {
                parameters.add( component.toString() );
            } else if ( bracketDepth == 1 && exp.charAt( i ) == ',' ) {
                parameters.add( component.toString() );
                component = new StringBuilder();
            }
        }
        return parameters;
    }

    
    public static boolean isAnOperation( final String operationName ) {
        if ( operationName.length() > 32 ) return false;
        Operation operation = Neureka.get().backend().getOperation( operationName );
        return operation != null;
    }

    
    public static String groupBy(
            final String operation,
            final String currentChain,
            final String currentComponent,
            final String currentOperation
    ) {
        String group = null;
        if (currentOperation != null) {
            if (currentOperation.equals(operation)) {
                group = currentComponent + currentOperation;
                if (currentChain != null) group = currentChain + group;
            }
        } else if (currentChain != null) group = currentChain + currentComponent;
        return group;
    }

    
    private static boolean isForbiddenChar( char c ) {
        return c == '"' || c == '$' || c == '%' || c == '&'  || c == '=' || c == '#' || c == '|' || c == '~' || c == ':'
            || c == ';' || c == '@' || c == '?' || c == '\\' || c == '>' || c == '<' || c == ' ';
    }

    
    public static String cleanedHeadAndTail( String exp ) {
        exp = exp.trim();
        int ci = 0;
        StringBuilder updated = new StringBuilder();
        boolean condition = true;
        while ( condition ) {
            if (ParseUtil.isForbiddenChar(exp.charAt(ci)) || (exp.charAt(ci) >= 'A' && exp.charAt(ci) <= 'Z') || (exp.charAt(ci) >= 'a' && exp.charAt(ci) <= 'z')) {
                ci++;
            } else condition = false;
            if (ci == exp.length()) condition = false;
        }
        for ( int gi = ci; gi < exp.length(); gi++) updated.append(exp.charAt(gi));
        exp = updated.toString();
        updated = new StringBuilder();
        if (exp.length() > 0) {
            ci = 0;
            condition = true;
            int l = exp.length() - 1;
            while ( condition ) {
                if (
                        isForbiddenChar( exp.charAt( ci ) ) ||
                        ( exp.charAt( l - ci ) >= 'A' && exp.charAt( l - ci ) <= 'Z' ) ||
                        ( exp.charAt( l - ci ) >= 'a' && exp.charAt( l - ci ) <= 'z' )
                ) {
                    ci++;
                } else condition = false;
                if ( l - ci < 0 ) condition = false;
            }
            for ( int gi = 0; gi <= l - ci; gi++) updated.append( exp.charAt(gi) );
            exp = updated.toString();
        }
        if ( exp.length() > 0 ) {
            if ( exp.charAt( 0 ) == '(' && exp.charAt( exp.length() - 1 ) != ')' ) {
                exp = exp.substring(1, exp.length()-1);
            }
            if ( exp.charAt(exp.length() - 1) == ')' && exp.charAt( 0 ) != '(' ) {
                exp = exp.substring(1, exp.length()-1);
            }
        }
        exp = exp.trim();
        return exp;
    }

    
    public static String unpackAndCorrect( String exp ) {
        if ( exp == null ) return null;
        if ( exp.length() == 0 ) return "";
        if ( exp.equals("()") ) return "";
        exp = exp.trim();
        exp = exp.replace("sigmoid", "sig");
        exp = exp.replace("quadratic", "quad");
        exp = exp.replace("quadr", "quad");
        exp = exp.replace("lig", "softplus");
        exp = exp.replace("ligmoid", "softplus");
        exp = exp.replace("splus", "softplus");
        exp = exp.replace("spls", "softplus");
        exp = exp.replace("ligm", "softplusd");
        exp = exp.replace("identity", "idy");
        exp = exp.replace("ident", "idy");
        exp = exp.replace("self", "idy");
        exp = exp.replace("copy", "idy");
        exp = exp.replace("gaussian", "gaus");
        exp = exp.replace("gauss", "gaus");
        exp = exp.replace("absolute", "abs");
        exp = exp.replace("summation", "sum");
        exp = exp.replace("product", "prod");

        int bracketDepth = 0;
        for ( int ei = 0; ei < exp.length(); ++ei) {
            if (exp.charAt(ei) == '(') ++bracketDepth;
            else if (exp.charAt(ei) == ')') --bracketDepth;
        }
        if (bracketDepth != 0) {
            if (bracketDepth < 0) {
                StringBuilder expBuilder = new StringBuilder(exp);
                for ( int bi = 0; bi < -bracketDepth; ++bi) {
                    expBuilder.insert(0, "(");
                }
                exp = expBuilder.toString();
            }
            else
                exp = new StringBuilder(exp).append(
                        String.join("", Collections.nCopies( bracketDepth, ")" )) // repeat!
                ).toString();
        }
        boolean parsing = true;
        boolean needsStitching = false;
        while (parsing && (exp.charAt( 0 ) == '(') && (exp.charAt(exp.length() - 1) == ')')) {
            bracketDepth = 0;
            needsStitching = true;
            for ( int i = 0; i < exp.length(); ++i) {
                if (exp.charAt( i ) == ')') --bracketDepth;
                else if (exp.charAt( i ) == '(') ++bracketDepth;
                if (bracketDepth == 0 && i != exp.length() - 1) needsStitching = false;
            }
            if (needsStitching) exp = exp.substring(1, exp.length()-1);
            else parsing = false;
        }
        return exp.trim();
    }

    /**
     *  This method tries to find the next best operation {@link String} the user might have meant.
     *
     * @param expression The expression which should be interpreted as something similar.
     * @return Something similar or null if the expression is not similar enough.
     */
    
    public static String assumptionBasedOn( String expression ) {
        double largest = -1;
        int best = 0;
        for (int i = 0; i< Neureka.get().backend().size(); i++ ) {
            double s = similarity( expression, Neureka.get().backend().getOperation( i ).getOperator() );
            if ( largest == -1 ) largest = s;
            else if ( s > largest ) {
                best = i;
                largest = s;
            }
        }
        return ( largest > 0.1 ) ? Neureka.get().backend().getOperation(best).getOperator() : "";
    }

    /**
     *  This method estimates the similarity between 2 provided {@link String} instances.
     *
     * @param s1 The first string which should be compared to the second string.
     * @param s2 The second string which should be compared to the first string.
     * @return A similarity score between 0 and 1 where 1 would be 100% similar (equal).
     */
    
    public static double similarity( final String s1, final String s2 ) {
            String longer = (s1.length() > s2.length()) ?s1 : s2;
            String shorter = (s1.length() > s2.length()) ? s2 : s1;
            // longer should always have greater length
            if ( longer.length() == 0 ) return 1.0; /* both strings are zero length */

            int delta = (longer.length()-shorter.length());
            double[] alignment = new double[ delta + 1 ];
            double[] weights = new double[ delta + 1 ];
            double currentWeight = longer.length();
            double weightSum = 0;
            double modifier = delta / (double) longer.length();
            for ( int i = 0; i < ( delta + 1 ); i++ ) {
                weights[ i ] = currentWeight;
                weightSum += currentWeight;
                currentWeight *= modifier;
                for ( int si = 0; si < shorter.length(); si++ ) {
                    char lChar = longer.charAt( i + si );
                    char sChar = shorter.charAt( si );
                    if ( lChar == sChar ) alignment[ i ] ++;
                    else if ( // Custom modifiers:
                        Character.toLowerCase( lChar ) == Character.toLowerCase( sChar )
                    ) alignment[ i ] += 0.5;
                    else if (
                        Character.isAlphabetic( lChar ) != Character.isAlphabetic( sChar )
                    ) alignment[ i ] -= 0.13571113;
                }
                alignment[ i ] /= longer.length();
                alignment[ i ] = Math.min( Math.max( alignment[ i ], 0.0 ), 1.0 );
            }
            Arrays.sort( alignment );
            Arrays.sort( weights );
            double similarity = 0;
            for ( int i = 0; i < ( delta + 1 ); i++ ) similarity += alignment[ i ] * ( weights[ i ] / weightSum );
            assert similarity <= 1.0;
            return similarity;
    }


}